├── AnalysisNotes.R ├── Assumptions.R ├── anova ├── .Rapp.history ├── ANOVA_demo.R ├── Contrast.dat ├── Dummy.dat ├── Videos_clean.Rdata ├── anova.R ├── anove_viagra.R └── untitled.R ├── common_coding_issues.R ├── correlation ├── Exam Anxiety.dat ├── The Biggest Liar.dat ├── correlation.R └── correlation_chi_square_demo.R ├── linear regression ├── .Rapp.history ├── Album Sales 1.dat ├── Album Sales 2.dat ├── Chapter 7 DSUR Regression.R ├── Countries3.Rdata ├── Linear Regression.R ├── simple_regression.R └── simple_regression_transformation.R ├── logistic regression ├── .Rapp.history ├── Chat-Up Lines.dat ├── Lacourse et al. (2001) Females.dat ├── eel.dat ├── logistic regression.R └── penalty.dat ├── multiple regression ├── .Rapp.history ├── Album Sales 2.dat ├── Chamorro-Premuzic.dat ├── GlastonburyFestivalRegression.dat ├── multiple regression.R └── neste-model.r └── plots ├── .Rapp.history ├── ChickFlick.dat ├── DownloadFestival.dat ├── Exam Anxiety.dat ├── FacebookNarcissism.dat ├── Fertility.csv ├── InternetUsers.RData └── ggplot_scripts.R /AnalysisNotes.R: -------------------------------------------------------------------------------- 1 | # This is a list of commands and processes to use with R when exploring data set 2 | 3 | # Set the working directory, usually where the data files are located 4 | setwd("~/OneDrive/MIDS/W203/Data/08") 5 | 6 | # Load a data file. There are several types, from delimited data, CSV files and RData file 7 | dataframe <- read.delim("handlebars.dat", header=TRUE) 8 | 9 | # CSV file 10 | dataframe <- read.csv("Removal_Requests.csv", header=TRUE) 11 | 12 | # RData file restores a data session 13 | load("Countries2.RData") 14 | 15 | ########################################################################################## 16 | 17 | # Summarize a dataframe 18 | summary(dataframe) 19 | 20 | # list the items in the environment 21 | ls() 22 | # list the items and describe each - good to see a dataframe and the type of each vector or column 23 | ls.str() 24 | 25 | # dump the top 5 rows of a data frame 26 | head(dataframe) 27 | head(Countries) 28 | 29 | ########################################################################################## 30 | 31 | # Examine Data visually 32 | 33 | # Load the ggplot2 library 34 | library(ggplot2) 35 | 36 | 37 | # Load the Facebook data into a dataframe 38 | facebookData = read.delim("~/OneDrive/MIDS/W203/Data/04/FacebookNarcissism.dat", header = TRUE) 39 | 40 | # scatterplot with different elements 41 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating)) 42 | graph + geom_point() 43 | graph + geom_point(shape = 7, size = 6) 44 | graph + geom_point(aes(color = Rating_Type)) 45 | graph + geom_point(color = "RED") 46 | graph + geom_point(aes(color = Rating_Type), position = "jitter") 47 | graph + geom_point(aes(shape = Rating_Type), position = "jitter") 48 | 49 | # add a smoothing line 50 | # color can also be a factor in the data frame. 51 | # the method parameter can be left off for the default, which is not linear 52 | graph + geom_point() + geom_smooth(method = "lm", alpha=0.1, colour="red", fill="blue") + labs(x = "Narcisisim", y = "Rating") 53 | 54 | # The car library has a nice default scatter plot 55 | library(car) 56 | scatterplot(Countries$gdp, Countries$fertility_rate) 57 | head(Countries) 58 | 59 | # histograms 60 | -------------------------------------------------------------------------------- /Assumptions.R: -------------------------------------------------------------------------------- 1 | #--------------------------------------------------------------------------------------------------------- 2 | #R Code for Chapter 5 of: 3 | # 4 | #Field, A. P., Miles, J. N. V., & Field, Z. C. (2012). Discovering Statistics Using R: and Sex and Drugs and Rock 'N' Roll. #London Sage 5 | # 6 | #(c) 2011 Andy P. Field, Jeremy N. V. Miles & Zoe C. Field 7 | #----------------------------------------------------------------------------------------------------------- 8 | 9 | #----Set the working directory------ 10 | setwd("~/Dropbox/Zoe/R Book Chapter 7 Stuff") 11 | setwd("~/Documents/Academic/Data/DSU_R/Chapter 05 (Exploring Data)") 12 | imageDirectory<-"~/Documents/Academic/Books/Discovering Statistics/DSU R/DSU R I/DSUR I Images" 13 | 14 | setwd("~/Public/Academic/Data/DSU_R/Chapter 05 (Exploring Data)") 15 | imageDirectory<-"~/Public/Academic/Books/Discovering Statistics/DSU R/DSU R I/DSUR I Images" 16 | 17 | 18 | 19 | names(dlf) 20 | 21 | #----Install and Load Packages----- 22 | install.packages("car") 23 | install.packages("ggplot2") 24 | install.packages("pastecs") 25 | install.packages("psych") 26 | 27 | library(car) 28 | library(ggplot2) 29 | library(pastecs) 30 | library(psych) 31 | library(Rcmdr) 32 | 33 | #Read in the download data: 34 | 35 | dlf <- read.delim("DownloadFestival.dat", header=TRUE) 36 | 37 | #Remove the outlier from the day1 hygiene score 38 | dlf$day1 <- ifelse(dlf$day1 > 20, NA, dlf$day1) 39 | 40 | 41 | 42 | #Histograms for hygiene scores on day 1, day 2 and day 3. 43 | 44 | #Histogram for day 1: 45 | 46 | hist.day1 <- ggplot(dlf, aes(day1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 1", y = "Density") 47 | hist.day1 48 | 49 | #Histogram for day 2: 50 | hist.day2 <- ggplot(dlf, aes(day2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 2", y = "Density") 51 | hist.day2 52 | 53 | #Histogram for day 3: 54 | 55 | hist.day3 <- ggplot(dlf, aes(day3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 3", y = "Density") 56 | hist.day3 57 | 58 | #Add the curves to the Histograms: 59 | 60 | hist.day1 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day1, na.rm = TRUE), sd = sd(dlf$day1, na.rm = TRUE)), colour = "black", size = 1) 61 | 62 | ggsave(file = paste(imageDirectory,"05 DLF Day 1 Hist.png",sep="/")) 63 | 64 | hist.day2 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day2, na.rm = TRUE), sd = sd(dlf$day2, na.rm = TRUE)), colour = "black", size = 1) 65 | 66 | ggsave(file = paste(imageDirectory,"05 DLF Day 2 Hist.png",sep="/")) 67 | 68 | 69 | hist.day3 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day3, na.rm = TRUE), sd = sd(dlf$day3, na.rm = TRUE)), colour = "black", size = 1) 70 | 71 | ggsave(file = paste(imageDirectory,"05 DLF Day 3 Hist.png",sep="/")) 72 | 73 | 74 | #Q-Q plot for day 1: 75 | qqplot.day1 <- qplot(sample = dlf$day1, stat="qq") 76 | qqplot.day1 77 | 78 | ggsave(file = paste(imageDirectory,"05 DLF Day 1 QQ.png",sep="/")) 79 | 80 | #Q-Q plot for day 2: 81 | 82 | qqplot.day2 <- qplot(sample = dlf$day2, stat="qq") 83 | qqplot.day2 84 | ggsave(file = paste(imageDirectory,"05 DLF Day 2 QQ.png",sep="/")) 85 | 86 | #Q-Q plot of the hygiene scores on day 3: 87 | qqplot.day3 <- qplot(sample = dlf$day3, stat="qq") 88 | qqplot.day3 89 | ggsave(file = paste(imageDirectory,"05 DLF Day 3 QQ.png",sep="/")) 90 | 91 | #Quantifying normality with numbers 92 | library(psych) #load the psych library, if you haven't already, for the describe() function. 93 | 94 | #Using the describe() function for a single variable. 95 | describe(dlf$day1) 96 | 97 | #Two alternative ways to describe multiple variables. 98 | describe(cbind(dlf$day1, dlf$day2, dlf$day3)) 99 | describe(dlf[,c("day1", "day2", "day3")]) 100 | 101 | library(pastecs) 102 | stat.desc(dlf$day1, basic = FALSE, norm = TRUE) 103 | 104 | stat.desc(cbind(dlf$day1, dlf$day2, dlf$day3), basic = FALSE, norm = TRUE) 105 | 106 | round(stat.desc(dlf[, c("day1", "day2", "day3")], basic = FALSE, norm = TRUE), digits = 3) 107 | 108 | 109 | 110 | #Read in R exam data. 111 | rexam <- read.delim("rexam.dat", header=TRUE) 112 | 113 | #Set the variable uni to be a factor: 114 | rexam$uni<-factor(rexam$uni, levels = c(0:1), labels = c("Duncetown University", "Sussex University")) 115 | 116 | #Self test task: 117 | 118 | round(stat.desc(rexam[, c("exam", "computer", "lectures", "numeracy")], basic = FALSE, norm = TRUE), digits = 3) 119 | 120 | hexam <- ggplot(rexam, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$exam, na.rm = TRUE), sd = sd(rexam$exam, na.rm = TRUE)), colour = "red", size = 1) 121 | hexam 122 | ggsave(file = paste(imageDirectory,"05 Rexam exam Hist.png",sep="/")) 123 | 124 | 125 | hcomputer <- ggplot(rexam, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$computer, na.rm = TRUE), sd = sd(rexam$computer, na.rm = TRUE)), colour = "red", size = 1) 126 | hcomputer 127 | ggsave(file = paste(imageDirectory,"05 Rexam computer Hist.png",sep="/")) 128 | 129 | hlectures <- ggplot(rexam, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$lectures, na.rm = TRUE), sd = sd(rexam$lectures, na.rm = TRUE)), colour = "red", size = 1) 130 | hlectures 131 | ggsave(file = paste(imageDirectory,"05 Rexam lectures Hist.png",sep="/")) 132 | 133 | hnumeracy <- ggplot(rexam, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Numeracy", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$numeracy, na.rm = TRUE), sd = sd(rexam$numeracy, na.rm = TRUE)), colour = "red", size = 1) 134 | hnumeracy 135 | ggsave(file = paste(imageDirectory,"05 Rexam numeracy Hist.png",sep="/")) 136 | 137 | 138 | 139 | #Use by() to get descriptives for one variable, split by uni 140 | by(data=rexam$exam, INDICES=rexam$uni, FUN=describe) 141 | by(rexam$exam, rexam$uni, stat.desc, basic = FALSE, norm = TRUE) 142 | 143 | #Use by() to get descriptives for two variables, split by uni 144 | by(cbind(data=rexam$exam, data=rexam$numeracy), rexam$uni, describe) 145 | by(rexam[, c("exam", "numeracy")], rexam$uni, stat.desc,basic = FALSE, norm = TRUE) 146 | 147 | 148 | #Use describe for four variables in the rexam dataframe. 149 | describe(cbind(rexam$exam, rexam$computer, rexam$lectures, rexam$numeracy)) 150 | 151 | #Use by() to get descriptives for four variables, split by uni 152 | by(data=cbind(rexam$exam, rexam$computer, rexam$lectures, rexam$numeracy), rexam$uni, describe) 153 | 154 | 155 | #Self test: 156 | #Use by() to get descriptives for computer literacy and percentage of lectures attended, split by uni 157 | by(cbind(data=rexam$computer, data=rexam$lectures), rexam$uni, describe) 158 | by(rexam[, c("computer", "lectures")], rexam$uni, stat.desc, basic = FALSE, norm = TRUE) 159 | 160 | 161 | 162 | #using subset to plot histograms for different groups: 163 | 164 | dunceData<-subset(rexam, rexam$uni=="Duncetown University") 165 | sussexData<-subset(rexam, rexam$uni=="Sussex University") 166 | 167 | hist.numeracy.duncetown <- ggplot(dunceData, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Numeracy Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$numeracy, na.rm = TRUE), sd = sd(dunceData$numeracy, na.rm = TRUE)), colour = "red", size=1) 168 | hist.numeracy.duncetown 169 | ggsave(file = paste(imageDirectory,"05 dunce numeracy Hist.png",sep="/")) 170 | 171 | hist.exam.duncetown <- ggplot(dunceData, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$exam, na.rm = TRUE), sd = sd(dunceData$exam, na.rm = TRUE)), colour = "red", size=1) 172 | hist.exam.duncetown 173 | ggsave(file = paste(imageDirectory,"05 dunce exam Hist.png",sep="/")) 174 | 175 | 176 | hist.numeracy.sussex <- ggplot(sussexData, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Numeracy Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData $numeracy, na.rm = TRUE), sd = sd(sussexData $numeracy, na.rm = TRUE)), colour = "red", size=1) 177 | hist.numeracy.sussex 178 | ggsave(file = paste(imageDirectory,"05 sussex numeracy Hist.png",sep="/")) 179 | 180 | hist.exam.sussex <- ggplot(sussexData, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$exam, na.rm = TRUE), sd = sd(sussexData$exam, na.rm = TRUE)), colour = "red", size=1) 181 | hist.exam.sussex 182 | ggsave(file = paste(imageDirectory,"05 sussex exam Hist.png",sep="/")) 183 | 184 | #self test: 185 | 186 | dunceData<-subset(rexam, rexam$uni=="Duncetown University") 187 | sussexData<-subset(rexam, rexam$uni=="Sussex University") 188 | 189 | 190 | hist.computer.duncetown <- ggplot(dunceData, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$computer, na.rm = TRUE), sd = sd(dunceData$computer, na.rm = TRUE)), colour = "red", size=1) 191 | hist.computer.duncetown 192 | ggsave(file = paste(imageDirectory,"05 dunce computer Hist.png",sep="/")) 193 | 194 | #To plot a histogram for percentage of lectures attended for Duncetown University we would execute: 195 | 196 | hist.lectures.duncetown <- ggplot(dunceData, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$lectures, na.rm = TRUE), sd = sd(dunceData$lectures, na.rm = TRUE)), colour = "red", size=1) 197 | hist.lectures.duncetown 198 | ggsave(file = paste(imageDirectory,"05 dunce lectures Hist.png",sep="/")) 199 | 200 | #To plot a histogram for computer literacy for Sussex University we would execute: 201 | 202 | hist.computer.sussex <- ggplot(sussexData, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$computer, na.rm = TRUE), sd = sd(sussexData$computer, na.rm = TRUE)), colour = "red", size=1) 203 | hist.computer.sussex 204 | ggsave(file = paste(imageDirectory,"05 sussex computer Hist.png",sep="/")) 205 | 206 | #To plot a histogram for percentage of lectures attended for Sussex University we would execute: 207 | 208 | hist.lectures.sussex <- ggplot(sussexData, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$lectures, na.rm = TRUE), sd = sd(sussexData$lectures, na.rm = TRUE)), colour = "red", size=1) 209 | hist.lectures.sussex 210 | ggsave(file = paste(imageDirectory,"05 sussex lectures Hist.png",sep="/")) 211 | 212 | 213 | 214 | 215 | #Shapiro-Wilks test for exam and numeracy for whole sample 216 | shapiro.test(rexam$exam) 217 | shapiro.test(rexam$numeracy) 218 | 219 | 220 | 221 | #Shapiro-Wilks test for exam and numeracy split by university 222 | by(rexam$exam, rexam$uni, shapiro.test) 223 | by(rexam$numeracy, rexam$uni, shapiro.test) 224 | 225 | 226 | #qqplots for the two variables 227 | qplot(sample = rexam$exam, stat="qq") 228 | ggsave(file = paste(imageDirectory,"05 exam QQ.png",sep="/")) 229 | qplot(sample = rexam$numeracy, stat="qq") 230 | ggsave(file = paste(imageDirectory,"05 numeracy QQ.png",sep="/")) 231 | 232 | 233 | #Levene's test for comparison of variances of exam scores in the two universities. 234 | leveneTest(rexam$exam, rexam$uni) 235 | leveneTest(rexam$exam, rexam$uni, center = mean) 236 | leveneTest(rexam$numeracy, rexam$uni) 237 | 238 | 239 | #########Log, square root, and reciprocal transformation: 240 | dlf$day1LessThanOne <- dlf$day1 < 1 241 | dlf$day1LessThanorEqualOne <- dlf$day1 <= 1 242 | dlf$day1GreaterThanOne <- dlf$day1 > 1 243 | dlf$day1GreaterThanorEqualOne <- dlf$day1 >= 1 244 | 245 | #self-help tasks 246 | dlf$logday1 <- log(dlf$day1 + 1) 247 | dlf$logday2 <- log(dlf$day2 + 1) 248 | dlf$logday3 <- log(dlf$day3 + 1) 249 | 250 | #Histograms of the log transformed scores: 251 | 252 | #Histogram for logday1: 253 | 254 | hist.logday1 <- ggplot(dlf, aes(logday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday1, na.rm = TRUE), sd = sd(dlf$logday1, na.rm = TRUE)), colour = "red", size = 1) 255 | hist.logday1 256 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 1 Hist.png",sep="/")) 257 | 258 | 259 | 260 | #Histogram for logday2: 261 | 262 | hist.logday2 <- ggplot(dlf, aes(logday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday2, na.rm = TRUE), sd = sd(dlf$logday2, na.rm = TRUE)), colour = "red", size = 1) 263 | hist.logday2 264 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 2 Hist.png",sep="/")) 265 | 266 | 267 | #Histogram for logday3: 268 | 269 | hist.logday3 <- ggplot(dlf, aes(logday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 3", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday3, na.rm = TRUE), sd = sd(dlf$logday3, na.rm = TRUE)), colour = "red", size = 1) 270 | hist.logday3 271 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 3 Hist.png",sep="/")) 272 | 273 | #Create square root scores 274 | 275 | dlf$sqrtday1 <- sqrt(dlf$day1) 276 | dlf$sqrtday2 <- sqrt(dlf$day2) 277 | dlf$sqrtday3 <- sqrt(dlf$day3) 278 | 279 | #Histograms of the square root transformed scores: 280 | 281 | #Histogram for sqrtday1: 282 | 283 | hist.sqrtday1 <- ggplot(dlf, aes(sqrtday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday1, na.rm = TRUE), sd = sd(dlf$sqrtday1, na.rm = TRUE)), colour = "red", size = 1) 284 | hist.sqrtday1 285 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 1 Hist.png",sep="/")) 286 | 287 | 288 | 289 | #Histogram for sqrtday2: 290 | 291 | hist.sqrtday2 <- ggplot(dlf, aes(sqrtday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday2, na.rm = TRUE), sd = sd(dlf$sqrtday2, na.rm = TRUE)), colour = "red", size = 1) 292 | hist.sqrtday2 293 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 2 Hist.png",sep="/")) 294 | 295 | 296 | 297 | #Histogram for sqrtday3: 298 | 299 | hist.sqrtday3 <- ggplot(dlf, aes(sqrtday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday3, na.rm = TRUE), sd = sd(dlf$sqrtday3, na.rm = TRUE)), colour = "red", size = 1) 300 | hist.sqrtday3 301 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 3 Hist.png",sep="/")) 302 | 303 | 304 | #Create reciprocal scores 305 | 306 | dlf$recday1 <- 1/(dlf$day1 + 1) 307 | dlf$recday2 <- 1/(dlf$day2 + 1) 308 | dlf$recday3 <- 1/(dlf$day3 + 1) 309 | 310 | #Histograms of the reciprocal transformed scores: 311 | 312 | #Histogram for recday1: 313 | 314 | hist.recday1 <- ggplot(dlf, aes(recday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday1, na.rm = TRUE), sd = sd(dlf$recday1, na.rm = TRUE)), colour = "red", size = 1) 315 | hist.recday1 316 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 1 Hist.png",sep="/")) 317 | 318 | 319 | 320 | #Histogram for recday2: 321 | 322 | hist.recday2 <- ggplot(dlf, aes(recday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday2, na.rm = TRUE), sd = sd(dlf$recday2, na.rm = TRUE)), colour = "red", size = 1) 323 | hist.recday2 324 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 2 Hist.png",sep="/")) 325 | 326 | 327 | #Histogram for recday3: 328 | 329 | 330 | hist.recday3 <- ggplot(dlf, aes(recday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 3", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday3, na.rm = TRUE), sd = sd(dlf$recday3, na.rm = TRUE)), colour = "red", size = 1) 331 | hist.recday3 332 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 3 Hist.png",sep="/")) 333 | 334 | 335 | #Ifelse 336 | 337 | dlf$day1NoOutlier <- ifelse(dlf$day1 > 5, NA, dlf$day1) 338 | 339 | #----------Smart Alex task 1--------- 340 | 341 | chickFlick <- read.delim(file="ChickFlick.dat", header=TRUE) 342 | 343 | #Use by() to get descriptives for arousal, split by film 344 | by(data=chickFlick$arousal, INDICES=chickFlick$film, FUN=describe) 345 | 346 | 347 | #Shapiro-Wilks test for arousal split by film 348 | by(data=chickFlick$arousal, INDICES=chickFlick$film, FUN=shapiro.test) 349 | 350 | 351 | #Levene's test for comparison of variances of arousal scores for the two films. 352 | leveneTest(chickFlick$arousal, chickFlick$film, center=median) 353 | 354 | #To plot a histogram for arousal for Bridget Jones' Diary we would execute: 355 | hist.arousal.Bridget <- ggplot(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary"),aes(arousal)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), fill="white", colour="black", binwidth=1) + labs(x="Arousal", y = "Density") + stat_function(fun=dnorm, args=list(mean=mean(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary")$arousal, na.rm=TRUE), sd=sd(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary")$arousal, na.rm=TRUE) ), colour="black", size=1) 356 | hist.arousal.Bridget 357 | 358 | 359 | #To plot a histogram for arousal for Memento we would execute: 360 | 361 | hist.arousal.Memento <- ggplot(subset(chickFlick, chickFlick$film=="Memento"),aes(arousal)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), fill="white", colour="black", binwidth=1) + labs(x="Arousal", y = "Density") + stat_function(fun=dnorm, args=list(mean=mean(subset(chickFlick, chickFlick$film=="Memento")$arousal, na.rm=TRUE), sd=sd(subset(chickFlick, chickFlick$film=="Memento")$arousal, na.rm=TRUE) ), colour="black", size=1) 362 | hist.arousal.Memento 363 | 364 | 365 | #----------Smart Alex task 2--------- 366 | 367 | 368 | #Read in R exam data. 369 | rexam <- read.table(file="rexam.dat", header=TRUE) 370 | 371 | #Set the variable uni to be a factor: 372 | rexam$uni<-factor(rexam$uni, levels = c(0:1), labels = c("Duncetown University", "Sussex University")) 373 | 374 | rexam$lognumeracy <- log(rexam$numeracy) 375 | rexam$sqrtnumeracy <- sqrt(rexam$numeracy) 376 | rexam$recnumeracy <- 1/(rexam$numeracy) 377 | 378 | #Histogram for numeracy: 379 | hist.numeracy <- ggplot(rexam, aes(numeracy)) + 380 | opts(legend.position = "none")+ 381 | geom_histogram(aes(y=..density..), 382 | colour="black", fill="white") + 383 | labs(x="Numeracy scores", y = "Density") + 384 | stat_function(fun=dnorm, 385 | args=list (mean=mean (rexam$numeracy, na.rm=TRUE), 386 | sd=sd(rexam$numeracy, na.rm=TRUE) ), 387 | colour="black", size=1) 388 | hist.numeracy 389 | 390 | #Histogram for log numeracy: 391 | 392 | hist.lognumeracy <- ggplot(rexam, aes(lognumeracy)) + 393 | opts(legend.position = "none")+ 394 | geom_histogram(aes(y=..density..), 395 | colour="black", fill="white") + 396 | labs(x="Log Transformed Numeracy scores", y = "Density") + 397 | stat_function(fun=dnorm, 398 | args=list (mean=mean (rexam$lognumeracy, na.rm=TRUE), 399 | sd=sd(rexam$lognumeracy, na.rm=TRUE) ), 400 | colour="black", size=1) 401 | hist.lognumeracy 402 | 403 | #Histogram for square root numeracy: 404 | 405 | hist.sqrtnumeracy <- ggplot(rexam, aes(sqrtnumeracy)) + 406 | opts(legend.position = "none")+ 407 | geom_histogram(aes(y=..density..), 408 | colour="black", fill="white") + 409 | labs(x="Square Root of Numeracy scores", y = "Density") + 410 | stat_function(fun=dnorm, 411 | args=list (mean=mean (rexam$sqrtnumeracy, na.rm=TRUE), 412 | sd=sd(rexam$sqrtnumeracy, na.rm=TRUE) ), 413 | colour="black", size=1) 414 | hist.sqrtnumeracy 415 | 416 | #Histogram of the reciprocal transformed numeracy scores: 417 | hist.recnumeracy <- ggplot(rexam, aes(recnumeracy)) + 418 | opts(legend.position = "none")+ 419 | geom_histogram(aes(y=..density..), 420 | colour="black", fill="white") + 421 | labs(x="Reciprocal of Numeracy Scores", y = "Density") + 422 | stat_function(fun=dnorm, 423 | args=list (mean=mean (rexam$recnumeracy, na.rm=TRUE), 424 | sd=sd(rexam$recnumeracy, na.rm=TRUE) ), 425 | colour="black", size=1) 426 | hist.recnumeracy 427 | 428 | 429 | #Shapiro-Wilks test for numeracy: 430 | shapiro.test(rexam$numeracy) 431 | 432 | #Shapiro-Wilks test for lognumeracy: 433 | shapiro.test(rexam$lognumeracy) 434 | 435 | #Shapiro-Wilks test for sqrtnumeracy: 436 | shapiro.test(rexam$sqrtnumeracy) 437 | 438 | #Shapiro-Wilks test for recnumeracy: 439 | shapiro.test(rexam$recnumeracy) 440 | 441 | -------------------------------------------------------------------------------- /anova/.Rapp.history: -------------------------------------------------------------------------------- 1 | 1000/sqrt(10)= 2 | 0 3 | 1000/sqrt(10) 4 | pnorm(1.58, lower.tail=FALSE) 5 | pnorm(1.58) 6 | pnorm(`1.58) 7 | pnorm(-1.58) 8 | c 9 | 3 10 | 4 11 | 2 12 | #This is a comment 13 | library(car) 14 | This is a command 15 | # This is a comment 16 | this 17 | ls.str() 18 | exit 19 | setwd("~/OneDrive/MIDS/W203/Data/anova") 20 | dummy <- read.delim("dummy.day", headers=TRUE) 21 | dummy <- read.delim("dummy.day", header=TRUE) 22 | dummy <- read.delim("dummy.dat", header=TRUE) 23 | head(dummy) 24 | summary(dummy) 25 | ls.str() 26 | dummyData$dose<-factor(dummyData$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 27 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 28 | ls.str() 29 | summary(dummy) 30 | dummy.1 <- lm(libido ~ dummy1 + dummy2, data = dummy) 31 | summary(dummy.1) 32 | # gm <- mean( dummy$libido) 33 | gm <- mean( dummy$libido) 34 | length(dummy$libido) 35 | sst <- gm/length(dummy$libido) 36 | sst 37 | gm 38 | gsd <- sd(dummy$libido) 39 | sst <- sd^2/length(dummy$libido) 40 | sst <- gsd^2/length(dummy$libido) 41 | sst 42 | sst <- (gsd^2)*length(dummy$libido) 43 | sst 44 | gsd 45 | 1.7^2 46 | 1.767^2 47 | sst <- (gsd^2)*(length(dummy$libido)-) 48 | sst <- (gsd^2)*(length(dummy$libido)-1) 49 | sst 50 | gm <- mean(dummy$libido) 51 | placebo <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 0, ] 52 | low <- dummy[dummy$dummy1 ==1 & dummy$dummy2 == 0, ] 53 | gm.grand <- mean(dummy$libido) 54 | gm.low <- mean(low$libido) 55 | gm.high <- mean(high$libido) 56 | high <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 1, ] 57 | gm.high <- mean(high$libido) 58 | m.grand <- mean(dummy$libido) 59 | m.low <- mean(low$libido) 60 | m.high <- mean(high$libido) 61 | m.grand 62 | m.low 63 | m.high 64 | high <- dummy[dummy$dummy1 == 1 & dummy$dummy2 == 0, ] 65 | low <- dummy[dummy$dummy1 ==0 & dummy$dummy2 == 1, ] 66 | m.low <- mean(low$libido) 67 | m.high <- mean(high$libido) 68 | m.placebo <- mean(placebo$libido) 69 | m.low 70 | m.placebo 71 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low.libido)*(m.low - m.grand)^2 + length(high.libido)*(m.high - m.grand)^2 72 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low$libido)*(m.low - m.grand)^2 + length(high$libido)*(m.high - m.grand)^2 73 | ssm 74 | s.low <- sd(low$libido) 75 | s.high <- sd(high$libido) 76 | s.placebo <- sd(placebo$libido) 77 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1)) * s.low^2 + (length(high$libido) - 1) * s.high^2 78 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1) * s.low^2 + (length(high$libido) - 1) * s.high^2 79 | ssr 80 | msm <- ssm/(3 - 1) 81 | msr <- ssr/(14 - 2) 82 | fr <- msm/msr 83 | fr 84 | contrast <- read.delim("contrast.dat", header = TRUE) 85 | summary(contrast) 86 | head(contrast) 87 | contrast.planned <- lm(libido ~ dummy + dummy2, data = contrast) 88 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 89 | ls.str() 90 | contrast$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 91 | contrast$dose<-factor(contrast$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 92 | ls.str() 93 | ls.str(contrast) 94 | contrast.planned <- lm(libido ~ dummy + dummy2, data = contrast) 95 | contrast.planned <- lm(libido ~ dummy1 + dummy2, data = contrast) 96 | contrast.planned 97 | summary(contrast.planned) 98 | -------------------------------------------------------------------------------- /anova/ANOVA_demo.R: -------------------------------------------------------------------------------- 1 | # A demonstration of ANOVA in R 2 | 3 | # Load Youtube video data 4 | load("Videos_clean.Rdata") 5 | summary(Videos) 6 | 7 | 8 | # check the rate variable for normality 9 | hist(Videos$rate) 10 | 11 | # That's not great, but remember that ANOVA is a robust-test 12 | # and the data is on a 1-5 scale, which isn't normally 13 | # a place we'd worry 14 | 15 | # Let's look at the means, by each category and overall 16 | by(Videos$rate, Videos$category, mean, na.rm=T) 17 | mean(Videos$rate, na.rm=T) 18 | 19 | # We can get nicer output with the tapply function 20 | tapply(Videos$rate, Videos$category, mean, na.rm=T) 21 | 22 | # Perform the analysis of variance and check the significance 23 | aovm = aov(rate ~ category, Videos) 24 | summary(aovm) 25 | 26 | # Post-hoc, we can compare our groups pairwise 27 | tt = pairwise.t.test(Videos$rate, Videos$category, p.adjust.method = "bonferroni") 28 | tt 29 | 30 | # That's hard to read, so let's create a prettier table 31 | # Write a function to add significance stars to a p-value 32 | sig_stars = function(p) 33 | { 34 | stars = symnum(p, na = F, cutpoints = c(0, .001, .01, .05, .1, 1), symbols=c("***","**", "*", ".", " ")) 35 | return( paste(round(p, 3), stars) ) 36 | } 37 | 38 | # apply our new function to every element in our matrix 39 | t_table = apply(tt$p.value, c(1,2), sig_stars) 40 | t_table 41 | # get rid of the quotes 42 | t_table = noquote( t_table ) 43 | t_table 44 | 45 | # tidy up the column names and erase the NAs 46 | colnames(t_table) = abbreviate(colnames(t_table)) 47 | 48 | # Notice that upper.tri indexes the upper right 49 | # triangle of the matrix 50 | upper.tri(t_table) 51 | 52 | # set those positions to the empty string 53 | t_table[upper.tri(t_table)] = "" 54 | 55 | # the finished table 56 | t_table 57 | 58 | -------------------------------------------------------------------------------- /anova/Contrast.dat: -------------------------------------------------------------------------------- 1 | dose libido dummy1 dummy2 2 | 1 3 -2 0 3 | 1 2 -2 0 4 | 1 1 -2 0 5 | 1 1 -2 0 6 | 1 4 -2 0 7 | 2 5 1 -1 8 | 2 2 1 -1 9 | 2 4 1 -1 10 | 2 2 1 -1 11 | 2 3 1 -1 12 | 3 7 1 1 13 | 3 4 1 1 14 | 3 5 1 1 15 | 3 3 1 1 16 | 3 6 1 1 17 | -------------------------------------------------------------------------------- /anova/Dummy.dat: -------------------------------------------------------------------------------- 1 | dose libido dummy1 dummy2 2 | 1 3 0 0 3 | 1 2 0 0 4 | 1 1 0 0 5 | 1 1 0 0 6 | 1 4 0 0 7 | 2 5 0 1 8 | 2 2 0 1 9 | 2 4 0 1 10 | 2 2 0 1 11 | 2 3 0 1 12 | 3 7 1 0 13 | 3 4 1 0 14 | 3 5 1 0 15 | 3 3 1 0 16 | 3 6 1 0 17 | -------------------------------------------------------------------------------- /anova/Videos_clean.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/anova/Videos_clean.Rdata -------------------------------------------------------------------------------- /anova/anova.R: -------------------------------------------------------------------------------- 1 | # ANOVA and Multiple Regression - Chapter 10 Discovering Statisitcs, Andy Field 2 | # 3 | setwd("~/OneDrive/MIDS/W203/Data/anova") 4 | 5 | # use the downloaded dummy.dat file 6 | dummy <- read.delim("dummy.dat", header=TRUE) 7 | head(dummy) 8 | summary(dummy) 9 | ls.str() 10 | 11 | # looking at the dummy data, there is a vector for libido, and vectors for dummy1 and dummy2 that 12 | # appear to be coded 0,1 for the dummy variable. There's also a vector for dose that ranges from 1-3 13 | # but is supposed to represent "placebo", "low dose", "high dose", so we need to recode as a factor 14 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 15 | ls.str() 16 | summary(dummy) 17 | 18 | # The libido vector is the single measurement here, but each value is one of the three categories. 19 | # Therefor values of dummy1,dummy2 (0,0) result in the placebo values, values (0,1), (1,0) result 20 | # in one of the low/high dose categories. 21 | 22 | # The number of dummy variables needed is # groups - 1. 23 | 24 | # now we can run the linear regression 25 | dummy.1 <- lm(libido ~ dummy1 + dummy2, data = dummy) 26 | summary(dummy.1) 27 | 28 | # We see from the output that F(2, 12) is significant at p < .05 29 | # Therefor using the group means is a better model than using the overall mean 30 | # We also see that beta(1) is significant at p < .05 but beta(2) is not (p=.282) 31 | 32 | # --------------------- The Logic of ANOVA ---------------------- # 33 | # 34 | # The simplest model we can fit to a set of data is the grand mean (the mean of the outcome variable) 35 | # This basic model represents the "no effect" of the predictor variable on the outcome variable 36 | # 37 | # We can fit a different model to the data that represents our hypotheses. If this model fits the data 38 | # well then it must be better than using the grand mean. 39 | # 40 | # The intercept and one or more co-efficents describe the chosen model 41 | # 42 | # The bigger the coefficents the greater the deviation between the regression line and the grand mean 43 | # 44 | # In correlational research, the co-efficients represent the slope of the line, but in experimental 45 | # research they reperesent the differences between the group mean 46 | # 47 | # If the differences between the group means are large enough then the resulting model will be a better 48 | # fit of the data than the grand mean and we can infer that our group means are significantly different. 49 | # 50 | # The F-ration is the ratio of the explained variance (due to the model) to the unexplained variance 51 | # 52 | # ---------------------------------------------------------------# 53 | # 54 | # Calculate the total variation of the data (sum of squares) 55 | # 56 | 57 | gsd <- sd(dummy$libido) 58 | sst <- (gsd^2)*(length(dummy$libido)-1) 59 | 60 | # Total amount of variation in the data is sst = 43.7333 61 | # How much of this variation can the regression model explain? 62 | # This is given by the model variance, which is the difference between each group mean and the grand mean, squared 63 | placebo <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 0, ] 64 | high <- dummy[dummy$dummy1 == 1 & dummy$dummy2 == 0, ] 65 | low <- dummy[dummy$dummy1 ==0 & dummy$dummy2 == 1, ] 66 | 67 | m.grand <- mean(dummy$libido) 68 | m.low <- mean(low$libido) 69 | m.high <- mean(high$libido) 70 | m.placebo <- mean(placebo$libido) 71 | 72 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low$libido)*(m.low - m.grand)^2 + length(high$libido)*(m.high - m.grand)^2 73 | 74 | # The variance explained by the model is ssm = 20.1333, so out of 43.733 variance of the data, the model explains 20.1333 75 | 76 | # We can calculate the residuals as the variance not explained by the model, which is the sum of group variances and degrees of freedom 77 | 78 | s.low <- sd(low$libido) 79 | s.high <- sd(high$libido) 80 | s.placebo <- sd(placebo$libido) 81 | 82 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1) * s.low^2 + (length(high$libido) - 1) * s.high^2 83 | 84 | # The variance not explained by the model is ssr = 23.6 85 | 86 | # The sum of mean squares 87 | # the sum of mean squares for the model is ssm/degrees of freedom for the model 88 | 89 | msm <- ssm/(3 - 1) 90 | 91 | # the sum of mean squares for the residuals is ssr/degrees of freedom for residuals 92 | 93 | msr <- ssr/(14 - 2) 94 | 95 | # The F-ratio is msm/msr 96 | 97 | fr <- msm/msr 98 | 99 | # which gives 5.1184, and compare to the F-ratio computed as part of the linear regression of 5.119 100 | 101 | # ---------------------- Planned Contrasts ------------------------ # 102 | # 103 | # Using the contrast.dat file downloaded from the supplemental website 104 | contrast <- read.delim("contrast.dat", header = TRUE) 105 | summary(contrast) 106 | head(contrast) 107 | 108 | contrast$dose<-factor(contrast$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose")) 109 | ls.str(contrast) 110 | # this data is coded with the weighting variables as dummy1 and dummy2 111 | 112 | contrast.planned <- lm(libido ~ dummy1 + dummy2, data = contrast) 113 | summary(contrast.planned) 114 | 115 | -------------------------------------------------------------------------------- /anova/anove_viagra.R: -------------------------------------------------------------------------------- 1 | #-------------------------- ANOVA Test Procedure -----------------------# 2 | # 3 | # This script uses the Viagra data as described in Discovering Statistics Using R, pg. 434, by Andy Field 4 | # 5 | #-----------------------------------------------------------------------# 6 | # 7 | # Load libraries 8 | library(ggplot2) 9 | library(granova) 10 | library(car) 11 | library(Rcmdr) 12 | library(pastecs) 13 | library(multcomp) 14 | library(compute.es) 15 | 16 | # First we enter the data manually 17 | 18 | libido <- c(3,2,1,1,4,5,2,4,2,3,7,4,5,3,6) 19 | dose <- gl(3, 5, labels = c("Placebo", "Low Dose", "High Dose")) 20 | viagraData <- data.frame(dose, libido) 21 | 22 | ls.str(viagraData) 23 | 24 | # Let's examine the data and test assumptions 25 | shapiro.test(viagraData$libido) 26 | 27 | # Shapiro-Wilk's test indicates non-normal sampling distribution for libido at p = .581 28 | 29 | 30 | leveneTest(viagraData$libido, viagraData$dose, center=median) 31 | # 32 | # Levene's Test is non-significant at F(2, 12) = 0.1176 and p = 0.89 33 | # Levene's test indicates that the variances are not significantly different 34 | # Homogeneity of variance is ok - so we use regular ANOVA 35 | # otherwise we would use Welch's F, or use one of the robust methods based on trimmed mean or bootstraps 36 | 37 | # Graph - this gets very fancy by placing 2 points for the mean to give the appearance of a dot with a border 38 | # 39 | # From the graph we see that the error bars overlap from one mean to the next and that there seems to 40 | # be a linear trend as the data changes 41 | line <- ggplot(viagraData, aes(dose, libido)) 42 | line + stat_summary(fun.y = mean, geom = "line", size = 1, aes(group=1), colour = "#FF6633") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.75, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 4, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 3, colour = "#FF6633") + labs(x = "Dose of Viagra", y = "Mean Libido") 43 | 44 | # Descriptive statistics 45 | by(viagraData$libido, viagraData$dose, stat.desc) 46 | 47 | # We can use the lm() or aov() function to create the model. 48 | viagraModel <- aov(libido ~ dose, data = viagraData) 49 | 50 | summary.lm(viagraModel) 51 | 52 | # Our viagra model shows that the probability of the F value occuring if there were 53 | # no effect in the population at p = .0247, or significant at p < .05 54 | 55 | # Let's take a look at the plots the aov constructed 56 | plot(viagraModel) 57 | 58 | # Welch's F Test 59 | oneway.test(libido ~ dose, data = viagraData) 60 | 61 | # Welch's F is more conservative at F(2, 7.943) = 4.23, p = 0.54 which indicates non-significance 62 | 63 | # Using robust methods (Wilcox) 64 | # Load the latest Wilcox R tests 65 | source("~/OneDrive/MIDS/W203/Data/Rallfun-V27.txt") 66 | 67 | # The Wilcox tests want the data in a matrix format, so first we convert the data frame 68 | viagraMatrix <- unstack(viagraData, libido ~ dose) 69 | viagraMatrix 70 | 71 | # Now we use the first test, the trimmed mean of 10% 72 | t1way(viagraMatrix, tr = .1) 73 | 74 | # The outcome of the trimmed mean function yields F(2, 7.943) = 4.32, p = 0.0537 75 | # Moving on to the median-based test 76 | med1way(viagraMatrix) 77 | 78 | # The median one-way test yields F = 4.783 and p = 0.07 79 | # Finally, we can use the bootstrap one way test with trimmed mean @ 0.05 and 2000 bootstrap samples 80 | t1waybt(viagraMatrix, tr = 0.05, nboot = 2000) 81 | 82 | # This yields F = 4.32, p = 0.061 83 | # All the one-way robust tests indicate that there is no relationship between the dose of 84 | # viagra and the libido 85 | 86 | # Planned Contrasts of the Viagra Data 87 | summary(viagraModel) 88 | 89 | # Here we see from the output that the low dose effect is non-significant at p = 0.28 90 | # but that the effect of high dose is significant at p = .008 91 | 92 | # let's set our own contrasts 93 | contrast1 <- c(-2, 1, 1) 94 | contrast2 <- c(0, -1, 1) 95 | contrasts(viagraData$dose) <- cbind(contrast1, contrast2) 96 | viagraData$dose 97 | 98 | # Contrast 1 compares the placebo group against the two experiment groups 99 | # Contrast 2 compares the low-dose group to the high-dose group 100 | 101 | viagraPlanned <- aov(libido ~ dose, data = viagraData) 102 | 103 | summary.lm(viagraPlanned) 104 | 105 | # Our hypothesis was that the experimental groups would increase libido above the control group 106 | # Therefore we should use a 1-tailed comparison 107 | # contrasting the expiremental group to the control group is significant at p < (.0293 / 2) = .0147 108 | .0293/2 109 | 110 | # and also we predicted that a higher dose would increase the effect over the low dose 111 | # contrast2 is significant at p < 0.0652 / 2 = .01325 112 | .0625/2 113 | 114 | # The planned contrasts show that taking viagra significantly increased libido compared to a 115 | # control group, t(12) = 2.47, p < .05, and taking a high dose significantly increased libido 116 | # compared to a low dose, t(12) = 2.029, p < .05 (one-tailed) 117 | 118 | # Trend analysis 119 | contrasts(viagraData$dose) <- contr.poly(3) 120 | viagraTrend <- aov(libido ~ dose, data = viagraData) 121 | summary.lm(viagraTrend) 122 | 123 | # based on the trend analysis, a linear trend at t = 3.157, p = 0.008 124 | # quadralinear is not significant at t=0.521 and p < 0.612 125 | 126 | # Posthoc tests 127 | # Bonferroni 128 | pairwise.t.test(viagraData$libido, viagraData$dose, paired = FALSE, p.adjust.method = "bonferroni") 129 | 130 | # Benjamini-Hochberg 131 | pairwise.t.test(viagraData$libido, viagraData$dose, paired = FALSE, p.adjust.method = "BH") 132 | 133 | # Tukey (requires multcomp library) 134 | postHocs <- glht(viagraModel, linfct = mcp(dose = "Tukey")) 135 | summary(postHocs) 136 | confint(postHocs) 137 | 138 | # Dunnett 139 | postHocs <- glht(viagraModel, linfct = mcp(dose = "Dunnett"), base = 1) 140 | summary(postHocs) 141 | confint(postHocs) 142 | 143 | # Rand Wilcox's robust tests 144 | lincon(viagraMatrix) 145 | mcppb20(viagraMatrix) 146 | 147 | # Effect size, omega squared 148 | 149 | omega<-function(SSm, SSr, dfm, MSr) 150 | { 151 | SSt = SSm + SSr 152 | omega = (SSm-(dfm*MSr))/(SSt+MSr) 153 | print(paste("Omega-Squared: ", omega)) 154 | } 155 | omega(20.13, 23.60, 2, 1.967) 156 | 157 | # Effect size between low-dose and placebo group 158 | mes(2.2, 3.2, 1.3038405, 1.3038405, 5, 5) 159 | # d = -0.77, r = 0.46 (.74 sd) 160 | 161 | # between high-dose and placebo group 162 | mes(2.2, 5, 1.3038405, 1.5811388, 5, 5) 163 | # d = -1.93, r = -0.69 ( ~ 2 sd's) 164 | 165 | # between low and high dose groups 166 | mes(3.2, 5, 1.3038405, 1.5811388, 5, 5) 167 | # d = -1.24, r = -0.53 (1.24 sd's) 168 | 169 | # Effect sizes for orthogonal contrasts 170 | rcontrast <- function(t, df) 171 | {r <- sqrt(t^2/(t^2 + df)) 172 | print(paste("r = ", r)) 173 | } 174 | 175 | rcontrast(2.474, 12) 176 | rcontrast(2.029, 12) -------------------------------------------------------------------------------- /anova/untitled.R: -------------------------------------------------------------------------------- 1 | #-------------------------- ANOVA Test Procedure -----------------------# 2 | # 3 | # This script uses the Viagra data as described in Discovering Statistics Using R, pg. 434, by Andy Field 4 | # 5 | #-----------------------------------------------------------------------# 6 | # 7 | # Load libraries 8 | library(ggplot2) 9 | library(granova) 10 | library(car) 11 | library(Rcmdr) 12 | library(pastecs) 13 | library(multcomp) 14 | library(compute.es) 15 | 16 | # First we enter the data manually 17 | 18 | libido <- c(3,2,1,1,4,5,2,4,2,3,7,4,5,3,6) 19 | dose <- gl(3, 5, labels = c("Placebo", "Low Dose", "High Dose")) 20 | viagraData <- data.frame(dose, libido) 21 | 22 | ls.str(viagraData) 23 | 24 | # Let's examine the data and test assumptions 25 | shapiro.test(viagraData$libido) 26 | 27 | # Shapiro-Wilk's test indicates non-normal sampling distribution for libido at p = .581 28 | 29 | 30 | leveneTest(viagraData$libido, viagraData$dose, center=median) 31 | # 32 | # Levene's Test is non-significant at F(2, 12) = 0.1176 and p = 0.89 33 | # Levene's test indicates that the variances are not significantly different 34 | # Homogeneity of variance is ok 35 | 36 | # Graph - this gets very fancy by placing 2 points for the mean to give the appearance of a dot with a border 37 | # 38 | # From the graph we see that the error bars overlap from one mean to the next and that there seems to 39 | # be a linear trend as the data changes 40 | line <- ggplot(viagraData, aes(dose, libido)) 41 | line + stat_summary(fun.y = mean, geom = "line", size = 1, aes(group=1), colour = "#FF6633") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.75, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 4, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 3, colour = "#FF6633") + labs(x = "Dose of Viagra", y = "Mean Libido") 42 | 43 | # Descriptive statistics 44 | by(viagraData$libido, viagraData$dose, stat.desc) 45 | 46 | # We can use the lm() or aov() function to create the model. 47 | viagraModel <- aov(libido ~ dose, data = viagraData) 48 | 49 | summary(viagraModel) 50 | 51 | # Our viagra model shows that the probability of the F value occuring if there were 52 | # no effect in the population at p = .0247, or significant at p < .05 53 | 54 | # Let's take a look at the plots the aov constructed 55 | plot(viagraModel) 56 | 57 | # Welch's F Test 58 | oneway.test(libido ~ dose, data = viagraData) 59 | 60 | # Welch's F is more conservative at F(2, 7.943) = 4.23, p = 0.54 which indicates non-significance 61 | 62 | # Using robust methods (Wilcox) -------------------------------------------------------------------------------- /common_coding_issues.R: -------------------------------------------------------------------------------- 1 | # Correcting common coding issues in data 2 | 3 | # We'll look at a dataset of video attributes 4 | # from Youtube and prepare it for further study. 5 | # This data is taken from webcrawls conducted 6 | # by Cheng, Dale, and Liu at Simon Fraser University. 7 | # Details of their methodology can be found at 8 | # http://netsg.cs.sfu.ca/youtubedata/ 9 | 10 | Videos = read.delim("videos.txt") 11 | summary(Videos) 12 | 13 | # Let's examine the categories variable 14 | summary(Videos$category) 15 | 16 | # Notice the mysterious first entry in the summary, 17 | # which has a number but no text 18 | # We can confirm this by checking the values of this variable 19 | levels(Videos$category) 20 | 21 | # It looks like missing categories are incorrectly 22 | # coded as empty strings. We should correct these to NA's 23 | Videos$category[Videos$category == ""] = NA 24 | 25 | # the 9 data points are gone, but the "" still 26 | # appears as a level. 27 | summary(Videos$category) 28 | 29 | # Can fix this by recreating the factor in R 30 | Videos$category = factor(Videos$category) 31 | 32 | # Next, let's look at the rate variable, which 33 | # represents the average rating given to a video on a 34 | # 5-point scale. We would typically begin by looking 35 | # at a histogram 36 | hist(Videos$rate) 37 | 38 | # Notice the surprising peak around zero - is that meaningful? 39 | 40 | # Let's check if these values are exactly zero, or just close to zero. 41 | # Create a vector of videos that have a zero rating exactly 42 | zero_rate = Videos$rate == 0 43 | 44 | # And see how many of these videos there are 45 | summary(zero_rate) 46 | 47 | # We may check to see how many ratings these videos have. 48 | # According to the documentation for the dataset, the number 49 | # of ratings a video has is coded in the ratings vector 50 | Videos$ratings[zero_rate] 51 | 52 | # See all the zeros? We conclude that videos with no 53 | # ratings are being coded 54 | # with an average rating of zero. This would throw off 55 | # our means and other statistics 56 | # get the vector of videos with no ratings 57 | no_rate = Videos$ratings == 0 58 | 59 | # and recode the rate variable as missing for these videos 60 | Videos$rate[no_rate] = NA 61 | 62 | 63 | # Let's similarly inspect the age variable 64 | hist(Videos$age) 65 | # Notice the peak at age 0 66 | 67 | # get the vector of videos with age zero to look closer 68 | zero_age = Videos$age == 0 69 | 70 | # could it be that these are the unrated videos again? 71 | Videos$ratings[zero_age] 72 | 73 | # Let's look at all of the columns to see if anything 74 | # jumps out about these datapoints 75 | # We need to subset the dataset, pulling out just the rows 76 | # which for the zero-age videos 77 | Videos[Videos$age == 0,] 78 | 79 | # Notice that they all come from the UNA category 80 | # We can check that all Videos in UNA also have zero age 81 | Videos$age[Videos$category == " UNA "] 82 | 83 | 84 | # At this point, I went and read the documentation 85 | # on the researchers' website. The age is supposed to 86 | # show days from the formation of Youtube 87 | # to the time the video was uploaded. 88 | 89 | # Because we don't expect many videos the day 90 | # youtube was created, this confirms out suspicion 91 | # that the zeros represent missing values 92 | # This also suggests that the UNA category represents 93 | # missing values, so lets recode all of these as NAs 94 | Videos$age[zero_age] = NA 95 | Videos$category[zero_age] = NA 96 | Videos$category = factor(Videos$category) 97 | 98 | # Moreover, for an age variable, we'd rather 99 | # measure from the day the video was created to 100 | # the day the data was gathered. 101 | # We can do that by subtracting the maximum age from each 102 | # video's age. 103 | Videos$age = max(Videos$age, na.rm = T)- Videos$age 104 | 105 | # Let's see if that looks more like what we'd expect 106 | hist(Videos$age) 107 | 108 | # Finally, let's save our dataframe so we can study it 109 | # more later 110 | save(Videos, file= "Videos_clean.Rdata") 111 | 112 | -------------------------------------------------------------------------------- /correlation/Exam Anxiety.dat: -------------------------------------------------------------------------------- 1 | Code Revise Exam Anxiety Gender 2 | 1 4 40 86.298 Male 3 | 2 11 65 88.716 Female 4 | 3 27 80 70.178 Male 5 | 4 53 80 61.312 Male 6 | 5 4 40 89.522 Male 7 | 6 22 70 60.506 Female 8 | 7 16 20 81.462 Female 9 | 8 21 55 75.82 Female 10 | 9 25 50 69.372 Female 11 | 10 18 40 82.268 Female 12 | 11 18 45 79.044 Male 13 | 12 16 85 80.656 Male 14 | 13 13 70 70.178 Male 15 | 14 18 50 75.014 Female 16 | 15 98 95 34.714 Male 17 | 16 1 70 95.164 Male 18 | 17 14 95 75.82 Male 19 | 18 29 95 79.044 Female 20 | 19 4 50 91.134 Female 21 | 20 23 60 64.536 Male 22 | 21 14 80 80.656 Male 23 | 22 12 75 77.432 Male 24 | 23 22 85 65.342 Female 25 | 24 84 90 .0560000000000116 Female 26 | 25 23 30 71.79 Female 27 | 26 26 60 81.462 Female 28 | 27 24 75 63.73 Male 29 | 28 72 75 27.46 Female 30 | 29 37 27 73.402 Female 31 | 30 10 20 89.522 Male 32 | 31 3 75 89.522 Female 33 | 32 36 90 75.014 Female 34 | 33 43 60 43.58 Male 35 | 34 19 30 82.268 Male 36 | 35 12 80 79.044 Male 37 | 36 9 10 79.044 Female 38 | 37 72 85 37.132 Male 39 | 38 10 7 81.462 Male 40 | 39 12 5 83.074 Female 41 | 40 30 85 50.834 Male 42 | 41 15 20 82.268 Male 43 | 42 8 45 78.238 Female 44 | 43 34 60 72.596 Male 45 | 44 22 70 74.208 Female 46 | 45 21 50 75.82 Female 47 | 46 27 25 70.984 Male 48 | 47 6 50 97.582 Male 49 | 48 18 40 67.76 Male 50 | 49 8 80 75.014 Male 51 | 50 19 50 73.402 Female 52 | 51 0 35 93.552 Female 53 | 52 52 80 58.894 Female 54 | 53 38 50 53.252 Female 55 | 54 19 49 84.686 Male 56 | 55 23 75 89.522 Female 57 | 56 11 25 71.79 Female 58 | 57 27 65 82.268 Male 59 | 58 17 80 69.372 Male 60 | 59 13 50 62.118 Male 61 | 60 42 70 68.566 Female 62 | 61 4 40 93.552 Male 63 | 62 8 80 84.686 Female 64 | 63 6 10 82.268 Male 65 | 64 11 20 81.462 Female 66 | 65 7 40 82.268 Male 67 | 66 15 40 91.134 Male 68 | 67 4 70 91.94 Female 69 | 68 28 52 86.298 Female 70 | 69 22 50 72.596 Male 71 | 70 29 60 63.73 Female 72 | 71 2 80 63.73 Male 73 | 72 16 60 71.79 Female 74 | 73 59 65 57.282 Male 75 | 74 10 15 84.686 Female 76 | 75 13 85 84.686 Male 77 | 76 8 20 77.432 Female 78 | 77 5 80 82.268 Female 79 | 78 2 100 10 Male 80 | 79 38 100 50.834 Female 81 | 80 4 80 87.91 Male 82 | 81 10 10 83.88 Male 83 | 82 6 70 84.686 Female 84 | 83 68 100 20.206 Female 85 | 84 8 70 87.104 Male 86 | 85 1 70 83.88 Female 87 | 86 14 65 67.76 Male 88 | 87 42 75 95.97 Female 89 | 88 13 85 62.118 Female 90 | 89 1 30 84.686 Male 91 | 90 3 5 92.746 Male 92 | 91 5 10 84.686 Female 93 | 92 12 90 83.074 Female 94 | 93 19 70 73.402 Male 95 | 94 2 20 87.91 Female 96 | 95 19 85 71.79 Male 97 | 96 11 35 86.298 Male 98 | 97 15 30 84.686 Female 99 | 98 23 70 75.82 Male 100 | 99 13 55 70.984 Female 101 | 100 14 75 78.238 Female 102 | 101 1 2 82.268 Male 103 | 102 9 40 79.044 Male 104 | 103 20 50 91.134 Female 105 | -------------------------------------------------------------------------------- /correlation/The Biggest Liar.dat: -------------------------------------------------------------------------------- 1 | Creativity Position Novice 2 | 53 1 0 3 | 36 3 1 4 | 31 4 0 5 | 43 2 0 6 | 30 4 1 7 | 41 1 0 8 | 32 4 0 9 | 54 1 1 10 | 47 2 1 11 | 50 2 0 12 | 56 1 0 13 | 50 1 0 14 | 34 1 1 15 | 41 4 0 16 | 52 5 0 17 | 38 1 0 18 | 35 2 1 19 | 35 3 1 20 | 48 1 0 21 | 38 1 1 22 | 43 2 1 23 | 38 3 1 24 | 39 1 1 25 | 51 1 0 26 | 26 3 0 27 | 31 4 1 28 | 45 1 0 29 | 31 3 0 30 | 34 2 1 31 | 35 5 1 32 | 39 2 1 33 | 50 2 1 34 | 53 1 1 35 | 38 2 0 36 | 37 3 0 37 | 34 2 1 38 | 30 1 0 39 | 46 6 0 40 | 36 2 1 41 | 26 1 1 42 | 41 1 1 43 | 50 6 1 44 | 43 1 0 45 | 54 2 0 46 | 39 1 1 47 | 51 2 0 48 | 47 1 0 49 | 38 3 1 50 | 32 1 0 51 | 28 4 1 52 | 39 1 0 53 | 42 1 1 54 | 44 2 0 55 | 21 5 1 56 | 43 1 1 57 | 39 3 1 58 | 42 1 1 59 | 35 2 0 60 | 52 1 0 61 | 39 1 0 62 | 45 1 0 63 | 25 4 1 64 | 41 1 1 65 | 35 3 1 66 | 27 3 0 67 | 44 4 0 68 | 37 3 1 69 | 40 1 1 70 | -------------------------------------------------------------------------------- /correlation/correlation.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages 2 | setwd("~/OneDrive/MIDS/W203/Data/08") 3 | 4 | library(boot) 5 | library(ggm) 6 | library(ggplot2) 7 | library(Hmisc) 8 | library(polycor) 9 | library(pastecs) 10 | 11 | examData <- read.delim("Exam Anxiety.dat", header = TRUE) 12 | 13 | cor(examData[,c("Anxiety","Exam","Revise")], use="complete.obs", method = "kendall") 14 | 15 | # create a matrix from the dataframe so that rcorr() can use it 16 | # first remove non-numeric vectors in the data frame 17 | exams <- subset(examData, select = c("Exam","Anxiety","Revise")) 18 | # convert that dataset to a matrix 19 | examMatrix <- as.matrix(exams) 20 | 21 | # need to disambiguate rcorr from the version in ggm package 22 | Hmisc::rcorr(as.matrix(examData[, c("Exam", "Anxiety", "Revise")])) 23 | 24 | examData2 <- examData[, c("Exam", "Anxiety", "Revise")] 25 | cor(examData2) 26 | 27 | cor.test(examData$Anxiety, examData$Exam) 28 | cor.test(examData$Revise, examData$Exam) 29 | cor.test(examData$Anxiety, examData$Revise) 30 | 31 | cor(examData2)^2 * 100 32 | 33 | liarData <- read.delim("The Biggest Liar.dat", header = TRUE) 34 | liarData 35 | liarHist.creativity <- ggplot(liarData, aes(Creativity), theme(legend.position = "none")) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x = "Creativity Score", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(liarData$Creativity, na.rm = TRUE), sd = sd(liarData$Creativity, na.rm = TRUE)), colour = "black", size = 1) 36 | liarHist.creativity 37 | 38 | stat.desc(liarData$Creativity, basic = FALSE, norm = TRUE) 39 | 40 | cor(liarData$Position, liarData$Creativity, method = "spearman") 41 | Hmisc::rcorr(as.matrix(liarData[, c("Position", "Creativity")])) 42 | 43 | cor.test(liarData$Position, liarData$Creativity, alternative = "less", method = "spearman") 44 | 45 | # partial correlation for Exam Anxiety while accounting for Revise 46 | pc <- pcor(c("Exam", "Anxiety", "Revise"), var(examData2)) 47 | pc^2 48 | pcor.test(pc, 1, 103) 49 | 50 | cpData <- read.delim("Chamorro-Premuzic.dat", header = TRUE) 51 | cor(cpData[,c("studentN", "studentE", "studentO", "studentA", "studentC", "lectureN", "lecturE", "lecturO", "lecturA", "lecturC")], use="pairwise.complete.obs", method = "pearson") 52 | 53 | # extract just the data in which we're interested 54 | cpMatrix <- as.matrix(cpData[, c("studentN", "studentE", "studentO", "studentA", "studentC", "lectureN", "lecturE", "lecturO", "lecturA", "lecturC")]) 55 | Hmisc::rcorr(cpMatrix) 56 | 57 | # results -> studentN prefers lecturA r=.10, p < 0.05 58 | # results -> studentE prefers lecturE r=.15, p < 0.05 59 | # results -> studentO does not like lecureN r=-.10, p < 0.05 60 | # results -> studentO prefers lecturO r=0.2, p < 0.01 61 | # results -> studentO does not like lecturA, r=-.16, p < 0.05 62 | -------------------------------------------------------------------------------- /correlation/correlation_chi_square_demo.R: -------------------------------------------------------------------------------- 1 | ### A demonstration of correlation and chi-square in R 2 | 3 | ### Preparation 4 | 5 | # car gives us nice scatterplots 6 | library(car) 7 | 8 | # We'll use our Country-by-Country dataset 9 | load("Countries2.Rdata") 10 | summary(Countries) 11 | 12 | 13 | # We'll also use Google's dataset of takedown requests - 14 | # that is, orders that come from governments of 15 | # different countries to remove certain content 16 | # from Youtube, search results, and other online products. 17 | # Each row of this dataset corresponds to a specific 18 | # country and a specific online product (you can think 19 | # of the unit of analysis as country x product), and there 20 | # are several variables of interest: 21 | # 22 | # Country - the country making specific takedown requests 23 | # Product - the online product the content is hosted on 24 | # (Youtube, Blogger, etc) 25 | # Reason - a reason why the content is being targeted 26 | # (copyright violation, government criticism, etc..) 27 | # Court.Orders - the number of requests from the Country's 28 | # court system 29 | # Executive..Police..etc. - the number of requests from the 30 | # executive and other branches of government 31 | # Items.Requested.To.Be.Removed - the number of separate items 32 | # of content. However, this variable seems to 33 | # have a lot of missing values 34 | 35 | # Read in the data 36 | Requests = read.csv("Removal_Requests.csv") 37 | head(Requests) 38 | 39 | # Note that there are multiple rows per country in 40 | # the Requests dataframe. 41 | 42 | # Create a new variable for total number of requests from 43 | # all branches of government 44 | Requests$total.takedowns = Requests$Court.Orders + Requests$Executive..Police..etc. 45 | 46 | # To merge our datasets, we first need to sum together all the 47 | # rows for each country in the Requests dataset, so that 48 | # each country only appears in one row. 49 | # (we'll lose some variables when we do this, such as the product 50 | # the request referred to) 51 | R2 = aggregate(Requests[,c("Court.Orders", "Executive..Police..etc.", "total.takedowns")], list(Country = Requests$Country), sum) 52 | 53 | # Notice that there's one row per country now. 54 | head(R2) 55 | 56 | # Perform the merge 57 | Countries = merge(Countries, R2, by="Country", all=T) 58 | 59 | head(Countries) 60 | 61 | 62 | 63 | ### Correlation: Linear relationships between metric variables 64 | 65 | # Let's examine the relationship between corruption 66 | # and takedown requests. 67 | 68 | # Use a scatterplot to see how linear the relationship looks 69 | scatterplot(Countries$cpi, Countries$total.takedowns) 70 | 71 | #check the correlation 72 | cor.test(Countries$cpi, Countries$total.takedowns) 73 | 74 | # the cor function allows us to construct a correlation matrix 75 | cor(Countries[,c("gdp", "cpi", "total.takedowns")], use = "pairwise.complete.obs") 76 | 77 | # the output is actually a matrix object, so we can 78 | # do things like square each value to get R-squared 79 | cor(Countries[,c("gdp", "cpi", "total.takedowns")], use = "pairwise.complete.obs")**2 80 | 81 | 82 | 83 | ### Chi-square: Testing for relationships between categorical variables 84 | # Here are three different approaches, depending on structure of dataset 85 | 86 | ## 1. Two categorical variables 87 | 88 | # Look at the frequency table between region and whether a country is corrupt 89 | table(Countries$region, Countries$high_cpi) 90 | 91 | # We store the results of our chi-square test so we can extract more 92 | # values from the output 93 | cs = chisq.test(Countries$region, Countries$high_cpi) 94 | 95 | # Examine the test result 96 | cs 97 | 98 | # Look at the std. residuals to see which regions contribute most to the result 99 | cs$stdres 100 | 101 | # Check the expected counts to see if any are less than 5 and 102 | # if we should therefore try Fisher's exact test 103 | cs$expected 104 | 105 | # Use Fisher's exact test in this case: 106 | fisher.test(Countries$region, Countries$high_cpi) 107 | 108 | # For an effect size, we could compute Cramer's V manually 109 | # We may wish to put the code in a function so we can use 110 | # it again whenever we want. 111 | cramers_v = function(cs) 112 | { 113 | cv = sqrt(cs$statistic / (sum(cs$observed) * (min(dim(cs$observed))-1))) 114 | print.noquote("Cramer's V:") 115 | return(as.numeric(cv)) 116 | } 117 | 118 | # run our new function on our chi-square test 119 | cramers_v(cs) 120 | 121 | # As a rule of thumb, 122 | # Cramer's V under .2 is weak 123 | # between .2 and .4 is strong 124 | # and above .4 is very strong 125 | 126 | ## 2. Count data, one variable in columns 127 | 128 | # Consider each request to be the unit of analysis, and consider two variables: 129 | # Whether it came from a corrupt or trustworthy country; and whether it came 130 | # through a court order or executive/police action. We want to know if these 131 | # variables are independent or related. 132 | 133 | # We can use aggregate to collapse the rows to just the high_cpi variable 134 | Corrupt_Source = aggregate(Countries[,c("Court.Orders", "Executive..Police..etc.")], list(high_cpi = Countries$high_cpi), sum, na.rm=T) 135 | 136 | # Note that we've created a table of counts: 137 | Corrupt_Source 138 | 139 | # Not required, but we can add row names to make the chi-square output prettier 140 | rownames(Corrupt_Source)=Corrupt_Source$high_cpi 141 | 142 | # We want to plug our count table into the chi-square test 143 | # but we first have to remove the first column, 144 | # because it's a factor. 145 | # Otherwise, R will throw an error. 146 | # Notice that we can use a negative index to omit columns 147 | # That is, we can choose columns 2 and 3 with c(2,3) 148 | # or we can get the same thing by skipping column 1 with c(-1) 149 | Corrupt_Source[,c(-1)] 150 | 151 | # Plug this into the Chi-square test 152 | cs = chisq.test(Corrupt_Source[,c(-1)]) 153 | cs 154 | 155 | # Look at the standardized residuals to see which direction the effect is in 156 | cs$stdres 157 | 158 | # Check the expected counts to see if any are less than 5 and 159 | # if we should therefore try Fisher's exact test 160 | cs$expected 161 | 162 | # Since we have a 2x2 matrix, we can measure the effect 163 | # size elegantly as an odds ratio. 164 | # First, get the odds an order came from a Court for 165 | # corrupt countries 166 | corrupt_odds = Corrupt_Source["Corrupt","Court.Orders"] / Corrupt_Source["Corrupt","Executive..Police..etc."] 167 | 168 | # Do the same for the trustworth countries. 169 | trustworthy_odds = Corrupt_Source["Trustworthy","Court.Orders"] / Corrupt_Source["Trustworthy","Executive..Police..etc."] 170 | 171 | # The odds ratio is just one divided by the other 172 | corrupt_odds / trustworthy_odds 173 | 174 | ## 3. Count data, both variables in rows 175 | 176 | # Let's see if corrupt countries are likely to target different products 177 | # than trustworthy ones. For this, we can't aggregate our data by Country 178 | # so go back to the original request data, and merge in the high_cpi variable 179 | # also, remove countries that are missing corruption data 180 | Requests2 = merge(Countries[,c("Country", "high_cpi")], Requests, by="Country") 181 | Requests2 = Requests2[ ! is.na(Requests2$high_cpi),] 182 | head(Requests2) 183 | 184 | # We want separate columns for takedown requests from corrupt countries 185 | # and from trustworthy countries. Here, we create both columns, and copy 186 | # each value for total.takedowns to the appropriate one. 187 | Corrupt_Product = Requests2[,c("Product","high_cpi")] 188 | Corrupt_Product$Corrupt = ifelse(Requests2$high_cpi == "Corrupt", Requests2$total.takedowns, 0) 189 | Corrupt_Product$Trustworthy = ifelse(Requests2$high_cpi == "Trustworthy", Requests2$total.takedowns, 0) 190 | 191 | # Observe that each row only has a positive value in one of the two new columns 192 | head(Corrupt_Product) 193 | 194 | # Next we sum Corrupt and Trustworthy columns for each product. 195 | Corrupt_Product = aggregate(Corrupt_Product[,c("Corrupt","Trustworthy")], list( Product = Corrupt_Product$Product), sum) 196 | 197 | # We are left with a contingency table 198 | Corrupt_Product 199 | 200 | # We could have also created the table in one step, using the cast command 201 | library(reshape) 202 | Corrupt_Product = cast(Requests2, Product ~ high_cpi , fun = sum, value = c("total.takedowns")) 203 | Corrupt_Product 204 | 205 | # Run a chi-square test as before 206 | cs = chisq.test(Corrupt_Product[,c(-1)]) 207 | cs 208 | 209 | # Check standardized residuals 210 | cs$stdres 211 | 212 | # And expected values 213 | cs$expected 214 | 215 | # The fisher test is probably too computationally intensive to run 216 | fisher.test(Corrupt_Product[,c(-1)]) 217 | 218 | # could also use monte-carlo simulation to check significance 219 | chisq.test(Corrupt_Product[,c(-1)], simulate.p.value = T) 220 | 221 | # let's use the function we wrote earlier to check the effect size 222 | cramers_v(cs) -------------------------------------------------------------------------------- /linear regression/.Rapp.history: -------------------------------------------------------------------------------- 1 | metallica 2 | lecturerData 3 | library(Rcmdr) 4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic")) 5 | alchoholPersonality 6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality) 7 | alchoholPersonalityMatrix 8 | library(Rcmdr) 9 | sunspots 10 | require(graphics) 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers") 12 | pvalue() 13 | pscore() 14 | mu1 <- 2/(sqrt(100)) 15 | mu1 16 | zscore() 17 | (2.5 - 2.0)/2 18 | 2*pnorm(2.5) 19 | 2 * pnorm(-2.5) 20 | -0.5/20 21 | pnorm(0.025) 22 | z <- (2 - 2.5)/2 23 | z 24 | 2*pnorm(-0.25) 25 | a <- 2 26 | s <- 2 27 | n <- 100 28 | xbar <- 2.5 29 | z <- (xbar-a)/(s/sqrt(n)) 30 | z 31 | 2*pnorm(-2.5) 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100))) 33 | 10000 - 10300 34 | -300/(1000 / sqrt(10)) 35 | pnorm(-300/(1000/sqrt(10))) 36 | pnorm(300/(1000/sqrt(10))) 37 | 2*(pnorm(-abs(500/(1000/sqrt(10)))) 38 | ) 39 | 2*(pnorm((500/(1000/sqrt(10)))) 40 | ) 41 | 500/1000/sqrt(10) 42 | 1000/sqrt(10) 43 | 500/(1000/sqrt(10)) 44 | pnorm(1.581139) 45 | pnorm(-1.581139) 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10))) 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10)) 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10)) 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10)) 51 | exit 52 | setwd("~/Documents/R/lr") 53 | setwd("~/Documents/R/lr") 54 | setwd("~/Documents/R/lr") 55 | load("Country3.dat") 56 | load("Countries3.dat") 57 | load("Countries3.Rdata") 58 | summary(Countries) 59 | scatterplot(Countries$gdp, Countries$internet_users_2011) 60 | library(car) 61 | scatterplot(Countries$gdp, Countries$internet_users_2011) 62 | rownames(Countries) <- Countries$country 63 | scatterplot(Countries$gdp, Countries$internet_users_2011) 64 | Countries$loggdp <- log(Countries$gpd) 65 | Countries$loggdp <- log(10, Countries$gpd) 66 | ?log 67 | Countries$loggdp <- log10(Countries$gpd) 68 | Countries$loggdp <- log10(Countries$gdp) 69 | scatterplot(Countries$loggdp, Countries$internet_users_2011) 70 | model = lm(internet_users_2011 ~ loggdp, data = Countries) 71 | summary(model) 72 | plot(model) 73 | library(lmtest) 74 | bptest(model) 75 | summary(model) 76 | 10**31.978 77 | Countries$loggdp <- log(Countries$gdp) 78 | scatterplot(Countries$loggdp, Countries$internet_users_2011) 79 | model = lm(internet_users_2011 ~ loggdp, data = Countries) 80 | summary(model) 81 | exp(13.89) 82 | summary(Countries) 83 | model = lm(internet_users_2011 ~ gdp, data = Countries)# 84 | summary(model) 85 | -------------------------------------------------------------------------------- /linear regression/Album Sales 1.dat: -------------------------------------------------------------------------------- 1 | adverts sales 2 | 10.256 330 3 | 985.685 120 4 | 1445.563 360 5 | 1188.193 270 6 | 574.513 220 7 | 568.954 170 8 | 471.814 70 9 | 537.352 210 10 | 514.068 200 11 | 174.093 300 12 | 1720.806 290 13 | 611.479 70 14 | 251.192 150 15 | 97.972 190 16 | 406.814 240 17 | 265.398 100 18 | 1323.287 250 19 | 196.65 210 20 | 1326.598 280 21 | 1380.689 230 22 | 792.345 210 23 | 957.167 230 24 | 1789.659 320 25 | 656.137 210 26 | 613.697 230 27 | 313.362 250 28 | 336.51 60 29 | 1544.899 330 30 | 68.954 150 31 | 785.692 150 32 | 125.628 180 33 | 377.925 80 34 | 217.994 180 35 | 759.862 130 36 | 1163.444 320 37 | 842.957 280 38 | 125.179 200 39 | 236.598 130 40 | 669.811 190 41 | 612.234 150 42 | 922.019 230 43 | 50 310 44 | 2000 340 45 | 1054.027 240 46 | 385.045 180 47 | 1507.972 220 48 | 102.568 40 49 | 204.568 190 50 | 1170.918 290 51 | 689.547 340 52 | 784.22 250 53 | 405.913 190 54 | 179.778 120 55 | 607.258 230 56 | 1542.329 190 57 | 1112.47 210 58 | 856.985 170 59 | 836.331 310 60 | 236.908 90 61 | 1077.855 140 62 | 579.321 300 63 | 1500 340 64 | 731.364 170 65 | 25.689 100 66 | 391.749 200 67 | 233.999 80 68 | 275.7 100 69 | 56.895 70 70 | 255.117 50 71 | 566.501 240 72 | 102.568 160 73 | 250.568 290 74 | 68.594 140 75 | 642.786 210 76 | 1500 300 77 | 102.563 230 78 | 756.984 280 79 | 51.229 160 80 | 644.151 200 81 | 15.313 110 82 | 243.237 110 83 | 256.894 70 84 | 22.464 100 85 | 45.689 190 86 | 724.938 70 87 | 1126.461 360 88 | 1985.119 360 89 | 1837.516 300 90 | 135.986 120 91 | 237.703 150 92 | 976.641 220 93 | 1452.689 280 94 | 1600 300 95 | 268.598 140 96 | 900.889 290 97 | 982.063 180 98 | 201.356 140 99 | 746.024 210 100 | 1132.877 250 101 | 1000 250 102 | 75.896 120 103 | 1351.254 290 104 | 202.705 60 105 | 365.985 140 106 | 305.268 290 107 | 263.268 160 108 | 513.694 100 109 | 152.609 160 110 | 35.987 150 111 | 102.568 140 112 | 215.368 230 113 | 426.784 230 114 | 507.772 30 115 | 233.291 80 116 | 1035.433 190 117 | 102.642 90 118 | 526.142 120 119 | 624.538 150 120 | 912.349 230 121 | 215.994 150 122 | 561.963 210 123 | 474.76 180 124 | 231.523 140 125 | 678.596 360 126 | 70.922 10 127 | 1567.548 240 128 | 263.598 270 129 | 1423.568 290 130 | 715.678 220 131 | 777.237 230 132 | 509.43 220 133 | 964.11 240 134 | 583.627 260 135 | 923.373 170 136 | 344.392 130 137 | 1095.578 270 138 | 100.025 140 139 | 30.425 60 140 | 1080.342 210 141 | 799.899 210 142 | 1071.752 240 143 | 893.355 210 144 | 283.161 200 145 | 917.017 140 146 | 234.568 90 147 | 456.897 120 148 | 206.973 100 149 | 1294.099 360 150 | 826.859 180 151 | 564.158 150 152 | 192.607 110 153 | 10.652 90 154 | 45.689 160 155 | 42.568 230 156 | 20.456 40 157 | 635.192 60 158 | 1002.273 230 159 | 1177.047 230 160 | 507.638 120 161 | 215.689 150 162 | 526.48 120 163 | 26.895 60 164 | 883.877 280 165 | 9.104 120 166 | 103.568 230 167 | 169.583 230 168 | 429.504 40 169 | 223.639 140 170 | 145.585 360 171 | 985.968 210 172 | 500.922 260 173 | 226.652 250 174 | 1051.168 200 175 | 68.093 150 176 | 1547.159 250 177 | 393.774 100 178 | 804.282 260 179 | 801.577 210 180 | 450.562 290 181 | 26.598 220 182 | 179.061 70 183 | 345.687 110 184 | 295.84 250 185 | 2271.86 320 186 | 1134.575 300 187 | 601.434 180 188 | 45.298 180 189 | 759.518 200 190 | 832.869 320 191 | 56.894 140 192 | 709.399 100 193 | 56.895 120 194 | 767.134 230 195 | 503.172 150 196 | 700.929 250 197 | 910.851 190 198 | 888.569 240 199 | 800.615 250 200 | 1500 230 201 | 785.694 110 202 | -------------------------------------------------------------------------------- /linear regression/Album Sales 2.dat: -------------------------------------------------------------------------------- 1 | adverts sales airplay attract 2 | 10.256 330 43 10 3 | 985.685 120 28 7 4 | 1445.563 360 35 7 5 | 1188.193 270 33 7 6 | 574.513 220 44 5 7 | 568.954 170 19 5 8 | 471.814 70 20 1 9 | 537.352 210 22 9 10 | 514.068 200 21 7 11 | 174.093 300 40 7 12 | 1720.806 290 32 7 13 | 611.479 70 20 2 14 | 251.192 150 24 8 15 | 97.972 190 38 6 16 | 406.814 240 24 7 17 | 265.398 100 25 5 18 | 1323.287 250 35 5 19 | 196.65 210 36 8 20 | 1326.598 280 27 8 21 | 1380.689 230 33 8 22 | 792.345 210 33 7 23 | 957.167 230 28 6 24 | 1789.659 320 30 9 25 | 656.137 210 34 7 26 | 613.697 230 49 7 27 | 313.362 250 40 8 28 | 336.51 60 20 4 29 | 1544.899 330 42 7 30 | 68.954 150 35 8 31 | 785.692 150 8 6 32 | 125.628 180 49 7 33 | 377.925 80 19 8 34 | 217.994 180 42 6 35 | 759.862 130 6 7 36 | 1163.444 320 36 6 37 | 842.957 280 32 7 38 | 125.179 200 28 6 39 | 236.598 130 25 8 40 | 669.811 190 34 8 41 | 612.234 150 21 6 42 | 922.019 230 34 7 43 | 50 310 63 7 44 | 2000 340 31 7 45 | 1054.027 240 25 7 46 | 385.045 180 42 7 47 | 1507.972 220 37 7 48 | 102.568 40 25 8 49 | 204.568 190 26 7 50 | 1170.918 290 39 7 51 | 689.547 340 46 7 52 | 784.22 250 36 6 53 | 405.913 190 12 4 54 | 179.778 120 2 8 55 | 607.258 230 29 8 56 | 1542.329 190 33 8 57 | 1112.47 210 28 7 58 | 856.985 170 10 6 59 | 836.331 310 38 7 60 | 236.908 90 19 4 61 | 1077.855 140 13 6 62 | 579.321 300 30 7 63 | 1500 340 38 8 64 | 731.364 170 22 8 65 | 25.689 100 23 6 66 | 391.749 200 22 9 67 | 233.999 80 20 7 68 | 275.7 100 18 6 69 | 56.895 70 37 7 70 | 255.117 50 16 8 71 | 566.501 240 32 8 72 | 102.568 160 26 5 73 | 250.568 290 53 9 74 | 68.594 140 28 7 75 | 642.786 210 32 7 76 | 1500 300 24 7 77 | 102.563 230 37 6 78 | 756.984 280 30 8 79 | 51.229 160 19 7 80 | 644.151 200 47 6 81 | 15.313 110 22 5 82 | 243.237 110 10 8 83 | 256.894 70 1 4 84 | 22.464 100 1 6 85 | 45.689 190 39 6 86 | 724.938 70 8 5 87 | 1126.461 360 38 7 88 | 1985.119 360 35 5 89 | 1837.516 300 40 5 90 | 135.986 120 22 7 91 | 237.703 150 27 8 92 | 976.641 220 31 6 93 | 1452.689 280 19 7 94 | 1600 300 24 9 95 | 268.598 140 1 7 96 | 900.889 290 38 8 97 | 982.063 180 26 6 98 | 201.356 140 11 6 99 | 746.024 210 34 6 100 | 1132.877 250 55 7 101 | 1000 250 5 7 102 | 75.896 120 34 6 103 | 1351.254 290 37 9 104 | 202.705 60 13 8 105 | 365.985 140 23 6 106 | 305.268 290 54 6 107 | 263.268 160 18 7 108 | 513.694 100 2 7 109 | 152.609 160 11 6 110 | 35.987 150 30 8 111 | 102.568 140 22 7 112 | 215.368 230 36 6 113 | 426.784 230 37 8 114 | 507.772 30 9 3 115 | 233.291 80 2 7 116 | 1035.433 190 12 8 117 | 102.642 90 5 9 118 | 526.142 120 14 7 119 | 624.538 150 20 5 120 | 912.349 230 57 6 121 | 215.994 150 19 8 122 | 561.963 210 35 7 123 | 474.76 180 22 5 124 | 231.523 140 16 7 125 | 678.596 360 53 7 126 | 70.922 10 4 6 127 | 1567.548 240 29 6 128 | 263.598 270 43 7 129 | 1423.568 290 26 7 130 | 715.678 220 28 7 131 | 777.237 230 37 8 132 | 509.43 220 32 5 133 | 964.11 240 34 7 134 | 583.627 260 30 7 135 | 923.373 170 15 7 136 | 344.392 130 23 7 137 | 1095.578 270 31 8 138 | 100.025 140 21 5 139 | 30.425 60 28 1 140 | 1080.342 210 18 7 141 | 799.899 210 28 7 142 | 1071.752 240 37 8 143 | 893.355 210 26 6 144 | 283.161 200 30 8 145 | 917.017 140 10 7 146 | 234.568 90 21 7 147 | 456.897 120 18 9 148 | 206.973 100 14 7 149 | 1294.099 360 38 7 150 | 826.859 180 36 6 151 | 564.158 150 32 7 152 | 192.607 110 9 5 153 | 10.652 90 39 5 154 | 45.689 160 24 7 155 | 42.568 230 45 7 156 | 20.456 40 13 8 157 | 635.192 60 17 6 158 | 1002.273 230 32 7 159 | 1177.047 230 23 6 160 | 507.638 120 0 6 161 | 215.689 150 35 5 162 | 526.48 120 26 6 163 | 26.895 60 19 6 164 | 883.877 280 26 7 165 | 9.104 120 53 8 166 | 103.568 230 29 8 167 | 169.583 230 28 7 168 | 429.504 40 17 6 169 | 223.639 140 26 8 170 | 145.585 360 42 8 171 | 985.968 210 17 6 172 | 500.922 260 36 8 173 | 226.652 250 45 7 174 | 1051.168 200 20 7 175 | 68.093 150 15 7 176 | 1547.159 250 28 8 177 | 393.774 100 27 6 178 | 804.282 260 17 8 179 | 801.577 210 32 8 180 | 450.562 290 46 9 181 | 26.598 220 47 8 182 | 179.061 70 19 1 183 | 345.687 110 22 8 184 | 295.84 250 55 9 185 | 2271.86 320 31 5 186 | 1134.575 300 39 8 187 | 601.434 180 21 6 188 | 45.298 180 36 6 189 | 759.518 200 21 7 190 | 832.869 320 44 7 191 | 56.894 140 27 7 192 | 709.399 100 16 6 193 | 56.895 120 33 6 194 | 767.134 230 33 8 195 | 503.172 150 21 7 196 | 700.929 250 35 9 197 | 910.851 190 26 7 198 | 888.569 240 14 6 199 | 800.615 250 34 6 200 | 1500 230 11 8 201 | 785.694 110 20 9 202 | -------------------------------------------------------------------------------- /linear regression/Chapter 7 DSUR Regression.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/linear regression/Chapter 7 DSUR Regression.R -------------------------------------------------------------------------------- /linear regression/Countries3.Rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/linear regression/Countries3.Rdata -------------------------------------------------------------------------------- /linear regression/Linear Regression.R: -------------------------------------------------------------------------------- 1 | # Simple Linear Regression in R 2 | # 3 | # Use the albumSales.dat file for these examples 4 | 5 | setwd("~/OneDrive/MIDS/W203/Data/lr") 6 | album1 <- read.delim("Album Sales 1.dat") 7 | head(album1) 8 | summary(album1) 9 | 10 | # run a regression analysis using a linear model, excluding any NA entries 11 | newModel <- lm(sales ~ adverts, data=album1, na.action=na.exclude) 12 | 13 | summary(newModel) 14 | 15 | # The summary() command gives the output from the linear regression 16 | # It tells us that the value of R^2 is 0.3346, so we can get the Pearson's 17 | # correlation co-efficient by taking the square root of R^2: 18 | newModel.pearsonscc = sqrt(0.3346) # which comes to 0.5784462 19 | 20 | # Another way we can determine the pearsons correlation co-efficient is 21 | # to run a correlation test 22 | cor(album1$adverts, album1$sales, use = "complete.obs", method="pearson") 23 | 24 | # Which results in the same value of 0.5784 25 | 26 | # The R^2 value tells us that the adverts accounts for 33.5% of the variation 27 | # in album sales. Therefore there must be other variables that account for 28 | # the remaining 66.5% of variation in album sales. 29 | 30 | # Examining the F-statistic from the summary output we have: 31 | F-statistic: 99.59 on 1 and 198 DF, p-value: < 2.2e-16 32 | 33 | # This tells us that we can reject the null hypothesis that the model is not 34 | # significantly improved with a P < .001 35 | 36 | # The regression coefficients are given as 37 | # intercept -> 134.1, gradient -> 0.09612 38 | # so we can say that with no additional change in adverts we will have 134100 39 | # albums sold, and that for every additional 1000 spent in adverts we will 40 | # sell an additional 96 albums (not the best investment in the world) 41 | # Both the intercept and gradient values have a t-test value < .001 so we 42 | # can reject the null hypothesis that these values are not zero and they are 43 | # a genuine effect in the population. 44 | 45 | # We can examine the individual residuals from the model on the actual data 46 | # and look for any residuals that seem particularly large 47 | resid(newModel) 48 | 49 | # examine the standardized residuals. 95% of z-scores shoule lie between +/- 1.96 50 | # and 99% between +/-2.58. Anything over 3 sd should be suspect, or values outside 51 | # +/- 3.29 52 | rstandard(newModel) 53 | 54 | # Examine for residuals that have undue influence on the model, first using the t-test 55 | rstudent(newModel) 56 | 57 | # We can calculate the Pearson residual as 58 | PearsonResidual <- (resid(newModel)-mean(resid(newModel)))/sd(resid(newModel)) 59 | 60 | 61 | 62 | 63 | 64 | # Multiple linear regression - we extend the model to multiple predictors 65 | # first get the album2 data 66 | album2<-read.delim("Album Sales 2.dat", header = TRUE) 67 | head(album2) 68 | summary(album2) 69 | 70 | # run the multiple regression model 71 | albumSales.2<-lm(sales ~ adverts, data = album2) 72 | albumSales.3<-lm(sales ~ adverts + airplay + attract, data = album2) 73 | summary(albumSales.2) 74 | summary(albumSales.3) 75 | 76 | # We can see from the R^2 that if the first model (albumSales.2) has multiple R^2 77 | # of 0.3346 and the second model adds two more predictors to get an R^2 of .6647 78 | # that we have accounted for an addtional 33% of album sales by the inclusion of the 79 | # other two predictors. 80 | 81 | 82 | 83 | 84 | #---We can obtain standardized parameter estimates with the lm.beta() function--- 85 | library(car) 86 | library(QuantPsyc) 87 | library(boot) 88 | 89 | # the lm.beta() gives us the standardized parameters, such that if parameter 1 90 | # varies by 1 sd then the outcome variable changes by beta sd's 91 | lm.beta(albumSales.3) 92 | sd(album2$sales) 93 | sd(album2$adverts) 94 | 95 | # sd of sales = 80.698, sd of adverts = 485.655 (these are in 1000's), so 96 | # 80,698 and 485,655, respectively 97 | # the lm.beta shows 0.511 on adverts, so an increase of 1 sd in adverts results in 98 | # an increase of 0.511 sd's of sales -> increase of 485,655 in adverts results in 99 | # 0.511 * 80,698 in sales = 41,240 100 | 101 | # the larger the beta values the more important the predictor 102 | 103 | #---Confidence intervals are obtained with the confint() function---- 104 | confint(albumSales.3) 105 | 106 | # compare the two models with anova 107 | anova(albumSales.2, albumSales.3) 108 | 109 | # The anova gives us an F=96.447 and p < .001, so the updated model is an 110 | # improvement over the first model 111 | 112 | # Model Diagnostics 113 | #----Obtain casewise diagnostics and add them to the original data file.--- 114 | 115 | album2$residuals<-resid(albumSales.3) 116 | album2$standardized.residuals <- rstandard(albumSales.3) 117 | album2$studentized.residuals <- rstudent(albumSales.3) 118 | album2$cooks.distance<-cooks.distance(albumSales.3) 119 | album2$dfbeta <- dfbeta(albumSales.3) 120 | album2$dffit <- dffits(albumSales.3) 121 | album2$leverage <- hatvalues(albumSales.3) 122 | album2$covariance.ratios <- covratio(albumSales.3) 123 | 124 | #----List of standardized residuals greater than 2-------------- 125 | album2$standardized.residuals>2| album2$standardized.residuals < -2 126 | 127 | #---Create a variable called large.residual, which is TRUE (or 1) if the residual is greater than 2, or less than -2.---------- 128 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2 129 | 130 | #---Count the number of large residuals------------- 131 | sum(album2$large.residual) 132 | 133 | 134 | #---Display the value of sales, airplay, attract, adverts, and the standardized residual, for those cases which have a residual greater than 2 or less than -2.------------- 135 | album2[album2$large.residual,c("sales", "airplay", "attract", "adverts", "standardized.residuals")] 136 | 137 | #-----Cook's distance, leverage and covariance ratio for cases with large residuals.--------- 138 | album2[album2$large.residual , c("cooks.distance", "leverage", "covariance.ratios")] 139 | 140 | 141 | #----The Durbin-Watson test is obtained with either dwt() or durbinWatsonTest()--- 142 | durbinWatsonTest(albumSales.3) 143 | dwt(albumSales.3) 144 | 145 | #----Obtaining the VIF--- 146 | vif(albumSales.3) 147 | 148 | #----The tolerance is 1/VIF--- 149 | 1/vif(albumSales.3) 150 | 151 | #----The mean VIF--- 152 | mean(vif(albumSales.3)) 153 | 154 | 155 | #---Histogram of studentized residuals--- 156 | 157 | hist(album2$studentized.residuals) 158 | hist(rstudent(albumSales.3)) 159 | 160 | #--Plot of residuals against fitted (predicted) values, with a flat line at the mean-- 161 | plot(albumSales.3$fitted.values,rstandard(albumSales.3)) 162 | abline(0, 0) 163 | 164 | #same as above 165 | plot(albumSales.3) 166 | 167 | -------------------------------------------------------------------------------- /linear regression/simple_regression.R: -------------------------------------------------------------------------------- 1 | ### A demonstration of simple OLS regression in R 2 | 3 | # 1. Setting up a regression 4 | 5 | # Use car for scatterplots 6 | library(car) 7 | 8 | # Load the Countries dataset 9 | load("Countries3.Rdata") 10 | summary(Countries) 11 | 12 | # Let's look at the number of internet users as predicted by 13 | # a country's GDP. As we'll see, this model isn't very well 14 | # chosen, but it will give us a chance to see some of the problems 15 | # that often come up with linear regression. 16 | 17 | # First, check the scatterplot to get a sense of the underlying 18 | # relationship. 19 | scatterplot(Countries$gdp, Countries$internet_users_2011) 20 | 21 | # That doesn't look too linear, so we already know that our 22 | # regression is somewhat problematic. 23 | 24 | # Set the rownames so that they appear in the diagnostic plots 25 | rownames(Countries) = Countries$Country 26 | 27 | # Create the linear model, and look at a summary 28 | model = lm(internet_users_2011 ~ gdp, data = Countries) 29 | summary(model) 30 | 31 | # 2. Regression diagnostics 32 | 33 | # use the plot command for common regression diagnostics 34 | plot(model) 35 | # This command pulls up a series of plots that can be used to 36 | # check the standard OLS assumptions 37 | # 38 | # plot 1: residuals versus fitted values. check for 39 | # heteroskedasticity. This means that the spread of points 40 | # on the graph changes from the left side to the right side. 41 | # We can also look for non-linear relationships. That would 42 | # mean that the band of points moves up and down as you look 43 | # from the left to the right of the plot. This particular 44 | # regression seems to suffer from both problems. 45 | # 46 | # plot 2: qqplot of standardized residuals vs. normal curve. 47 | # Check to see if the errors are normally distributed. That 48 | # means that the points should follow the diagonal line closely. 49 | # In this regression, we don't seem to have normal residuals, 50 | # but it's a large sample size, so we can use the central limit 51 | # theorem and we don't have to worry about this. 52 | # 53 | # plot 3: scale-location. This is a variation on the 54 | # residuals vs fitted values plot. Since we take the absolute 55 | # value of the resuduals, all points appear above the x-axis, 56 | # and more variance appears as higher points. Heteroskedasticity 57 | # can then be detected if the mass of points moves up and down 58 | # as you look from the left to the right of the graph. 59 | # Curiously, this regression doesn't seem to have a problem on 60 | # this plot. It seems the non-linearity is actually obscuring 61 | # the problem with heteroskedasticity. 62 | # 63 | # plot 4: residuals vs leverage. Look for outliers that 64 | # may be biasing the model. 65 | # In this regression, no points have Cook's distance > 1, so 66 | # we don't need to worry. 67 | 68 | # Here's an explicit way to check for points with large residuals 69 | outlierTest(model) 70 | 71 | # Could use the Durbin-Watson test to check for autocorrelation 72 | # It's really not necessary here since the order of Countries 73 | # is alphabetical, and there's no reason to suspect that alphabetical 74 | # proximity influences residuals. 75 | dwt(model) 76 | 77 | # We suspect heteroskedasticity here, and we could check 78 | # with a Breusch-Pagan test. Since we are testing a null 79 | # hypothesis of homoskedasticity, this test will tend to 80 | # be significant for large sample sizes, so interpret the 81 | # results with caution. 82 | library(lmtest) 83 | bptest(model) 84 | # In this case, the test is negative. Breusch-Pagan is based 85 | # on squaring the residuals, so again, it could be that the 86 | # non-linear relationship is obscuring the heteroskedasticity. 87 | 88 | # Just in case, let's check the heteroskedasticity-robust errors 89 | # We are keeping out regression line the same, just adjusting 90 | # our estimates of significance. 91 | library(sandwich) 92 | coeftest(model, vcov = vcovHC) 93 | 94 | # As an exercise, you could try the log of gdp instead 95 | # to see what really nice regression output looks like -------------------------------------------------------------------------------- /linear regression/simple_regression_transformation.R: -------------------------------------------------------------------------------- 1 | # We're going to use a similar script to the simple_regression script and 2 | # the Countries3.Rdata file to take a look at Internet Users as a function 3 | # of GDP. However, this time we'll transform the GDP data by taking the 4 | # log(GDP) and performing the regression with that to help deal with the 5 | # heteroskedacity issues we saw in the non-transformed data. 6 | # 7 | # 8 | # Set the working directory 9 | setwd("~/Documents/R/lr") 10 | # 11 | # Load the car library for the scatterplot function 12 | library(car) 13 | # 14 | # Load the countries dataset 15 | load("Countries3.Rdata") 16 | # 17 | # Take a look at the Countries data 18 | summary(Countries) 19 | # 20 | # Use the country name as the row name as a convenience 21 | rownames(Countries) <- Countries$country 22 | # 23 | # Look at the number of internet users per country by gdp 24 | scatterplot(Countries$gdp, Countries$internet_users_2011) 25 | # 26 | # This looks like there will be some problems with linear regression 27 | # assumptions, and in the previous version of the script we saw 28 | # heteroskedacity issues. 29 | # So let's try transforming the data for gdp by taking the log base 10 (gdp) 30 | # 31 | Countries$loggdp <- log(Countries$gdp) 32 | # 33 | scatterplot(Countries$loggdp, Countries$internet_users_2011) 34 | # 35 | # Now we see a little better scatterplot 36 | # Let's go ahead and construct our linear regression model 37 | model = lm(internet_users_2011 ~ loggdp, data = Countries) 38 | summary(model) 39 | # 40 | # the coefficients are highly significant as we saw before but 41 | # let's take a look at the plots of the model to see how they look 42 | plot(model) 43 | # 44 | # plot 1: Residuals vs. Fitted 45 | # We still see some variation of the band of residuals but it's much better 46 | # than before the transformation. There's a better overall uniformity to the 47 | # band 48 | # 49 | # plot 2: QQ Plot 50 | # Again we see that the ends of the plot move away from the diagonal, but it 51 | # is much better than before the trasnformation. The lower left of the plot 52 | # is still away from the diagonal, however. But normality is probably not as much 53 | # of an issue here because of the Central Limit Theorem and sample size. 54 | # 55 | # plot 3: Scale Location 56 | # This alternative homoskedacity plot shows much better then before the 57 | # transformation as well. 58 | # 59 | # plot 4: Residuals vs. Leverage 60 | # There's nothing to worry about on this plot. 61 | # 62 | library(lmtest) 63 | bptest(model) 64 | # Again the Beusch-Pagan test doesn't really show anything for heterskedacity 65 | # 66 | # So if we look at the summary of the model again, we should be able to 67 | # convert the gdp coefficients back to something that makes sense instead 68 | # of logarithm. 69 | summary(model) 70 | # 71 | # The intercept is -66.518 but the beta co-efficient is 13.89. 72 | # That would make an equation of Y = 13.89*loggdp - 66.518 73 | # So to convert the beta co-efficent we take e^beta 74 | exp(13.89) 75 | # 76 | # e^13.89 = 1077334 77 | # for every 1077334 change in gdp we have 1 unit change in internet users. -------------------------------------------------------------------------------- /logistic regression/.Rapp.history: -------------------------------------------------------------------------------- 1 | metallica 2 | lecturerData 3 | library(Rcmdr) 4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic")) 5 | alchoholPersonality 6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality) 7 | alchoholPersonalityMatrix 8 | library(Rcmdr) 9 | sunspots 10 | require(graphics) 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers") 12 | pvalue() 13 | pscore() 14 | mu1 <- 2/(sqrt(100)) 15 | mu1 16 | zscore() 17 | (2.5 - 2.0)/2 18 | 2*pnorm(2.5) 19 | 2 * pnorm(-2.5) 20 | -0.5/20 21 | pnorm(0.025) 22 | z <- (2 - 2.5)/2 23 | z 24 | 2*pnorm(-0.25) 25 | a <- 2 26 | s <- 2 27 | n <- 100 28 | xbar <- 2.5 29 | z <- (xbar-a)/(s/sqrt(n)) 30 | z 31 | 2*pnorm(-2.5) 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100))) 33 | 10000 - 10300 34 | -300/(1000 / sqrt(10)) 35 | pnorm(-300/(1000/sqrt(10))) 36 | pnorm(300/(1000/sqrt(10))) 37 | 2*(pnorm(-abs(500/(1000/sqrt(10)))) 38 | ) 39 | 2*(pnorm((500/(1000/sqrt(10)))) 40 | ) 41 | 500/1000/sqrt(10) 42 | 1000/sqrt(10) 43 | 500/(1000/sqrt(10)) 44 | pnorm(1.581139) 45 | pnorm(-1.581139) 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10))) 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10)) 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10)) 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10)) 51 | exit 52 | setwd("~/Documents/R/logistic regression") 53 | eelData <- read.delim("eel.dat", header = TRUE) 54 | str(eelData) 55 | head(eelData) 56 | eelData$Cured <- relevel(eelData$Cured, "Not Cured") 57 | eelData$Intervention <- relevel(eelData$Intervention, "No Treatment") 58 | str(eelData) 59 | eelModel.1 <- glm(Cured ~ Intervention, data = eelData, family = binomial(link = "logit")) 60 | eelModel.2 <- glm(Cured ~ Interention + Duration, data = eelData, family = binomial(link = "logit")) 61 | eelModel.2 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(link = "logit")) 62 | summary(eelModel.1) 63 | summary(eelModel.2) 64 | chisq.test((154.08 - 144.16)) 65 | chisq.prob <- 1 - pchisq(eelModel.1$null.deviance - eelModel.1$deviance, eelModel.1$df.null - eelModel.1$df.residual) 66 | chisq.prob 67 | print(154.08-144.16) 68 | print(sqrt((3.074^2-(2*1))/154.08)) 69 | str(eelModel.1) 70 | eelModel.1 71 | eelModel.1$z 72 | eelModel.1$z.statistic 73 | eelModel.1$z.value 74 | eelModel.1$effects 75 | coef(summary(eelModel.1))[, 3] 76 | summary(eelModel.1)$coefficients[-1,3] 77 | coef(summary(eelModel.1))[-1, 3] 78 | z.stat <- coef(summary(eelModel.1))[-1, 3] 79 | eelModel.1.z <- coef(summary(eelModel.1))[-1, 3] 80 | eelModel.1.R <- sqrt((eelModel.1.z^2 - 2*(eelModel.1$df.null - eelModel.1$df.residual))/eelModel.1$null.deviance) 81 | eelModel.1.R 82 | summary(eelModel.1) 83 | eelModel.1$residual.deviance 84 | eelModel.1$deviance 85 | eelModel.1.chisq <- eelModel.1$null.deviance - eelModel.1$deviance 86 | eelModel.1.R2L <- eelModel.1.chisq / eelModel.1$null.deviance 87 | eelModel.1.R2L 88 | eelModel.1.R 89 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / (eelModel.1$df.null + 1)) 90 | eelModel.1.RCS 91 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / (eelModel.1$df.null + 1))))) 92 | eelModel.1.RCN 93 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / length(eelModel.1$fitted.values)) 94 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / length(eelModel.1$fitted.values))))) 95 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients) 96 | eelModel.1.oddsRatio 97 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[ , 1]) 98 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[1]) 99 | eelModel.1.oddsRatio 100 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[2]) 101 | eelModel.1.oddsRatio 102 | exp(confint(eel.Model.1)) 103 | exp(confint(eelModel.1)) 104 | eelModel.2.chisq <- eelModel.1$deviance - eelModel.2$deviance # deviance between the 2 models 105 | chisq.prob <- 1 - pchisq(eelModel.2.chisq, eelModel.1$df.residual - eelModel.2$df.residual) 106 | chisq.prob 107 | eelModel.2.chisq 108 | anova(eelModel.1, eelModel.2) 109 | eelData$predicted.probabilities <- fitted(eelModel.1) 110 | eelData$standardized.residuals <- rstandard(eelModel.1) 111 | eelData$studentized.residuals <- rstudent(eelModel.1) 112 | eelData$dfbeta <- dfbeta(eelModel.1) 113 | eelData$dffit <- dffit(eelModel.1) 114 | eelData$dffit <- dffits(eelModel.1) 115 | eelData$leverage <- hatvalues(eelModel.1) 116 | head(eelData[, c("Cured", "Intervantion", "Duration", "predicted.probabilities")]) 117 | head(eelData[, c("Cured", "Intervention", "Duration", "predicted.probabilities")]) 118 | eelData[, c("leverage", "studentized.residuals", "dfbeta")] 119 | penaltyData <- read.delim("penalty.day", header = TRUE) 120 | penaltyData <- read.delim("penalty.dat", header = TRUE) 121 | head(penaltyData) 122 | str(penaltyData) 123 | penaltyModel.1 = glm(Scored ~ Previous, data = penaltyData, family = binomial(link = "logit")) 124 | summary(penatlyModel.1) 125 | summary(penaltyModel.1) 126 | penaltyModel.1.chisq <- penaltyModel.1$null.deviance - penaltyModel.1$deviance 127 | penaltyModel.1.chisqProbability <- pchisq(penalyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual) 128 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual) 129 | penaltyModel.1.chisq 130 | penaltyModel.1.chisqProbability 131 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, 1) 132 | penaltyModel.1.chisqProbability 133 | pchisq(penaltyModel.1.chisq, 1) 134 | penaltyModel.1$null.deviance 135 | penaltyModel.1$deviance 136 | penaltyModel.1$null.deviance - penaltyModel.1$deviance 137 | pchisq(42.466, 1) 138 | penaltyModel.1.z <- coef(summary(penaltyModel.1))[-1, 3] 139 | penaltyModel.1.z 140 | penaltyModel.1.R <- sqrt((penaltyModel.1.z^2 - 2*(penaltyModel.1$df.null - penaltyModel.1$df.residual))/penaltyModel.1$null.deviance) 141 | penaltyModel.1.R 142 | penaltyModel.1.R2L <- penaltyModel.1.chisq / penaltyModel.1$null.deviance 143 | penaltyModel.1.R2L 144 | penaltyModel.1.RCS <- 1 - exp((penaltyModel.1$deviance - penaltyModel.1$null.deviance) / length(penaltyModel.1$fitted.values)) 145 | penaltyModel.1.RCS 146 | penaltyModel.1.RCN <- penaltyModel.1.RCS / (1 - (exp(-(penaltyModel.1$null.deviance / length(penaltyModel.1$fitted.values))))) 147 | penaltyModel.1.RCN 148 | penaltyModel.1.oddsRatio <- exp(penaltyModel.1$coefficients[2]) 149 | penaltyModel.1.oddsRatio 150 | exp(confint(penaltyModel.1)) 151 | penaltyModel.2 <- glm(Scored ~ Previous + PSWQ, data = penaltyData, family = binomial(link = "logit")) 152 | summary(penaltyModel.2) 153 | penaltyModel.2.chisq <- penaltyModel.1$deviance - penaltyModel.2$deviance 154 | penaltyModel.2.chisqProbability <- pchisq(penaltyModel.2.chisq, penaltyModel.1$df.residual - penaltyModel.2$df.residual) 155 | penaltyModel.2.chisq 156 | penaltyModel.2.chisqProbability 157 | penaltyModel.2.z <- coef(summary(penaltyModel.2))[-1, 3] 158 | penaltyModel.2.R <- sqrt((penaltyModel.2.z^2 - 2*(penaltyModel.1$df.residual - penaltyModel.2$df.residual))/penaltyModel.1$deviance) 159 | penaltyModel.2.R 160 | penaltyModel.2.R2L <- penaltyModel.2.chisq / penaltyModel.1$deviance 161 | penaltyModel.2.R2L 162 | penaltyModel.2.RCS <- e - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values)) 163 | penaltyModel.2.RCS <- 1 - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values)) 164 | penaltyModel.2.RCS 165 | penaltyModel.2.RCN <- penaltyModel.2.RCS / (1 - (exp(-(penaltyModel.1$deviance / length(penaltyModel.2$fitted.values))))) 166 | penaltyModel.2.RCN 167 | penaltyModel.2.oddsRatio <- exp(penaltyModel.2$coefficents[2]) 168 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penalty.dat, family = binomial()) 169 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penaltyData, family = binomial()) 170 | summary(penalyModel.3) 171 | summary(penaltyModel.3) 172 | vif(penaltyModel.3) 173 | library(car) 174 | vif(penaltyModel.3) 175 | 1/vif(penaltyModel.3) 176 | library(polycor) 177 | cor(penaltyData$Previous, penaltyData$PSWQ, penaltyData$Anxious) 178 | penaltyVars <- penaltyData[, c("Previous", "PSWQ", "Anxious")] 179 | cor(penaltyVars) 180 | cor.test(penaltyData$Previous, penaltyData$Anxious) 181 | cor.test(penaltyData$Previous, penaltyData$PSWQ) 182 | cor.test(penaltyData$Anxious, penaltyData$PSWQ) 183 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ 184 | penaltyData$logPreviousInt <- log(penaltyData$Previous)*penaltyData$Previous 185 | penaltyData$logAnxiousInt <- log(penaltyData$Anxious)*penaltyData$Anxious 186 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ 187 | penaltyModel.test <- glm(Scored ~ Previous + PSWQ + Anxious + logPreviousInt + logPSWQInt + logAnxiousInt, data = penaltyData, family = binomial()) 188 | summary(penaltyModel.test) 189 | -------------------------------------------------------------------------------- /logistic regression/Lacourse et al. (2001) Females.dat: -------------------------------------------------------------------------------- 1 | Age Age_Group Drug_Use Father_Negligence Gender Isolation Marital_Status Meaninglessness Metal Mother_Negligence Normlessness Self_Estrangement Suicide_Risk Vicarious Worshipping 2 | 15.83 14-16 Years Old 8 17 Female 6 Together 10 4.83762518844781 10 6 15 Non-Suicidal 5 4 3 | 14.92 14-16 Years Old 9 23 Female 8 Together 26 6 12 8 20 Non-Suicidal 4 6 4 | 15.33 14-16 Years Old 5 15 Female 18 Together 19 6 16 7 17 Non-Suicidal 6 3 5 | 15.83 14-16 Years Old 11 11 Female 9 Separated or Divorced 13 4 10 5 12 Non-Suicidal 3 3 6 | 14.92 14-16 Years Old 7 13 Female 5 Together 13 8 16 3 6 Non-Suicidal 3 9 7 | 14.58 14-16 Years Old 4 29 Female 15 Separated or Divorced 18 7 18 5 15 Suicidal 2 4 8 | 14.5 14-16 Years Old 5 10 Female 8 Together 12 8 9 6 10 Non-Suicidal 3 4 9 | 15.67 14-16 Years Old 7 27 Female 6 Separated or Divorced 18 4 12 7 12 Non-Suicidal 3 4 10 | 14.92 14-16 Years Old 5 23 Female 10 Together 29 14 21 4 28 Non-Suicidal 8 9 11 | 15 14-16 Years Old 4 12 Female 5 Together 22 4 15 7 7 Non-Suicidal 5 9 12 | 15.17 14-16 Years Old 3 19 Female 12 Together 20 8 11 8 19 Non-Suicidal 5 7 13 | 17.17 16-19 Years Old 8 30 Female 14 Separated or Divorced 13 10 29 6 12 Suicidal 6 12 14 | 16.83 16-19 Years Old 11 12 Female 7 Together 22 4 22 7 19 Non-Suicidal 6 7 15 | 16.75 16-19 Years Old 6 16 Female 14 Together 25 7 9 11 10 Non-Suicidal 5 3 16 | 16.58 16-19 Years Old 8.43970783004555 20 Female 5 Together 16 4 15 4 9 Non-Suicidal 8 4 17 | 16.5 16-19 Years Old 5 17 Female 12 Together 17 7 9 6 19 Non-Suicidal 3 7 18 | 16.5 16-19 Years Old 3 24 Female 13 Separated or Divorced 21 7 24 7 16 Non-Suicidal 4 6 19 | 16.75 16-19 Years Old 3 22 Female 16 Together 30 4 31 7 11 Non-Suicidal 4 5 20 | 17 16-19 Years Old 5 20 Female 12 Together 13 4 14 4 12 Non-Suicidal 3 3 21 | 17.25 16-19 Years Old 5 31 Female 16 Separated or Divorced 23 5 21 6 22 Suicidal 5 7 22 | 17.17 16-19 Years Old 3 26 Female 15 Together 16 5 19.5824136069868 4 24 Non-Suicidal 5 9 23 | 17.42 16-19 Years Old 7.15478004110386 23 Female 15 Together 18 5 36 9 14 Non-Suicidal 6 3 24 | 17.08 16-19 Years Old 5 16 Female 10.9559005734461 Together 21 6.53961748140933 13 8 15 Non-Suicidal 6 4 25 | 17.92 16-19 Years Old 11 16 Female 6 Together 17 10 13 12 16 Suicidal 7 6 26 | 17.25 16-19 Years Old 7.87460242195194 26 Female 13 Together 24 6 10 8 10 Non-Suicidal 8 7 27 | 18.5 16-19 Years Old 6 33 Female 6 Separated or Divorced 25 4 9 6 14 Non-Suicidal 4 3 28 | 16.92 16-19 Years Old 5 27 Female 23 Together 10 12 19 5 22 Non-Suicidal 5 4 29 | 17.33 16-19 Years Old 12 14 Female 10 Together 10 6 15 7 16 Suicidal 3 5 30 | 17.42 16-19 Years Old 7 16 Female 6 Together 22 5 13 7 18 Non-Suicidal 5 4 31 | 17.17 16-19 Years Old 7 10 Female 5 Together 11 6 9 5 12 Suicidal 7 4 32 | 16.58 16-19 Years Old 8 12 Female 6 Together 13 9 12 6 11 Non-Suicidal 5 10 33 | 16.58 16-19 Years Old 5 21 Female 8 Together 15 4 18 7 14 Non-Suicidal 6 7 34 | 17.42 16-19 Years Old 6 30 Female 7 Together 25 9 24 4 14 Non-Suicidal 5 3 35 | 16.75 16-19 Years Old 11 24 Female 9 Together 15 7 15 8 12 Non-Suicidal 4 4 36 | 15 14-16 Years Old 11 29 Female 7 Together 26 11 10 6 7 Suicidal 5 10 37 | 16.5 16-19 Years Old 9 10 Female 4 Together 10 4 9 7 5 Non-Suicidal 3 4 38 | 16.58 16-19 Years Old 5 16 Female 13 Together 13 7 16 4 17 Non-Suicidal 5 4 39 | 16.67 16-19 Years Old 6 18 Female 11 Together 19 4 14 3 10 Non-Suicidal 5 3 40 | 17 16-19 Years Old 5 16 Female 8 Together 14 8.52820197005175 12 5 5 Suicidal 3 3 41 | 17.25 16-19 Years Old 7 12 Female 4 Together 8 4 16 10 10 Suicidal 2 3 42 | 14.58 14-16 Years Old 5 9 Female 4 Separated or Divorced 8 11 12 6 8 Non-Suicidal 8 6 43 | 14.75 14-16 Years Old 6 15 Female 6 Together 17 5 19 4 17 Non-Suicidal 5 5 44 | 14.5 14-16 Years Old 11 29 Female 10 Together 15 6.91536148045185 21 9 13 Non-Suicidal 2 3 45 | 14.75 14-16 Years Old 3 9 Female 4 Together 13 5 9 3 8 Non-Suicidal 3 9 46 | 14.58 14-16 Years Old 14 22 Female 14 Together 27 4 15 12 18 Suicidal 5 10 47 | 14.92 14-16 Years Old 7 13 Female 6 Separated or Divorced 14 4 13 5 11 Non-Suicidal 3 9 48 | 14.5 14-16 Years Old 14 27 Female 14 Together 25 7 18 8 16 Non-Suicidal 6 11 49 | 16.5 16-19 Years Old 9 10 Female 18 Separated or Divorced 15 6 24 6 26 Suicidal 6 5 50 | 14.67 14-16 Years Old 9 24 Female 6 Together 24 13 26 9 21 Suicidal 3 3 51 | 16.83 16-19 Years Old 13 22 Female 9 Separated or Divorced 15 4 9 9 14 Non-Suicidal 5 3 52 | 15.08 14-16 Years Old 8 29 Female 8 Separated or Divorced 30 9 21 17 29 Suicidal 7 10 53 | 15.33 14-16 Years Old 13 14 Female 5 Separated or Divorced 21 12 10 9 22 Suicidal 6 6 54 | 14.67 14-16 Years Old 9 11 Female 8 Together 19 6 10 7 18 Non-Suicidal 3 7 55 | 15.17 14-16 Years Old 11 22 Female 18 Together 20 12 13 8 22 Suicidal 8 9 56 | 16.17 14-16 Years Old 3 15 Female 18 Together 17 12 32 4 27 Non-Suicidal 7 3 57 | 16.08 14-16 Years Old 7 12 Female 5 Together 13.6292687605901 6 9 8 18 Suicidal 4 4 58 | 16.42 16-19 Years Old 3 13 Female 10 Together 14 6 20 3 10 Non-Suicidal 2 9 59 | 15.83 14-16 Years Old 5 11 Female 5 Together 16 5 11 4 8 Non-Suicidal 3 5 60 | 15.92 14-16 Years Old 10 35 Female 12 Separated or Divorced 25 4 17 8 14 Suicidal 7 5 61 | 16.25 16-19 Years Old 5 13 Female 8 Together 14 5 19 5 14 Non-Suicidal 6 6 62 | 17.75 16-19 Years Old 11 9 Female 9 Together 19 6 9 6 21 Suicidal 3 6 63 | 16 14-16 Years Old 6 14 Female 11 Together 17 9 18 11 17 Suicidal 5 10 64 | 15.42 14-16 Years Old 3 17 Female 16 Together 23 14 13 6 18 Non-Suicidal 3 3 65 | 16 14-16 Years Old 11 13 Female 6 Together 30 15 36 8 11 Non-Suicidal 8 6 66 | 15.17 14-16 Years Old 6 12 Female 12 Together 11 6 15 6 18 Non-Suicidal 4 5 67 | 15 14-16 Years Old 7 17 Female 11 Together 16 4 29 9 27 Non-Suicidal 8 4 68 | 14.5 14-16 Years Old 3 21 Female 5 Together 12 5 14 3 6 Non-Suicidal 8 12 69 | 15.08 14-16 Years Old 13 13 Female 7 Separated or Divorced 21 4 12 12 17 Non-Suicidal 8 9 70 | 15 14-16 Years Old 13 17 Female 9 Separated or Divorced 26 6 24 6 24 Non-Suicidal 8 9 71 | 15.08 14-16 Years Old 6 9 Female 7 Separated or Divorced 16 5 16 8 16 Non-Suicidal 4 3 72 | 15.25 14-16 Years Old 5 24 Female 18 Separated or Divorced 23.9099750687457 4 17 15 24 Non-Suicidal 2 4 73 | 15 14-16 Years Old 7 13 Female 11 Together 13 6 9 13 17 Suicidal 4 8 74 | 15.42 14-16 Years Old 13 22 Female 7 Together 18 5 13 5 28 Suicidal 8 6 75 | 14.67 14-16 Years Old 5 13 Female 8 Together 23 6 12 5 18 Non-Suicidal 5 7 76 | 15.42 14-16 Years Old 11 21 Female 6 Together 19 5 20 6 16 Non-Suicidal 5 4 77 | 15.17 14-16 Years Old 15 34 Female 22 Separated or Divorced 27 20 17 12 29 Suicidal 8 10 78 | 15.67 14-16 Years Old 6.36189616889847 11 Female 10 Together 11 5 9 8 15 Non-Suicidal 6 7 79 | 16.42 16-19 Years Old 6 10 Female 18 Together 21 6 14 10 21 Non-Suicidal 3 4 80 | 14.67 14-16 Years Old 4 11 Female 7 Together 11 10 9 4 11 Non-Suicidal 2 10 81 | 16.17 14-16 Years Old 7.35574617962391 15 Female 10 Together 14 7 14 5 20 Non-Suicidal 4 3 82 | 16.25 16-19 Years Old 5 12 Female 5 Together 16 4 13 5 14 Non-Suicidal 2 3 83 | 15.75 14-16 Years Old 13 14 Female 4 Together 12 7 15 12 9 Non-Suicidal 7 9 84 | 16.33 16-19 Years Old 4 15 Female 12 Together 14 5 13 4 10 Non-Suicidal 4 4 85 | 16.42 16-19 Years Old 12 25 Female 5 Together 18 5 16 6 20 Suicidal 3 5 86 | 17.08 16-19 Years Old 10 31 Female 16 Separated or Divorced 20 11 23 7 23 Suicidal 8 5 87 | 16.67 16-19 Years Old 7 15 Female 8 Together 21 6 10 3 11 Non-Suicidal 2 3 88 | 17.67 16-19 Years Old 4 10 Female 10 Together 22 16 10 5 15 Non-Suicidal 4 3 89 | 17.08 16-19 Years Old 7 25 Female 7 Separated or Divorced 13 6 13 5 13 Non-Suicidal 5 3 90 | 16.83 16-19 Years Old 5 17 Female 12 Separated or Divorced 13 4 20 4 14 Non-Suicidal 5 5 91 | 17 16-19 Years Old 5 20 Female 6 Separated or Divorced 11 6 12 5 19 Non-Suicidal 6 3 92 | 17.25 16-19 Years Old 11 27 Female 10 Separated or Divorced 13 12 12 8 17 Suicidal 7 7 93 | 17.25 16-19 Years Old 9 20 Female 9 Together 30 18 9 15 27 Suicidal 8 10 94 | 16.83 16-19 Years Old 11 31 Female 7 Separated or Divorced 21 7 14 13 16 Suicidal 3 5 95 | 16.42 16-19 Years Old 3 14 Female 17 Together 11 4 17 4 12 Non-Suicidal 6 4 96 | 17.42 16-19 Years Old 3 17 Female 12 Together 12 5 14 5 14 Non-Suicidal 6 3 97 | 16.42 16-19 Years Old 5 12 Female 4 Together 15 4 12 4 12 Non-Suicidal 5 5 98 | 16.75 16-19 Years Old 3 19 Female 11 Separated or Divorced 21 6 18 6 13 Non-Suicidal 5 3 99 | 17.25 16-19 Years Old 3 19 Female 8 Together 10 6 23 3 12 Non-Suicidal 2 3 100 | 17.08 16-19 Years Old 11 11 Female 5 Together 14.433093821544 8 10 6 8 Non-Suicidal 2 5 101 | 17.25 16-19 Years Old 7 16 Female 9 Together 23 6 14 10 10 Suicidal 4 3 102 | 17.33 16-19 Years Old 6 13 Female 4 Together 22 8 11 6 9 Non-Suicidal 4 3 103 | 17.08 16-19 Years Old 6 10 Female 10 Together 10 6 9 5 10 Non-Suicidal 5 4 104 | 17.25 16-19 Years Old 3 19 Female 4 Together 14 12 16 3 9 Non-Suicidal 4 3 105 | 17.42 16-19 Years Old 3 11 Female 9 Together 12 5 10 3 12 Non-Suicidal 2 8 106 | 17.42 16-19 Years Old 5 12 Female 11 Together 26 7 9 5 18 Non-Suicidal 3 4 107 | 15.75 14-16 Years Old 13 13 Female 18 Together 18 14 9 8 21 Suicidal 4 7 108 | 16 14-16 Years Old 5 29 Female 8 Separated or Divorced 19 6 11 7 18 Non-Suicidal 6 6 109 | 16.42 16-19 Years Old 5 10 Female 9 Together 17 4 11.8560541141899 4 17 Non-Suicidal 6 3 110 | 15.58 14-16 Years Old 6 22 Female 18 Together 20 5.06898320036459 15.7517495914531 12 17 Non-Suicidal 6 7 111 | 15.83 14-16 Years Old 5 12 Female 6 Together 13 6 14 3 14 Non-Suicidal 3 4 112 | 15.58 14-16 Years Old 11 21 Female 10 Together 20 4 16 9 21 Suicidal 5 3 113 | 15.83 14-16 Years Old 3 18 Female 9 Together 18 10 14 5 10 Non-Suicidal 3 6 114 | 15.5 14-16 Years Old 3 13 Female 5 Together 19 5 15 6 11 Non-Suicidal 3 4 115 | 15.58 14-16 Years Old 3 10 Female 4 Together 25 4 9 3 14 Non-Suicidal 2 4 116 | 16.33 16-19 Years Old 3 17 Female 5 Together 18 13 13 5 14 Non-Suicidal 4 4 117 | 15.83 14-16 Years Old 11 26 Female 8 Separated or Divorced 22 6 17 9 7 Non-Suicidal 5 4 118 | 15.5 14-16 Years Old 11 12 Female 4 Together 14 18 12 14 15 Non-Suicidal 7 8 119 | 15.58 14-16 Years Old 5 16 Female 14 Together 29 4 12 13 14 Non-Suicidal 5 6 120 | 15.58 14-16 Years Old 4 13 Female 8 Together 15 5 13 8 16 Non-Suicidal 5 3 121 | 15.67 14-16 Years Old 7 20 Female 6 Together 14 6 18 4 11 Non-Suicidal 7 4 122 | 16 14-16 Years Old 5 9 Female 4 Separated or Divorced 11 4 9 4 10 Non-Suicidal 2 3 123 | -------------------------------------------------------------------------------- /logistic regression/eel.dat: -------------------------------------------------------------------------------- 1 | Cured Intervention Duration 2 | Not Cured No Treatment 7 3 | Not Cured No Treatment 7 4 | Not Cured No Treatment 6 5 | Cured No Treatment 8 6 | Cured Intervention 7 7 | Cured No Treatment 6 8 | Not Cured Intervention 7 9 | Cured Intervention 7 10 | Cured No Treatment 8 11 | Not Cured No Treatment 7 12 | Cured Intervention 7 13 | Cured No Treatment 7 14 | Cured No Treatment 5 15 | Not Cured Intervention 9 16 | Not Cured No Treatment 6 17 | Not Cured No Treatment 7 18 | Cured Intervention 8 19 | Not Cured Intervention 7 20 | Cured Intervention 7 21 | Cured Intervention 9 22 | Not Cured No Treatment 7 23 | Cured Intervention 9 24 | Cured Intervention 8 25 | Not Cured No Treatment 7 26 | Cured Intervention 6 27 | Cured Intervention 8 28 | Cured No Treatment 7 29 | Cured No Treatment 6 30 | Cured Intervention 7 31 | Cured Intervention 6 32 | Not Cured No Treatment 7 33 | Cured No Treatment 6 34 | Cured Intervention 5 35 | Not Cured Intervention 6 36 | Cured Intervention 7 37 | Cured No Treatment 7 38 | Cured No Treatment 8 39 | Not Cured Intervention 7 40 | Cured No Treatment 5 41 | Cured Intervention 7 42 | Cured No Treatment 9 43 | Not Cured Intervention 10 44 | Cured Intervention 7 45 | Not Cured Intervention 8 46 | Not Cured No Treatment 5 47 | Not Cured No Treatment 8 48 | Not Cured No Treatment 4 49 | Not Cured Intervention 7 50 | Not Cured Intervention 8 51 | Cured No Treatment 6 52 | Cured Intervention 6 53 | Cured Intervention 6 54 | Cured Intervention 7 55 | Cured No Treatment 7 56 | Not Cured No Treatment 8 57 | Cured Intervention 7 58 | Cured Intervention 7 59 | Not Cured No Treatment 7 60 | Cured Intervention 7 61 | Cured Intervention 8 62 | Cured No Treatment 7 63 | Not Cured No Treatment 9 64 | Cured No Treatment 7 65 | Not Cured Intervention 8 66 | Cured Intervention 8 67 | Not Cured No Treatment 7 68 | Not Cured No Treatment 7 69 | Cured No Treatment 7 70 | Not Cured No Treatment 8 71 | Not Cured No Treatment 7 72 | Cured Intervention 8 73 | Not Cured No Treatment 7 74 | Not Cured Intervention 8 75 | Cured Intervention 8 76 | Cured Intervention 9 77 | Cured No Treatment 7 78 | Cured Intervention 10 79 | Not Cured Intervention 5 80 | Cured No Treatment 7 81 | Not Cured Intervention 8 82 | Cured No Treatment 9 83 | Not Cured No Treatment 5 84 | Cured Intervention 10 85 | Cured Intervention 8 86 | Not Cured Intervention 7 87 | Cured Intervention 6 88 | Cured No Treatment 5 89 | Not Cured Intervention 6 90 | Cured Intervention 7 91 | Not Cured No Treatment 7 92 | Cured Intervention 7 93 | Not Cured No Treatment 7 94 | Cured Intervention 7 95 | Cured Intervention 7 96 | Not Cured Intervention 8 97 | Cured Intervention 5 98 | Cured No Treatment 6 99 | Not Cured No Treatment 7 100 | Not Cured No Treatment 6 101 | Cured Intervention 7 102 | Not Cured No Treatment 7 103 | Not Cured No Treatment 9 104 | Cured Intervention 6 105 | Cured No Treatment 6 106 | Not Cured No Treatment 7 107 | Cured No Treatment 7 108 | Not Cured No Treatment 6 109 | Not Cured No Treatment 7 110 | Cured Intervention 8 111 | Cured Intervention 9 112 | Not Cured No Treatment 4 113 | Not Cured No Treatment 6 114 | Cured Intervention 9 115 | -------------------------------------------------------------------------------- /logistic regression/logistic regression.R: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | # 3 | # W203 Week 14 - Logistic Regression 4 | # 5 | # Exercises from Andy Field's Discovering Statistics With R, Chapter 8 6 | # 7 | ############################################################################ 8 | # 9 | # Eel fun, using the eel.dat data set from http://www.sagepub.com/dsur/study/default.htm 10 | # 11 | setwd("~/Documents/R/logistic regression") 12 | # 13 | # Get the eel data set 14 | eelData <- read.delim("eel.dat", header = TRUE) 15 | # 16 | str(eelData) 17 | head(eelData) 18 | # 19 | # reset the factor levels for the dichotomous variables "Intervention" and 20 | # "Cured" to the opposite of what R defaulted them to be 21 | eelData$Cured <- relevel(eelData$Cured, "Not Cured") 22 | eelData$Intervention <- relevel(eelData$Intervention, "No Treatment") 23 | str(eelData) 24 | # 25 | # OK, that looks better. Let's get on with some logistic regression 26 | # Use the glm() method, and use the Intervention variable as a predictor of 27 | # Cured outcome. In addition, specify the family of distributions to use as 28 | # the binomial distribution with the default link logit link function. 29 | # 30 | eelModel.1 <- glm(Cured ~ Intervention, data = eelData, family = binomial(link = "logit")) 31 | # 32 | # Now let's add to the model Duration and create a second model 33 | eelModel.2 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(link = "logit")) 34 | # 35 | # Let's see what we've got 36 | summary(eelModel.1) 37 | summary(eelModel.2) 38 | # 39 | # Based on the model summaries, Intervention is statistically significanly different 40 | # from zero, p < .003, but Duration is not. 41 | # The chi square statistic on the first model is the difference between the two 42 | # residual deviances (null deviande and residual deviance) = 154.08 - 144.16 43 | print(154.08-144.16) # 9.93 44 | # 45 | # Let's compute the chi square form the model itself 46 | # 47 | eelModel.1.chisq <- eelModel.1$null.deviance - eelModel.1$deviance 48 | # 49 | # We can calculate the probablity of the chi square statistic with the following 50 | chisq.prob <- 1 - pchisq(eelModel.1$null.deviance - eelModel.1$deviance, eelModel.1$df.null - eelModel.1$df.residual) 51 | # 52 | # p < .002 based on the above calculation, so we can reject the null hypothesis 53 | # that the model is no better than chance at predicting the outcome. 54 | # Therefore: including Intervention in the model produced a significant improvement 55 | # in the fit of the model, chi-sq(1) = 9.93, p = .002. 56 | # 57 | # If we examine the coefficients for model 1, we see that Intervention is a 58 | # significant predictor of being cured, b = 1.23, z = 3.07, p = .002 59 | # 60 | # Calculating the various R values for this model (model 1) 61 | # 62 | # First, the standard R 63 | # sqrt((z^2 - 2*degFreedom)/null deviance) 64 | print(sqrt((3.074^2-(2*1))/154.08)) # this is pulling data from the summary display 65 | # 66 | # but let's do it directly from the model. First we extract the z-statistic to make 67 | # it a little easier to deal with then we compute R 68 | eelModel.1.z <- coef(summary(eelModel.1))[-1, 3] 69 | eelModel.1.R <- sqrt((eelModel.1.z^2 - 2*(eelModel.1$df.null - eelModel.1$df.residual))/eelModel.1$null.deviance) 70 | # 0.22 71 | # 72 | # R^2 L (Hosmer & Lemeshow) is chi-square / -2LL(baseline) 73 | eelModel.1.R2L <- eelModel.1.chisq / eelModel.1$null.deviance # .0644 74 | # 75 | # Cox and Snell R^2 76 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / length(eelModel.1$fitted.values)) 77 | # 0.0841 78 | # 79 | # Nagelkerke's R^2 80 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / length(eelModel.1$fitted.values))))) 81 | # 0.113 82 | # 83 | # The book goes into a huge calculation using probabilities to calculate the odds ratio 84 | # when all you need to do is take the exponential of the predictor coefficient. 85 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[2]) # 3.42 86 | # 87 | # Confidence Intervals 88 | exp(confint(eelModel.1)) 89 | # 90 | # The values of the confidence interval ends are greater than 1, which means as the predictor 91 | # increases, so do the odds of being cured. Values less than one mean that as the predictor 92 | # increases the odds of being cured decrease. Since we observed that the direction was an 93 | # increase is odds with an increase of predictor variable, this corresponds and supports that 94 | # observation. Furthermore, since the confidence interval doesn't go below 1 then it doesn't 95 | # include the chance that in the population the direction of the relationship is opposite to 96 | # what we observed. 97 | # 98 | # 99 | # Model #2 Analysis - Adding Duration 100 | # 101 | # The b value is very small and not significant, p = 0.964. The deviance between the models 102 | # is the same and the AIC is slightly higher in model 2, so model 1 is a better model than 103 | # model 2. 104 | # 105 | # Comparing the models - manual calculations vs. anova 106 | # Manual -- 107 | # 108 | eelModel.2.chisq <- eelModel.1$deviance - eelModel.2$deviance # deviance between the 2 models 109 | chisq.prob <- 1 - pchisq(eelModel.2.chisq, eelModel.1$df.residual - eelModel.2$df.residual) 110 | # confirms p = 0.9644, chi squared is 0.002 111 | # 112 | # Use anova 113 | anova(eelModel.1, eelModel.2) # gives same output of deviance (chi sq) = 0.002, df = 1 114 | # 115 | # Residuals 116 | # 117 | eelData$predicted.probabilities <- fitted(eelModel.1) 118 | eelData$standardized.residuals <- rstandard(eelModel.1) 119 | eelData$studentized.residuals <- rstudent(eelModel.1) 120 | eelData$dfbeta <- dfbeta(eelModel.1) 121 | eelData$dffit <- dffits(eelModel.1) 122 | eelData$leverage <- hatvalues(eelModel.1) 123 | # 124 | # 125 | head(eelData[, c("Cured", "Intervention", "Duration", "predicted.probabilities")]) 126 | # 127 | # from the output we see that the Cured, No Treatment -> 43% probability 128 | # and that Cured, Intervention -> 72% probability, which helps to validate the model 129 | # 130 | # Let's see if we see points for which the model is poor fit, or which have undue leverage 131 | eelData[, c("leverage", "studentized.residuals", "dfbeta")] 132 | # 133 | # Leverage -> expected value = (k + 1)/N = 2/113 = 0.018 134 | # All the leverage values in the output are very close to this value, so that's good 135 | # 136 | # Residuals (standard or studentized) should have ony 5% outside +/- 1.96, 1% outside +/-2.58 137 | # All the values in the output look very good 138 | # 139 | # dfBeta for constant and for Intervantion should be less than 1 -> all look OK. 140 | # 141 | # Soccer Penalty Data 142 | penaltyData <- read.delim("penalty.dat", header = TRUE) 143 | head(penaltyData) 144 | str(penaltyData) 145 | # 146 | # data looks ok, let's start a hierarchical logistic regression model setup, 147 | # starting with Scored ~ Previous 148 | penaltyModel.1 = glm(Scored ~ Previous, data = penaltyData, family = binomial(link = "logit")) 149 | summary(penaltyModel.1) 150 | # 151 | # highly significant coefficient of 0.0964, p = 0 152 | penaltyModel.1.chisq <- penaltyModel.1$null.deviance - penaltyModel.1$deviance 153 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual) 154 | # 155 | # We end up with a chi squared of 42.466, p = 1 -> we can't reject the null hypothesis 156 | # that the model is better than chance 157 | # 158 | penaltyModel.1.z <- coef(summary(penaltyModel.1))[-1, 3] 159 | penaltyModel.1.R <- sqrt((penaltyModel.1.z^2 - 2*(penaltyModel.1$df.null - penaltyModel.1$df.residual))/penaltyModel.1$null.deviance) 160 | # 161 | # R = 0.427 162 | penaltyModel.1.R2L <- penaltyModel.1.chisq / penaltyModel.1$null.deviance 163 | # 164 | # R^2 L = 0.41 165 | penaltyModel.1.RCS <- 1 - exp((penaltyModel.1$deviance - penaltyModel.1$null.deviance) / length(penaltyModel.1$fitted.values)) 166 | # R^2 CS = 0.432 167 | # 168 | penaltyModel.1.RCN <- penaltyModel.1.RCS / (1 - (exp(-(penaltyModel.1$null.deviance / length(penaltyModel.1$fitted.values))))) 169 | # R^2 N = 0.577 170 | # All the R and R^2 values are very high 171 | # 172 | # odds ratio 173 | penaltyModel.1.oddsRatio <- exp(penaltyModel.1$coefficients[2]) # 1.10 174 | # confidence intervals 175 | exp(confint(penaltyModel.1)) 176 | # 177 | # Move on to add PSWQ to the model 178 | penaltyModel.2 <- glm(Scored ~ Previous + PSWQ, data = penaltyData, family = binomial(link = "logit")) 179 | summary(penaltyModel.2) 180 | # 181 | # b(Previous) = 0.0648, p = .0034 182 | # b(PSWQ) = -0.23009, p = .00395 183 | penaltyModel.2.chisq <- penaltyModel.1$deviance - penaltyModel.2$deviance 184 | penaltyModel.2.chisqProbability <- pchisq(penaltyModel.2.chisq, penaltyModel.1$df.residual - penaltyModel.2$df.residual) 185 | # chi squared = 12.51, p = .999 186 | penaltyModel.2.z <- coef(summary(penaltyModel.2))[-1, 3] 187 | penaltyModel.2.R <- sqrt((penaltyModel.2.z^2 - 2*(penaltyModel.1$df.residual - penaltyModel.2$df.residual))/penaltyModel.1$deviance) 188 | # R values have come down to ~0.33 and 0.32, respectively 189 | penaltyModel.2.R2L <- penaltyModel.2.chisq / penaltyModel.1$deviance 190 | penaltyModel.2.RCS <- 1 - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values)) 191 | penaltyModel.2.RCN <- penaltyModel.2.RCS / (1 - (exp(-(penaltyModel.1$deviance / length(penaltyModel.2$fitted.values))))) 192 | # R^2 values also show improvement 193 | # 194 | # add Anxious to the penalty model 195 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penaltyData, family = binomial()) 196 | summary(penaltyModel.3) 197 | # 198 | # testing assumptions 199 | library(car) 200 | vif(penaltyModel.3) 201 | 1/vif(penaltyModel.3) 202 | # Previous and Anxious have VIF 35.5 , way over 10. 203 | # 204 | # Check Pearson's correlation between predictor variables 205 | library(polycor) 206 | penaltyVars <- penaltyData[, c("Previous", "PSWQ", "Anxious")] 207 | cor(penaltyVars) 208 | # extremely high correlation between Anxious and Previous, and a high correlation between Anxious and PSWQ 209 | cor.test(penaltyData$Previous, penaltyData$Anxious) # highly significant, cor = -0.9929 210 | cor.test(penaltyData$Previous, penaltyData$PSWQ) # highly significant, cor = -0.644 211 | cor.test(penaltyData$Anxious, penaltyData$PSWQ) # highly significant, cor = 0.65 212 | # 213 | # The better a player's previous score, the lower the anxiety 214 | # Anxiety and PSWQ are correlation and have an inverse relationship with Previous 215 | # This model is unreliable. 216 | # 217 | # Testing for linearity 218 | # We need to test for linearity of the continuous predictors with the logit function. 219 | # To do this we need to compute the indicator variable as ln(predictor) * predictor for each predictor 220 | # Then we add all the variables and the interaction variables into a new model 221 | # We examine thie significance of the interaction variables. If any are significant 222 | # then we have violated linearity for that variable/predictor 223 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ 224 | penaltyData$logAnxiousInt <- log(penaltyData$Anxious)*penaltyData$Anxious 225 | penaltyData$logPreviousInt <- log(penaltyData$Previous)*penaltyData$Previous 226 | penaltyModel.test <- glm(Scored ~ Previous + PSWQ + Anxious + logPreviousInt + logPSWQInt + logAnxiousInt, data = penaltyData, family = binomial()) 227 | summary(penaltyModel.test) 228 | # None of the Int variables show significance, so linearity test passes 229 | -------------------------------------------------------------------------------- /logistic regression/penalty.dat: -------------------------------------------------------------------------------- 1 | PSWQ Anxious Previous Scored 2 | 18 21 56 Scored Penalty 3 | 17 32 35 Scored Penalty 4 | 16 34 35 Scored Penalty 5 | 14 40 15 Scored Penalty 6 | 5 24 47 Scored Penalty 7 | 1 15 67 Scored Penalty 8 | 4 10 75 Scored Penalty 9 | 12 19 53 Scored Penalty 10 | 11 29 35 Scored Penalty 11 | 15 14 65 Scored Penalty 12 | 23 5 85 Scored Penalty 13 | 11 6 86 Scored Penalty 14 | 14 34 23 Scored Penalty 15 | 22 27 46 Scored Penalty 16 | 12 26 46 Scored Penalty 17 | 5 16 67 Scored Penalty 18 | 6 14 67 Scored Penalty 19 | 4 5 84 Scored Penalty 20 | 7 10 74 Scored Penalty 21 | 8 13 64 Scored Penalty 22 | 12 19 55 Scored Penalty 23 | 14 26 46 Scored Penalty 24 | 13 27 43 Scored Penalty 25 | 1 29 35 Scored Penalty 26 | 7 30 35 Scored Penalty 27 | 10 31 33 Scored Penalty 28 | 15 26 45 Scored Penalty 29 | 16 28 35 Scored Penalty 30 | 18 10 74 Scored Penalty 31 | 17 8 76 Scored Penalty 32 | 14 12 75 Scored Penalty 33 | 20 24 44 Scored Penalty 34 | 18 23 47 Scored Penalty 35 | 2 16 65 Scored Penalty 36 | 6 14 66 Scored Penalty 37 | 4 19 55 Scored Penalty 38 | 4 18 56 Scored Penalty 39 | 2 17 64 Scored Penalty 40 | 4 20 53 Scored Penalty 41 | 1 23 47 Scored Penalty 42 | 20 26 45 Missed Penalty 43 | 25 29 35 Missed Penalty 44 | 16 21 55 Missed Penalty 45 | 18 34 24 Missed Penalty 46 | 17 38 15 Missed Penalty 47 | 29 39 15 Missed Penalty 48 | 24 46 4 Missed Penalty 49 | 25 49 0 Missed Penalty 50 | 22 47 0 Missed Penalty 51 | 26 42 5 Missed Penalty 52 | 23 41 16 Missed Penalty 53 | 25 40 18 Missed Penalty 54 | 28 35 24 Missed Penalty 55 | 18 36 23 Missed Penalty 56 | 17 31 34 Missed Penalty 57 | 14 35 24 Missed Penalty 58 | 15 42 16 Missed Penalty 59 | 11 20 56 Missed Penalty 60 | 10 24 42 Missed Penalty 61 | 15 19 56 Missed Penalty 62 | 14 18 56 Missed Penalty 63 | 16 34 24 Missed Penalty 64 | 17 35 24 Missed Penalty 65 | 25 36 26 Missed Penalty 66 | 24 32 33 Missed Penalty 67 | 17 35 24 Missed Penalty 68 | 27 39 16 Missed Penalty 69 | 28 38 15 Missed Penalty 70 | 27 41 14 Missed Penalty 71 | 25 42 14 Missed Penalty 72 | 26 45 8 Missed Penalty 73 | 22 48 0 Missed Penalty 74 | 28 49 0 Missed Penalty 75 | 28 46 4 Missed Penalty 76 | 24 26 45 Missed Penalty 77 | -------------------------------------------------------------------------------- /multiple regression/.Rapp.history: -------------------------------------------------------------------------------- 1 | metallica 2 | lecturerData 3 | library(Rcmdr) 4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic")) 5 | alchoholPersonality 6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality) 7 | alchoholPersonalityMatrix 8 | library(Rcmdr) 9 | sunspots 10 | require(graphics) 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers") 12 | pvalue() 13 | pscore() 14 | mu1 <- 2/(sqrt(100)) 15 | mu1 16 | zscore() 17 | (2.5 - 2.0)/2 18 | 2*pnorm(2.5) 19 | 2 * pnorm(-2.5) 20 | -0.5/20 21 | pnorm(0.025) 22 | z <- (2 - 2.5)/2 23 | z 24 | 2*pnorm(-0.25) 25 | a <- 2 26 | s <- 2 27 | n <- 100 28 | xbar <- 2.5 29 | z <- (xbar-a)/(s/sqrt(n)) 30 | z 31 | 2*pnorm(-2.5) 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100))) 33 | 10000 - 10300 34 | -300/(1000 / sqrt(10)) 35 | pnorm(-300/(1000/sqrt(10))) 36 | pnorm(300/(1000/sqrt(10))) 37 | 2*(pnorm(-abs(500/(1000/sqrt(10)))) 38 | ) 39 | 2*(pnorm((500/(1000/sqrt(10)))) 40 | ) 41 | 500/1000/sqrt(10) 42 | 1000/sqrt(10) 43 | 500/(1000/sqrt(10)) 44 | pnorm(1.581139) 45 | pnorm(-1.581139) 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10))) 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10)) 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10)) 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10)) 51 | exit 52 | setwd("~/OneDrive/MIDS/W203/Data/multiple regression") 53 | album2 <- read.delim("Album Sales 2.dat", header = TRUE) 54 | summary(album2) 55 | library(psych) 56 | scatterplot(sales, adverts) 57 | library(car) 58 | scatterplot(sales, adverts) 59 | scatterplot(album2$sales, album2$adverts) 60 | cor.test(album2$sales, album2$adverts) 61 | albumSales.2 <- lm(sales ~ adverts, data = album2) 62 | summary(albumSales.2) 63 | albmuSales.3 <- lm(sales ~ adverts + airplay, data = album2) 64 | summary(albumSales.3) 65 | albumSales.3 <- lm(sales ~ adverts + airplay, data = album2) 66 | summary(albumSales.3) 67 | plot(albumSales.2) 68 | plot(albumSales.3) 69 | albumSales.4 <- lm(sales ~ adverts + airplay + attract, data = album2) 70 | summary(albumSales.4) 71 | plot(albumSales.4) 72 | library(QuantPsych) 73 | library(QuantPsyc) 74 | library(QuantPsyc) 75 | library(boot) 76 | lm.beta(albumSales.4) 77 | confint(albumSales.4) 78 | anova(albumSales.2, albumSales.3) 79 | anova(albumSales.3, albumSales.4) 80 | album2$standardize.residuals <- rstandard(albumSales.4) 81 | album2$studentized.residuals <- rstudent(albumSales.4) 82 | album2$cooks.distance <- cooks.distance(albumSales.4) 83 | album2$dfbeta <- dfbeta(albumSales.4) 84 | album2$dffit <- dffits(albumSales.4) 85 | album2$leverage <- hatvalues(albumSales.4) 86 | album2$covariance.ratios <- covratio(albumSales.4) 87 | album2 88 | album2.large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2 89 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2 90 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2 91 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2) 92 | album2.large.residual 93 | album2$standardized.residuals <- rstandard(albumSales.4) 94 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2) 95 | sum(album2$large.residual) 96 | album2[album2$large.residual, c("sales","airplay","attract","adverts","standardized.residuals")] 97 | album2[album2$large.residual, c("cooks.distance", "leverage", "covariance.ratios")] 98 | 1 + 3(3 + 1)/200 99 | 1 + 12/200 100 | 1 - 12/200 101 | dwt(albumSales.4) 102 | vif(albumSales.4) 103 | 1/vif(albumSales.4) 104 | mean(vif(albumSales.4)) 105 | -------------------------------------------------------------------------------- /multiple regression/Album Sales 2.dat: -------------------------------------------------------------------------------- 1 | adverts sales airplay attract 2 | 10.256 330 43 10 3 | 985.685 120 28 7 4 | 1445.563 360 35 7 5 | 1188.193 270 33 7 6 | 574.513 220 44 5 7 | 568.954 170 19 5 8 | 471.814 70 20 1 9 | 537.352 210 22 9 10 | 514.068 200 21 7 11 | 174.093 300 40 7 12 | 1720.806 290 32 7 13 | 611.479 70 20 2 14 | 251.192 150 24 8 15 | 97.972 190 38 6 16 | 406.814 240 24 7 17 | 265.398 100 25 5 18 | 1323.287 250 35 5 19 | 196.65 210 36 8 20 | 1326.598 280 27 8 21 | 1380.689 230 33 8 22 | 792.345 210 33 7 23 | 957.167 230 28 6 24 | 1789.659 320 30 9 25 | 656.137 210 34 7 26 | 613.697 230 49 7 27 | 313.362 250 40 8 28 | 336.51 60 20 4 29 | 1544.899 330 42 7 30 | 68.954 150 35 8 31 | 785.692 150 8 6 32 | 125.628 180 49 7 33 | 377.925 80 19 8 34 | 217.994 180 42 6 35 | 759.862 130 6 7 36 | 1163.444 320 36 6 37 | 842.957 280 32 7 38 | 125.179 200 28 6 39 | 236.598 130 25 8 40 | 669.811 190 34 8 41 | 612.234 150 21 6 42 | 922.019 230 34 7 43 | 50 310 63 7 44 | 2000 340 31 7 45 | 1054.027 240 25 7 46 | 385.045 180 42 7 47 | 1507.972 220 37 7 48 | 102.568 40 25 8 49 | 204.568 190 26 7 50 | 1170.918 290 39 7 51 | 689.547 340 46 7 52 | 784.22 250 36 6 53 | 405.913 190 12 4 54 | 179.778 120 2 8 55 | 607.258 230 29 8 56 | 1542.329 190 33 8 57 | 1112.47 210 28 7 58 | 856.985 170 10 6 59 | 836.331 310 38 7 60 | 236.908 90 19 4 61 | 1077.855 140 13 6 62 | 579.321 300 30 7 63 | 1500 340 38 8 64 | 731.364 170 22 8 65 | 25.689 100 23 6 66 | 391.749 200 22 9 67 | 233.999 80 20 7 68 | 275.7 100 18 6 69 | 56.895 70 37 7 70 | 255.117 50 16 8 71 | 566.501 240 32 8 72 | 102.568 160 26 5 73 | 250.568 290 53 9 74 | 68.594 140 28 7 75 | 642.786 210 32 7 76 | 1500 300 24 7 77 | 102.563 230 37 6 78 | 756.984 280 30 8 79 | 51.229 160 19 7 80 | 644.151 200 47 6 81 | 15.313 110 22 5 82 | 243.237 110 10 8 83 | 256.894 70 1 4 84 | 22.464 100 1 6 85 | 45.689 190 39 6 86 | 724.938 70 8 5 87 | 1126.461 360 38 7 88 | 1985.119 360 35 5 89 | 1837.516 300 40 5 90 | 135.986 120 22 7 91 | 237.703 150 27 8 92 | 976.641 220 31 6 93 | 1452.689 280 19 7 94 | 1600 300 24 9 95 | 268.598 140 1 7 96 | 900.889 290 38 8 97 | 982.063 180 26 6 98 | 201.356 140 11 6 99 | 746.024 210 34 6 100 | 1132.877 250 55 7 101 | 1000 250 5 7 102 | 75.896 120 34 6 103 | 1351.254 290 37 9 104 | 202.705 60 13 8 105 | 365.985 140 23 6 106 | 305.268 290 54 6 107 | 263.268 160 18 7 108 | 513.694 100 2 7 109 | 152.609 160 11 6 110 | 35.987 150 30 8 111 | 102.568 140 22 7 112 | 215.368 230 36 6 113 | 426.784 230 37 8 114 | 507.772 30 9 3 115 | 233.291 80 2 7 116 | 1035.433 190 12 8 117 | 102.642 90 5 9 118 | 526.142 120 14 7 119 | 624.538 150 20 5 120 | 912.349 230 57 6 121 | 215.994 150 19 8 122 | 561.963 210 35 7 123 | 474.76 180 22 5 124 | 231.523 140 16 7 125 | 678.596 360 53 7 126 | 70.922 10 4 6 127 | 1567.548 240 29 6 128 | 263.598 270 43 7 129 | 1423.568 290 26 7 130 | 715.678 220 28 7 131 | 777.237 230 37 8 132 | 509.43 220 32 5 133 | 964.11 240 34 7 134 | 583.627 260 30 7 135 | 923.373 170 15 7 136 | 344.392 130 23 7 137 | 1095.578 270 31 8 138 | 100.025 140 21 5 139 | 30.425 60 28 1 140 | 1080.342 210 18 7 141 | 799.899 210 28 7 142 | 1071.752 240 37 8 143 | 893.355 210 26 6 144 | 283.161 200 30 8 145 | 917.017 140 10 7 146 | 234.568 90 21 7 147 | 456.897 120 18 9 148 | 206.973 100 14 7 149 | 1294.099 360 38 7 150 | 826.859 180 36 6 151 | 564.158 150 32 7 152 | 192.607 110 9 5 153 | 10.652 90 39 5 154 | 45.689 160 24 7 155 | 42.568 230 45 7 156 | 20.456 40 13 8 157 | 635.192 60 17 6 158 | 1002.273 230 32 7 159 | 1177.047 230 23 6 160 | 507.638 120 0 6 161 | 215.689 150 35 5 162 | 526.48 120 26 6 163 | 26.895 60 19 6 164 | 883.877 280 26 7 165 | 9.104 120 53 8 166 | 103.568 230 29 8 167 | 169.583 230 28 7 168 | 429.504 40 17 6 169 | 223.639 140 26 8 170 | 145.585 360 42 8 171 | 985.968 210 17 6 172 | 500.922 260 36 8 173 | 226.652 250 45 7 174 | 1051.168 200 20 7 175 | 68.093 150 15 7 176 | 1547.159 250 28 8 177 | 393.774 100 27 6 178 | 804.282 260 17 8 179 | 801.577 210 32 8 180 | 450.562 290 46 9 181 | 26.598 220 47 8 182 | 179.061 70 19 1 183 | 345.687 110 22 8 184 | 295.84 250 55 9 185 | 2271.86 320 31 5 186 | 1134.575 300 39 8 187 | 601.434 180 21 6 188 | 45.298 180 36 6 189 | 759.518 200 21 7 190 | 832.869 320 44 7 191 | 56.894 140 27 7 192 | 709.399 100 16 6 193 | 56.895 120 33 6 194 | 767.134 230 33 8 195 | 503.172 150 21 7 196 | 700.929 250 35 9 197 | 910.851 190 26 7 198 | 888.569 240 14 6 199 | 800.615 250 34 6 200 | 1500 230 11 8 201 | 785.694 110 20 9 202 | -------------------------------------------------------------------------------- /multiple regression/Chamorro-Premuzic.dat: -------------------------------------------------------------------------------- 1 | Age Gender studentN studentE studentO studentA studentC lectureN lecturE lecturO lecturA lecturC 2 | Female 33 35 26 43 30 -14 3 6 10 3 | Female 18 29 19 31 27 -17 4 19 9 4 | 18 Female 35 29 30 45 26 -30 13 15 25 5 | 18 Female 38 43 17 49 29 -24 2 9 8 6 | Female 30 30 29 56 40 -21 11 21 12 7 | Female 19 31 31 35 22 -22 2 13 8 8 | 19 Female 33 27 33 60 38 -30 10 15 11 9 | Male 30 31 25 60 28 -23 15 15 24 10 | 18 Female 30 23 26 48 27 -25 5 7 18 11 | 18 Male 13 28 34 48 29 -30 10 9 7 12 | 18 Female 37 36 37 45 32 -29 28 25 16 13 | 18 Male 23 25 25 48 34 -13 5 4 14 | 18 Male 21 20 24 38 26 -18 6 13 8 15 | 0 Female 30 28 21 41 25 0 16 22 21 16 | 18 Female 8 27 34 55 40 -26 17 22 29 17 | 18 Female 21 28 38 37 27 -30 20 20 20 18 | 18 Female 40 30 23 37 40 -23 10 1 6 19 | Female 28 36 40 45 29 -5 24 22 19 20 | 18 Female 40 24 20 35 21 -29 8 21 18 21 | 17 Female 30 42 16 51 34 -26 12 24 21 22 | 18 Male 31 13 20 47 27 -30 -4 9 27 23 | Female 41 30 37 51 34 -26 0 14 16 24 | 18 Female 25 30 19 50 31 -28 17 23 17 25 | 18 Female 37 21 35 45 24 -30 28 8 17 26 | 19 Male 14 36 27 34 -27 11 7 14 27 | 18 Male 17 33 32 53 31 -17 6 2 12 28 | 17 Male 10 38 26 52 39 -22 -6 15 8 29 | 18 Female 28 29 25 58 34 -24 6 10 15 30 | 18 Female 19 35 23 46 30 -24 13 18 22 31 | Male 33 31 30 36 28 -24 19 17 14 32 | 18 Male 15 34 37 43 25 -17 11 2 10 33 | 18 Female 20 24 25 57 39 -29 2 26 20 34 | 18 Female 15 34 35 50 34 -24 22 -3 20 35 | 18 Male 25 38 20 47 58 0 0 6 4 36 | Male 28 33 30 46 23 -19 2 3 17 37 | 18 Male 26 26 28 25 -1 5 10 1 38 | 18 Female 26 35 30 51 33 -27 11 20 18 39 | 18 Female 16 29 33 49 34 -30 13 19 11 40 | 18 Female 21 38 36 45 29 -22 15 14 25 41 | 18 Female 57 42 | 18 Female 32 33 27 57 36 -30 9 28 4 43 | 18 Female 24 41 50 32 -24 15 4 10 44 | 19 Female 15 34 37 50 25 -26 9 19 15 45 | 18 Female 15 34 35 50 34 -26 0 14 16 46 | 18 Female 25 38 20 45 58 -24 9 2 23 47 | 19 Male 14 36 27 34 -28 17 23 17 48 | 18 Male 17 33 32 53 31 -27 11 7 14 49 | 17 Male 10 38 26 52 39 -6 17 18 19 50 | 18 Female 28 29 25 58 34 -17 6 2 12 51 | 18 Female 19 35 23 46 30 -22 -6 15 8 52 | Male 33 31 30 36 28 -24 6 10 15 53 | 18 Male 41 30 37 43 34 -24 13 18 22 54 | 18 Female 25 27 31 57 30 -24 19 17 14 55 | 18 Female 25 30 19 50 31 -17 11 2 10 56 | 18 Male 37 21 35 47 24 -29 2 26 20 57 | Male 28 33 30 46 23 -24 22 -3 20 58 | 18 Male 26 26 28 25 0 0 6 4 59 | 18 Female 24 41 50 32 -1 5 10 1 60 | 21 Female 34 28 36 42 30 -30 16 -5 4 61 | 18 Female 25 27 31 55 30 -24 9 2 23 62 | 18 Female 18 26 22 43 29 -6 17 18 19 63 | 18 Female 12 26 29 45 34 -26 9 19 15 64 | 18 Female 20 24 25 55 39 -24 15 4 10 65 | 18 Female 18 26 22 43 29 -30 28 8 17 66 | 18 Female 12 26 29 45 34 -19 2 3 17 67 | 18 Male 26 26 28 25 -1 5 10 1 68 | 19 Female 33 27 33 60 38 -30 10 15 11 69 | 18 Female 8 27 34 55 40 -26 17 22 29 70 | 18 Female 25 30 19 50 31 -17 11 2 10 71 | 17 Female 30 42 16 51 34 -26 12 24 21 72 | 18 Female 25 30 19 50 31 -28 17 23 17 73 | 20 Female 24 31 23 55 40 -30 1 -4 23 74 | 18 Female 13 36 26 50 31 -30 7 21 23 75 | 18 Female 30 29 40 59 43 -30 26 25 18 76 | 19 Female 27 21 29 49 27 -22 23 19 21 77 | 23 Female 14 32 21 57 35 -15 0 15 23 78 | Female 17 33 28 41 33 -20 9 20 21 79 | 18 Male 19 27 22 51 32 -17 7 12 15 80 | 18 Male 17 34 31 49 29 -21 11 14 81 | 18 Female 26 36 28 41 29 -25 2 8 -1 82 | 17 Female 15 38 33 53 38 -24 12 13 19 83 | 19 Male 31 37 30 51 34 -16 7 5 5 84 | 18 Female 30 33 24 36 23 -27 9 17 15 85 | 18 Female 13 32 30 41 41 -23 6 11 19 86 | 19 Female 22 37 28 55 34 -30 13 17 26 87 | 20 Male 19 31 35 45 26 -3 88 | 16 Male 35 21 23 56 8 8 13 0 89 | 18 Male 10 38 40 50 32 -29 25 19 15 90 | 18 Female 49 -30 20 21 30 91 | 18 Female 20 37 30 41 30 -23 2 10 17 92 | 18 Male 14 24 32 43 24 -17 18 10 7 93 | 18 Female 27 39 24 42 32 -30 19 5 94 | 18 Male 20 21 38 21 -11 10 10 12 95 | 18 Female 18 31 23 50 -16 6 12 8 96 | 18 Female 16 35 26 49 29 -24 5 -1 4 97 | 18 Female 31 37 22 53 32 -30 16 23 24 98 | 17 Female 36 35 31 54 36 -30 6 3 24 99 | 19 Male 15 31 36 45 24 -18 17 1 7 100 | 18 Female 35 31 28 49 30 -25 27 16 28 101 | 19 Female 18 42 35 49 31 -30 19 21 14 102 | 18 Female 18 28 37 51 33 -19 18 5 14 103 | 18 Female 32 27 27 54 24 -5 17 13 11 104 | 18 Female 27 32 21 46 37 -21 1 23 18 105 | 17 Female 21 38 23 49 32 -30 8 1 19 106 | 17 Female 15 32 26 41 25 -20 1 12 5 107 | 19 Male 10 36 23 48 32 -20 17 19 16 108 | 18 Male 8 30 22 45 26 -25 4 -5 8 109 | 18 Female 19 29 35 43 31 -26 21 20 21 110 | 18 Female 25 28 25 38 28 -11 20 22 25 111 | 19 Male 5 31 33 43 41 -28 3 -3 23 112 | 18 Female 17 33 32 48 33 -30 7 -1 22 113 | 18 Female 27 30 32 50 32 -18 9 17 9 114 | 18 Female 16 34 31 46 34 -13 6 17 20 115 | 20 Female 19 35 39 61 39 -24 18 11 20 116 | 18 Female 13 30 25 51 39 -24 1 -14 30 117 | Female 39 31 42 42 23 -20 4 -6 -1 118 | 17 Male 36 23 29 41 23 -24 3 1 15 119 | 19 Female 26 34 38 64 39 -27 24 25 19 120 | 18 Female 14 39 41 57 37 -30 25 -5 28 121 | Female 23 27 27 42 33 -30 16 8 23 122 | Female 34 28 34 38 25 -28 12 6 29 123 | Female 23 26 32 50 34 -24 3 12 -6 124 | 18 Male 4 35 23 43 27 -13 0 4 12 125 | 26 Female 25 28 33 49 35 -24 16 3 16 126 | Female 20 34 29 60 42 -29 20 17 26 127 | 18 Male 19 27 30 51 23 -24 1 9 128 | 18 Female 38 29 46 45 33 -24 14 7 22 129 | 20 Male 6 33 39 34 20 -28 12 -3 8 130 | Female 25 36 29 61 39 -29 19 22 24 131 | 19 Male 7 38 33 62 42 -30 19 12 24 132 | Male 26 32 36 44 28 -6 10 0 13 133 | 18 Male 19 36 39 50 33 -17 15 6 21 134 | 17 Male 36 23 29 41 23 -24 3 1 15 135 | 18 Male 20 21 38 21 -11 10 10 12 136 | 18 Female 18 31 23 50 -16 6 12 8 137 | 17 Female 30 42 16 51 34 -26 12 24 21 138 | Female 39 31 42 42 23 -20 4 -6 -1 139 | 19 Female 27 21 29 49 27 -22 23 19 21 140 | 23 Female 14 32 21 57 35 -15 0 15 23 141 | 142 | 18 Female 28 19 21 48 35 -29 16 -7 10 23 143 | 19 Female 20 28 32 46 28 -10 21 17 26 25 144 | 18 Female 34 22 28 25 7 -6 3 -1 1 10 145 | 24 Female 20 32 32 44 27 -18 8 6 5 18 146 | 18 Female 29 17 31 52 27 -25 20 8 -2 20 147 | 18 Female 32 29 26 47 33 -29 7 -4 12 28 148 | 18 Male 0 41 32 50 45 -30 9 9 9 16 149 | 18 Female 21 25 25 45 33 -17 11 5 13 16 150 | 22 Female 26 33 32 37 27 -3 7 14 6 12 151 | 17 Female 19 33 27 34 35 -23 11 -8 19 27 152 | 19 Female 20 17 36 54 38 -27 13 -2 1 26 153 | 19 Male 26 35 29 46 18 -27 28 -7 -2 154 | 19 Female 37 17 35 47 18 -16 8 5 9 16 155 | 18 Female 30 34 31 44 32 -20 5 7 12 30 156 | 20 Female 19 36 28 50 36 -19 15 10 -6 15 157 | 18 Female 28 27 37 32 26 -22 12 12 11 5 158 | 19 Female 34 29 30 47 23 -23 12 12 14 30 159 | 19 Male 13 29 27 47 26 -4 3 4 15 16 160 | 19 Female 35 20 23 46 28 -30 16 3 10 14 161 | 18 Female 15 31 23 51 32 -21 8 -3 14 21 162 | 18 Female 14 23 31 54 33 -14 10 1 3 11 163 | 19 Female 32 32 42 36 16 -29 24 27 12 18 164 | 19 Male 10 39 37 44 34 -30 25 12 2 15 165 | 18 Female 23 34 26 45 34 -15 16 4 3 11 166 | 18 Female 26 28 35 50 36 -17 12 10 2 13 167 | 19 Female 24 25 22 63 32 -18 15 5 4 16 168 | 18 Female 23 31 22 58 41 -27 20 18 2 16 169 | 19 Female 34 26 31 58 28 -27 11 -1 13 23 170 | 18 Female 13 35 37 37 24 -30 2 3 -1 26 171 | 18 Female 28 5 28 27 24 -26 9 1 -1 9 172 | 18 Female 20 42 24 46 31 -29 14 2 -7 12 173 | 19 Female 21 37 33 57 43 -30 12 15 20 19 174 | 18 Female 25 24 19 54 39 -24 13 16 18 21 175 | 18 Female 14 32 24 50 36 -17 16 9 20 23 176 | 19 Female 27 23 27 55 26 -28 10 3 14 24 177 | 24 Male 40 19 37 59 41 -19 27 27 -6 15 178 | 20 Male 24 29 35 34 24 -21 12 19 12 19 179 | 18 Female 27 26 34 47 30 -25 7 8 4 16 180 | 18 Female 25 28 33 43 35 -21 4 -5 7 15 181 | 19 Male 24 29 24 52 32 -15 10 7 12 7 182 | 18 Female 14 36 23 51 28 -30 13 10 -7 17 183 | 18 Female 22 33 31 50 41 -20 14 3 -7 24 184 | 18 Female 23 41 14 50 37 -25 25 10 -1 4 185 | 19 Female 43 33 30 43 17 -11 11 3 6 15 186 | 19 Female 35 28 36 50 30 -21 5 7 -3 19 187 | 18 Female 34 30 40 52 23 -20 8 18 22 9 188 | 17 Female 37 33 36 42 21 -23 11 2 10 18 189 | 18 Male 24 29 44 43 24 -21 23 30 26 -8 190 | 18 Female 19 36 26 25 -30 10 -5 1 15 191 | 19 Female 39 24 21 44 36 -22 2 -5 17 16 192 | 18 Female 28 36 30 50 39 -30 27 16 9 20 193 | 23 Male 24 23 37 47 15 -6 -4 -2 15 194 | 18 Female 18 43 29 57 34 -20 20 10 7 20 195 | 18 Male 2 39 40 33 20 -18 22 20 6 6 196 | 20 Female 14 30 41 51 27 -20 18 14 -1 22 197 | 18 Female 22 30 34 51 36 -30 1 6 2 17 198 | 19 Female 29 27 32 38 26 -19 4 4 3 13 199 | 20 Female 28 34 22 47 31 -21 22 19 22 27 200 | 18 Female 15 29 31 54 33 -16 12 11 -2 7 201 | 20 Female 32 25 37 43 17 -30 10 10 -11 9 202 | 18 Female 29 29 29 41 31 -14 14 1 -8 18 203 | 19 Male 16 30 29 38 31 -27 14 6 4 23 204 | 20 Male 30 9 36 35 22 5 16 19 13 10 205 | 21 Male 2 35 29 53 42 -27 11 6 10 24 206 | 19 Female 33 21 15 50 29 -29 8 4 22 27 207 | 29 Female 19 31 30 47 36 -23 10 10 1 12 208 | 18 Female 22 26 30 48 28 -10 3 10 4 10 209 | 20 Male 29 20 39 37 21 -20 6 15 -6 -5 210 | 19 Female 37 18 32 47 31 -30 12 6 13 18 211 | 19 Female 20 26 22 49 32 -26 2 10 7 16 212 | 19 Female 21 31 32 51 39 -27 7 -1 11 26 213 | 18 Male 25 32 37 44 30 -27 18 19 26 3 214 | 19 Female 35 11 32 58 44 -26 9 2 11 215 | 19 Female 40 21 29 50 26 -30 12 6 19 29 216 | 18 Female 33 26 30 40 26 -26 16 12 1 17 217 | 19 Female 41 26 26 56 31 -24 5 1 13 17 218 | 18 Female 20 40 31 58 41 -30 18 5 11 22 219 | 18 Female 11 32 28 45 36 -18 1 14 7 15 220 | 18 Female 21 27 35 46 28 -13 24 18 15 15 221 | 18 Female 29 31 28 53 29 -24 10 -3 0 25 222 | 19 Female 42 34 33 44 30 -25 0 10 9 17 223 | 18 Male 8 32 36 49 28 -24 13 7 15 20 224 | 20 Male 29 23 41 39 20 -21 -3 3 4 21 225 | 19 Female 39 21 41 45 33 -21 19 24 -3 15 226 | 17 Female 24 33 32 49 30 -29 16 1 11 16 227 | 18 Female 24 33 27 50 37 -28 20 12 1 24 228 | 20 Female 19 20 14 39 30 -25 10 6 17 20 229 | 24 Male 22 38 40 45 23 -30 3 -7 7 18 230 | 18 Female 33 29 27 35 21 -20 19 7 2 8 231 | 18 Male 2 34 38 44 28 -30 10 -1 9 19 232 | 18 Female 23 35 22 47 31 -30 14 7 18 23 233 | 19 Female 13 33 34 37 27 -19 12 3 0 14 234 | 18 Female 35 27 40 47 25 -24 8 25 7 26 235 | 19 Female 25 36 28 54 31 -18 14 10 3 26 236 | 237 | 22 Female 20 25 30 44 32 -29 4 4 17 22 238 | 19 Female 19 38 29 41 33 -23 14 -1 -4 27 239 | 22 Female 27 26 26 51 38 -27 18 8 8 23 240 | 23 Female 13 23 30 60 44 -23 6 5 4 15 241 | 19 Female 19 34 39 55 31 -9 12 6 1 9 242 | 18 Male 23 26 23 51 25 -21 4 11 9 19 243 | 21 Female 11 28 29 54 39 -17 15 9 7 14 244 | 19 Female 16 35 28 43 29 -16 26 20 18 23 245 | 18 Male 22 18 24 41 19 -28 4 -9 0 17 246 | 33 Male 29 26 26 48 33 -27 10 2 -13 6 247 | 22 Female 24 33 27 51 42 -29 13 9 7 21 248 | 25 Female 27 25 33 38 28 -4 5 2 -4 10 249 | 24 30 31 38 33 0 4 -4 7 15 250 | 19 Female 20 31 32 48 33 -24 -3 1 1 12 251 | 40 Female 17 35 30 61 42 -30 6 4 -19 24 252 | 21 Male 19 11 32 27 14 -12 5 10 -6 21 253 | 2 Female 21 25 29 43 29 -27 14 14 2 12 254 | 26 Female 22 38 29 49 37 -23 13 18 1 -4 255 | 43 Female 28 31 27 45 30 -28 17 8 -2 13 256 | 20 Male 35 24 19 44 16 -26 12 4 8 18 257 | 21 Female 13 29 22 48 30 -30 20 16 -14 25 258 | 21 Female 24 33 28 50 34 -30 9 5 9 22 259 | 19 Female 25 31 25 55 34 -17 19 14 15 23 260 | 28 Female 12 34 30 38 35 -30 25 -5 -10 21 261 | 21 Female 15 27 27 44 23 -24 16 4 6 6 262 | 39 Female 10 36 28 50 39 -17 14 8 3 20 263 | 25 Male 30 30 42 41 34 -26 22 9 2 16 264 | 30 Female 37 28 21 40 29 3 10 265 | 19 Female 31 31 26 52 25 -30 -1 13 -5 12 266 | 27 Male 15 31 40 46 36 -19 14 11 -5 13 267 | 27 Female 25 24 22 46 37 -27 10 12 22 26 268 | 22 Male 26 38 29 53 22 -28 7 -2 -6 15 269 | 27 Female 20 33 29 47 31 -26 7 -1 -3 14 270 | 24 Female 27 28 38 45 25 -24 21 14 -4 19 271 | 24 Female 19 31 24 57 42 -22 19 2 13 20 272 | 20 Female 20 24 31 55 28 -30 15 0 -14 29 273 | 19 Female 18 34 22 46 38 -22 12 4 5 4 274 | 20 Female 18 26 39 52 44 -28 17 9 4 23 275 | 21 Female 14 38 36 50 42 -30 4 16 9 16 276 | 19 Female 19 37 25 51 36 -26 17 19 21 24 277 | 22 Female 22 23 25 54 36 278 | 39 Female 31 33 15 51 29 22 12 11 19 27 279 | 18 Male 26 33 19 56 37 -21 20 8 29 13 280 | 18 Female 40 35 31 51 24 -26 4 -10 -4 21 281 | 18 Female 27 45 23 55 36 282 | 18 Female 21 33 31 53 41 -23 9 6 23 25 283 | 21 Male 20 27 26 55 24 10 17 12 19 18 284 | 32 Female 22 27 24 33 24 -6 18 18 17 18 285 | 22 Female 26 25 23 46 -13 14 10 21 21 286 | 20 Female 20 31 25 37 26 -27 16 14 11 26 287 | 28 Male 25 20 22 51 24 4 10 13 1 13 288 | 18 Female 21 18 21 42 24 -30 13 20 5 26 289 | 18 Female 35 16 23 51 38 -22 7 6 22 16 290 | 18 Female 17 29 22 42 19 -18 7 -3 6 5 291 | 18 Female 27 33 27 47 30 -26 16 4 1 27 292 | 24 Female 16 27 30 53 31 -30 17 5 2 21 293 | 20 Female 24 26 28 50 31 -29 -5 -2 13 3 294 | 18 Female 26 18 18 42 24 -8 1 6 0 15 295 | 18 Male 23 19 23 41 24 -14 9 3 4 22 296 | 297 | 21 Male 27 30 24 43 36 -30 13 -2 7 20 298 | 18 Female 36 27 20 43 26 21 6 5 11 299 | 21 Male 3 34 38 49 32 -25 11 9 13 29 300 | 20 Female 9 35 25 52 32 -30 14 7 0 24 301 | 18 Male 17 39 33 32 17 -30 14 8 6 16 302 | 20 Female 28 33 21 47 31 -22 16 -5 -1 15 303 | 18 Male 28 27 20 41 23 -30 10 6 12 21 304 | 19 Male 14 35 34 38 25 -26 5 8 -3 16 305 | 20 Male 17 28 29 31 32 -25 18 3 10 306 | 21 Female 27 31 27 43 30 -19 5 12 10 9 307 | 21 Female 24 25 22 48 33 -30 15 9 22 26 308 | 18 Female 36 27 30 53 36 -12 24 8 15 18 309 | 20 Female 36 17 28 47 39 -30 26 17 25 30 310 | 19 Male 24 28 18 46 35 -24 1 -9 25 30 311 | 20 Female 19 39 39 47 26 -14 10 5 -3 15 312 | 19 Male 21 19 29 54 42 -30 8 7 21 30 313 | 21 Female 37 28 30 46 29 -24 7 -10 26 24 314 | 20 Female 40 25 26 38 25 -30 6 5 2 26 315 | 19 Female 26 30 35 55 32 -16 11 5 9 23 316 | 20 Male 31 32 21 44 39 -18 10 -1 1 6 317 | 20 Male 14 36 22 33 19 -26 26 3 15 16 318 | 20 Male 10 31 36 35 24 -20 22 14 2 7 319 | 19 Female 16 25 23 39 38 -15 20 0 14 15 320 | 20 Female 24 32 34 49 36 -24 19 3 -5 26 321 | 20 Female 35 16 41 41 28 -29 17 2 2 8 322 | 20 Female 19 29 33 50 37 -30 24 5 23 17 323 | 18 Male 16 39 41 51 41 -30 23 20 0 18 324 | 19 Female 23 30 25 47 36 -13 24 13 16 16 325 | 19 Female 21 39 35 33 23 -19 23 13 5 -1 326 | 19 Female 44 18 28 67 34 -29 14 29 29 27 327 | 20 Female 27 27 30 29 24 -18 8 -3 -3 9 328 | 21 Female 40 23 33 50 40 -21 15 8 26 22 329 | 20 Female 29 33 35 41 30 -15 13 1 2 5 330 | 19 Male 21 30 19 47 36 -23 15 23 22 22 331 | 20 Female 18 42 34 50 35 -13 21 14 4 17 332 | 18 Male 19 25 31 41 30 -30 18 8 -4 23 333 | 23 Male 18 41 41 37 23 -30 17 3 16 26 334 | 21 Male 34 13 20 42 34 -11 20 11 8 12 335 | 18 Female 23 26 25 46 36 -30 13 9 -6 17 336 | 21 Female 27 30 24 39 31 -28 7 1 27 22 337 | 21 Female 8 43 31 52 32 -24 19 15 15 2 338 | 20 Male 8 29 21 56 35 -30 22 21 -3 12 339 | 21 Female 20 38 35 50 35 -29 15 15 15 13 340 | 341 | 21 Female 34 28 24 39 22 -29 0 -2 26 17 342 | 19 Female 36 26 18 34 17 6 2 8 343 | 19 Female 44 23 29 34 14 -19 22 19 20 15 344 | 20 Female 37 37 24 53 38 -30 7 -15 14 20 345 | 19 Female 20 24 24 43 34 -30 17 6 15 28 346 | 20 Female 23 36 24 41 32 -10 19 5 9 14 347 | 19 Male 17 37 22 28 19 -14 7 8 13 3 348 | 20 Female 15 46 18 37 35 -28 18 15 7 16 349 | 20 Female 39 38 26 31 12 -30 10 16 -12 20 350 | 30 Female 36 28 26 38 18 -29 25 23 8 18 351 | 20 Male 28 18 22 46 21 -20 18 13 22 20 352 | 21 Female 42 43 35 44 33 -20 23 13 19 26 353 | 21 Male 19 18 31 39 33 -30 13 8 7 30 354 | 19 Female 20 24 33 35 20 -22 18 7 -9 9 355 | 19 Female 30 25 28 43 30 -30 12 15 -6 20 356 | 19 Male 6 38 27 27 16 -16 11 9 -7 2 357 | 20 Female 12 41 27 36 31 -30 27 5 -2 19 358 | 19 Female 22 31 25 42 28 -30 12 17 0 20 359 | 20 Female 13 43 29 28 27 -24 28 24 14 29 360 | 20 Male 11 35 34 39 31 -30 11 1 -15 20 361 | 20 Male 24 28 36 35 9 -30 23 13 9 30 362 | 24 Male 22 31 30 42 22 -25 3 0 4 17 363 | 19 Female 32 28 30 44 26 -14 13 -9 3 27 364 | 19 Female 26 30 28 43 33 -27 7 -9 1 7 365 | 34 Male 21 33 40 54 35 -26 19 20 1 23 366 | 19 Male 8 33 31 39 25 -9 14 20 -3 -2 367 | 18 Female 34 20 20 48 22 -27 13 3 -6 18 368 | 20 Male 24 34 22 51 33 -24 19 3 7 20 369 | 20 Female 27 30 29 29 -30 26 7 -21 18 370 | 18 Female 31 -22 28 26 22 23 371 | 18 Female 7 37 34 40 33 -30 13 3 -1 4 372 | 19 Female 22 29 27 27 -22 28 16 23 21 373 | 19 Female 15 33 27 26 -23 14 8 -3 2 374 | 23 Female 12 34 36 47 27 -28 13 5 9 13 375 | 19 Female 22 30 23 23 -23 13 -3 9 13 376 | 20 Male 15 35 16 47 38 -22 14 6 22 21 377 | 21 Female 17 38 39 29 -30 8 -3 20 23 378 | 20 Female 25 33 34 41 30 -22 0 -3 3 18 379 | 20 Female 25 31 25 40 28 -23 12 7 14 16 380 | 19 Female 31 33 28 52 29 -20 13 6 12 17 381 | 19 Female 34 20 22 53 36 -29 21 8 12 10 382 | 20 Female 36 38 28 56 35 -30 4 9 16 23 383 | 19 Female 16 31 26 43 33 -17 10 19 17 27 384 | 20 Female 14 41 27 33 25 -27 14 2 13 21 385 | 20 Female 19 31 25 44 30 -25 8 7 10 8 386 | 19 Female 20 31 32 32 26 -30 14 17 11 16 387 | 20 Female 27 25 21 33 28 -29 6 2 19 26 388 | 19 Female 29 24 21 43 27 -27 25 13 18 10 389 | 20 Female 29 28 33 41 25 -19 18 11 15 15 390 | 25 Female 37 20 32 40 29 -12 15 7 -4 15 391 | 19 Female 27 35 23 49 30 -29 6 2 18 392 | 21 Female 25 26 31 45 34 -30 18 6 6 16 393 | 21 Female 15 33 24 34 25 -30 -4 1 -3 17 394 | 20 Male 32 20 31 45 25 -9 9 8 4 10 395 | 19 Female 24 30 27 50 30 -28 11 5 6 22 396 | 19 Male 39 33 42 40 13 -27 13 18 16 12 397 | 20 Male 10 32 30 46 35 -19 21 22 20 22 398 | 19 Female 30 27 23 51 37 -30 13 2 12 14 399 | 31 Female 28 25 34 29 30 -10 13 13 4 23 400 | 401 | 23 Female 28 28 17 52 21 2 12 12 6 3 402 | 19 Female 24 29 25 36 29 -28 10 18 11 5 403 | 19 Female 30 34 23 45 25 -23 7 7 5 10 404 | 20 Female 28 30 31 41 31 -6 14 19 8 18 405 | 20 Male 26 31 32 44 17 -21 17 18 18 23 406 | 19 Male 24 32 25 46 22 -18 11 8 10 12 407 | 18 Female 36 33 39 44 26 -30 10 -3 5 4 408 | 19 Female 28 37 28 47 28 0 2 -4 0 0 409 | 19 Female 24 27 26 48 31 -25 12 11 12 5 410 | 18 Female 15 33 23 48 35 -24 5 8 10 12 411 | 19 Female 26 22 32 53 22 -30 7 15 14 16 412 | 19 Male 24 14 26 45 22 1 6 2 2 0 413 | 25 Male 26 29 33 43 23 -15 10 6 4 6 414 | 19 Female 13 33 28 39 29 -8 9 0 5 6 415 | 19 Female 20 33 27 48 34 8 15 16 15 16 416 | 22 Male 17 31 24 44 29 -30 14 -3 22 25 417 | 18 Female 28 35 34 39 18 -30 25 20 9 21 418 | 19 Female 25 30 25 36 25 -9 3 1 -5 -7 419 | 19 Female 54 -29 23 15 26 420 | 25 Female 27 25 19 57 35 25 19 25 26 27 421 | 19 Male 21 33 26 54 25 -23 17 16 4 20 422 | 19 Female 24 27 29 49 20 -30 10 -7 17 9 423 | 21 Male 12 40 40 63 32 25 28 27 28 28 424 | 19 Female 54 4 6 2 2 1 425 | 21 Male 27 38 24 58 19 -8 16 14 7 9 426 | 20 Male 21 25 27 44 26 13 15 3 16 4 427 | 20 Male 23 31 21 51 29 -25 19 17 18 24 428 | 22 Female 24 26 23 59 24 0 13 8 12 22 429 | 19 Male 18 21 36 44 26 -12 11 1 7 8 430 | 22 Male 40 32 29 73 24 -18 26 16 19 27 431 | 19 Female 10 37 43 62 39 -23 16 20 -4 22 432 | -------------------------------------------------------------------------------- /multiple regression/multiple regression.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | # 3 | # Multiple Regression in R 4 | # 5 | # From Andy Field's book Discovering Statistics Using R 6 | # 7 | ############################################################# 8 | # 9 | setwd("~/OneDrive/MIDS/W203/Data/multiple regression") 10 | 11 | # Load up the album2.dat file as discussed in 7.8.2.2 12 | album2 <- read.delim("Album Sales 2.dat", header = TRUE) 13 | 14 | summary(album2) 15 | # 16 | # From the Album Sales 2 data set there are 4 vectors in the 17 | # data frame: adverts, sales, airplay and attract 18 | # 19 | # The goal is to create a model that takes into account these 20 | # predictors on the sales as the outcome variable. 21 | # The first thing we can do is load the car library to use 22 | # the handy scatterplot() in that library. 23 | library(car) 24 | scatterplot(album2$sales, album2$adverts) 25 | 26 | # The data show a pretty wide variance for these variables 27 | # but we see that there is a line that may indicate a linear 28 | # relationship exists. 29 | # 30 | # Out of curiousity, let's see what the correlation is between 31 | # these two vectors. 32 | cor.test(album2$sales, album2$adverts) 33 | 34 | # The correlation t-test shows that we can reject the null 35 | # hypothesis that there is no correlation between adverts and 36 | # album sales and support the hypothesis that there is a 37 | # correlation with p = 2.2e-16 (highly significant). The 38 | # correlation is 0.5785 with a 95% confidence interval of 39 | # 0.4781 - 0.66394. 40 | # 41 | # Performing a linear regression and obtaining the first model 42 | albumSales.2 <- lm(sales ~ adverts, data = album2) 43 | summary(albumSales.2) 44 | 45 | # The model shows an increase of 1 unit of adverts results in 46 | # 0.09612 in album sales, highly significant, with a multiple 47 | # R^2 = 0.3346, and an adjusted R^2 of 0.3313 48 | # 49 | # Let's take a look at the plot output 50 | plot(albumSales.2) 51 | # 52 | # There appears to be a bit of heteroscadicity in the Residuals 53 | # vs. Fitted graph; not a huge amount, but it's there. 54 | # The QQ Plot actually looks somewhat reasonable. 55 | # Scale-Location plot shows a similar indication of 56 | # heteroscadicity 57 | # Don't see anything with respect to leverage that is of concern 58 | # 59 | # 60 | # Now, we think that airplay is also a significant predictor of 61 | # album sales, so let's create another model that now inludes 62 | # airplay 63 | albumSales.3 <- lm(sales ~ adverts + airplay, data = album2) 64 | summary(albumSales.3) 65 | 66 | # As we expected, airplay has a highly significant non-zero 67 | # coefficient. For each unit increase in airplay, album sales 68 | # increase 3.59 units The multiple R^2 has increased to 0.6293, 69 | # so our model accounts for almost 63% of the variation in album sales. 70 | # 71 | plot(albumSales.3) 72 | # 73 | # Little bit better on the homoscedacity than the first model 74 | # QQ Plot still looks reasonable 75 | # Other diagnostics look good 76 | # 77 | # Now let's add in the final variable, attractiveness of the band 78 | albumSales.4 <- lm(sales ~ adverts + airplay + attract, data = album2) 79 | summary(albumSales.4) 80 | # 81 | # Now we have all 3 beta coefficients and all are statistically 82 | # significantly different from zero. 83 | # The R^2 has increased to 0.665 84 | plot(albumSales.4) 85 | # 86 | # Diagnostics from the plot() all look OK 87 | # 88 | # We can examine the standardized beta estimates by loading the 89 | # QuantPsych library and using the lm.beta() 90 | library(QuantPsyc) 91 | library(boot) 92 | 93 | lm.beta(albumSales.4) 94 | # 95 | # The betas are given in terms of standard deviations. So now 96 | # for adverts with a standardized beta of 0.511 we can say that 97 | # as advert budget increases by 1 SD then album sales increase 98 | # by 0.511 SD. 99 | # Also, every 1 SD increase in airplay results in 0.512 increase 100 | # in album sales. 101 | # Finally, every 1 SD increase in band attractiveness results 102 | # in a 0.192 increase in album sales. 103 | # 104 | # Compute the confidence intervas for the model 105 | confint(albumSales.4) 106 | 107 | # We see that our coefficients fall in range of each of these 108 | # confidence intervals and that the confidence intervals don't 109 | # cross 0. 110 | # 111 | # Compare models using anova 112 | anova(albumSales.2, albumSales.3) 113 | 114 | # Compared to a model based on the mean, the first model improved 115 | # with F(2, 197) = 156.57, p < .001 116 | anova(albumSales.3, albumSales.4) 117 | 118 | # Compared to the previous model, the final model is an improvement 119 | # F(2, 196) = 20.681, p < .001 120 | 121 | # Compute some useful diagnostic statistics and place them in the 122 | # dataframe 123 | album2$standardized.residuals <- rstandard(albumSales.4) 124 | album2$studentized.residuals <- rstudent(albumSales.4) 125 | album2$cooks.distance <- cooks.distance(albumSales.4) 126 | album2$dfbeta <- dfbeta(albumSales.4) 127 | album2$dffit <- dffits(albumSales.4) 128 | album2$leverage <- hatvalues(albumSales.4) 129 | album2$covariance.ratios <- covratio(albumSales.4) 130 | # 131 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2) 132 | # how many residuals do we have outside 2 sd? 133 | sum(album2$large.residual) 134 | # 135 | # There are 12 data points with large residuals outside the 96.5% range. 136 | # The next command will display those rows and the specified columns 137 | album2[album2$large.residual, c("sales","airplay","attract","adverts","standardized.residuals")] 138 | # 139 | # There's nothing too huge except for data point 169, which is ~ 3.1 140 | # Let's take a look at some of the other diagnostic variables for these 141 | # 12 data points. 142 | album2[album2$large.residual, c("cooks.distance", "leverage", "covariance.ratios")] 143 | # 144 | # We know that the CVR upper bound is 1 + 3(k + 1)/n = 1 + 3(3 + 1)/200 145 | # and the CVR lower bound is 1 - 3(k + 1)n = 1 - 3(3 + 1)/200 146 | 1 + 12/200 147 | 1 - 12/200 148 | # 1.06 and 0.94, respectively. 149 | # Looking at the covariance ratios, the only point outside these bounds 150 | # is 169, again. However, the Cooks distance value for this point is 151 | # of little convern, so this point is not excercising undue influence 152 | # 153 | # Durbin-Watson test of independence 154 | dwt(albumSales.4) 155 | # 156 | # This shows that we can't reject the null hypothesis of no autocorrelation 157 | # 158 | # Multicollinearity 159 | vif(albumSales.4) 160 | 1/vif(albumSales.4) # Tolerance is 1/VIF 161 | mean(vif(albumSales.4)) 162 | # 163 | # If the largest VIF > 10 then there is cause for concern 164 | # If the average VIF is substantially > 1 then the regression may be biased 165 | # Tolerance < 0.1 indicates a serious issue 166 | # Tolerance < 0.2 indicates a potential issue -------------------------------------------------------------------------------- /multiple regression/neste-model.r: -------------------------------------------------------------------------------- 1 | # Today's Agenda 2 | # Using the 1994 GSS you will examine the dependent variable “partyid”, which is “political party identification” 3 | # (ie., strong democrat to strong republican on a 7-point scale). You will treat this as a metric variable for the purposes of this investigation. 4 | # Your task: 5 | # 0) load the GSS1994_daughters dataset. 6 | # 1) Recode “sex of the first child” (kdsex1) into a dummy variable (daughter1) where female is coded as 1 or ‘true’. 7 | # Examine partyid and turn it into a numeric variable (the as.numeric function should be helpful here). 8 | # 2) Conduct a linear regression with “daughter1” as your independent variable. 9 | # 3) Conduct another linear regression with “daughter1” and one or more additional independent variables. You may use the following function to view the description 10 | # of each variable: 11 | # desc = function(var, df) { 12 | # attr(df,"var.labels")[match(var, colnames(df))] 13 | # } 14 | # For example, typing desc(“partyid”, GSS) will return a description of partyid. You may also use the following command to pull up a table of all variables and descriptions: 15 | # data.frame(names = colnames(GSS), labels = attr(GSS, "var.labels")) 16 | # 4) Assess the improvement, if any, from the first model to the second model. 17 | # 5) Did the coefficient for daughter1 change from the first model to the second model? If so, how do you explain this change? 18 | 19 | 20 | # X (first child) -> Y (Party ID) 21 | 22 | # Z = control, want it to be exogenous to X and Y. 23 | 24 | # Post-treatment bias 25 | 26 | # X -> Z -> Y 27 | 28 | # X -> Y will be downwardly biased. 29 | 30 | 31 | setwd('C:/Users/thomasa/Downloads/MIDS/EAD/Week13') 32 | load('daughters.rdata') 33 | 34 | #1) 35 | #dummy code the daughter variable 36 | GSS$daughter1 = as.numeric(GSS$kdsex1 == 'female') 37 | 38 | #create numeric party variable 39 | GSS$party_num = as.numeric(GSS$partyid) 40 | 41 | #Other party is coded as 8 so recode to NA 42 | GSS$party_num[GSS$party_num==8] = NA 43 | 44 | #check distribution 45 | table(GSS$party_num) 46 | hist(GSS$party_num) 47 | 48 | #2) 49 | mod = lm(party_num ~ daughter1,data=GSS) 50 | mod 51 | summary(mod) 52 | plot(mod) 53 | 54 | #3) 55 | #not sure what this does 56 | desc = function(var, df) { 57 | attr(df,"var.labels")[match(var, colnames(df))] 58 | } 59 | desc('age',GSS) 60 | 61 | #Try Age 62 | mod1 = lm(party_num ~ daughter1 + age,data=GSS) 63 | mod1 64 | summary(mod1) 65 | plot(mod1) 66 | #not statistically significant - don't include 67 | 68 | #Try Race 69 | #recode to remove iap 70 | GSS$race = factor(as.character(GSS$race),levels=c('black','white','other')) 71 | 72 | #contrasts 73 | contrasts(GSS$race) 74 | 75 | #model 76 | mod2 = lm(party_num ~ daughter1 + race,data=GSS) 77 | mod2 78 | summary(mod2) 79 | plot(mod2) 80 | 81 | 82 | 83 | # 4) 84 | #model comparison base with Age 85 | AIC(mod) 86 | AIC(mod1) 87 | 88 | #anova 89 | anova(mod,mod1) 90 | #no improvement 91 | 92 | #try race 93 | AIC(mod) 94 | AIC(mod2) 95 | anova(mod,mod2) 96 | 97 | # 5) Does the effect of daughter1 change from base model (daughter1) to model2 (daughter1 + race) 98 | # highly overlapping confidence levels 99 | confint(mod) 100 | confint(mod2) 101 | 102 | 103 | t.test(GSS$party_num[GSS$race=='black'],GSS$party_num[GSS$race=='other']) 104 | 105 | 106 | 107 | #income 108 | cbind(as.character(GSS$income),as.numeric(gsub('.*\\$| .*','',as.character(GSS$income)))) 109 | 110 | 111 | mod3 = lm(party_num ~ daughter1 + race + polviews,data=GSS) 112 | mod3 113 | summary(mod3) 114 | plot(mod3) 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /plots/.Rapp.history: -------------------------------------------------------------------------------- 1 | metallica 2 | lecturerData 3 | library(Rcmdr) 4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic")) 5 | alchoholPersonality 6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality) 7 | alchoholPersonalityMatrix 8 | library(Rcmdr) 9 | sunspots 10 | require(graphics) 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers") 12 | pvalue() 13 | pscore() 14 | mu1 <- 2/(sqrt(100)) 15 | mu1 16 | zscore() 17 | (2.5 - 2.0)/2 18 | 2*pnorm(2.5) 19 | 2 * pnorm(-2.5) 20 | -0.5/20 21 | pnorm(0.025) 22 | z <- (2 - 2.5)/2 23 | z 24 | 2*pnorm(-0.25) 25 | a <- 2 26 | s <- 2 27 | n <- 100 28 | xbar <- 2.5 29 | z <- (xbar-a)/(s/sqrt(n)) 30 | z 31 | 2*pnorm(-2.5) 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100))) 33 | 10000 - 10300 34 | -300/(1000 / sqrt(10)) 35 | pnorm(-300/(1000/sqrt(10))) 36 | pnorm(300/(1000/sqrt(10))) 37 | 2*(pnorm(-abs(500/(1000/sqrt(10)))) 38 | ) 39 | 2*(pnorm((500/(1000/sqrt(10)))) 40 | ) 41 | 500/1000/sqrt(10) 42 | 1000/sqrt(10) 43 | 500/(1000/sqrt(10)) 44 | pnorm(1.581139) 45 | pnorm(-1.581139) 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10))) 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10)) 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10)) 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10)) 51 | exit 52 | setwd("/Users/rcordell/Documents/Data/W203/R Book Data") 53 | setwd("/Users/rcordell/Documents/R/W203") 54 | library(ggplot2) 55 | facebookData = read.delim("FacebookNarcissism.dat", header = TRUE) 56 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating)) 57 | graph + geom_point() 58 | graph + geom_point(shape = 17) 59 | graph + geom_point(shape = 7) 60 | graph + geom_point(shape = 7, size = 6) 61 | graph + geom_point(shape = 2, size = 6) 62 | graph + geom_point(shape = 3, size = 6) 63 | graph + geom_point(shape = 4, size = 6) 64 | graph + geom_point(shape = 17) 65 | graph + geom_point(shape = 4, size = 6) 66 | graph + geom_point(shape = 5, size = 6) 67 | graph + geom_point(aes(color = Rating_Type)) 68 | graph + geom_point(color = "RED") 69 | graph + geom_point(aes(color = Rating_Type), position = "jitter") 70 | graph + geom_point(aes(shape = Rating_Type), position = "jitter") 71 | examData <- read.delim("Exam.dat", header = TRUE) 72 | examData <- read.delim("Exam Anxiety.dat", header = TRUE) 73 | scatter <- ggplot(examData, aes(Anxiety, Exam)) 74 | scatter + geom_point() 75 | scatter + geom_point() + labs(x = "Exam Anxiety", y = "Exam Performance %") 76 | scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %") 77 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 78 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED") + labs(x = "Exam Anxiety", y = "Exam Performance %") 79 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %") 80 | scatter + geom_point() + geom_smooth(method = "lm", alpha = 0.1, fill = "Blue") + labs(x = "Exam Anxiety", y = "Exam Performance %") 81 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 82 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 83 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 84 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 85 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 86 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 87 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 88 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 89 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 90 | scatter + geom_point(color = Gender) + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 91 | summary(examData) 92 | scatter <- ggplot(examData, aes(Anxiety, Exam)) 93 | scatter + geom_point(aes(color = Gender) + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 94 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 95 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 96 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 97 | scatter + geom_point() 98 | scatter + geom_point() + geom_smooth(method = "lm") 99 | scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) 100 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 101 | scatter <- ggplot(examData, aes(Anxiety, Exam, color = Gender)) 102 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 103 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 104 | -------------------------------------------------------------------------------- /plots/ChickFlick.dat: -------------------------------------------------------------------------------- 1 | gender film arousal 2 | Male Bridget Jones' Diary 22 3 | Male Bridget Jones' Diary 13 4 | Male Bridget Jones' Diary 16 5 | Male Bridget Jones' Diary 10 6 | Male Bridget Jones' Diary 18 7 | Male Bridget Jones' Diary 24 8 | Male Bridget Jones' Diary 13 9 | Male Bridget Jones' Diary 14 10 | Male Bridget Jones' Diary 19 11 | Male Bridget Jones' Diary 23 12 | Male Memento 37 13 | Male Memento 20 14 | Male Memento 16 15 | Male Memento 28 16 | Male Memento 27 17 | Male Memento 18 18 | Male Memento 32 19 | Male Memento 24 20 | Male Memento 21 21 | Male Memento 35 22 | Female Bridget Jones' Diary 3 23 | Female Bridget Jones' Diary 15 24 | Female Bridget Jones' Diary 5 25 | Female Bridget Jones' Diary 16 26 | Female Bridget Jones' Diary 13 27 | Female Bridget Jones' Diary 20 28 | Female Bridget Jones' Diary 11 29 | Female Bridget Jones' Diary 19 30 | Female Bridget Jones' Diary 15 31 | Female Bridget Jones' Diary 7 32 | Female Memento 30 33 | Female Memento 25 34 | Female Memento 31 35 | Female Memento 36 36 | Female Memento 23 37 | Female Memento 14 38 | Female Memento 21 39 | Female Memento 31 40 | Female Memento 22 41 | Female Memento 14 42 | -------------------------------------------------------------------------------- /plots/DownloadFestival.dat: -------------------------------------------------------------------------------- 1 | ticknumb gender day1 day2 day3 2 | 2111 Male 2.64 1.35 1.61 3 | 2229 Female .97 1.41 .29 4 | 2338 Male .84 5 | 2384 Female 3.03 6 | 2401 Female .88 .08 7 | 2405 Male .85 8 | 2467 Female 1.56 9 | 2478 Female 3.02 10 | 2490 Male 2.29 11 | 2504 Female 1.11 .44 .55 12 | 2509 Male 2.17 13 | 2510 Female .82 .2 .47 14 | 2514 Male 1.41 15 | 2515 Female 1.76 1.64 1.58 16 | 2520 Male 1.38 .02 17 | 2521 Female 2.79 18 | 2529 Male 1.5 19 | 2533 Female 1.91 2.05 20 | 2535 Female 2.32 21 | 2538 Male 2.05 22 | 2549 Male 2.17 .7 .76 23 | 2551 Female 2.05 24 | 2558 Female 1.61 25 | 2562 Female 1.66 .85 26 | 2565 Female 2.3 27 | 2566 Female 2.76 28 | 2568 Female 1.44 29 | 2586 Female 1.06 30 | 2601 Male 3.23 31 | 2602 Male .97 .38 .76 32 | 2604 Male 2.57 .11 .02 33 | 2606 Female .26 34 | 2609 Female .47 35 | 2611 Female 1.73 36 | 2612 Male 1.94 .82 1.67 37 | 2616 Male 1.91 38 | 2624 Female 2.08 .91 .96 39 | 2633 Female 1.91 40 | 2642 Female 1.42 41 | 2644 Male 1.5 42 | 2662 Female .11 43 | 2663 Male 1.67 44 | 2664 Male 2.08 45 | 2669 Female 2.05 46 | 2670 Male 2 47 | 2675 Male 1.52 48 | 2676 Female 1.58 49 | 2677 Male 1.28 .38 .14 50 | 2681 Male 1.88 51 | 2685 Female 1.32 52 | 2686 Female 2.09 53 | 2687 Male 2 54 | 2688 Female 2.64 55 | 2689 Male .85 .32 .52 56 | 2692 Female 2.47 .23 .38 57 | 2698 Female 1.79 58 | 2709 Female 1.64 59 | 2710 Male 1.32 60 | 2712 Female 2.97 61 | 2726 Female 1.44 .14 62 | 2727 Female 2.02 63 | 2728 Male 1.79 64 | 2730 Male 1.34 65 | 2731 Female 2.29 1.9 66 | 2732 Female 1.66 67 | 2734 Female .6 68 | 2735 Female 1.76 .76 .29 69 | 2736 Female 1.5 70 | 2737 Female 2.08 .7 71 | 2748 Female 1 .55 72 | 2752 Male 1.73 73 | 2756 Female 1.05 .38 74 | 2758 Male 2.81 75 | 2759 Female 1.52 76 | 2768 Male 1.47 77 | 2769 Female 2.64 78 | 2770 Female 2.2 1.18 79 | 2771 Male .55 .79 80 | 2772 Male 2.29 81 | 2773 Female 2 82 | 2775 Male 2.23 83 | 2779 Female 2.45 84 | 2780 Male 1.2 85 | 2783 Male 2.91 2.08 2.11 86 | 2784 Female 1.14 1 87 | 2788 Male 1.88 88 | 2789 Male .94 89 | 2791 Female 1.85 90 | 2794 Female 2.58 91 | 2796 Female .61 92 | 2799 Male .7 93 | 2806 Male 1.38 94 | 2807 Female 1.94 95 | 2811 Female 2.29 96 | 2812 Male 1.59 97 | 2813 Female 2.46 98 | 2814 Female 1.67 .14 99 | 2825 Female 2.02 .58 .5 100 | 2827 Male 1.5 101 | 2828 Female 2.7 1.7 1.91 102 | 2829 Female 1.61 103 | 2830 Male 2.29 104 | 2831 Female .97 1.06 .76 105 | 2839 Male 1.85 106 | 2842 Female 2.76 107 | 2857 Male 1.64 108 | 2858 Male 1.17 109 | 2859 Male 1.57 110 | 2862 Female 2.23 111 | 2863 Female 2.05 1.58 2.15 112 | 2875 Female 2.05 113 | 2876 Female 2.94 114 | 2883 Female 2.39 115 | 2890 Male 1.94 116 | 2894 Female 2.12 117 | 2895 Female 1.11 118 | 2897 Female .97 119 | 2898 Female 1.35 120 | 2899 Female 2.81 2.08 121 | 2900 Female 2.5 122 | 2901 Male 1.87 123 | 2920 Male 1.33 124 | 2921 Female 1.26 125 | 2922 Female 1.44 126 | 2923 Male .55 127 | 2924 Female 1.75 128 | 2925 Female 2.08 129 | 2932 Male .85 130 | 2933 Female 2.52 131 | 2935 Female 3 132 | 2936 Female 1.41 133 | 2937 Female 1.08 134 | 2938 Male 1.2 1.38 1.5 135 | 2940 Male 1.94 1.44 136 | 2941 Female 2.26 1.73 1.73 137 | 2942 Male 1.41 138 | 2948 Female 2.5 139 | 2952 Male 2.17 140 | 2953 Male 1.82 1.11 141 | 2954 Female 1.44 1.14 142 | 2956 Female 1.66 143 | 2957 Male 1.82 144 | 2958 Male 1.26 145 | 2959 Female 2.67 146 | 2961 Female 1.47 147 | 2962 Female 1.84 148 | 2964 Female 2.58 149 | 2966 Male 1.73 150 | 2967 Male 1.23 151 | 2968 Male 2.32 152 | 2972 Female 2.67 153 | 2974 Female 1.02 154 | 2975 Female 1.66 2.12 2.7 155 | 2976 Female 1.88 156 | 2977 Female 1.91 157 | 2978 Female 1.64 158 | 2979 Male 1.34 159 | 2982 Male 1.85 160 | 2983 Male 2.08 161 | 2984 Male 1.02 162 | 2985 Female 1.79 163 | 2988 Female 1.94 164 | 2989 Female 3.26 1.97 1.67 165 | 2990 Male 1.14 .58 .11 166 | 3008 Female 1.5 .7 .38 167 | 3009 Female 2.03 168 | 3010 Female 2.24 169 | 3013 Female 1.11 170 | 3014 Female 2.21 171 | 3016 Male 1.94 172 | 3017 Male 2.41 173 | 3018 Male .88 174 | 3019 Female 1.17 1.35 175 | 3027 Male 2.23 176 | 3028 Female 1.64 177 | 3029 Female 2.14 178 | 3030 Male .11 .29 179 | 3031 Male 2.17 180 | 3032 Female 1.67 181 | 3033 Female 1 182 | 3034 Female .88 183 | 3035 Male 2.2 184 | 3048 Male 2.17 185 | 3049 Female 2.32 186 | 3051 Male 1.64 187 | 3052 Female 3 188 | 3053 Female 2.38 .85 189 | 3054 Female 1.6 1.02 190 | 3055 Female 1.58 191 | 3056 Female 2.61 192 | 3057 Male 1.44 .05 .2 193 | 3064 Male 1.57 194 | 3065 Female 2.32 195 | 3068 Female 1.14 196 | 3069 Male 1.93 197 | 3070 Male 2.47 198 | 3072 Female 2.29 199 | 3073 Female 1 200 | 3092 Female 1.58 201 | 3093 Male 2.44 202 | 3094 Female .83 203 | 3095 Male 2.71 .78 .33 204 | 3096 Female 1.73 205 | 3097 Male 1.58 206 | 3098 Male 1.5 207 | 3100 Female 1.05 208 | 3106 Male 2.05 209 | 3107 Female 2.63 210 | 3109 Male 2.55 2.29 211 | 3111 Female 2 212 | 3112 Male 2 213 | 3114 Female 1.32 214 | 3116 Female 3.14 215 | 3118 Male 1.44 216 | 3129 Male 1.85 .23 217 | 3131 Female 1.41 .44 218 | 3132 Female 1.94 219 | 3133 Male 2.91 220 | 3135 Female 1.85 221 | 3136 Male 1.7 222 | 3137 Female 2.23 223 | 3138 Male 1.11 224 | 3139 Female 1.47 225 | 3146 Female 2.2 226 | 3147 Male 1.82 227 | 3148 Female 1.42 228 | 3168 Female 2.44 229 | 3171 Female 2.66 230 | 3172 Male 1.52 231 | 3173 Female 1.35 .47 .73 232 | 3180 Female 1.29 233 | 3182 Female 2.32 234 | 3190 Male .78 235 | 3192 Female 2.84 236 | 3202 Male .97 237 | 3236 Female 1.52 238 | 3245 Female 1.7 239 | 3246 Female .94 1.17 1.29 240 | 3247 Male 1.41 241 | 3248 Male 1.79 242 | 3249 Male 1.08 .44 .44 243 | 3250 Male 1.47 244 | 3251 Female 1.79 .47 245 | 3253 Male 2 246 | 3254 Female .76 247 | 3255 Male 2.2 248 | 3256 Female .94 .17 249 | 3257 Female 1.38 250 | 3258 Female 1.38 .85 251 | 3260 Male .32 252 | 3261 Male 2.58 253 | 3262 Male .51 254 | 3264 Female .32 255 | 3267 Female .91 1.11 1.7 256 | 3273 Male 1.51 257 | 3275 Female 1.47 258 | 3276 Female 2.5 259 | 3277 Female 2.26 260 | 3278 Female 2.81 261 | 3279 Female 1.87 262 | 3281 Female 2 263 | 3282 Female 2.23 .41 1.02 264 | 3284 Male 2 .76 265 | 3290 Female 1.41 266 | 3291 Male 1.64 267 | 3292 Male 1.64 268 | 3296 Male 1.26 269 | 3306 Female 1.52 .55 1.88 270 | 3307 Male 2.44 1.02 .76 271 | 3308 Female 2.18 272 | 3309 Female 3.02 273 | 3310 Female 1.02 274 | 3311 Female 2.88 275 | 3312 Male 1.54 276 | 3313 Female 1.64 277 | 3314 Female 2.44 2.5 1.7 278 | 3315 Female 1.29 279 | 3316 Female 1.61 .32 .26 280 | 3321 Female 1.77 281 | 3325 Male .91 .17 282 | 3326 Female .85 .2 .38 283 | 3327 Male .85 .52 .44 284 | 3328 Female 1.5 285 | 3329 Male 1.05 .23 286 | 3338 Female 3.38 287 | 3340 Female 1.42 .52 2 288 | 3341 Female 1.85 289 | 3348 Female 1.91 .84 290 | 3349 Male .82 .26 291 | 3350 Female 1.32 .76 292 | 3351 Female 2.23 .85 .39 293 | 3352 Female 1.47 1.52 .17 294 | 3363 Female 2.7 295 | 3365 Male 1.58 296 | 3366 Male 1 297 | 3367 Female 1.44 298 | 3368 Female 2 299 | 3369 Male 1.6 300 | 3370 Female 2.32 2.53 1.67 301 | 3371 Female 3.41 302 | 3372 Female 2.02 303 | 3373 Male .64 .52 304 | 3374 Male 3.58 3.35 305 | 3375 Male 1.5 306 | 3376 Male 1.08 307 | 3377 Female 1.52 308 | 3378 Male 1.26 309 | 3379 Female 1.68 310 | 3380 Male 1.47 1.08 .58 311 | 3390 Female 1.47 312 | 3391 Female 1.67 1.55 313 | 3392 Female 2.47 1.97 314 | 3393 Male 1.82 315 | 3394 Female 2.17 316 | 3395 Female 3.21 317 | 3397 Female 1.6 1.38 1.02 318 | 3398 Female .32 319 | 3407 Female .55 320 | 3411 Female 1.42 321 | 3412 Male 1.14 322 | 3413 Female 2.64 323 | 3416 Female 2.58 324 | 3418 Female 2.02 325 | 3419 Male 2 326 | 3420 Female 2.9 327 | 3423 Male 1.82 328 | 3429 Female .5 329 | 3431 Male 1.53 330 | 3449 Female 2.48 331 | 3450 Female 2.05 332 | 3453 Male 2.52 333 | 3454 Female 1.88 334 | 3455 Male 2.73 335 | 3456 Female 2.88 336 | 3457 Female 1.67 337 | 3458 Female 1.93 338 | 3460 Female 1.67 339 | 3461 Male 1.2 340 | 3464 Female 2.75 341 | 3467 Female 1.94 .97 342 | 3468 Female .59 343 | 3469 Female 1.5 344 | 3477 Male 1.58 .94 .94 345 | 3480 Female 2.23 .11 .17 346 | 3483 Female 2.35 347 | 3490 Female 2.55 .82 .29 348 | 3493 Male 1.55 349 | 3494 Female 2.31 350 | 3495 Male 2.23 351 | 3500 Female .67 .5 352 | 3501 Male 2.51 353 | 3503 Male 1.08 .58 .61 354 | 3510 Female 2.44 355 | 3511 Female .23 .14 356 | 3512 Female 2.17 357 | 3518 Male 1.9 1.17 358 | 3519 Female 1.67 .44 359 | 3521 Female 2 .58 .52 360 | 3522 Female 2.44 361 | 3523 Male 1.44 362 | 3524 Male .82 363 | 3525 Female 2.5 364 | 3526 Male 1.82 365 | 3536 Female 1.97 366 | 3539 Female 2.52 367 | 3540 Female .05 368 | 3542 Female 2.08 369 | 3545 Female 2.39 370 | 3546 Male 1.45 .82 371 | 3551 Male 2.58 372 | 3557 Female 2.12 373 | 3565 Female 2.02 .76 .55 374 | 3567 Male 1.78 1.14 .44 375 | 3568 Male .73 .17 .76 376 | 3569 Female 2.26 .9 1.85 377 | 3570 Female 2.79 378 | 3571 Male .43 .67 .14 379 | 3572 Male .52 .38 .7 380 | 3573 Female 2.32 381 | 3574 Male 2.22 382 | 3575 Male .58 383 | 3576 Male 2 384 | 3577 Female .7 385 | 3586 Male 1 386 | 3587 Male .3 387 | 3588 Male 1.52 388 | 3593 Female 1.58 .35 389 | 3594 Male 2.34 390 | 3595 Female .79 391 | 3596 Female 2.26 392 | 3597 Male 2.35 393 | 3598 Female 1.7 394 | 3599 Female 3.09 395 | 3600 Female 1.52 396 | 3601 Female .35 397 | 3602 Female 2.7 398 | 3603 Female 1.64 399 | 3605 Male .82 400 | 3606 Male 2.73 401 | 3607 Female 2.23 402 | 3609 Female 1.06 403 | 3610 Male 2.05 .2 .35 404 | 3613 Female 1.73 1.44 405 | 3614 Female .93 .91 406 | 3620 Female 2.5 2.44 407 | 3621 Male 1.44 408 | 3622 Female 2.88 409 | 3625 Female .67 .23 .44 410 | 3626 Male 1.85 .35 411 | 3627 Female 1.21 .79 412 | 3628 Male 1.06 .76 .7 413 | 3629 Male .61 .26 .33 414 | 3631 Female 2 415 | 3634 Female 1.17 .73 1.17 416 | 3635 Female 1.48 .79 1.55 417 | 3645 Female 1.55 418 | 3646 Female 3.29 419 | 3647 Male 1.47 420 | 3648 Male .96 421 | 3649 Male 1 1.11 1.2 422 | 3652 Male 1.47 423 | 3653 Female 2.55 2.38 424 | 3654 Female .44 .06 425 | 3655 Female 2.35 2.41 426 | 3656 Female 1.71 .85 427 | 3659 Male 1.84 .58 .7 428 | 3660 Female 1.11 .23 .55 429 | 3667 Female 1.38 430 | 3669 Male .88 431 | 3670 Female .94 432 | 3672 Female 1.91 433 | 3676 Male 2.76 434 | 3677 Male 1.55 .32 .47 435 | 3678 Male 2.67 436 | 3680 Female 1.03 .29 .72 437 | 3696 Female 2.5 438 | 3697 Male 1.64 439 | 3698 Female 2.26 440 | 3709 Female 2.14 441 | 3710 Male .52 442 | 3711 Male 1.08 443 | 3712 Male 1.69 444 | 3713 Male 2.73 445 | 3714 Male 1.91 446 | 3715 Male 1.73 447 | 3716 Female 3.21 448 | 3717 Female 2.11 449 | 3718 Female 2.05 450 | 3719 Male 2.17 451 | 3720 Female 2.17 452 | 3721 Female 2.3 453 | 3722 Female 2.56 454 | 3723 Female 2.11 .41 .47 455 | 3724 Male 1.7 456 | 3725 Female 1.23 457 | 3726 Female 3.2 458 | 3727 Female 2.02 459 | 3728 Female 2.64 460 | 3729 Male 2.52 .14 461 | 3730 Male 1.61 462 | 3731 Male 1.5 1.2 .91 463 | 3734 Male 1.15 .45 .44 464 | 3736 Male 1.82 465 | 3753 Female 1.5 466 | 3754 Female 2.32 467 | 3759 Female 2.92 468 | 3764 Male 1.41 469 | 3769 Male 1.35 470 | 3778 Male .61 .14 471 | 3779 Female .73 472 | 3782 Female 2.23 1.88 473 | 3783 Female 1.32 .91 474 | 3784 Female 2.94 1.79 475 | 3785 Male 1.61 476 | 3786 Female 1 477 | 3787 Female 3.15 3 478 | 3788 Female 2.88 479 | 3791 Female 2.09 1.21 480 | 3792 Female 1.32 1.7 2.29 481 | 3793 Male 1.47 .35 482 | 3794 Male 1.61 483 | 3796 Male 2.2 1.5 1.88 484 | 3797 Female 2.78 485 | 3798 Female 2.06 486 | 3799 Female .47 487 | 3804 Female 2.87 488 | 3826 Male 1.14 489 | 3828 Female 3.32 3.21 490 | 3831 Female 2.08 1.38 491 | 3836 Female 2.38 2.5 492 | 3837 Female 2.08 493 | 3838 Male 1.85 494 | 3840 Male 1.38 495 | 3844 Male 1.14 496 | 3846 Male 1.58 497 | 3849 Male 1.23 .7 1.02 498 | 3850 Female 2.53 499 | 3851 Male .67 500 | 3854 Female .73 501 | 3855 Female 1.34 502 | 3856 Female 2.14 .7 503 | 3857 Female 1 504 | 3858 Male 1.35 505 | 3859 Male 1.94 .79 506 | 3860 Male .5 507 | 3869 Female 3.08 508 | 3870 Female 2.88 509 | 3871 Female 1.91 510 | 3872 Male 1.41 511 | 3873 Male 2.02 512 | 3874 Female .76 513 | 3875 Male 1.94 514 | 3876 Male .67 .28 515 | 3879 Female 2.41 516 | 3880 Female 2.17 517 | 3882 Female 2.67 .41 518 | 3883 Male 1.94 .64 519 | 3888 Female 2.05 .85 1.7 520 | 3890 Male 2.17 521 | 3909 Male .47 522 | 3912 Male .62 .76 523 | 3913 Female 2 524 | 3914 Male .45 525 | 3916 Female 2.29 .91 1.38 526 | 3917 Female 2.55 527 | 3918 Female .82 528 | 3919 Female 3.12 2.2 529 | 3920 Male 2.5 2.23 .41 530 | 3921 Male 1.79 531 | 3922 Female 2.28 532 | 3923 Male .58 1.05 .58 533 | 3924 Female 2.5 1.29 534 | 3925 Female 1.41 535 | 3926 Female 2.14 536 | 3927 Male .76 .26 537 | 3929 Male 1.79 1.11 538 | 3930 Male 1.02 .35 539 | 3932 Female 2.62 540 | 3942 Male .88 541 | 3944 Male 1.58 .2 .2 542 | 3945 Female 2.2 543 | 3946 Female 1.14 544 | 3947 Male 1.47 .52 .35 545 | 3948 Female 1.41 .23 .38 546 | 3949 Male 1.44 1.76 1.18 547 | 3950 Female 1.23 1.17 548 | 3951 Female 1.82 549 | 3952 Female 2.44 550 | 3954 Female 1.94 1.2 551 | 3955 Male 2.41 552 | 3964 Female 2.27 553 | 3965 Male 1.79 .23 554 | 3966 Male 1.88 555 | 3972 Male 1.85 .64 1.32 556 | 3973 Female 2.21 557 | 3986 Male 1.97 1.94 558 | 3987 Female 2.51 559 | 3988 Female 2.05 560 | 3989 Female 1.29 1 .82 561 | 3990 Female 2.05 562 | 3991 Female 2.23 563 | 3992 Female 1.76 564 | 3993 Male 1.05 565 | 3994 Male 1.79 566 | 3995 Female 1.02 .73 .47 567 | 3996 Female 2.76 1.58 568 | 3997 Female 1.67 .55 569 | 4010 Male 2.85 570 | 4011 Female .23 .84 571 | 4012 Female 1.9 572 | 4013 Male 1.23 .52 573 | 4014 Male 1.97 574 | 4015 Female 1.5 575 | 4016 Female 3.69 576 | 4017 Female .5 577 | 4022 Female 2.18 578 | 4023 Female 2.17 579 | 4024 Female 1.58 580 | 4025 Male 2.88 581 | 4030 Male 2.52 582 | 4032 Female 2.2 583 | 4033 Male 1.73 584 | 4034 Female 2.23 585 | 4079 Male 1.97 586 | 4089 Male 1.2 .67 .91 587 | 4090 Male 2 588 | 4092 Female 1.91 589 | 4093 Male .81 590 | 4095 Female 1.31 591 | 4096 Female .38 592 | 4101 Female 1.97 593 | 4104 Female .38 594 | 4105 Female 2.11 .76 .85 595 | 4106 Female 3.2 596 | 4107 Female .02 597 | 4109 Female 2.56 598 | 4110 Female 2.02 599 | 4111 Female 2.3 600 | 4114 Male 2.02 601 | 4147 Female 2.05 602 | 4148 Female 1.7 603 | 4149 Male 1.61 604 | 4150 Male .73 605 | 4151 Male 2.5 1.64 606 | 4152 Female 2.18 1.75 1.91 607 | 4153 Female 2.46 1.08 1.91 608 | 4154 Female 1.5 .91 609 | 4155 Female 1.73 .94 610 | 4156 Male 1.44 611 | 4157 Male 1.64 .32 .52 612 | 4158 Female 20.02 2.44 613 | 4159 Female 1.2 .17 .2 614 | 4160 Male .38 .02 615 | 4161 Female 1.58 1.54 1.76 616 | 4162 Female 1.67 .5 617 | 4163 Female 1 .48 618 | 4164 Female 2.58 1.35 619 | 4165 Female 2.82 2.61 3.02 620 | 4166 Female 2.29 2.05 621 | 4167 Female 1.14 622 | 4168 Female 1.64 .76 623 | 4171 Female 1.82 .08 624 | 4172 Female 3.32 2.91 3.02 625 | 4193 Male 3.32 626 | 4194 Male 1.85 1 1.5 627 | 4195 Male 2.29 628 | 4196 Female 1.47 .47 1.58 629 | 4197 Male 2.08 .7 .67 630 | 4198 Male 2.2 631 | 4199 Female 1.06 1.45 632 | 4216 Female .97 .14 .02 633 | 4218 Male 2 634 | 4219 Female 1.67 .38 635 | 4220 Female 2.94 636 | 4221 Female 1.55 637 | 4222 Male .88 .26 638 | 4223 Female 1.35 2.32 639 | 4224 Female .61 .2 .17 640 | 4225 Male 1 641 | 4226 Female 1.52 2.72 .52 642 | 4234 Male 1 643 | 4235 Male 1.76 .41 .88 644 | 4236 Female 2.52 645 | 4237 Female 2 .88 1.44 646 | 4238 Female 2.63 647 | 4239 Female .73 .85 648 | 4240 Male 1.58 .23 1.66 649 | 4241 Female .58 650 | 4245 Female 1.67 651 | 4247 Female 1.47 652 | 4249 Female 1.81 653 | 4256 Male 1.91 654 | 4257 Male 1.06 655 | 4259 Male 1.47 1.23 .94 656 | 4262 Male 2.52 657 | 4263 Female 1.85 .2 658 | 4264 Male 3.44 659 | 4265 Female 1.55 1.32 1.19 660 | 4266 Female 2.29 2.7 661 | 4267 Female 1.76 662 | 4268 Male 1.9 663 | 4269 Male 2.52 2.55 664 | 4271 Female 2.52 665 | 4274 Female 2.82 .17 666 | 4281 Female 2.02 667 | 4284 Female 1.29 668 | 4285 Male 1.26 669 | 4287 Female .94 670 | 4291 Female 2 1.13 .53 671 | 4292 Female .73 672 | 4294 Male 2.26 .79 673 | 4295 Female 2.23 674 | 4297 Male 2.35 675 | 4298 Female .55 .38 676 | 4301 Female 1.85 677 | 4302 Male .67 678 | 4305 Male 1.85 1 679 | 4306 Male 1.23 .2 680 | 4308 Female 2.35 681 | 4309 Male 1.35 682 | 4310 Female 1.94 683 | 4311 Male 1.55 684 | 4312 Female 1.29 685 | 4313 Male 2.17 686 | 4314 Female 1.91 687 | 4333 Female 2.88 688 | 4334 Female 2.36 689 | 4335 Female 2.36 690 | 4336 Female 2.2 691 | 4349 Female 2.17 .47 .38 692 | 4351 Male .52 693 | 4352 Male .32 694 | 4353 Female 1.52 .55 695 | 4354 Male 2 .94 .08 696 | 4355 Male 1.32 1.02 1.2 697 | 4356 Female 2.05 698 | 4357 Female 1.73 699 | 4358 Male 1.94 700 | 4359 Female 1.81 701 | 4360 Male .9 .64 702 | 4361 Male 1.58 .67 703 | 4363 Female 2.29 1.87 .91 704 | 4366 Female 2.57 705 | 4370 Female 1.58 706 | 4382 Female 2.33 .82 707 | 4383 Female 3.15 708 | 4384 Female 2.29 709 | 4385 Female .82 710 | 4386 Female 1.93 711 | 4387 Female 1.82 712 | 4388 Female 1.96 713 | 4389 Female 1.32 .64 .47 714 | 4391 Female 1.02 715 | 4392 Female 1.14 716 | 4393 Female 2.32 717 | 4394 Male 2.16 718 | 4396 Female 2.42 1.7 719 | 4397 Female 1.14 720 | 4398 Male 1.55 .79 1.76 721 | 4404 Female 1.17 722 | 4405 Female 1 .58 .32 723 | 4410 Female 1.05 .11 724 | 4419 Female 1.38 725 | 4428 Female 1.93 2.42 726 | 4430 Female 2.73 727 | 4435 Female 2.02 728 | 4436 Female 2.81 729 | 4437 Female 2.47 730 | 4439 Male 1.35 731 | 4440 Male 2.08 732 | 4441 Female 2.5 733 | 4442 Female 2.45 734 | 4443 Female 2.17 735 | 4444 Male 1.7 0 736 | 4445 Male .7 .23 .45 737 | 4446 Male 1.51 738 | 4447 Female 1.23 739 | 4448 Female 2.14 .85 740 | 4452 Male 1.14 1.14 741 | 4453 Female .96 742 | 4454 Male 1.52 1.14 1.26 743 | 4455 Male .52 744 | 4456 Male 1.56 745 | 4459 Female 3.29 .26 746 | 4464 Male .45 747 | 4467 Female 2.63 748 | 4468 Female 1.7 749 | 4470 Male 3.11 750 | 4479 Female 1.82 751 | 4481 Female 1.58 .14 752 | 4482 Female 2.73 753 | 4485 Female 1.5 1.14 754 | 4486 Male 1.78 1.02 755 | 4488 Female 2.02 756 | 4507 Male .67 .94 .91 757 | 4509 Female 1.41 .55 758 | 4510 Male .9 759 | 4511 Female 1.23 1.11 .76 760 | 4512 Female 2.7 761 | 4514 Female 1.97 762 | 4515 Male .84 763 | 4516 Male 1.79 764 | 4517 Female 2.84 765 | 4518 Male 2.02 766 | 4519 Male 1.64 .7 767 | 4525 Female 1.08 768 | 4533 Female 2.97 769 | 4552 Female .94 770 | 4553 Female 2.97 1.94 771 | 4554 Female .97 772 | 4555 Male 1.47 773 | 4559 Female 2.61 774 | 4563 Female 1.73 .2 .91 775 | 4564 Female 3.38 3.44 3.41 776 | 4569 Male 3.17 1 1.73 777 | 4570 Female 2.2 .91 778 | 4571 Female 2.14 779 | 4582 Male 1.29 1.58 .81 780 | 4590 Female 3.21 2.85 781 | 4597 Male 2.67 782 | 4598 Male 1.85 .79 783 | 4601 Female 1.35 784 | 4607 Female 2.14 .76 785 | 4611 Male 1.24 .56 786 | 4654 Female 2.02 1.78 2.55 787 | 4655 Female 2.32 788 | 4663 Male 1.08 .23 789 | 4666 Male 1.14 1.35 1.02 790 | 4690 Female 2.14 1.82 791 | 4692 Female 2.88 792 | 4693 Male 1.35 .17 .44 793 | 4694 Male 1 1.7 794 | 4695 Male 2.02 795 | 4696 Female .64 1.32 796 | 4697 Male .29 .14 797 | 4698 Female 1.73 .94 798 | 4699 Female 1.82 1.52 799 | 4704 Female 2.11 800 | 4710 Female 1.23 801 | 4711 Male .64 802 | 4724 Male 2.23 1.41 2.11 803 | 4725 Female 2.44 .32 804 | 4740 Male 1.17 .58 805 | 4744 Female .61 .44 806 | 4749 Female .52 807 | 4756 Female 2.91 .94 808 | 4758 Female 2.61 1.44 809 | 4759 Female 1.47 810 | 4760 Male 1.28 811 | 4765 Female 1.26 812 | -------------------------------------------------------------------------------- /plots/Exam Anxiety.dat: -------------------------------------------------------------------------------- 1 | Code Revise Exam Anxiety Gender 2 | 1 4 40 86.298 Male 3 | 2 11 65 88.716 Female 4 | 3 27 80 70.178 Male 5 | 4 53 80 61.312 Male 6 | 5 4 40 89.522 Male 7 | 6 22 70 60.506 Female 8 | 7 16 20 81.462 Female 9 | 8 21 55 75.82 Female 10 | 9 25 50 69.372 Female 11 | 10 18 40 82.268 Female 12 | 11 18 45 79.044 Male 13 | 12 16 85 80.656 Male 14 | 13 13 70 70.178 Male 15 | 14 18 50 75.014 Female 16 | 15 98 95 34.714 Male 17 | 16 1 70 95.164 Male 18 | 17 14 95 75.82 Male 19 | 18 29 95 79.044 Female 20 | 19 4 50 91.134 Female 21 | 20 23 60 64.536 Male 22 | 21 14 80 80.656 Male 23 | 22 12 75 77.432 Male 24 | 23 22 85 65.342 Female 25 | 24 84 90 .0560000000000116 Female 26 | 25 23 30 71.79 Female 27 | 26 26 60 81.462 Female 28 | 27 24 75 63.73 Male 29 | 28 72 75 27.46 Female 30 | 29 37 27 73.402 Female 31 | 30 10 20 89.522 Male 32 | 31 3 75 89.522 Female 33 | 32 36 90 75.014 Female 34 | 33 43 60 43.58 Male 35 | 34 19 30 82.268 Male 36 | 35 12 80 79.044 Male 37 | 36 9 10 79.044 Female 38 | 37 72 85 37.132 Male 39 | 38 10 7 81.462 Male 40 | 39 12 5 83.074 Female 41 | 40 30 85 50.834 Male 42 | 41 15 20 82.268 Male 43 | 42 8 45 78.238 Female 44 | 43 34 60 72.596 Male 45 | 44 22 70 74.208 Female 46 | 45 21 50 75.82 Female 47 | 46 27 25 70.984 Male 48 | 47 6 50 97.582 Male 49 | 48 18 40 67.76 Male 50 | 49 8 80 75.014 Male 51 | 50 19 50 73.402 Female 52 | 51 0 35 93.552 Female 53 | 52 52 80 58.894 Female 54 | 53 38 50 53.252 Female 55 | 54 19 49 84.686 Male 56 | 55 23 75 89.522 Female 57 | 56 11 25 71.79 Female 58 | 57 27 65 82.268 Male 59 | 58 17 80 69.372 Male 60 | 59 13 50 62.118 Male 61 | 60 42 70 68.566 Female 62 | 61 4 40 93.552 Male 63 | 62 8 80 84.686 Female 64 | 63 6 10 82.268 Male 65 | 64 11 20 81.462 Female 66 | 65 7 40 82.268 Male 67 | 66 15 40 91.134 Male 68 | 67 4 70 91.94 Female 69 | 68 28 52 86.298 Female 70 | 69 22 50 72.596 Male 71 | 70 29 60 63.73 Female 72 | 71 2 80 63.73 Male 73 | 72 16 60 71.79 Female 74 | 73 59 65 57.282 Male 75 | 74 10 15 84.686 Female 76 | 75 13 85 84.686 Male 77 | 76 8 20 77.432 Female 78 | 77 5 80 82.268 Female 79 | 78 2 100 10 Male 80 | 79 38 100 50.834 Female 81 | 80 4 80 87.91 Male 82 | 81 10 10 83.88 Male 83 | 82 6 70 84.686 Female 84 | 83 68 100 20.206 Female 85 | 84 8 70 87.104 Male 86 | 85 1 70 83.88 Female 87 | 86 14 65 67.76 Male 88 | 87 42 75 95.97 Female 89 | 88 13 85 62.118 Female 90 | 89 1 30 84.686 Male 91 | 90 3 5 92.746 Male 92 | 91 5 10 84.686 Female 93 | 92 12 90 83.074 Female 94 | 93 19 70 73.402 Male 95 | 94 2 20 87.91 Female 96 | 95 19 85 71.79 Male 97 | 96 11 35 86.298 Male 98 | 97 15 30 84.686 Female 99 | 98 23 70 75.82 Male 100 | 99 13 55 70.984 Female 101 | 100 14 75 78.238 Female 102 | 101 1 2 82.268 Male 103 | 102 9 40 79.044 Male 104 | 103 20 50 91.134 Female 105 | -------------------------------------------------------------------------------- /plots/FacebookNarcissism.dat: -------------------------------------------------------------------------------- 1 | id NPQC_R_Total Rating_Type Rating 2 | 1 31 Attractive 2 3 | 1 31 Fashionable 2 4 | 1 31 Glamourous 2 5 | 1 31 Cool 2 6 | 2 37 Attractive 2 7 | 2 37 Fashionable 2 8 | 2 37 Glamourous 2 9 | 2 37 Cool 2 10 | 5 44.47 Attractive 3 11 | 5 44.47 Fashionable 3 12 | 5 44.47 Glamourous 3 13 | 5 44.47 Cool 3 14 | 6 30 Attractive 1 15 | 6 30 Fashionable 3 16 | 6 30 Glamourous 3 17 | 6 30 Cool 3 18 | 7 28 Fashionable 3 19 | 7 28 Cool 3 20 | 7 28 Attractive 4 21 | 7 28 Glamourous 4 22 | 13 42 Attractive 5 23 | 13 42 Fashionable 5 24 | 13 42 Glamourous 5 25 | 13 42 Cool 5 26 | 14 23.47 Attractive 3 27 | 14 23.47 Fashionable 3 28 | 14 23.47 Glamourous 3 29 | 14 23.47 Cool 3 30 | 15 28 Fashionable 1 31 | 15 28 Attractive 2 32 | 15 28 Glamourous 2 33 | 15 28 Cool 2 34 | 18 44 Attractive 3 35 | 18 44 Fashionable 3 36 | 18 44 Glamourous 3 37 | 18 44 Cool 3 38 | 21 40 Cool 2 39 | 21 40 Attractive 4 40 | 21 40 Glamourous 4 41 | 21 40 Fashionable 5 42 | 22 32.46 Attractive 3 43 | 22 32.46 Fashionable 3 44 | 22 32.46 Glamourous 3 45 | 22 32.46 Cool 3 46 | 23 24 Attractive 1 47 | 23 24 Fashionable 1 48 | 23 24 Glamourous 1 49 | 23 24 Cool 5 50 | 24 38 Attractive 3 51 | 24 38 Fashionable 3 52 | 24 38 Cool 3 53 | 24 38 Glamourous 4 54 | 25 19 Attractive 3 55 | 25 19 Fashionable 3 56 | 25 19 Glamourous 3 57 | 25 19 Cool 3 58 | 27 22 Attractive 3 59 | 27 22 Fashionable 3 60 | 27 22 Glamourous 3 61 | 27 22 Cool 3 62 | 28 48 Attractive 3 63 | 28 48 Fashionable 3 64 | 28 48 Glamourous 3 65 | 28 48 Cool 3 66 | 31 22 Attractive 1 67 | 31 22 Fashionable 1 68 | 31 22 Glamourous 1 69 | 31 22 Cool 1 70 | 32 26 Fashionable 2 71 | 32 26 Glamourous 2 72 | 32 26 Cool 2 73 | 32 26 Attractive 3 74 | 33 33 Attractive 3 75 | 33 33 Fashionable 3 76 | 33 33 Glamourous 3 77 | 33 33 Cool 3 78 | 34 26 Attractive 3 79 | 34 26 Fashionable 3 80 | 34 26 Glamourous 3 81 | 34 26 Cool 3 82 | 35 22 Fashionable 2 83 | 35 22 Attractive 3 84 | 35 22 Glamourous 3 85 | 35 22 Cool 5 86 | 36 31 Attractive 3 87 | 36 31 Fashionable 3 88 | 36 31 Glamourous 3 89 | 36 31 Cool 3 90 | 37 50 Attractive 4 91 | 37 50 Glamourous 4 92 | 37 50 Cool 4 93 | 37 50 Fashionable 5 94 | 39 37 Fashionable 3 95 | 39 37 Glamourous 3 96 | 39 37 Cool 3 97 | 39 37 Attractive 4 98 | 45 30 Fashionable 2 99 | 45 30 Glamourous 2 100 | 45 30 Cool 2 101 | 45 30 Attractive 3 102 | 46 33 Fashionable 2 103 | 46 33 Glamourous 3 104 | 46 33 Attractive 4 105 | 46 33 Cool 5 106 | 48 38 Attractive 3 107 | 48 38 Fashionable 3 108 | 48 38 Cool 3 109 | 48 38 Glamourous 4 110 | 50 30 Attractive 3 111 | 50 30 Fashionable 4 112 | 50 30 Glamourous 4 113 | 50 30 Cool 4 114 | 51 45 Attractive 2 115 | 51 45 Fashionable 2 116 | 51 45 Glamourous 2 117 | 51 45 Cool 3 118 | 53 23 Attractive 1 119 | 53 23 Fashionable 1 120 | 53 23 Glamourous 1 121 | 53 23 Cool 1 122 | 54 30 Cool 2 123 | 54 30 Attractive 3 124 | 54 30 Fashionable 3 125 | 54 30 Glamourous 3 126 | 55 39 Attractive 3 127 | 55 39 Fashionable 3 128 | 55 39 Glamourous 3 129 | 55 39 Cool 3 130 | 56 21 Attractive 1 131 | 56 21 Glamourous 1 132 | 56 21 Cool 1 133 | 56 21 Fashionable 2 134 | 59 31 Attractive 3 135 | 59 31 Fashionable 3 136 | 59 31 Glamourous 3 137 | 59 31 Cool 4 138 | 60 41 Cool 2 139 | 60 41 Attractive 3 140 | 60 41 Fashionable 3 141 | 60 41 Glamourous 3 142 | 62 38 Fashionable 1 143 | 62 38 Glamourous 1 144 | 62 38 Cool 2 145 | 62 38 Attractive 3 146 | 66 35 Fashionable 2 147 | 66 35 Glamourous 2 148 | 66 35 Attractive 3 149 | 66 35 Cool 3 150 | 67 26 Attractive 2 151 | 67 26 Fashionable 2 152 | 67 26 Glamourous 2 153 | 67 26 Cool 2 154 | 68 35 Attractive 3 155 | 68 35 Glamourous 3 156 | 68 35 Fashionable 5 157 | 68 35 Cool 5 158 | 69 48 Fashionable 2 159 | 69 48 Glamourous 3 160 | 69 48 Attractive 4 161 | 69 48 Cool 4 162 | 70 23 Glamourous 1 163 | 70 23 Attractive 2 164 | 70 23 Fashionable 2 165 | 70 23 Cool 3 166 | 71 41 Fashionable 3 167 | 71 41 Glamourous 4 168 | 71 41 Cool 4 169 | 71 41 Attractive 5 170 | 72 36 Attractive 3 171 | 72 36 Fashionable 3 172 | 72 36 Glamourous 3 173 | 72 36 Cool 3 174 | 73 29 Attractive 3 175 | 73 29 Fashionable 3 176 | 73 29 Glamourous 3 177 | 73 29 Cool 3 178 | 74 34 Attractive 2 179 | 74 34 Glamourous 2 180 | 74 34 Fashionable 3 181 | 74 34 Cool 3 182 | 75 50 Attractive 3 183 | 75 50 Glamourous 3 184 | 75 50 Fashionable 4 185 | 75 50 Cool 4 186 | 76 36 Attractive 3 187 | 76 36 Fashionable 3 188 | 76 36 Glamourous 3 189 | 76 36 Cool 4 190 | 77 27 Attractive 3 191 | 77 27 Fashionable 3 192 | 77 27 Glamourous 3 193 | 77 27 Cool 3 194 | 78 31 Attractive 3 195 | 78 31 Fashionable 3 196 | 78 31 Glamourous 3 197 | 78 31 Cool 3 198 | 79 35 Fashionable 3 199 | 79 35 Cool 3 200 | 79 35 Attractive 4 201 | 79 35 Glamourous 4 202 | 80 25 Fashionable 2 203 | 80 25 Glamourous 2 204 | 80 25 Cool 2 205 | 80 25 Attractive 3 206 | 81 23 Attractive 3 207 | 81 23 Fashionable 3 208 | 81 23 Glamourous 3 209 | 81 23 Cool 3 210 | 82 32 Attractive 1 211 | 82 32 Fashionable 1 212 | 82 32 Glamourous 1 213 | 82 32 Cool 3 214 | 83 32 Attractive 3 215 | 83 32 Fashionable 3 216 | 83 32 Glamourous 3 217 | 83 32 Cool 4 218 | 84 32 Attractive 3 219 | 84 32 Fashionable 3 220 | 84 32 Glamourous 3 221 | 84 32 Cool 3 222 | 85 25 Glamourous 2 223 | 85 25 Attractive 3 224 | 85 25 Fashionable 3 225 | 85 25 Cool 3 226 | 86 36 Cool 2 227 | 86 36 Attractive 3 228 | 86 36 Fashionable 3 229 | 86 36 Glamourous 3 230 | 87 37 Attractive 3 231 | 87 37 Fashionable 3 232 | 87 37 Glamourous 3 233 | 87 37 Cool 3 234 | 88 33 Attractive 3 235 | 88 33 Fashionable 3 236 | 88 33 Glamourous 3 237 | 88 33 Cool 3 238 | 89 19 Attractive 2 239 | 89 19 Fashionable 2 240 | 89 19 Glamourous 2 241 | 89 19 Cool 2 242 | 90 25 Fashionable 1 243 | 90 25 Glamourous 1 244 | 90 25 Cool 1 245 | 90 25 Attractive 2 246 | 91 36 Fashionable 1 247 | 91 36 Cool 1 248 | 91 36 Attractive 2 249 | 91 36 Glamourous 2 250 | 92 33 Attractive 3 251 | 92 33 Fashionable 3 252 | 92 33 Glamourous 3 253 | 92 33 Cool 3 254 | 93 29 Attractive 3 255 | 93 29 Fashionable 3 256 | 93 29 Glamourous 3 257 | 93 29 Cool 3 258 | 94 31 Attractive 1 259 | 94 31 Fashionable 1 260 | 94 31 Glamourous 1 261 | 94 31 Cool 1 262 | 98 34 Attractive 3 263 | 98 34 Fashionable 3 264 | 98 34 Glamourous 3 265 | 98 34 Cool 3 266 | 99 34 Glamourous 4 267 | 99 34 Cool 4 268 | 99 34 Attractive 5 269 | 99 34 Fashionable 5 270 | 100 26 Attractive 1 271 | 100 26 Fashionable 1 272 | 100 26 Glamourous 1 273 | 100 26 Cool 1 274 | 101 20 Attractive 1 275 | 101 20 Glamourous 1 276 | 101 20 Fashionable 2 277 | 101 20 Cool 3 278 | 102 36.41 Attractive 2 279 | 102 36.41 Fashionable 3 280 | 102 36.41 Glamourous 3 281 | 102 36.41 Cool 4 282 | 103 28.46 Attractive 5 283 | 103 28.46 Fashionable 5 284 | 103 28.46 Glamourous 5 285 | 103 28.46 Cool 5 286 | 104 45 Fashionable 3 287 | 104 45 Glamourous 3 288 | 104 45 Attractive 4 289 | 104 45 Cool 5 290 | 105 28 Attractive 2 291 | 105 28 Glamourous 2 292 | 105 28 Fashionable 3 293 | 105 28 Cool 3 294 | 107 29 Fashionable 2 295 | 107 29 Cool 2 296 | 107 29 Attractive 3 297 | 107 29 Glamourous 3 298 | 109 36 Attractive 3 299 | 109 36 Fashionable 3 300 | 109 36 Glamourous 3 301 | 109 36 Cool 3 302 | 110 42 Fashionable 1 303 | 110 42 Glamourous 2 304 | 110 42 Attractive 3 305 | 110 42 Cool 3 306 | 111 27.44 Cool 1 307 | 111 27.44 Attractive 2 308 | 111 27.44 Fashionable 2 309 | 111 27.44 Glamourous 2 310 | 112 23 Glamourous 1 311 | 112 23 Attractive 2 312 | 112 23 Fashionable 2 313 | 112 23 Cool 2 314 | 114 40.52 Fashionable 1 315 | 114 40.52 Glamourous 1 316 | 114 40.52 Attractive 2 317 | 114 40.52 Cool 2 318 | 116 41 Attractive 3 319 | 116 41 Glamourous 3 320 | 116 41 Cool 3 321 | 116 41 Fashionable 4 322 | 117 17 Attractive 1 323 | 117 17 Fashionable 1 324 | 117 17 Glamourous 1 325 | 117 17 Cool 1 326 | 119 39 Glamourous 2 327 | 119 39 Attractive 3 328 | 119 39 Fashionable 3 329 | 119 39 Cool 3 330 | 120 24 Attractive 3 331 | 120 24 Fashionable 3 332 | 120 24 Glamourous 3 333 | 120 24 Cool 3 334 | 121 50 Attractive 3 335 | 121 50 Glamourous 3 336 | 121 50 Fashionable 4 337 | 121 50 Cool 5 338 | 123 35 Attractive 3 339 | 123 35 Fashionable 3 340 | 123 35 Glamourous 3 341 | 123 35 Cool 3 342 | 124 21 Attractive 1 343 | 124 21 Glamourous 1 344 | 124 21 Cool 1 345 | 124 21 Fashionable 2 346 | 125 28 Attractive 2 347 | 125 28 Glamourous 2 348 | 125 28 Cool 2 349 | 125 28 Fashionable 3 350 | 126 22 Attractive 3 351 | 126 22 Fashionable 3 352 | 126 22 Glamourous 3 353 | 126 22 Cool 3 354 | 127 35 Attractive 3 355 | 127 35 Fashionable 3 356 | 127 35 Glamourous 3 357 | 127 35 Cool 3 358 | 129 29 Attractive 4 359 | 129 29 Fashionable 4 360 | 129 29 Glamourous 4 361 | 129 29 Cool 4 362 | 131 42 Attractive 4 363 | 131 42 Fashionable 4 364 | 131 42 Glamourous 4 365 | 131 42 Cool 4 366 | 132 27 Cool 2 367 | 132 27 Attractive 3 368 | 132 27 Fashionable 3 369 | 132 27 Glamourous 3 370 | 133 27 Attractive 3 371 | 133 27 Fashionable 3 372 | 133 27 Glamourous 3 373 | 133 27 Cool 3 374 | 134 36 Cool 3 375 | 134 36 Attractive 4 376 | 134 36 Fashionable 5 377 | 134 36 Glamourous 5 378 | 136 40 Glamourous 2 379 | 136 40 Attractive 3 380 | 136 40 Fashionable 3 381 | 136 40 Cool 3 382 | 137 25 Cool 1 383 | 137 25 Fashionable 2 384 | 137 25 Glamourous 2 385 | 137 25 Attractive 3 386 | 138 42 Attractive 3 387 | 138 42 Fashionable 3 388 | 138 42 Glamourous 3 389 | 138 42 Cool 3 390 | 140 25 Attractive 1 391 | 140 25 Fashionable 1 392 | 140 25 Glamourous 3 393 | 140 25 Cool 5 394 | 141 33 Attractive 3 395 | 141 33 Fashionable 3 396 | 141 33 Glamourous 3 397 | 141 33 Cool 3 398 | 142 36 Attractive 5 399 | 142 36 Fashionable 5 400 | 142 36 Glamourous 5 401 | 142 36 Cool 5 402 | 143 45 Attractive 5 403 | 143 45 Fashionable 5 404 | 143 45 Glamourous 5 405 | 143 45 Cool 5 406 | 147 31 Attractive 3 407 | 147 31 Fashionable 3 408 | 147 31 Glamourous 3 409 | 147 31 Cool 3 410 | 148 23 Fashionable 1 411 | 148 23 Glamourous 1 412 | 148 23 Attractive 2 413 | 148 23 Cool 2 414 | 150 28 Fashionable 2 415 | 150 28 Glamourous 2 416 | 150 28 Cool 2 417 | 150 28 Attractive 3 418 | 152 26 Attractive 3 419 | 152 26 Fashionable 3 420 | 152 26 Glamourous 3 421 | 152 26 Cool 3 422 | 153 37.06 Fashionable 3 423 | 153 37.06 Cool 3 424 | 153 37.06 Attractive 4 425 | 153 37.06 Glamourous 4 426 | 154 38 Attractive 3 427 | 154 38 Fashionable 3 428 | 154 38 Glamourous 3 429 | 154 38 Cool 3 430 | 157 23 Attractive 2 431 | 157 23 Fashionable 2 432 | 157 23 Glamourous 2 433 | 157 23 Cool 2 434 | 158 40 Fashionable 4 435 | 158 40 Glamourous 4 436 | 158 40 Attractive 5 437 | 158 40 Cool 5 438 | 159 41 Fashionable 2 439 | 159 41 Cool 2 440 | 159 41 Glamourous 3 441 | 159 41 Attractive 4 442 | 161 28 Attractive 1 443 | 161 28 Fashionable 1 444 | 161 28 Glamourous 3 445 | 161 28 Cool 5 446 | 162 47 Attractive 5 447 | 162 47 Fashionable 5 448 | 162 47 Glamourous 5 449 | 162 47 Cool 5 450 | 163 46 Attractive 3 451 | 163 46 Fashionable 3 452 | 163 46 Glamourous 3 453 | 163 46 Cool 3 454 | 165 30 Attractive 3 455 | 165 30 Fashionable 3 456 | 165 30 Glamourous 3 457 | 165 30 Cool 3 458 | 167 41 Attractive 3 459 | 167 41 Fashionable 3 460 | 167 41 Glamourous 3 461 | 167 41 Cool 4 462 | 168 34 Glamourous 2 463 | 168 34 Fashionable 3 464 | 168 34 Attractive 4 465 | 168 34 Cool 5 466 | 169 26 Attractive 3 467 | 169 26 Fashionable 3 468 | 169 26 Glamourous 3 469 | 169 26 Cool 3 470 | 170 35 Cool 4 471 | 170 35 Attractive 5 472 | 170 35 Fashionable 5 473 | 170 35 Glamourous 5 474 | 173 36 Fashionable 1 475 | 173 36 Glamourous 1 476 | 173 36 Attractive 2 477 | 173 36 Cool 4 478 | 174 30 Attractive 3 479 | 174 30 Fashionable 3 480 | 174 30 Glamourous 3 481 | 174 30 Cool 3 482 | 175 31 Attractive 3 483 | 175 31 Fashionable 3 484 | 175 31 Glamourous 3 485 | 175 31 Cool 3 486 | 178 38 Attractive 3 487 | 178 38 Fashionable 4 488 | 178 38 Glamourous 4 489 | 178 38 Cool 5 490 | 179 44 Attractive 3 491 | 179 44 Fashionable 3 492 | 179 44 Glamourous 3 493 | 179 44 Cool 3 494 | 180 24.52 Attractive 3 495 | 180 24.52 Fashionable 3 496 | 180 24.52 Glamourous 3 497 | 180 24.52 Cool 3 498 | 183 36 Attractive 3 499 | 183 36 Fashionable 3 500 | 183 36 Glamourous 3 501 | 183 36 Cool 3 502 | 184 36 Attractive 1 503 | 184 36 Fashionable 1 504 | 184 36 Glamourous 1 505 | 184 36 Cool 1 506 | 186 37 Attractive 2 507 | 186 37 Fashionable 2 508 | 186 37 Glamourous 2 509 | 186 37 Cool 2 510 | 187 52 Attractive 5 511 | 187 52 Fashionable 5 512 | 187 52 Glamourous 5 513 | 187 52 Cool 5 514 | 188 33 Attractive 3 515 | 188 33 Fashionable 3 516 | 188 33 Glamourous 3 517 | 188 33 Cool 3 518 | 189 31 Attractive 3 519 | 189 31 Glamourous 3 520 | 189 31 Fashionable 4 521 | 189 31 Cool 4 522 | 190 39 Attractive 3 523 | 190 39 Fashionable 3 524 | 190 39 Glamourous 3 525 | 190 39 Cool 3 526 | 191 27 Attractive 1 527 | 191 27 Glamourous 2 528 | 191 27 Fashionable 3 529 | 191 27 Cool 3 530 | 192 35 Attractive 2 531 | 192 35 Cool 2 532 | 192 35 Fashionable 3 533 | 192 35 Glamourous 3 534 | 193 33 Attractive 3 535 | 193 33 Fashionable 3 536 | 193 33 Glamourous 3 537 | 193 33 Cool 3 538 | 194 29 Attractive 2 539 | 194 29 Fashionable 2 540 | 194 29 Glamourous 2 541 | 194 29 Cool 2 542 | 195 38 Attractive 3 543 | 195 38 Fashionable 3 544 | 195 38 Glamourous 3 545 | 195 38 Cool 3 546 | 196 34 Fashionable 2 547 | 196 34 Glamourous 2 548 | 196 34 Attractive 3 549 | 196 34 Cool 3 550 | 198 41 Attractive 4 551 | 198 41 Fashionable 4 552 | 198 41 Glamourous 4 553 | 198 41 Cool 4 554 | 199 38 Attractive 1 555 | 199 38 Fashionable 1 556 | 199 38 Glamourous 1 557 | 199 38 Cool 1 558 | 200 32 Fashionable 3 559 | 200 32 Glamourous 3 560 | 200 32 Cool 3 561 | 200 32 Attractive 5 562 | 201 21 Attractive 3 563 | 201 21 Fashionable 3 564 | 201 21 Glamourous 3 565 | 201 21 Cool 3 566 | 202 37 Attractive 1 567 | 202 37 Fashionable 1 568 | 202 37 Glamourous 1 569 | 202 37 Cool 1 570 | 203 36 Attractive 3 571 | 203 36 Fashionable 3 572 | 203 36 Glamourous 3 573 | 203 36 Cool 3 574 | 205 45 Attractive 3 575 | 205 45 Fashionable 3 576 | 205 45 Glamourous 3 577 | 205 45 Cool 3 578 | 207 19 Attractive 3 579 | 207 19 Fashionable 3 580 | 207 19 Glamourous 3 581 | 207 19 Cool 3 582 | 208 31 Attractive 4 583 | 208 31 Fashionable 4 584 | 208 31 Glamourous 4 585 | 208 31 Cool 4 586 | 209 35 Attractive 2 587 | 209 35 Fashionable 2 588 | 209 35 Glamourous 2 589 | 209 35 Cool 5 590 | 210 23 Attractive 2 591 | 210 23 Fashionable 2 592 | 210 23 Glamourous 2 593 | 210 23 Cool 3 594 | 211 31.06 Fashionable 2 595 | 211 31.06 Glamourous 2 596 | 211 31.06 Cool 2 597 | 211 31.06 Attractive 3 598 | 214 38 Glamourous 4 599 | 214 38 Attractive 5 600 | 214 38 Fashionable 5 601 | 214 38 Cool 5 602 | 215 30 Attractive 2 603 | 215 30 Fashionable 2 604 | 215 30 Glamourous 2 605 | 215 30 Cool 5 606 | 217 39 Attractive 3 607 | 217 39 Fashionable 3 608 | 217 39 Glamourous 3 609 | 217 39 Cool 3 610 | 218 34 Attractive 1 611 | 218 34 Fashionable 1 612 | 218 34 Glamourous 1 613 | 218 34 Cool 1 614 | 220 38 Attractive 3 615 | 220 38 Fashionable 3 616 | 220 38 Glamourous 3 617 | 220 38 Cool 3 618 | 221 40 Attractive 3 619 | 221 40 Glamourous 3 620 | 221 40 Cool 3 621 | 221 40 Fashionable 4 622 | 222 27 Attractive 2 623 | 222 27 Fashionable 2 624 | 222 27 Glamourous 2 625 | 222 27 Cool 3 626 | 224 33 Fashionable 2 627 | 224 33 Cool 2 628 | 224 33 Attractive 3 629 | 224 33 Glamourous 3 630 | 226 39.47 Attractive 3 631 | 226 39.47 Fashionable 3 632 | 226 39.47 Glamourous 3 633 | 226 39.47 Cool 3 634 | 227 31 Attractive 2 635 | 227 31 Fashionable 2 636 | 227 31 Glamourous 2 637 | 227 31 Cool 2 638 | 228 24 Attractive 3 639 | 228 24 Fashionable 3 640 | 228 24 Glamourous 3 641 | 228 24 Cool 3 642 | 229 49 Fashionable 3 643 | 229 49 Glamourous 3 644 | 229 49 Cool 4 645 | 229 49 Attractive 5 646 | 230 30 Cool 2 647 | 230 30 Glamourous 3 648 | 230 30 Attractive 5 649 | 230 30 Fashionable 5 650 | 231 34 Attractive 4 651 | 231 34 Fashionable 4 652 | 231 34 Glamourous 4 653 | 231 34 Cool 4 654 | 232 32 Attractive 1 655 | 232 32 Fashionable 1 656 | 232 32 Glamourous 1 657 | 232 32 Cool 1 658 | 234 44 Attractive 3 659 | 234 44 Fashionable 3 660 | 234 44 Glamourous 3 661 | 234 44 Cool 3 662 | 235 35 Attractive 2 663 | 235 35 Fashionable 2 664 | 235 35 Glamourous 2 665 | 235 35 Cool 4 666 | 236 24 Attractive 3 667 | 236 24 Fashionable 3 668 | 236 24 Glamourous 3 669 | 236 24 Cool 3 670 | 237 32 Fashionable 2 671 | 237 32 Glamourous 2 672 | 237 32 Cool 2 673 | 237 32 Attractive 3 674 | 238 45 Attractive 5 675 | 238 45 Fashionable 5 676 | 238 45 Glamourous 5 677 | 238 45 Cool 5 678 | 240 14 Attractive 2 679 | 240 14 Fashionable 2 680 | 240 14 Glamourous 2 681 | 240 14 Cool 2 682 | 242 36 Fashionable 2 683 | 242 36 Glamourous 2 684 | 242 36 Attractive 3 685 | 242 36 Cool 4 686 | 243 41 Attractive 3 687 | 243 41 Fashionable 3 688 | 243 41 Glamourous 3 689 | 243 41 Cool 3 690 | 245 31 Attractive 3 691 | 245 31 Glamourous 3 692 | 245 31 Fashionable 4 693 | 245 31 Cool 4 694 | 247 26 Attractive 2 695 | 247 26 Fashionable 2 696 | 247 26 Glamourous 2 697 | 247 26 Cool 2 698 | 248 47 Attractive 3 699 | 248 47 Fashionable 3 700 | 248 47 Glamourous 3 701 | 248 47 Cool 3 702 | 250 29 Attractive 3 703 | 250 29 Fashionable 3 704 | 250 29 Glamourous 3 705 | 250 29 Cool 3 706 | 251 22 Attractive 3 707 | 251 22 Fashionable 3 708 | 251 22 Glamourous 3 709 | 251 22 Cool 3 710 | 255 25 Attractive 2 711 | 255 25 Fashionable 2 712 | 255 25 Glamourous 2 713 | 255 25 Cool 2 714 | 256 23 Attractive 2 715 | 256 23 Fashionable 2 716 | 256 23 Glamourous 2 717 | 256 23 Cool 2 718 | 257 22 Glamourous 2 719 | 257 22 Cool 2 720 | 257 22 Attractive 3 721 | 257 22 Fashionable 3 722 | 258 51 Glamourous 3 723 | 258 51 Attractive 4 724 | 258 51 Fashionable 5 725 | 258 51 Cool 5 726 | 260 30 Cool 3 727 | 260 30 Fashionable 4 728 | 260 30 Glamourous 4 729 | 260 30 Attractive 5 730 | 264 26 Attractive 2 731 | 264 26 Fashionable 2 732 | 264 26 Glamourous 3 733 | 264 26 Cool 3 734 | 265 34 Attractive 2 735 | 265 34 Fashionable 2 736 | 265 34 Glamourous 2 737 | 265 34 Cool 2 738 | 266 41.39 Attractive 4 739 | 266 41.39 Fashionable 4 740 | 266 41.39 Glamourous 4 741 | 266 41.39 Cool 5 742 | 267 42 Attractive 3 743 | 267 42 Fashionable 3 744 | 267 42 Glamourous 3 745 | 267 42 Cool 4 746 | 268 33 Fashionable 2 747 | 268 33 Glamourous 3 748 | 268 33 Cool 3 749 | 268 33 Attractive 4 750 | 269 36 Attractive 3 751 | 269 36 Fashionable 3 752 | 269 36 Glamourous 3 753 | 269 36 Cool 3 754 | 270 37 Attractive 5 755 | 270 37 Fashionable 5 756 | 270 37 Glamourous 5 757 | 270 37 Cool 5 758 | 271 30 Attractive 3 759 | 271 30 Glamourous 3 760 | 271 30 Fashionable 4 761 | 271 30 Cool 4 762 | 272 36 Attractive 3 763 | 272 36 Fashionable 3 764 | 272 36 Glamourous 3 765 | 272 36 Cool 3 766 | 273 36 Attractive 3 767 | 273 36 Fashionable 3 768 | 273 36 Glamourous 3 769 | 273 36 Cool 3 770 | 274 38 Attractive 3 771 | 274 38 Fashionable 4 772 | 274 38 Glamourous 4 773 | 274 38 Cool 5 774 | 275 42 Attractive 4 775 | 275 42 Fashionable 4 776 | 275 42 Glamourous 4 777 | 275 42 Cool 4 778 | -------------------------------------------------------------------------------- /plots/Fertility.csv: -------------------------------------------------------------------------------- 1 | Country,fertility_rate,contraception,region Afghanistan,6.9,NA,Asia Albania,2.6,NA,Europe Algeria,3.81,52,Africa American-Samoa,NA,NA,Oceania Andorra,NA,NA,Europe Angola,6.69,NA,Africa Antigua,NA,53,Americas Argentina,2.62,NA,Americas Armenia,1.7,22,Europe Australia,1.89,76,Oceania Austria,1.42,71,Europe Azerbaijan,2.3,17,Asia Bahamas,1.95,62,Americas Bahrain,2.97,53,Asia Bangladesh,3.14,49,Asia Barbados,1.73,55,Americas Belarus,1.4,50,Europe Belgium,1.62,79,Europe Belize,3.66,47,Americas Benin,5.83,16,Africa Bhutan,5.89,19,Asia Bolivia,4.36,45,Americas Bosnia,1.4,NA,Europe Botswana,4.45,33,Africa Brazil,2.17,74,Americas Brunei,2.7,NA,Asia Bulgaria,1.45,NA,Europe Burkina-Faso,6.57,8,Africa Burundi,6.28,9,Africa Cambodia,4.5,NA,Asia Cameroon,5.3,16,Africa Canada,1.61,66,Americas Cape-Verde,3.56,NA,Africa Central-African-Rep,4.95,24,Africa Chad,5.51,NA,Africa Chile,2.44,30,Americas China,1.8,83,Asia Colombia,2.69,72,Americas Comoros,5.51,21,Africa Congo,5.87,NA,Africa Cook-Islands,3.5,38,Oceania Costa-Rica,2.95,75,Americas Croatia,1.6,NA,Europe Cuba,1.55,70,Americas Cyprus,2.31,NA,Europe Czech-Republic,1.4,69,Europe Dem-Rep-of-the-Congo,6.24,8,Africa Denmark,1.82,78,Europe Djibouti,5.39,NA,Africa Dominica,NA,50,Americas Dominican-Republic,2.8,64,Americas East-Timor,4.32,NA,Oceania Ecuador,3.1,57,Americas Egypt,3.4,47,Africa El-Salvador,3.09,53,Americas Equatorial-Guinea,5.51,NA,Africa Eritrea,5.34,5,Africa Estonia,1.3,70,Europe Ethiopia,7,4,Africa Fiji,2.76,40,Oceania Finland,1.83,NA,Europe France,1.63,77,Europe French-Guiana,NA,NA,Americas French-Polynesia,2.85,NA,Oceania Gabon,5.4,NA,Africa Gambia,5.2,12,Africa Gaza-Strip,8,NA,Asia Georgia,1.9,17,Europe Germany,1.3,75,Europe Ghana,5.28,20,Africa Greece,1.38,NA,Europe Grenada,NA,54,Americas Guadeloupe,2.1,NA,Americas Guam,3.04,NA,Oceania Guatemala,4.9,31,Americas Guinea,6.61,2,Africa Guinea-Bissau,5.42,NA,Africa Guyana,2.32,NA,Americas Haiti,4.6,18,Americas Honduras,4.3,47,Americas Hong-Kong,1.32,86,Asia Hungary,1.4,73,Europe Iceland,2.19,NA,Europe India,3.07,41,Asia Indonesia,2.63,55,Oceania Iran,4.77,65,Asia Iraq,5.25,14,Asia Ireland,1.8,NA,Europe Israel,2.75,NA,Asia Italy,1.19,NA,Europe Ivory-Coast,5.1,11,Africa Jamaica,2.44,62,Americas Japan,1.48,59,Oceania Jordan,5.13,35,Asia Kazakhstan,2.3,59,Asia Kenya,4.85,33,Africa Kiribati,3.8,37,Oceania Korea-Dem-Peoples-Rep,2.1,62,Asia Korea-Republic-of,1.65,79,Asia Kuwait,2.77,35,Asia Kyrgyzstan,3.21,31,Asia Laos,6.69,19,Asia Latvia,1.4,47,Europe Lebanon,2.75,NA,Asia Lesotho,4.86,23,Africa Liberia,6.33,6,Africa Libya,5.92,40,Africa Liechtenstein,1.45,NA,Europe Lithuania,1.5,59,Europe Luxembourg,1.76,NA,Europe Macau,1.6,NA,Asia Macedonia,1.9,NA,Europe Madagascar,5.65,17,Africa Malawi,6.69,22,Africa Malaysia,3.24,48,Oceania Maldives,6.8,NA,Asia Mali,6.6,7,Africa Malta,2.1,NA,Europe Marshall-Islands,4.49,27,Oceania Martinique,2,NA,Americas Mauritania,5.03,3,Africa Mauritius,2.28,75,Africa Mexico,2.75,53,Americas Micronesia,5.6,NA,Oceania Moldova,1.8,22,Europe Monaco,NA,NA,Europe Mongolia,3.27,61,Asia Morocco,3.1,50,Africa Mozambique,6.06,NA,Africa Myanmar,3.3,17,Asia Namibia,4.9,29,Africa Nepal,4.95,29,Asia Netherlands,1.55,78,Europe Netherlands-Antilles,2.1,NA,Americas New-Caledonia,2.53,25,Oceania New-Zealand,2.02,69,Oceania Nicaragua,3.85,49,Americas Niger,7.1,4,Africa Nigeria,5.97,6,Africa Northern-Mariana-Islands,5.11,NA,Oceania Norway,1.88,76,Europe Oman,7.2,9,Asia Pakistan,5.02,18,Asia Palau,3,NA,Oceania Panama,2.63,64,Americas Papua-New-Guinea,4.65,NA,Oceania Paraguay,4.17,56,Americas Peru,2.98,64,Americas Philippines,3.62,40,Oceania Poland,1.65,NA,Europe Portugal,1.48,66,Europe Puerto-Rico,2.1,64,Americas Qatar,3.77,32,Asia Reunion,2.1,67,Africa Romania,1.4,57,Europe Russia,1.35,32,Europe Rwanda,6,21,Africa Saint-Kitts,2.63,41,Americas Saint-Lucia,3.82,47,Americas Samoa,3.8,34,Oceania San-Marino,NA,NA,Europe Sao-Tome,NA,NA,Africa Saudi-Arabia,5.9,NA,Asia Senegal,5.62,13,Africa Seychelles,2.59,NA,Africa Sierra-Leone,6.06,NA,Africa Singapore,1.79,74,Oceania Slovakia,1.5,74,Europe Slovenia,1.3,NA,Europe Solomon-Islands,4.98,NA,Oceania Somalia,7,NA,Africa South-Africa,3.81,50,Africa Spain,1.22,59,Europe Sri-Lanka,2.1,66,Asia St-Vincent,3.86,58,Americas Sudan,4.61,8,Africa Suriname,2.39,NA,Americas Swaziland,4.46,20,Africa Sweden,1.8,78,Europe Switzerland,1.46,71,Europe Syria,4,36,Asia Tajikistan,3.93,21,Asia Tanzania,5.48,18,Africa Thailand,1.74,74,Asia Togo,6.08,12,Africa Tonga,4.02,74,Oceania Trinidad-and-Tobago,2.1,53,Americas Tunisia,2.92,60,Africa Turkey,2.5,63,Asia Turkmenistan,3.58,20,Asia Tuvalu,NA,NA,Oceania Uganda,7.1,15,Africa Ukraine,1.38,23,Europe United-Arab-Emirates,3.46,NA,Asia United-Kingdom,1.72,82,Europe United-States,1.96,71,Americas Uruguay,2.25,NA,Americas Uzbekistan,3.48,56,Asia Vanuatu,4.36,15,Oceania Venezuela,2.98,52,Americas Viet-Nam,2.97,65,Asia Virgin-Islands,3.03,NA,Americas Western-Sahara,3.98,NA,Africa Yemen,7.6,7,Asia Yugoslavia,1.8,NA,Europe Zambia,5.49,25,Africa Zimbabwe,4.68,48,Africa -------------------------------------------------------------------------------- /plots/InternetUsers.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/plots/InternetUsers.RData -------------------------------------------------------------------------------- /plots/ggplot_scripts.R: -------------------------------------------------------------------------------- 1 | # Set the working directory 2 | setwd("/Users/rcordell/Documents/R/W203") 3 | 4 | # Load the ggplot library 5 | library(ggplot2) 6 | 7 | # Load the Facebook data into a dataframe 8 | facebookData = read.delim("FacebookNarcissism.dat", header = TRUE) 9 | 10 | # experiment with different graph elements 11 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating)) 12 | # simple scatterplot 13 | graph + geom_point() 14 | # scsatterplot with inverted triangle shapes instead of circles 15 | graph + geom_point(shape = 17) 16 | # scatterplot with squares with x's in them 17 | graph + geom_point(shape = 7) 18 | # scatterplot with big squares with x's in them 19 | graph + geom_point(shape = 7, size = 6) 20 | # scatterplot with large right-side up triangles 21 | graph + geom_point(shape = 2, size = 6) 22 | # scatterplot with large + signs 23 | graph + geom_point(shape = 3, size = 6) 24 | # scatterplot with large X's 25 | graph + geom_point(shape = 4, size = 6) 26 | # scatterplot with large diamonds 27 | graph + geom_point(shape = 5, size = 6) 28 | # scatterplot with default shape and size, but vary color by variable "rating_type" 29 | # Rating_Type is a 4-level factor. A legend for the colors is shown on the right 30 | graph + geom_point(aes(color = Rating_Type)) 31 | # scatterplot with red dots 32 | graph + geom_point(color = "RED") 33 | # scatterplot that uses jitter to spread the points out so they're not on top of one another 34 | # and vary color by Rating_Type 35 | graph + geom_point(aes(color = Rating_Type), position = "jitter") 36 | # vary shape by "Rating_Type". A legend for shapes is shown on the right 37 | graph + geom_point(aes(shape = Rating_Type), position = "jitter") 38 | # 39 | # 40 | # Let's play with a different data set, Exam Anxiety 41 | examData <- read.delim("Exam Anxiety.dat", header = TRUE) 42 | summary(examData) 43 | # scatter plot of Exam data 44 | scatter <- ggplot(examData, aes(Anxiety, Exam)) 45 | scatter + geom_point() 46 | # custom labels for the scatter plot 47 | scatter + geom_point() + labs(x = "Exam Anxiety", y = "Exam Performance %") 48 | # add a curve that follows the mean of the data 49 | scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %") 50 | # add a line that follows a linear model of the data 51 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 52 | # change the color of the line to RED 53 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED") + labs(x = "Exam Anxiety", y = "Exam Performance %") 54 | # remove the envelope from the line 55 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %") 56 | # change the color of the envelope and line to blue and make it transparent 57 | scatter + geom_point() + geom_smooth(method = "lm", alpha = 0.1, fill = "Blue") + labs(x = "Exam Anxiety", y = "Exam Performance %") 58 | # color the scatterplot points by gender - but this isn't working as expected 59 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 60 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 61 | # scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %") 62 | # scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender) 63 | 64 | scatter + geom_point() 65 | scatter + geom_point() + geom_smooth(method = "lm") 66 | scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1) 67 | # create a linear line for each Gender and color the envelope accordingly 68 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) 69 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 70 | scatter <- ggplot(examData, aes(Anxiety, Exam, color = Gender)) 71 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 72 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %") 73 | 74 | # Now let's graph some of the Festival Hygiene data 75 | festivalData <- read.delim("DownloadFestival.dat", header = TRUE) 76 | 77 | graph + geom_point() 78 | graph + geom_point() + geom_smooth(method = "lm") 79 | graph + geom_point() + geom_smooth(method = "lm", se = F) 80 | graph + geom_point() + geom_smooth(method = "lm", aes(color = Rating_Type)) 81 | graph + geom_point() + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) 82 | graph + geom_point(aes(color = Rating_Type)) + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) 83 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) 84 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating") 85 | graph + geom_point(aes(color = Rating_Type)) + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating") 86 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating") 87 | graph + geom_smooth(aes(color = Rating_Type)) + labs(x = "Total Score", y = "Narcissism Rating") 88 | 89 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating") 90 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Narcissism (NPQC)", y = "Facebook Picture Rating") 91 | 92 | # Create a histogram of festival data 93 | festivalHistogram <- ggplot(festivalData, aes(day1)) 94 | festivalHistogram + geom_histogram() 95 | 96 | # Adjust bin width 97 | festivalHistogram + geom_histogram(binwidth = 0.4) 98 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency") 99 | 100 | festivalHistogram <- ggplot(festivalData, aes(day1), theme(legend.position = "none")) 101 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency") 102 | festivalHistogram <- ggplot(festivalData, aes(day1)) + theme(legend.position = "none") 103 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency") 104 | festivalHistogram <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none") 105 | festivalBoxplot <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none") 106 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)") 107 | 108 | # Order the data to look for outliers 109 | festivalData <- festivalData[order(festivalData$day1),] 110 | festivalData 111 | 112 | # User RCommander to edit the data to correct the error 113 | library(Rcmdr) 114 | 115 | festivalBoxplot <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none") 116 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)") 117 | festivalHistogram <- ggplot(festivalData, aes(day1)) + theme(legend.position = "none") 118 | festivalHistogram + geom_histogram() 119 | 120 | festivalHistogram + geom_histogram(binwidth = 0.4) 121 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)") 122 | festivalBoxplot <- ggplot(festivalData, aes(gender, day2)) + theme(legend.position = "none") 123 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 2 of Festival)") 124 | 125 | festivalBoxplot <- ggplot(festivalData, aes(gender, day3)) + theme(legend.position = "none") 126 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 3 of Festival)") 127 | 128 | density <- ggplot(festivalData, aes(day1)) 129 | density + geom_density() 130 | 131 | # Create the function outlierSummary as given by the book 132 | function(variable, digits = 2){ 133 | 134 | zvariable<-(variable-mean(variable, na.rm = TRUE))/sd(variable, na.rm = TRUE) 135 | 136 | outlier95<-abs(zvariable) >= 1.96 137 | outlier99<-abs(zvariable) >= 2.58 138 | outlier999<-abs(zvariable) >= 3.29 139 | 140 | ncases<-length(na.omit(zvariable)) 141 | 142 | percent95<-round(100*length(subset(outlier95, outlier95 == TRUE))/ncases, digits) 143 | percent99<-round(100*length(subset(outlier99, outlier99 == TRUE))/ncases, digits) 144 | percent999<-round(100*length(subset(outlier999, outlier999 == TRUE))/ncases, digits) 145 | 146 | cat("Absolute z-score greater than 1.96 = ", percent95, "%", "\n") 147 | cat("Absolute z-score greater than 2.58 = ", percent99, "%", "\n") 148 | cat("Absolute z-score greater than 3.29 = ", percent999, "%", "\n") 149 | } 150 | 151 | outlierSummary(festivalData$day2) 152 | 153 | # Now let's use the ChickFlick data 154 | chickFlick <- read.delim("ChickFlick.dat", header = TRUE) 155 | 156 | bar <- ggplot(chickFlick, aes(film, arousal)) 157 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") 158 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange") 159 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_boot, geom = "pointrange") 160 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_boot, geom = "pointrange") 161 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender)) 162 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2) 163 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2) + labs(x = "Film", y = "Mean Arousal", fill = "Gender") 164 | bar <- ggplot(chickFlick, aes(film, arousal, fill = film)) 165 | bar + stat_summary(fun.y = mean, geom = "bar") 166 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) 167 | 168 | # Let's see how facets work 169 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) 170 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none") 171 | 172 | # Change the color of the bars 173 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none") + scale_fill_manual(values = c("Female" = "Blue", "Male" = "Green")) 174 | 175 | # Remove the legend 176 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none") 177 | 178 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender)) 179 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender), scale_fill_manual("Gender", values = c("Female" = "Blue", "Male" = "Green"))) 180 | bar + stat_summary(fun.y = mean, geom = "bar") 181 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") 182 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + scale_fill_manual("Gender", values = c("Female" = "Blue", "Male" = "Green")) 183 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633")) 184 | 185 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633")) 186 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90)) + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633")) 187 | 188 | # From Section 4 class - working with z-values and normal distrubutions 189 | # Population mean = 10,000, Standard error = 1000/sqrt(10) where 10 = sample size and 1000 is population standard deviation, and 10,300 is the sample mean we're testing 190 | # This will give P(sample mean < 10,300) 191 | pnorm(10300,10000,316.22) 192 | [1] 0.8286151 193 | 194 | # This will give P(sample mean > 10,300) 195 | 1 - pnorm(10300,10000,316.22) 196 | 197 | # Checking the Standard Error value 198 | 1000/sqrt(10) 199 | 200 | # Now check to see what is probability of sample mean between 10,500 and 9,500 201 | # First find P(sample mean < 10500) 202 | pnorm(10500, 10000, 316.22) 203 | 204 | # Hmmmm - it's going to be the same for 9,500 - so if we take the probability of the two tails, so to speak 205 | 2 * (1 - pnorm(10500, 10000, 316.22)) 206 | 207 | # I wonder if it is mor accurate to put in the SE computation 208 | 2 * (1 - pnorm(10500, 10000, 1000/sqrt(10))) 209 | 210 | # Actually we want 1 - P 211 | 1 - 2 * (1 - pnorm(10500, 10000, 1000/sqrt(10))) 212 | # Here's a better way to do it using the z-values (need to look at the pnorm function in more detail) 213 | pnorm(-1.58) - pnorm(1.58) 214 | 215 | # And another way 216 | 1 - 2 * pnorm(1.58, lower.tail = FALSE) 217 | --------------------------------------------------------------------------------