├── AnalysisNotes.R
├── Assumptions.R
├── anova
    ├── .Rapp.history
    ├── ANOVA_demo.R
    ├── Contrast.dat
    ├── Dummy.dat
    ├── Videos_clean.Rdata
    ├── anova.R
    ├── anove_viagra.R
    └── untitled.R
├── common_coding_issues.R
├── correlation
    ├── Exam Anxiety.dat
    ├── The Biggest Liar.dat
    ├── correlation.R
    └── correlation_chi_square_demo.R
├── linear regression
    ├── .Rapp.history
    ├── Album Sales 1.dat
    ├── Album Sales 2.dat
    ├── Chapter 7 DSUR Regression.R
    ├── Countries3.Rdata
    ├── Linear Regression.R
    ├── simple_regression.R
    └── simple_regression_transformation.R
├── logistic regression
    ├── .Rapp.history
    ├── Chat-Up Lines.dat
    ├── Lacourse et al. (2001) Females.dat
    ├── eel.dat
    ├── logistic regression.R
    └── penalty.dat
├── multiple regression
    ├── .Rapp.history
    ├── Album Sales 2.dat
    ├── Chamorro-Premuzic.dat
    ├── GlastonburyFestivalRegression.dat
    ├── multiple regression.R
    └── neste-model.r
└── plots
    ├── .Rapp.history
    ├── ChickFlick.dat
    ├── DownloadFestival.dat
    ├── Exam Anxiety.dat
    ├── FacebookNarcissism.dat
    ├── Fertility.csv
    ├── InternetUsers.RData
    └── ggplot_scripts.R


/AnalysisNotes.R:
--------------------------------------------------------------------------------
 1 | # This is a list of commands and processes to use with R when exploring data set
 2 | 
 3 | # Set the working directory, usually where the data files are located
 4 | setwd("~/OneDrive/MIDS/W203/Data/08")
 5 | 
 6 | # Load a data file. There are several types, from delimited data, CSV files and RData file
 7 | dataframe <- read.delim("handlebars.dat", header=TRUE)
 8 | 
 9 | # CSV file
10 | dataframe <- read.csv("Removal_Requests.csv", header=TRUE)
11 | 
12 | # RData file restores a data session
13 | load("Countries2.RData")
14 | 
15 | ##########################################################################################
16 | 
17 | # Summarize a dataframe
18 | summary(dataframe)
19 | 
20 | # list the items in the environment
21 | ls()
22 | # list the items and describe each - good to see a dataframe and the type of each vector or column
23 | ls.str()
24 | 
25 | # dump the top 5 rows of a data frame 
26 | head(dataframe)
27 | head(Countries)
28 | 
29 | ##########################################################################################
30 | 
31 | # Examine Data visually
32 | 
33 | # Load the ggplot2 library
34 | library(ggplot2)
35 | 
36 | 
37 | # Load the Facebook data into a dataframe
38 | facebookData = read.delim("~/OneDrive/MIDS/W203/Data/04/FacebookNarcissism.dat", header = TRUE)
39 | 
40 | # scatterplot with different elements
41 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating))
42 | graph + geom_point()
43 | graph + geom_point(shape = 7, size = 6)
44 | graph + geom_point(aes(color = Rating_Type))
45 | graph + geom_point(color = "RED")
46 | graph + geom_point(aes(color = Rating_Type), position = "jitter")
47 | graph + geom_point(aes(shape = Rating_Type), position = "jitter")
48 | 
49 | # add a smoothing line
50 | # color can also be a factor in the data frame.
51 | # the method parameter can be left off for the default, which is not linear
52 | graph + geom_point() + geom_smooth(method = "lm", alpha=0.1, colour="red", fill="blue") + labs(x = "Narcisisim", y = "Rating")
53 | 
54 | # The car library has a nice default scatter plot
55 | library(car)
56 | scatterplot(Countries$gdp, Countries$fertility_rate)
57 | head(Countries)
58 | 
59 | # histograms
60 | 


--------------------------------------------------------------------------------
/Assumptions.R:
--------------------------------------------------------------------------------
  1 | #---------------------------------------------------------------------------------------------------------
  2 | #R Code for Chapter 5 of:
  3 | #
  4 | #Field, A. P., Miles, J. N. V., & Field, Z. C. (2012). Discovering Statistics Using R: and Sex and Drugs and Rock 'N' Roll. #London Sage
  5 | #
  6 | #(c) 2011 Andy P. Field, Jeremy N. V. Miles & Zoe C. Field
  7 | #-----------------------------------------------------------------------------------------------------------
  8 | 
  9 | #----Set the working directory------
 10 | setwd("~/Dropbox/Zoe/R Book Chapter 7 Stuff")
 11 | setwd("~/Documents/Academic/Data/DSU_R/Chapter 05 (Exploring Data)")
 12 | imageDirectory<-"~/Documents/Academic/Books/Discovering Statistics/DSU R/DSU R I/DSUR I Images"
 13 | 
 14 | setwd("~/Public/Academic/Data/DSU_R/Chapter 05 (Exploring Data)")
 15 | imageDirectory<-"~/Public/Academic/Books/Discovering Statistics/DSU R/DSU R I/DSUR I Images"
 16 | 
 17 | 
 18 | 
 19 | names(dlf)
 20 | 
 21 | #----Install and Load Packages-----
 22 | install.packages("car")
 23 | install.packages("ggplot2")
 24 | install.packages("pastecs")
 25 | install.packages("psych")
 26 | 
 27 | library(car)
 28 | library(ggplot2)
 29 | library(pastecs)
 30 | library(psych)
 31 | library(Rcmdr)
 32 | 
 33 | #Read in the download data:
 34 | 
 35 | dlf <- read.delim("DownloadFestival.dat", header=TRUE)
 36 | 
 37 | #Remove the outlier from the day1 hygiene score
 38 | dlf$day1 <- ifelse(dlf$day1 > 20, NA, dlf$day1)
 39 | 
 40 | 
 41 | 
 42 | #Histograms for hygiene scores on day 1, day 2 and day 3.
 43 | 
 44 | #Histogram for day 1:
 45 | 
 46 | hist.day1 <- ggplot(dlf, aes(day1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 1", y = "Density")
 47 | hist.day1
 48 | 
 49 | #Histogram for day 2:
 50 | hist.day2 <- ggplot(dlf, aes(day2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 2", y = "Density")
 51 | hist.day2
 52 | 
 53 | #Histogram for day 3:
 54 | 
 55 | hist.day3 <- ggplot(dlf, aes(day3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Hygiene score on day 3", y = "Density")
 56 | hist.day3
 57 | 
 58 | #Add the curves to the Histograms:
 59 | 
 60 | hist.day1 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day1, na.rm = TRUE), sd = sd(dlf$day1, na.rm = TRUE)), colour = "black", size = 1)
 61 | 
 62 | ggsave(file = paste(imageDirectory,"05 DLF Day 1 Hist.png",sep="/"))
 63 | 
 64 | hist.day2 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day2, na.rm = TRUE), sd = sd(dlf$day2, na.rm = TRUE)), colour = "black", size = 1)
 65 | 
 66 | ggsave(file = paste(imageDirectory,"05 DLF Day 2 Hist.png",sep="/"))
 67 | 
 68 | 
 69 | hist.day3 + stat_function(fun = dnorm, args = list(mean = mean(dlf$day3, na.rm = TRUE), sd = sd(dlf$day3, na.rm = TRUE)), colour = "black", size = 1)
 70 | 
 71 | ggsave(file = paste(imageDirectory,"05 DLF Day 3 Hist.png",sep="/"))
 72 | 
 73 | 
 74 | #Q-Q plot for day 1:
 75 | qqplot.day1 <- qplot(sample = dlf$day1, stat="qq")
 76 | qqplot.day1
 77 | 
 78 | ggsave(file = paste(imageDirectory,"05 DLF Day 1 QQ.png",sep="/"))
 79 | 
 80 | #Q-Q plot for day 2:
 81 | 
 82 | qqplot.day2 <- qplot(sample = dlf$day2, stat="qq")
 83 | qqplot.day2
 84 | ggsave(file = paste(imageDirectory,"05 DLF Day 2 QQ.png",sep="/"))
 85 | 
 86 | #Q-Q plot of the hygiene scores on day 3:
 87 | qqplot.day3 <- qplot(sample = dlf$day3, stat="qq")
 88 | qqplot.day3
 89 | ggsave(file = paste(imageDirectory,"05 DLF Day 3 QQ.png",sep="/"))
 90 | 
 91 | #Quantifying normality with numbers
 92 | library(psych)		#load the psych library, if you haven't already, for the describe() function.
 93 | 
 94 | #Using the describe() function for a single variable.
 95 | describe(dlf$day1)
 96 | 
 97 | #Two alternative ways to describe multiple variables.
 98 | describe(cbind(dlf$day1, dlf$day2, dlf$day3))
 99 | describe(dlf[,c("day1", "day2", "day3")])
100 | 
101 | library(pastecs)
102 | stat.desc(dlf$day1, basic = FALSE, norm = TRUE)
103 | 
104 | stat.desc(cbind(dlf$day1, dlf$day2, dlf$day3), basic = FALSE, norm = TRUE)
105 | 
106 | round(stat.desc(dlf[, c("day1", "day2", "day3")], basic = FALSE, norm = TRUE), digits = 3)
107 | 
108 | 
109 | 
110 | #Read in R exam data.
111 | rexam <- read.delim("rexam.dat", header=TRUE)
112 | 
113 | #Set the variable uni to be a factor:
114 | rexam$uni<-factor(rexam$uni, levels = c(0:1), labels = c("Duncetown University", "Sussex University"))
115 | 
116 | #Self test task:
117 | 
118 | round(stat.desc(rexam[, c("exam", "computer", "lectures", "numeracy")], basic = FALSE, norm = TRUE), digits = 3)
119 | 
120 | hexam <- ggplot(rexam, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$exam, na.rm = TRUE), sd = sd(rexam$exam, na.rm = TRUE)), colour = "red", size = 1)
121 | hexam
122 | ggsave(file = paste(imageDirectory,"05 Rexam exam Hist.png",sep="/"))
123 | 
124 | 
125 | hcomputer <- ggplot(rexam, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$computer, na.rm = TRUE), sd = sd(rexam$computer, na.rm = TRUE)), colour = "red", size = 1)
126 | hcomputer
127 | ggsave(file = paste(imageDirectory,"05 Rexam computer Hist.png",sep="/"))
128 | 
129 | hlectures <- ggplot(rexam, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$lectures, na.rm = TRUE), sd = sd(rexam$lectures, na.rm = TRUE)), colour = "red", size = 1)
130 | hlectures
131 | ggsave(file = paste(imageDirectory,"05 Rexam lectures Hist.png",sep="/"))
132 | 
133 | hnumeracy <- ggplot(rexam, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x = "Numeracy", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(rexam$numeracy, na.rm = TRUE), sd = sd(rexam$numeracy, na.rm = TRUE)), colour = "red", size = 1)
134 | hnumeracy
135 | ggsave(file = paste(imageDirectory,"05 Rexam numeracy Hist.png",sep="/"))
136 | 
137 | 
138 | 
139 | #Use by() to get descriptives for one variable, split by uni
140 | by(data=rexam$exam, INDICES=rexam$uni, FUN=describe)
141 | by(rexam$exam, rexam$uni, stat.desc, basic = FALSE, norm = TRUE)
142 | 
143 | #Use by() to get descriptives for two variables, split by uni
144 | by(cbind(data=rexam$exam, data=rexam$numeracy), rexam$uni, describe)
145 | by(rexam[, c("exam", "numeracy")], rexam$uni, stat.desc,basic = FALSE, norm = TRUE)
146 | 
147 | 
148 | #Use describe for four variables in the rexam dataframe.
149 | describe(cbind(rexam$exam, rexam$computer, rexam$lectures, rexam$numeracy))
150 | 
151 | #Use by() to get descriptives for four variables, split by uni
152 | by(data=cbind(rexam$exam, rexam$computer, rexam$lectures, rexam$numeracy), rexam$uni, describe)
153 | 
154 | 
155 | #Self test:
156 | #Use by() to get descriptives for computer literacy and percentage of lectures attended, split by uni
157 | by(cbind(data=rexam$computer, data=rexam$lectures), rexam$uni, describe)
158 | by(rexam[, c("computer", "lectures")], rexam$uni, stat.desc, basic = FALSE, norm = TRUE)
159 | 
160 | 
161 | 
162 | #using subset to plot histograms for different groups:
163 | 
164 | dunceData<-subset(rexam, rexam$uni=="Duncetown University")
165 | sussexData<-subset(rexam, rexam$uni=="Sussex University")
166 | 
167 | hist.numeracy.duncetown <- ggplot(dunceData, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Numeracy Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$numeracy, na.rm = TRUE), sd = sd(dunceData$numeracy, na.rm = TRUE)), colour = "red", size=1)
168 | hist.numeracy.duncetown
169 | ggsave(file = paste(imageDirectory,"05 dunce numeracy Hist.png",sep="/"))
170 | 
171 | hist.exam.duncetown <- ggplot(dunceData, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$exam, na.rm = TRUE), sd = sd(dunceData$exam, na.rm = TRUE)), colour = "red", size=1)
172 | hist.exam.duncetown
173 | ggsave(file = paste(imageDirectory,"05 dunce exam Hist.png",sep="/"))
174 | 
175 | 
176 | hist.numeracy.sussex <- ggplot(sussexData, aes(numeracy)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black", binwidth = 1) + labs(x = "Numeracy Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData $numeracy, na.rm = TRUE), sd = sd(sussexData $numeracy, na.rm = TRUE)), colour = "red", size=1)
177 | hist.numeracy.sussex
178 | ggsave(file = paste(imageDirectory,"05 sussex numeracy Hist.png",sep="/"))
179 | 
180 | hist.exam.sussex <- ggplot(sussexData, aes(exam)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "First Year Exam Score", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$exam, na.rm = TRUE), sd = sd(sussexData$exam, na.rm = TRUE)), colour = "red", size=1)
181 | hist.exam.sussex
182 | ggsave(file = paste(imageDirectory,"05 sussex exam Hist.png",sep="/"))
183 | 
184 | #self test:
185 | 
186 | dunceData<-subset(rexam, rexam$uni=="Duncetown University")
187 | sussexData<-subset(rexam, rexam$uni=="Sussex University")
188 | 
189 | 
190 | hist.computer.duncetown <- ggplot(dunceData, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$computer, na.rm = TRUE), sd = sd(dunceData$computer, na.rm = TRUE)), colour = "red", size=1)
191 | hist.computer.duncetown
192 | ggsave(file = paste(imageDirectory,"05 dunce computer Hist.png",sep="/"))
193 | 
194 | #To plot a histogram  for percentage of lectures attended for Duncetown University we would execute:
195 | 
196 | hist.lectures.duncetown <- ggplot(dunceData, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(dunceData$lectures, na.rm = TRUE), sd = sd(dunceData$lectures, na.rm = TRUE)), colour = "red", size=1)
197 | hist.lectures.duncetown
198 | ggsave(file = paste(imageDirectory,"05 dunce lectures Hist.png",sep="/"))
199 | 
200 | #To plot a histogram  for computer literacy for Sussex University we would execute:
201 | 
202 | hist.computer.sussex <- ggplot(sussexData, aes(computer)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Computer Literacy", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$computer, na.rm = TRUE), sd = sd(sussexData$computer, na.rm = TRUE)), colour = "red", size=1)
203 | hist.computer.sussex
204 | ggsave(file = paste(imageDirectory,"05 sussex computer Hist.png",sep="/"))
205 | 
206 | #To plot a histogram  for percentage of lectures attended for Sussex University we would execute:
207 | 
208 | hist.lectures.sussex <- ggplot(sussexData, aes(lectures)) + opts(legend.position = "none") + geom_histogram(aes(y = ..density..), fill = "white", colour = "black") + labs(x = "Percentage of Lectures Attended", y = "Density") + stat_function(fun=dnorm, args=list(mean = mean(sussexData$lectures, na.rm = TRUE), sd = sd(sussexData$lectures, na.rm = TRUE)), colour = "red", size=1)
209 | hist.lectures.sussex
210 | ggsave(file = paste(imageDirectory,"05 sussex lectures Hist.png",sep="/"))
211 | 
212 | 
213 | 
214 | 
215 | #Shapiro-Wilks test for exam and numeracy for whole sample
216 | shapiro.test(rexam$exam)
217 | shapiro.test(rexam$numeracy)
218 | 
219 | 
220 | 
221 | #Shapiro-Wilks test for exam and numeracy split by university
222 | by(rexam$exam, rexam$uni, shapiro.test)
223 | by(rexam$numeracy, rexam$uni, shapiro.test)
224 | 
225 | 
226 | #qqplots for the two variables
227 | qplot(sample = rexam$exam, stat="qq")
228 | ggsave(file = paste(imageDirectory,"05 exam QQ.png",sep="/"))
229 | qplot(sample = rexam$numeracy, stat="qq")
230 | ggsave(file = paste(imageDirectory,"05 numeracy QQ.png",sep="/"))
231 | 
232 | 
233 | #Levene's test for comparison of variances of exam scores in the two universities.
234 | leveneTest(rexam$exam, rexam$uni)
235 | leveneTest(rexam$exam, rexam$uni, center = mean)
236 | leveneTest(rexam$numeracy, rexam$uni)
237 | 
238 | 
239 | #########Log, square root, and reciprocal transformation:
240 | dlf$day1LessThanOne <- dlf$day1 < 1
241 | dlf$day1LessThanorEqualOne <- dlf$day1 <= 1
242 | dlf$day1GreaterThanOne <- dlf$day1 > 1
243 | dlf$day1GreaterThanorEqualOne <- dlf$day1 >= 1
244 | 
245 | #self-help tasks
246 | dlf$logday1 <- log(dlf$day1 + 1)
247 | dlf$logday2 <- log(dlf$day2 + 1)
248 | dlf$logday3 <- log(dlf$day3 + 1)
249 | 
250 | #Histograms of the log transformed scores:
251 | 
252 | #Histogram for logday1:
253 | 
254 | hist.logday1 <- ggplot(dlf, aes(logday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday1, na.rm = TRUE), sd = sd(dlf$logday1, na.rm = TRUE)), colour = "red", size = 1)
255 | hist.logday1
256 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 1 Hist.png",sep="/"))
257 | 
258 | 
259 | 
260 | #Histogram for logday2:
261 | 
262 | hist.logday2 <- ggplot(dlf, aes(logday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday2, na.rm = TRUE), sd = sd(dlf$logday2, na.rm = TRUE)), colour = "red", size = 1)
263 | hist.logday2
264 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 2 Hist.png",sep="/"))
265 | 
266 | 
267 | #Histogram for logday3:
268 | 
269 | hist.logday3 <- ggplot(dlf, aes(logday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Log Transformed Hygiene Score on Day 3", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$logday3, na.rm = TRUE), sd = sd(dlf$logday3, na.rm = TRUE)), colour = "red", size = 1)
270 | hist.logday3
271 | ggsave(file = paste(imageDirectory,"05 DLF Log Day 3 Hist.png",sep="/"))
272 | 
273 | #Create square root scores
274 | 
275 | dlf$sqrtday1 <- sqrt(dlf$day1)
276 | dlf$sqrtday2 <- sqrt(dlf$day2)
277 | dlf$sqrtday3 <- sqrt(dlf$day3)
278 | 
279 | #Histograms of the square root transformed scores:
280 | 
281 | #Histogram for sqrtday1:
282 | 
283 | hist.sqrtday1 <- ggplot(dlf, aes(sqrtday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday1, na.rm = TRUE), sd = sd(dlf$sqrtday1, na.rm = TRUE)), colour = "red", size = 1)
284 | hist.sqrtday1
285 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 1 Hist.png",sep="/"))
286 | 
287 | 
288 | 
289 | #Histogram for sqrtday2:
290 | 
291 | hist.sqrtday2 <- ggplot(dlf, aes(sqrtday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday2, na.rm = TRUE), sd = sd(dlf$sqrtday2, na.rm = TRUE)), colour = "red", size = 1)
292 | hist.sqrtday2
293 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 2 Hist.png",sep="/"))
294 | 
295 | 
296 | 
297 | #Histogram for sqrtday3:
298 | 
299 | hist.sqrtday3 <- ggplot(dlf, aes(sqrtday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Square Root of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$sqrtday3, na.rm = TRUE), sd = sd(dlf$sqrtday3, na.rm = TRUE)), colour = "red", size = 1)
300 | hist.sqrtday3
301 | ggsave(file = paste(imageDirectory,"05 DLF sqrt Day 3 Hist.png",sep="/"))
302 | 
303 | 
304 | #Create reciprocal scores
305 | 
306 | dlf$recday1 <- 1/(dlf$day1 + 1)
307 | dlf$recday2 <- 1/(dlf$day2 + 1)
308 | dlf$recday3 <- 1/(dlf$day3 + 1)
309 | 
310 | #Histograms of the reciprocal transformed scores:
311 | 
312 | #Histogram for recday1:
313 | 
314 | hist.recday1 <- ggplot(dlf, aes(recday1)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 1", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday1, na.rm = TRUE), sd = sd(dlf$recday1, na.rm = TRUE)), colour = "red", size = 1)
315 | hist.recday1
316 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 1 Hist.png",sep="/"))
317 | 
318 | 
319 | 
320 | #Histogram for recday2:
321 | 
322 | hist.recday2 <- ggplot(dlf, aes(recday2)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 2", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday2, na.rm = TRUE), sd = sd(dlf$recday2, na.rm = TRUE)), colour = "red", size = 1)
323 | hist.recday2
324 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 2 Hist.png",sep="/"))
325 | 
326 | 
327 | #Histogram for recday3:
328 | 
329 | 
330 | hist.recday3 <- ggplot(dlf, aes(recday3)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), colour="black", fill="white") + labs(x="Reciprocal of of Hygiene Score on Day 3", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(dlf$recday3, na.rm = TRUE), sd = sd(dlf$recday3, na.rm = TRUE)), colour = "red", size = 1)
331 | hist.recday3
332 | ggsave(file = paste(imageDirectory,"05 DLF rec Day 3 Hist.png",sep="/"))
333 | 
334 | 
335 | #Ifelse
336 | 
337 | dlf$day1NoOutlier <- ifelse(dlf$day1 > 5, NA, dlf$day1)
338 | 
339 | #----------Smart Alex task 1---------
340 | 
341 | chickFlick <- read.delim(file="ChickFlick.dat", header=TRUE)
342 | 
343 | #Use by() to get descriptives for arousal, split by film
344 | by(data=chickFlick$arousal, INDICES=chickFlick$film, FUN=describe)
345 | 
346 | 
347 | #Shapiro-Wilks test for arousal split by film
348 | by(data=chickFlick$arousal, INDICES=chickFlick$film, FUN=shapiro.test)
349 | 
350 | 
351 | #Levene's test for comparison of variances of arousal scores for the two films.
352 | leveneTest(chickFlick$arousal, chickFlick$film, center=median)
353 | 
354 | #To plot a histogram  for arousal for Bridget Jones' Diary we would execute:
355 | hist.arousal.Bridget <- ggplot(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary"),aes(arousal)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), fill="white", colour="black", binwidth=1) + labs(x="Arousal", y = "Density") + stat_function(fun=dnorm, args=list(mean=mean(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary")$arousal, na.rm=TRUE), sd=sd(subset(chickFlick, chickFlick$film=="Bridget Jones' Diary")$arousal, na.rm=TRUE) ), colour="black", size=1)
356 | hist.arousal.Bridget
357 | 
358 | 
359 | #To plot a histogram  for arousal for Memento we would execute:
360 | 
361 | hist.arousal.Memento <- ggplot(subset(chickFlick, chickFlick$film=="Memento"),aes(arousal)) + opts(legend.position = "none") + geom_histogram(aes(y=..density..), fill="white", colour="black", binwidth=1) + labs(x="Arousal", y = "Density") + stat_function(fun=dnorm, args=list(mean=mean(subset(chickFlick, chickFlick$film=="Memento")$arousal, na.rm=TRUE), sd=sd(subset(chickFlick, chickFlick$film=="Memento")$arousal, na.rm=TRUE) ), colour="black", size=1)
362 | hist.arousal.Memento
363 | 
364 | 
365 | #----------Smart Alex task 2---------
366 | 
367 | 
368 | #Read in R exam data.
369 | rexam <- read.table(file="rexam.dat", header=TRUE)
370 | 
371 | #Set the variable uni to be a factor:
372 | rexam$uni<-factor(rexam$uni, levels = c(0:1), labels = c("Duncetown University", "Sussex University"))
373 | 
374 | rexam$lognumeracy <- log(rexam$numeracy)
375 | rexam$sqrtnumeracy <- sqrt(rexam$numeracy)
376 | rexam$recnumeracy <- 1/(rexam$numeracy)
377 | 
378 | #Histogram for numeracy:
379 | hist.numeracy <- ggplot(rexam, aes(numeracy)) + 
380 |              opts(legend.position = "none")+ 
381 |               geom_histogram(aes(y=..density..), 
382 |                colour="black", fill="white") + 
383 |              labs(x="Numeracy scores", y = "Density") + 
384 |              stat_function(fun=dnorm, 
385 |                args=list (mean=mean (rexam$numeracy, na.rm=TRUE), 
386 |                 sd=sd(rexam$numeracy, na.rm=TRUE) ), 
387 |                 colour="black", size=1)
388 | hist.numeracy
389 | 
390 | #Histogram for log numeracy:
391 | 
392 | hist.lognumeracy <- ggplot(rexam, aes(lognumeracy)) + 
393 |              opts(legend.position = "none")+ 
394 |               geom_histogram(aes(y=..density..), 
395 |                colour="black", fill="white") + 
396 |              labs(x="Log Transformed Numeracy scores", y = "Density") + 
397 |              stat_function(fun=dnorm, 
398 |                args=list (mean=mean (rexam$lognumeracy, na.rm=TRUE), 
399 |                 sd=sd(rexam$lognumeracy, na.rm=TRUE) ), 
400 |                 colour="black", size=1)
401 | hist.lognumeracy
402 | 
403 | #Histogram for square root numeracy:
404 | 
405 | hist.sqrtnumeracy <- ggplot(rexam, aes(sqrtnumeracy)) + 
406 |              opts(legend.position = "none")+ 
407 |               geom_histogram(aes(y=..density..), 
408 |                colour="black", fill="white") + 
409 |              labs(x="Square Root of Numeracy scores", y = "Density") + 
410 |              stat_function(fun=dnorm, 
411 |                args=list (mean=mean (rexam$sqrtnumeracy, na.rm=TRUE), 
412 |                 sd=sd(rexam$sqrtnumeracy, na.rm=TRUE) ), 
413 |                 colour="black", size=1)
414 | hist.sqrtnumeracy
415 | 
416 | #Histogram of the reciprocal transformed numeracy scores:
417 | hist.recnumeracy <- ggplot(rexam, aes(recnumeracy)) + 
418 |              opts(legend.position = "none")+ 
419 |               geom_histogram(aes(y=..density..), 
420 |                colour="black", fill="white") + 
421 |              labs(x="Reciprocal of Numeracy Scores", y = "Density") + 
422 |              stat_function(fun=dnorm, 
423 |                args=list (mean=mean (rexam$recnumeracy, na.rm=TRUE), 
424 |                 sd=sd(rexam$recnumeracy, na.rm=TRUE) ), 
425 |                 colour="black", size=1)
426 | hist.recnumeracy
427 | 
428 | 
429 | #Shapiro-Wilks test for numeracy:
430 | shapiro.test(rexam$numeracy)
431 | 
432 | #Shapiro-Wilks test for lognumeracy:
433 | shapiro.test(rexam$lognumeracy)
434 | 
435 | #Shapiro-Wilks test for sqrtnumeracy:
436 | shapiro.test(rexam$sqrtnumeracy)
437 | 
438 | #Shapiro-Wilks test for recnumeracy:
439 | shapiro.test(rexam$recnumeracy)
440 | 
441 | 


--------------------------------------------------------------------------------
/anova/.Rapp.history:
--------------------------------------------------------------------------------
 1 | 1000/sqrt(10)=
 2 | 0
 3 | 1000/sqrt(10)
 4 | pnorm(1.58, lower.tail=FALSE)
 5 | pnorm(1.58)
 6 | pnorm(`1.58)
 7 | pnorm(-1.58)
 8 | c
 9 | 3
10 | 4
11 | 2
12 | #This is a comment
13 | library(car)
14 | This is a command
15 | # This is a comment
16 | this
17 | ls.str()
18 | exit
19 | setwd("~/OneDrive/MIDS/W203/Data/anova")
20 | dummy <- read.delim("dummy.day", headers=TRUE)
21 | dummy <- read.delim("dummy.day", header=TRUE)
22 | dummy <- read.delim("dummy.dat", header=TRUE)
23 | head(dummy)
24 | summary(dummy)
25 | ls.str()
26 | dummyData$dose<-factor(dummyData$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
27 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
28 | ls.str()
29 | summary(dummy)
30 | dummy.1 <- lm(libido ~ dummy1 + dummy2, data = dummy)
31 | summary(dummy.1)
32 | # gm <- mean( dummy$libido)
33 | gm <- mean( dummy$libido)
34 | length(dummy$libido)
35 | sst <- gm/length(dummy$libido)
36 | sst
37 | gm
38 | gsd <- sd(dummy$libido)
39 | sst <- sd^2/length(dummy$libido)
40 | sst <- gsd^2/length(dummy$libido)
41 | sst
42 | sst <- (gsd^2)*length(dummy$libido)
43 | sst
44 | gsd
45 | 1.7^2
46 | 1.767^2
47 | sst <- (gsd^2)*(length(dummy$libido)-)
48 | sst <- (gsd^2)*(length(dummy$libido)-1)
49 | sst
50 | gm <- mean(dummy$libido)
51 | placebo <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 0, ]
52 | low <- dummy[dummy$dummy1 ==1 & dummy$dummy2 == 0, ]
53 | gm.grand <- mean(dummy$libido)
54 | gm.low <- mean(low$libido)
55 | gm.high <- mean(high$libido)
56 | high <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 1, ]
57 | gm.high <- mean(high$libido)
58 | m.grand <- mean(dummy$libido)
59 | m.low <- mean(low$libido)
60 | m.high <- mean(high$libido)
61 | m.grand
62 | m.low
63 | m.high
64 | high <- dummy[dummy$dummy1 == 1 & dummy$dummy2 == 0, ]
65 | low <- dummy[dummy$dummy1 ==0 & dummy$dummy2 == 1, ]
66 | m.low <- mean(low$libido)
67 | m.high <- mean(high$libido)
68 | m.placebo <- mean(placebo$libido)
69 | m.low
70 | m.placebo
71 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low.libido)*(m.low - m.grand)^2 + length(high.libido)*(m.high - m.grand)^2
72 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low$libido)*(m.low - m.grand)^2 + length(high$libido)*(m.high - m.grand)^2
73 | ssm
74 | s.low <- sd(low$libido)
75 | s.high <- sd(high$libido)
76 | s.placebo <- sd(placebo$libido)
77 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1)) * s.low^2 + (length(high$libido) - 1) * s.high^2
78 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1) * s.low^2 + (length(high$libido) - 1) * s.high^2
79 | ssr
80 | msm <- ssm/(3 - 1)
81 | msr <- ssr/(14 - 2)
82 | fr <- msm/msr
83 | fr
84 | contrast <- read.delim("contrast.dat", header = TRUE)
85 | summary(contrast)
86 | head(contrast)
87 | contrast.planned <- lm(libido ~ dummy + dummy2, data = contrast)
88 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
89 | ls.str()
90 | contrast$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
91 | contrast$dose<-factor(contrast$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
92 | ls.str()
93 | ls.str(contrast)
94 | contrast.planned <- lm(libido ~ dummy + dummy2, data = contrast)
95 | contrast.planned <- lm(libido ~ dummy1 + dummy2, data = contrast)
96 | contrast.planned
97 | summary(contrast.planned)
98 | 


--------------------------------------------------------------------------------
/anova/ANOVA_demo.R:
--------------------------------------------------------------------------------
 1 | # A demonstration of ANOVA in R
 2 | 
 3 | # Load Youtube video data
 4 | load("Videos_clean.Rdata")
 5 | summary(Videos)
 6 | 
 7 | 
 8 | # check the rate variable for normality
 9 | hist(Videos$rate)
10 | 
11 | # That's not great, but remember that ANOVA is a robust-test
12 | # and the data is on a 1-5 scale, which isn't normally
13 | # a place we'd worry
14 | 
15 | # Let's look at the means, by each category and overall
16 | by(Videos$rate, Videos$category, mean, na.rm=T)
17 | mean(Videos$rate, na.rm=T)
18 | 
19 | # We can get nicer output with the tapply function
20 | tapply(Videos$rate, Videos$category, mean, na.rm=T)
21 | 
22 | # Perform the analysis of variance and check the significance
23 | aovm = aov(rate ~ category, Videos)
24 | summary(aovm)
25 | 
26 | # Post-hoc, we can compare our groups pairwise
27 | tt = pairwise.t.test(Videos$rate, Videos$category, p.adjust.method = "bonferroni")
28 | tt
29 | 
30 | # That's hard to read, so let's create a prettier table
31 | # Write a function to add significance stars to a p-value
32 | sig_stars = function(p)
33 | {
34 | 	stars = symnum(p, na = F, cutpoints = c(0, .001, .01, .05, .1, 1), symbols=c("***","**", "*", ".", " "))
35 | 	return( paste(round(p, 3), stars) )
36 | }
37 | 
38 | # apply our new function to every element in our matrix
39 | t_table = apply(tt$p.value, c(1,2), sig_stars) 
40 | t_table
41 | # get rid of the quotes
42 | t_table = noquote( t_table )
43 | t_table
44 | 
45 | # tidy up the column names and erase the NAs
46 | colnames(t_table) = abbreviate(colnames(t_table))
47 | 
48 | # Notice that upper.tri indexes the upper right
49 | # triangle of the matrix
50 | upper.tri(t_table)
51 | 
52 | # set those positions to the empty string
53 | t_table[upper.tri(t_table)] = ""
54 | 
55 | # the finished table
56 | t_table
57 | 
58 | 


--------------------------------------------------------------------------------
/anova/Contrast.dat:
--------------------------------------------------------------------------------
 1 | dose	libido	dummy1	dummy2
 2 | 1	3	-2	0
 3 | 1	2	-2	0
 4 | 1	1	-2	0
 5 | 1	1	-2	0
 6 | 1	4	-2	0
 7 | 2	5	1	-1
 8 | 2	2	1	-1
 9 | 2	4	1	-1
10 | 2	2	1	-1
11 | 2	3	1	-1
12 | 3	7	1	1
13 | 3	4	1	1
14 | 3	5	1	1
15 | 3	3	1	1
16 | 3	6	1	1
17 | 


--------------------------------------------------------------------------------
/anova/Dummy.dat:
--------------------------------------------------------------------------------
 1 | dose	libido	dummy1	dummy2
 2 | 1	3	0	0
 3 | 1	2	0	0
 4 | 1	1	0	0
 5 | 1	1	0	0
 6 | 1	4	0	0
 7 | 2	5	0	1
 8 | 2	2	0	1
 9 | 2	4	0	1
10 | 2	2	0	1
11 | 2	3	0	1
12 | 3	7	1	0
13 | 3	4	1	0
14 | 3	5	1	0
15 | 3	3	1	0
16 | 3	6	1	0
17 | 


--------------------------------------------------------------------------------
/anova/Videos_clean.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/anova/Videos_clean.Rdata


--------------------------------------------------------------------------------
/anova/anova.R:
--------------------------------------------------------------------------------
  1 | # ANOVA and Multiple Regression - Chapter 10 Discovering Statisitcs, Andy Field 
  2 | #
  3 | setwd("~/OneDrive/MIDS/W203/Data/anova")
  4 | 
  5 | # use the downloaded dummy.dat file
  6 | dummy <- read.delim("dummy.dat", header=TRUE)
  7 | head(dummy)
  8 | summary(dummy)
  9 | ls.str()
 10 | 
 11 | # looking at the dummy data, there is a vector for libido, and vectors for dummy1 and dummy2 that 
 12 | # appear to be coded 0,1 for the dummy variable. There's also a vector for dose that ranges from 1-3
 13 | # but is supposed to represent "placebo", "low dose", "high dose", so we need to recode as a factor
 14 | dummy$dose<-factor(dummy$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
 15 | ls.str()
 16 | summary(dummy)
 17 | 
 18 | # The libido vector is the single measurement here, but each value is one of the three categories.
 19 | # Therefor values of dummy1,dummy2 (0,0) result in the placebo values, values (0,1), (1,0) result
 20 | # in one of the low/high dose categories.
 21 | 
 22 | # The number of dummy variables needed is # groups - 1.
 23 | 
 24 | # now we can run the linear regression
 25 | dummy.1 <- lm(libido ~ dummy1 + dummy2, data = dummy)
 26 | summary(dummy.1)
 27 | 
 28 | # We see from the output that F(2, 12) is significant at p < .05
 29 | # Therefor using the group means is a better model than using the overall mean 
 30 | # We also see that beta(1) is significant at p < .05 but beta(2) is not (p=.282)
 31 | 
 32 | # --------------------- The Logic of ANOVA ---------------------- #
 33 | #
 34 | # The simplest model we can fit to a set of data is the grand mean (the mean of the outcome variable)
 35 | # This basic model represents the "no effect" of the predictor variable on the outcome variable
 36 | #
 37 | # We can fit a different model to the data that represents our hypotheses. If this model fits the data
 38 | # well then it must be better than using the grand mean.
 39 | #
 40 | # The intercept and one or more co-efficents describe the chosen model
 41 | #
 42 | # The bigger the coefficents the greater the deviation between the regression line and the grand mean
 43 | #
 44 | # In correlational research, the co-efficients represent the slope of the line, but in experimental
 45 | # research they reperesent the differences between the group mean
 46 | #
 47 | # If the differences between the group means are large enough then the resulting model will be a better
 48 | # fit of the data than the grand mean and we can infer that our group means are significantly different.
 49 | #
 50 | # The F-ration is the ratio of the explained variance (due to the model) to the unexplained variance
 51 | #
 52 | # ---------------------------------------------------------------#
 53 | #
 54 | # Calculate the total variation of the data (sum of squares)
 55 | # 
 56 | 
 57 | gsd <- sd(dummy$libido)
 58 | sst <- (gsd^2)*(length(dummy$libido)-1)
 59 | 
 60 | # Total amount of variation in the data is sst = 43.7333
 61 | # How much of this variation can the regression model explain?
 62 | # This is given by the model variance, which is the difference between each group mean and the grand mean, squared
 63 | placebo <- dummy[dummy$dummy1 == 0 & dummy$dummy2 == 0, ]
 64 | high <- dummy[dummy$dummy1 == 1 & dummy$dummy2 == 0, ]
 65 | low <- dummy[dummy$dummy1 ==0 & dummy$dummy2 == 1, ]
 66 | 
 67 | m.grand <- mean(dummy$libido)
 68 | m.low <- mean(low$libido) 
 69 | m.high <- mean(high$libido)
 70 | m.placebo <- mean(placebo$libido)
 71 | 
 72 | ssm <- length(placebo$libido)*(m.placebo - m.grand)^2 + length(low$libido)*(m.low - m.grand)^2 + length(high$libido)*(m.high - m.grand)^2
 73 | 
 74 | # The variance explained by the model is ssm = 20.1333, so out of 43.733 variance of the data, the model explains 20.1333 
 75 | 
 76 | # We can calculate the residuals as the variance not explained by the model, which is the sum of group variances and degrees of freedom
 77 | 
 78 | s.low <- sd(low$libido)
 79 | s.high <- sd(high$libido)
 80 | s.placebo <- sd(placebo$libido)
 81 | 
 82 | ssr <- (length(placebo$libido) - 1) * s.placebo^2 + (length(low$libido) - 1) * s.low^2 + (length(high$libido) - 1) * s.high^2
 83 | 
 84 | # The variance not explained by the model is ssr = 23.6
 85 | 
 86 | # The sum of mean squares
 87 | # the sum of mean squares for the model is ssm/degrees of freedom for the model
 88 | 
 89 | msm <- ssm/(3 - 1)
 90 | 
 91 | # the sum of mean squares for the residuals is ssr/degrees of freedom for residuals
 92 | 
 93 | msr <- ssr/(14 - 2)
 94 | 
 95 | # The F-ratio is msm/msr 
 96 | 
 97 | fr <- msm/msr 
 98 | 
 99 | # which gives 5.1184, and compare to the F-ratio computed as part of the linear regression of 5.119
100 | 
101 | # ---------------------- Planned Contrasts ------------------------ #
102 | #
103 | # Using the contrast.dat file downloaded from the supplemental website
104 | contrast <- read.delim("contrast.dat", header = TRUE)
105 | summary(contrast)
106 | head(contrast)
107 | 
108 | contrast$dose<-factor(contrast$dose, levels = c(1:3), labels = c("Placebo", "Low Dose", "High Dose"))
109 | ls.str(contrast)
110 | # this data is coded with the weighting variables as dummy1 and dummy2
111 | 
112 | contrast.planned <- lm(libido ~ dummy1 + dummy2, data = contrast)
113 | summary(contrast.planned)
114 | 
115 | 


--------------------------------------------------------------------------------
/anova/anove_viagra.R:
--------------------------------------------------------------------------------
  1 | #-------------------------- ANOVA Test Procedure -----------------------#
  2 | # 
  3 | # This script uses the Viagra data as described in Discovering Statistics Using R, pg. 434, by Andy Field
  4 | #
  5 | #-----------------------------------------------------------------------#
  6 | #
  7 | # Load libraries
  8 | library(ggplot2)
  9 | library(granova)
 10 | library(car)
 11 | library(Rcmdr)
 12 | library(pastecs)
 13 | library(multcomp)
 14 | library(compute.es)
 15 | 
 16 | # First we enter the data manually
 17 | 
 18 | libido <- c(3,2,1,1,4,5,2,4,2,3,7,4,5,3,6)
 19 | dose <- gl(3, 5, labels = c("Placebo", "Low Dose", "High Dose"))
 20 | viagraData <- data.frame(dose, libido)
 21 | 
 22 | ls.str(viagraData)
 23 | 
 24 | # Let's examine the data and test assumptions
 25 | shapiro.test(viagraData$libido)
 26 | 
 27 | # Shapiro-Wilk's test indicates non-normal sampling distribution for libido at p = .581
 28 | 
 29 | 
 30 | leveneTest(viagraData$libido, viagraData$dose, center=median)
 31 | #
 32 | # Levene's Test is non-significant at F(2, 12) = 0.1176 and p = 0.89
 33 | # Levene's test indicates that the variances are not significantly different
 34 | # Homogeneity of variance is ok - so we use regular ANOVA
 35 | # otherwise we would use Welch's F, or use one of the robust methods based on trimmed mean or bootstraps
 36 | 
 37 | # Graph - this gets very fancy by placing 2 points for the mean to give the appearance of a dot with a border
 38 | # 
 39 | # From the graph we see that the error bars overlap from one mean to the next and that there seems to 
 40 | # be a linear trend as the data changes
 41 | line <- ggplot(viagraData, aes(dose, libido))
 42 | line + stat_summary(fun.y = mean, geom = "line", size = 1, aes(group=1), colour = "#FF6633") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.75, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 4, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 3, colour = "#FF6633") + labs(x = "Dose of Viagra", y = "Mean Libido")
 43 | 
 44 | # Descriptive statistics
 45 | by(viagraData$libido, viagraData$dose, stat.desc)
 46 | 
 47 | # We can use the lm() or aov() function to create the model.
 48 | viagraModel <- aov(libido ~ dose, data = viagraData)
 49 | 
 50 | summary.lm(viagraModel)
 51 | 
 52 | # Our viagra model shows that the probability of the F value occuring if there were 
 53 | # no effect in the population at p = .0247, or significant at p < .05
 54 | 
 55 | # Let's take a look at the plots the aov constructed
 56 | plot(viagraModel)
 57 | 
 58 | # Welch's F Test
 59 | oneway.test(libido ~ dose, data = viagraData)
 60 | 
 61 | # Welch's F is more conservative at F(2, 7.943) = 4.23, p = 0.54 which indicates non-significance
 62 | 
 63 | # Using robust methods (Wilcox)
 64 | # Load the latest Wilcox R tests 
 65 | source("~/OneDrive/MIDS/W203/Data/Rallfun-V27.txt")
 66 | 
 67 | # The Wilcox tests want the data in a matrix format, so first we convert the data frame 
 68 | viagraMatrix <- unstack(viagraData, libido ~ dose)
 69 | viagraMatrix
 70 | 
 71 | # Now we use the first test, the trimmed mean of 10%
 72 | t1way(viagraMatrix, tr = .1)
 73 | 
 74 | # The outcome of the trimmed mean function yields F(2, 7.943) = 4.32, p = 0.0537
 75 | # Moving on to the median-based test
 76 | med1way(viagraMatrix)
 77 | 
 78 | # The median one-way test yields F = 4.783 and p = 0.07
 79 | # Finally, we can use the bootstrap one way test with trimmed mean @ 0.05 and 2000 bootstrap samples
 80 | t1waybt(viagraMatrix, tr = 0.05, nboot = 2000)
 81 | 
 82 | # This yields F = 4.32, p = 0.061
 83 | # All the one-way robust tests indicate that there is no relationship between the dose of
 84 | # viagra and the libido
 85 | 
 86 | # Planned Contrasts of the Viagra Data 
 87 | summary(viagraModel)
 88 | 
 89 | # Here we see from the output that the low dose effect is non-significant at p = 0.28
 90 | # but that the effect of high dose is significant at p = .008
 91 | 
 92 | # let's set our own contrasts 
 93 | contrast1 <- c(-2, 1, 1)
 94 | contrast2 <- c(0, -1, 1)
 95 | contrasts(viagraData$dose) <- cbind(contrast1, contrast2)
 96 | viagraData$dose
 97 | 
 98 | # Contrast 1 compares the placebo group against the two experiment groups
 99 | # Contrast 2 compares the low-dose group to the high-dose group
100 | 
101 | viagraPlanned <- aov(libido ~ dose, data = viagraData)
102 | 
103 | summary.lm(viagraPlanned)
104 | 
105 | # Our hypothesis was that the experimental groups would increase libido above the control group
106 | # Therefore we should use a 1-tailed comparison
107 | # contrasting the expiremental group to the control group is significant at p < (.0293 / 2) = .0147
108 | .0293/2
109 | 
110 | # and also we predicted that a higher dose would increase the effect over the low dose 
111 | # contrast2 is significant at p < 0.0652 / 2 = .01325
112 | .0625/2
113 | 
114 | # The planned contrasts show that taking viagra significantly increased libido compared to a
115 | # control group, t(12) = 2.47, p < .05, and taking a high dose significantly increased libido
116 | # compared to a low dose, t(12) = 2.029, p < .05 (one-tailed)
117 | 
118 | # Trend analysis
119 | contrasts(viagraData$dose) <- contr.poly(3)
120 | viagraTrend <- aov(libido ~ dose, data = viagraData)
121 | summary.lm(viagraTrend)
122 | 
123 | # based on the trend analysis, a linear trend at t = 3.157, p = 0.008
124 | # quadralinear is not significant at t=0.521 and p < 0.612
125 | 
126 | # Posthoc tests
127 | # Bonferroni
128 | pairwise.t.test(viagraData$libido, viagraData$dose, paired = FALSE, p.adjust.method = "bonferroni")
129 | 
130 | # Benjamini-Hochberg
131 | pairwise.t.test(viagraData$libido, viagraData$dose, paired = FALSE, p.adjust.method = "BH")
132 | 
133 | # Tukey  (requires multcomp library)
134 | postHocs <- glht(viagraModel, linfct = mcp(dose = "Tukey"))
135 | summary(postHocs)
136 | confint(postHocs)
137 | 
138 | # Dunnett
139 | postHocs <- glht(viagraModel, linfct = mcp(dose = "Dunnett"), base = 1)
140 | summary(postHocs)
141 | confint(postHocs)
142 | 
143 | # Rand Wilcox's robust tests
144 | lincon(viagraMatrix)
145 | mcppb20(viagraMatrix)
146 | 
147 | # Effect size, omega squared
148 | 
149 | omega<-function(SSm, SSr, dfm, MSr)
150 | {
151 | 	SSt = SSm + SSr
152 | 	omega = (SSm-(dfm*MSr))/(SSt+MSr)
153 | 	print(paste("Omega-Squared: ", omega))
154 | 	}
155 | omega(20.13, 23.60, 2, 1.967)
156 | 
157 | # Effect size between low-dose and placebo group
158 | mes(2.2, 3.2, 1.3038405, 1.3038405, 5, 5)
159 | # d = -0.77, r = 0.46 (.74 sd)
160 | 
161 | # between high-dose and placebo group
162 | mes(2.2, 5, 1.3038405, 1.5811388, 5, 5)
163 | # d = -1.93, r = -0.69 ( ~ 2 sd's)
164 | 
165 | # between low and high dose groups
166 | mes(3.2, 5, 1.3038405, 1.5811388, 5, 5)
167 | # d = -1.24, r = -0.53 (1.24 sd's)
168 | 
169 | # Effect sizes for orthogonal contrasts 
170 | rcontrast <- function(t, df)
171 | {r <- sqrt(t^2/(t^2 + df))
172 |          print(paste("r = ", r))
173 |          }
174 | 
175 |  rcontrast(2.474, 12)
176 |  rcontrast(2.029, 12)


--------------------------------------------------------------------------------
/anova/untitled.R:
--------------------------------------------------------------------------------
 1 | #-------------------------- ANOVA Test Procedure -----------------------#
 2 | # 
 3 | # This script uses the Viagra data as described in Discovering Statistics Using R, pg. 434, by Andy Field
 4 | #
 5 | #-----------------------------------------------------------------------#
 6 | #
 7 | # Load libraries
 8 | library(ggplot2)
 9 | library(granova)
10 | library(car)
11 | library(Rcmdr)
12 | library(pastecs)
13 | library(multcomp)
14 | library(compute.es)
15 | 
16 | # First we enter the data manually
17 | 
18 | libido <- c(3,2,1,1,4,5,2,4,2,3,7,4,5,3,6)
19 | dose <- gl(3, 5, labels = c("Placebo", "Low Dose", "High Dose"))
20 | viagraData <- data.frame(dose, libido)
21 | 
22 | ls.str(viagraData)
23 | 
24 | # Let's examine the data and test assumptions
25 | shapiro.test(viagraData$libido)
26 | 
27 | # Shapiro-Wilk's test indicates non-normal sampling distribution for libido at p = .581
28 | 
29 | 
30 | leveneTest(viagraData$libido, viagraData$dose, center=median)
31 | #
32 | # Levene's Test is non-significant at F(2, 12) = 0.1176 and p = 0.89
33 | # Levene's test indicates that the variances are not significantly different
34 | # Homogeneity of variance is ok
35 | 
36 | # Graph - this gets very fancy by placing 2 points for the mean to give the appearance of a dot with a border
37 | # 
38 | # From the graph we see that the error bars overlap from one mean to the next and that there seems to 
39 | # be a linear trend as the data changes
40 | line <- ggplot(viagraData, aes(dose, libido))
41 | line + stat_summary(fun.y = mean, geom = "line", size = 1, aes(group=1), colour = "#FF6633") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.75, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 4, colour = "#990000") + stat_summary(fun.y = mean, geom = "point", size = 3, colour = "#FF6633") + labs(x = "Dose of Viagra", y = "Mean Libido")
42 | 
43 | # Descriptive statistics
44 | by(viagraData$libido, viagraData$dose, stat.desc)
45 | 
46 | # We can use the lm() or aov() function to create the model.
47 | viagraModel <- aov(libido ~ dose, data = viagraData)
48 | 
49 | summary(viagraModel)
50 | 
51 | # Our viagra model shows that the probability of the F value occuring if there were 
52 | # no effect in the population at p = .0247, or significant at p < .05
53 | 
54 | # Let's take a look at the plots the aov constructed
55 | plot(viagraModel)
56 | 
57 | # Welch's F Test
58 | oneway.test(libido ~ dose, data = viagraData)
59 | 
60 | # Welch's F is more conservative at F(2, 7.943) = 4.23, p = 0.54 which indicates non-significance
61 | 
62 | # Using robust methods (Wilcox)


--------------------------------------------------------------------------------
/common_coding_issues.R:
--------------------------------------------------------------------------------
  1 | # Correcting common coding issues in data
  2 | 
  3 | # We'll look at a dataset of video attributes
  4 | # from Youtube and prepare it for further study.
  5 | # This data is taken from webcrawls conducted 
  6 | # by Cheng, Dale, and Liu at Simon Fraser University. 
  7 | # Details of their methodology can be found at 
  8 | # http://netsg.cs.sfu.ca/youtubedata/
  9 | 
 10 | Videos = read.delim("videos.txt")
 11 | summary(Videos)
 12 | 
 13 | # Let's examine the categories variable
 14 | summary(Videos$category)
 15 | 
 16 | # Notice the mysterious first entry in the summary,
 17 | # which has a number but no text
 18 | # We can confirm this by checking the values of this variable
 19 | levels(Videos$category)
 20 | 
 21 | # It looks like missing categories are incorrectly 
 22 | # coded as empty strings.  We should correct these to NA's
 23 | Videos$category[Videos$category == ""] = NA
 24 | 
 25 | # the 9 data points are gone, but the "" still 
 26 | # appears as a level.
 27 | summary(Videos$category)
 28 | 
 29 | # Can fix this by recreating the factor in R
 30 | Videos$category = factor(Videos$category)
 31 | 
 32 | # Next, let's look at the rate variable, which
 33 | # represents the average rating given to a video on a
 34 | # 5-point scale.  We would typically begin by looking
 35 | # at a histogram
 36 | hist(Videos$rate)
 37 | 
 38 | # Notice the surprising peak around zero - is that meaningful?
 39 | 
 40 | # Let's check if these values are exactly zero, or just close to zero.
 41 | # Create a vector of videos that have a zero rating exactly
 42 | zero_rate = Videos$rate == 0
 43 | 
 44 | # And see how many of these videos there are
 45 | summary(zero_rate)
 46 | 
 47 | # We may check to see how many ratings these videos have.
 48 | # According to the documentation for the dataset, the number
 49 | # of ratings a video has is coded in the ratings vector
 50 | Videos$ratings[zero_rate]
 51 | 
 52 | # See all the zeros?  We conclude that videos with no 
 53 | # ratings are being coded
 54 | # with an average rating of zero.  This would throw off
 55 | # our means and other statistics
 56 | # get the vector of videos with no ratings
 57 | no_rate = Videos$ratings == 0
 58 | 
 59 | # and recode the rate variable as missing for these videos
 60 | Videos$rate[no_rate] = NA
 61 | 
 62 | 
 63 | # Let's similarly inspect the age variable
 64 | hist(Videos$age)
 65 | # Notice the peak at age 0
 66 | 
 67 | # get the vector of videos with age zero to look closer
 68 | zero_age = Videos$age == 0
 69 | 
 70 | # could it be that these are the unrated videos again?
 71 | Videos$ratings[zero_age]
 72 | 
 73 | # Let's look at all of the columns to see if anything
 74 | # jumps out about these datapoints
 75 | # We need to subset the dataset, pulling out just the rows
 76 | # which for the zero-age videos
 77 | Videos[Videos$age == 0,]
 78 | 
 79 | # Notice that they all come from the UNA category
 80 | # We can check that all Videos in UNA also have zero age
 81 | Videos$age[Videos$category == " UNA "]
 82 | 
 83 | 
 84 | # At this point, I went and read the documentation
 85 | # on the researchers' website.  The age is supposed to
 86 | # show days from the formation of Youtube
 87 | # to the time the video was uploaded.
 88 | 
 89 | # Because we don't expect many videos the day
 90 | # youtube was created, this confirms out suspicion
 91 | # that the zeros represent missing values
 92 | # This also suggests that the UNA category represents
 93 | # missing values, so lets recode all of these as NAs
 94 | Videos$age[zero_age] = NA
 95 | Videos$category[zero_age] = NA
 96 | Videos$category = factor(Videos$category)
 97 | 
 98 | # Moreover, for an age variable, we'd rather 
 99 | # measure from the day the video was created to
100 | # the day the data was gathered.
101 | # We can do that by subtracting the maximum age from each
102 | # video's age.
103 | Videos$age = max(Videos$age, na.rm = T)- Videos$age
104 | 
105 | # Let's see if that looks more like what we'd expect
106 | hist(Videos$age)
107 | 
108 | # Finally, let's save our dataframe so we can study it
109 | # more later
110 | save(Videos, file= "Videos_clean.Rdata")
111 | 
112 | 


--------------------------------------------------------------------------------
/correlation/Exam Anxiety.dat:
--------------------------------------------------------------------------------
  1 | Code	Revise	Exam	Anxiety	Gender
  2 | 1	4	40	86.298	Male
  3 | 2	11	65	88.716	Female
  4 | 3	27	80	70.178	Male
  5 | 4	53	80	61.312	Male
  6 | 5	4	40	89.522	Male
  7 | 6	22	70	60.506	Female
  8 | 7	16	20	81.462	Female
  9 | 8	21	55	75.82	Female
 10 | 9	25	50	69.372	Female
 11 | 10	18	40	82.268	Female
 12 | 11	18	45	79.044	Male
 13 | 12	16	85	80.656	Male
 14 | 13	13	70	70.178	Male
 15 | 14	18	50	75.014	Female
 16 | 15	98	95	34.714	Male
 17 | 16	1	70	95.164	Male
 18 | 17	14	95	75.82	Male
 19 | 18	29	95	79.044	Female
 20 | 19	4	50	91.134	Female
 21 | 20	23	60	64.536	Male
 22 | 21	14	80	80.656	Male
 23 | 22	12	75	77.432	Male
 24 | 23	22	85	65.342	Female
 25 | 24	84	90	.0560000000000116	Female
 26 | 25	23	30	71.79	Female
 27 | 26	26	60	81.462	Female
 28 | 27	24	75	63.73	Male
 29 | 28	72	75	27.46	Female
 30 | 29	37	27	73.402	Female
 31 | 30	10	20	89.522	Male
 32 | 31	3	75	89.522	Female
 33 | 32	36	90	75.014	Female
 34 | 33	43	60	43.58	Male
 35 | 34	19	30	82.268	Male
 36 | 35	12	80	79.044	Male
 37 | 36	9	10	79.044	Female
 38 | 37	72	85	37.132	Male
 39 | 38	10	7	81.462	Male
 40 | 39	12	5	83.074	Female
 41 | 40	30	85	50.834	Male
 42 | 41	15	20	82.268	Male
 43 | 42	8	45	78.238	Female
 44 | 43	34	60	72.596	Male
 45 | 44	22	70	74.208	Female
 46 | 45	21	50	75.82	Female
 47 | 46	27	25	70.984	Male
 48 | 47	6	50	97.582	Male
 49 | 48	18	40	67.76	Male
 50 | 49	8	80	75.014	Male
 51 | 50	19	50	73.402	Female
 52 | 51	0	35	93.552	Female
 53 | 52	52	80	58.894	Female
 54 | 53	38	50	53.252	Female
 55 | 54	19	49	84.686	Male
 56 | 55	23	75	89.522	Female
 57 | 56	11	25	71.79	Female
 58 | 57	27	65	82.268	Male
 59 | 58	17	80	69.372	Male
 60 | 59	13	50	62.118	Male
 61 | 60	42	70	68.566	Female
 62 | 61	4	40	93.552	Male
 63 | 62	8	80	84.686	Female
 64 | 63	6	10	82.268	Male
 65 | 64	11	20	81.462	Female
 66 | 65	7	40	82.268	Male
 67 | 66	15	40	91.134	Male
 68 | 67	4	70	91.94	Female
 69 | 68	28	52	86.298	Female
 70 | 69	22	50	72.596	Male
 71 | 70	29	60	63.73	Female
 72 | 71	2	80	63.73	Male
 73 | 72	16	60	71.79	Female
 74 | 73	59	65	57.282	Male
 75 | 74	10	15	84.686	Female
 76 | 75	13	85	84.686	Male
 77 | 76	8	20	77.432	Female
 78 | 77	5	80	82.268	Female
 79 | 78	2	100	10	Male
 80 | 79	38	100	50.834	Female
 81 | 80	4	80	87.91	Male
 82 | 81	10	10	83.88	Male
 83 | 82	6	70	84.686	Female
 84 | 83	68	100	20.206	Female
 85 | 84	8	70	87.104	Male
 86 | 85	1	70	83.88	Female
 87 | 86	14	65	67.76	Male
 88 | 87	42	75	95.97	Female
 89 | 88	13	85	62.118	Female
 90 | 89	1	30	84.686	Male
 91 | 90	3	5	92.746	Male
 92 | 91	5	10	84.686	Female
 93 | 92	12	90	83.074	Female
 94 | 93	19	70	73.402	Male
 95 | 94	2	20	87.91	Female
 96 | 95	19	85	71.79	Male
 97 | 96	11	35	86.298	Male
 98 | 97	15	30	84.686	Female
 99 | 98	23	70	75.82	Male
100 | 99	13	55	70.984	Female
101 | 100	14	75	78.238	Female
102 | 101	1	2	82.268	Male
103 | 102	9	40	79.044	Male
104 | 103	20	50	91.134	Female
105 | 


--------------------------------------------------------------------------------
/correlation/The Biggest Liar.dat:
--------------------------------------------------------------------------------
 1 | Creativity	Position	Novice
 2 | 53	1	0
 3 | 36	3	1
 4 | 31	4	0
 5 | 43	2	0
 6 | 30	4	1
 7 | 41	1	0
 8 | 32	4	0
 9 | 54	1	1
10 | 47	2	1
11 | 50	2	0
12 | 56	1	0
13 | 50	1	0
14 | 34	1	1
15 | 41	4	0
16 | 52	5	0
17 | 38	1	0
18 | 35	2	1
19 | 35	3	1
20 | 48	1	0
21 | 38	1	1
22 | 43	2	1
23 | 38	3	1
24 | 39	1	1
25 | 51	1	0
26 | 26	3	0
27 | 31	4	1
28 | 45	1	0
29 | 31	3	0
30 | 34	2	1
31 | 35	5	1
32 | 39	2	1
33 | 50	2	1
34 | 53	1	1
35 | 38	2	0
36 | 37	3	0
37 | 34	2	1
38 | 30	1	0
39 | 46	6	0
40 | 36	2	1
41 | 26	1	1
42 | 41	1	1
43 | 50	6	1
44 | 43	1	0
45 | 54	2	0
46 | 39	1	1
47 | 51	2	0
48 | 47	1	0
49 | 38	3	1
50 | 32	1	0
51 | 28	4	1
52 | 39	1	0
53 | 42	1	1
54 | 44	2	0
55 | 21	5	1
56 | 43	1	1
57 | 39	3	1
58 | 42	1	1
59 | 35	2	0
60 | 52	1	0
61 | 39	1	0
62 | 45	1	0
63 | 25	4	1
64 | 41	1	1
65 | 35	3	1
66 | 27	3	0
67 | 44	4	0
68 | 37	3	1
69 | 40	1	1
70 | 


--------------------------------------------------------------------------------
/correlation/correlation.R:
--------------------------------------------------------------------------------
 1 | suppressPackageStartupMessages
 2 | setwd("~/OneDrive/MIDS/W203/Data/08")
 3 | 
 4 | library(boot)
 5 | library(ggm)
 6 | library(ggplot2)
 7 | library(Hmisc)
 8 | library(polycor)
 9 | library(pastecs)
10 | 
11 | examData <- read.delim("Exam Anxiety.dat", header = TRUE)
12 | 
13 | cor(examData[,c("Anxiety","Exam","Revise")], use="complete.obs", method = "kendall")
14 | 
15 | # create a matrix from the dataframe so that rcorr() can use it
16 | # first remove non-numeric vectors in the data frame
17 | exams <- subset(examData, select = c("Exam","Anxiety","Revise"))
18 | # convert that dataset to a matrix
19 | examMatrix <- as.matrix(exams)
20 | 
21 | # need to disambiguate rcorr from the version in ggm package
22 | Hmisc::rcorr(as.matrix(examData[, c("Exam", "Anxiety", "Revise")]))
23 | 
24 | examData2 <- examData[, c("Exam", "Anxiety", "Revise")]
25 | cor(examData2)
26 | 
27 | cor.test(examData$Anxiety, examData$Exam)
28 | cor.test(examData$Revise, examData$Exam)
29 | cor.test(examData$Anxiety, examData$Revise)
30 | 
31 | cor(examData2)^2 * 100
32 | 
33 | liarData <- read.delim("The Biggest Liar.dat", header = TRUE)
34 | liarData
35 | liarHist.creativity <- ggplot(liarData, aes(Creativity), theme(legend.position = "none")) + geom_histogram(aes(y = ..density..), colour = "black", fill = "white") + labs(x = "Creativity Score", y = "Density") + stat_function(fun = dnorm, args = list(mean = mean(liarData$Creativity, na.rm = TRUE), sd = sd(liarData$Creativity, na.rm = TRUE)), colour = "black", size = 1)
36 | liarHist.creativity
37 | 
38 | stat.desc(liarData$Creativity, basic = FALSE, norm = TRUE)
39 | 
40 | cor(liarData$Position, liarData$Creativity, method = "spearman")
41 | Hmisc::rcorr(as.matrix(liarData[, c("Position", "Creativity")]))
42 | 
43 | cor.test(liarData$Position, liarData$Creativity, alternative = "less", method = "spearman")
44 | 
45 | # partial correlation for Exam Anxiety while accounting for Revise
46 | pc <- pcor(c("Exam", "Anxiety", "Revise"), var(examData2))
47 | pc^2
48 | pcor.test(pc, 1, 103)
49 | 
50 | cpData <- read.delim("Chamorro-Premuzic.dat", header = TRUE)
51 | cor(cpData[,c("studentN", "studentE", "studentO", "studentA", "studentC", "lectureN", "lecturE", "lecturO", "lecturA", "lecturC")], use="pairwise.complete.obs", method = "pearson")
52 | 
53 | # extract just the data in which we're interested
54 | cpMatrix <- as.matrix(cpData[, c("studentN", "studentE", "studentO", "studentA", "studentC", "lectureN", "lecturE", "lecturO", "lecturA", "lecturC")])
55 | Hmisc::rcorr(cpMatrix)
56 | 
57 | # results -> studentN prefers lecturA r=.10, p < 0.05
58 | # results -> studentE prefers lecturE r=.15, p < 0.05
59 | # results -> studentO does not like lecureN r=-.10, p < 0.05
60 | # results -> studentO prefers lecturO r=0.2, p < 0.01
61 | # results -> studentO does not like lecturA, r=-.16, p < 0.05
62 | 


--------------------------------------------------------------------------------
/correlation/correlation_chi_square_demo.R:
--------------------------------------------------------------------------------
  1 | ### A demonstration of correlation and chi-square in R
  2 | 
  3 | ### Preparation
  4 | 
  5 | # car gives us nice scatterplots
  6 | library(car)
  7 | 
  8 | # We'll use our Country-by-Country dataset
  9 | load("Countries2.Rdata")
 10 | summary(Countries)
 11 | 
 12 | 
 13 | # We'll also use Google's dataset of takedown requests -
 14 | # that is, orders that come from governments of 
 15 | # different countries to remove certain content 
 16 | # from Youtube, search results, and other online products.
 17 | # Each row of this dataset corresponds to a specific
 18 | # country and a specific online product (you can think
 19 | # of the unit of analysis as country x product), and there
 20 | # are several variables of interest:
 21 | #
 22 | # Country - the country making specific takedown requests
 23 | # Product - the online product the content is hosted on 
 24 | #           (Youtube, Blogger, etc)
 25 | # Reason - a reason why the content is being targeted 
 26 | #           (copyright violation, government criticism, etc..)
 27 | # Court.Orders - the number of requests from the Country's 
 28 | #             court system
 29 | # Executive..Police..etc. - the number of requests from the
 30 | #             executive and other branches of government
 31 | # Items.Requested.To.Be.Removed - the number of separate items
 32 | #             of content.  However, this variable seems to
 33 | #             have a lot of missing values
 34 | 
 35 | # Read in the data
 36 | Requests = read.csv("Removal_Requests.csv")
 37 | head(Requests)
 38 | 
 39 | # Note that there are multiple rows per country in 
 40 | # the Requests dataframe.
 41 | 
 42 | # Create a new variable for total number of requests from
 43 | # all branches of government
 44 | Requests$total.takedowns = Requests$Court.Orders + Requests$Executive..Police..etc.
 45 | 
 46 | # To merge our datasets, we first need to sum together all the
 47 | # rows for each country in the Requests dataset, so that 
 48 | # each country only appears in one row.
 49 | # (we'll lose some variables when we do this, such as the product 
 50 | # the request referred to)
 51 | R2 = aggregate(Requests[,c("Court.Orders", "Executive..Police..etc.", "total.takedowns")], list(Country = Requests$Country), sum)
 52 | 
 53 | # Notice that there's one row per country now.
 54 | head(R2)
 55 | 
 56 | # Perform the merge
 57 | Countries = merge(Countries, R2, by="Country", all=T)
 58 | 
 59 | head(Countries)
 60 | 
 61 | 
 62 | 
 63 | ### Correlation: Linear relationships between metric variables
 64 | 
 65 | # Let's examine the relationship between corruption 
 66 | # and takedown requests.
 67 | 
 68 | # Use a scatterplot to see how linear the relationship looks
 69 | scatterplot(Countries$cpi, Countries$total.takedowns)
 70 | 
 71 | #check the correlation
 72 | cor.test(Countries$cpi, Countries$total.takedowns)
 73 | 
 74 | # the cor function allows us to construct a correlation matrix
 75 | cor(Countries[,c("gdp", "cpi", "total.takedowns")], use = "pairwise.complete.obs")
 76 | 
 77 | # the output is actually a matrix object, so we can 
 78 | # do things like square each value to get R-squared
 79 | cor(Countries[,c("gdp", "cpi", "total.takedowns")], use = "pairwise.complete.obs")**2
 80 | 
 81 | 
 82 | 
 83 | ### Chi-square: Testing for relationships between categorical variables
 84 | # Here are three different approaches, depending on structure of dataset
 85 | 
 86 | ## 1. Two categorical variables
 87 | 
 88 | # Look at the frequency table between region and whether a country is corrupt
 89 | table(Countries$region, Countries$high_cpi)
 90 | 
 91 | # We store the results of our chi-square test so we can extract more
 92 | # values from the output
 93 | cs = chisq.test(Countries$region, Countries$high_cpi)
 94 | 
 95 | # Examine the test result
 96 | cs
 97 | 
 98 | # Look at the std. residuals to see which regions contribute most to the result
 99 | cs$stdres
100 | 
101 | # Check the expected counts to see if any are less than 5 and
102 | # if we should therefore try Fisher's exact test
103 | cs$expected
104 | 
105 | # Use Fisher's exact test in this case:
106 | fisher.test(Countries$region, Countries$high_cpi)
107 | 
108 | # For an effect size, we could compute Cramer's V manually
109 | # We may wish to put the code in a function so we can use
110 | # it again whenever we want.
111 | cramers_v = function(cs)
112 | {
113 | 	cv = sqrt(cs$statistic / (sum(cs$observed) * (min(dim(cs$observed))-1)))
114 | 	print.noquote("Cramer's V:")
115 | 	return(as.numeric(cv))
116 | }
117 | 
118 | # run our new function on our chi-square test
119 | cramers_v(cs)
120 | 
121 | # As a rule of thumb,
122 | # Cramer's V under .2 is weak
123 | # between .2 and .4 is strong
124 | # and above .4 is very strong
125 | 
126 | ## 2. Count data, one variable in columns
127 | 
128 | # Consider each request to be the unit of analysis, and consider two variables:
129 | # Whether it came from a corrupt or trustworthy country; and whether it came
130 | # through a court order or executive/police action.  We want to know if these
131 | # variables are independent or related.
132 | 
133 | # We can use aggregate to collapse the rows to just the high_cpi variable
134 | Corrupt_Source = aggregate(Countries[,c("Court.Orders", "Executive..Police..etc.")], list(high_cpi = Countries$high_cpi), sum, na.rm=T)
135 | 
136 | # Note that we've created a table of counts:
137 | Corrupt_Source
138 | 
139 | # Not required, but we can add row names to make the chi-square output prettier
140 | rownames(Corrupt_Source)=Corrupt_Source$high_cpi
141 | 
142 | # We want to plug our count table into the chi-square test
143 | # but we first have to remove the first column,
144 | # because it's a factor.
145 | # Otherwise, R will throw an error.
146 | # Notice that we can use a negative index to omit columns
147 | # That is, we can choose columns 2 and 3 with c(2,3)
148 | # or we can get the same thing by skipping column 1 with c(-1)
149 | Corrupt_Source[,c(-1)]
150 | 
151 | # Plug this into the Chi-square test
152 | cs = chisq.test(Corrupt_Source[,c(-1)])
153 | cs
154 | 
155 | # Look at the standardized residuals to see which direction the effect is in
156 | cs$stdres
157 | 
158 | # Check the expected counts to see if any are less than 5 and
159 | # if we should therefore try Fisher's exact test
160 | cs$expected
161 | 
162 | # Since we have a 2x2 matrix, we can measure the effect
163 | # size elegantly as an odds ratio.
164 | # First, get the odds an order came from a Court for
165 | # corrupt countries
166 | corrupt_odds = Corrupt_Source["Corrupt","Court.Orders"] / Corrupt_Source["Corrupt","Executive..Police..etc."]
167 | 
168 | # Do the same for the trustworth countries.
169 | trustworthy_odds = Corrupt_Source["Trustworthy","Court.Orders"] / Corrupt_Source["Trustworthy","Executive..Police..etc."]
170 | 
171 | # The odds ratio is just one divided by the other
172 | corrupt_odds / trustworthy_odds
173 | 
174 | ## 3. Count data, both variables in rows
175 | 
176 | # Let's see if corrupt countries are likely to target different products
177 | # than trustworthy ones.  For this, we can't aggregate our data by Country
178 | # so go back to the original request data, and merge in the high_cpi variable
179 | # also, remove countries that are missing corruption data
180 | Requests2 = merge(Countries[,c("Country", "high_cpi")], Requests, by="Country")
181 | Requests2 = Requests2[ ! is.na(Requests2$high_cpi),]
182 | head(Requests2)
183 | 
184 | # We want separate columns for takedown requests from corrupt countries
185 | # and from trustworthy countries.  Here, we create both columns, and copy
186 | # each value for total.takedowns to the appropriate one.
187 | Corrupt_Product = Requests2[,c("Product","high_cpi")]
188 | Corrupt_Product$Corrupt = ifelse(Requests2$high_cpi == "Corrupt", Requests2$total.takedowns, 0)
189 | Corrupt_Product$Trustworthy = ifelse(Requests2$high_cpi == "Trustworthy", Requests2$total.takedowns, 0)
190 | 
191 | # Observe that each row only has a positive value in one of the two new columns
192 | head(Corrupt_Product)
193 | 
194 | # Next we sum Corrupt and Trustworthy columns for each product.
195 | Corrupt_Product =  aggregate(Corrupt_Product[,c("Corrupt","Trustworthy")], list( Product = Corrupt_Product$Product), sum)
196 | 
197 | # We are left with a contingency table
198 | Corrupt_Product
199 | 
200 | # We could have also created the table in one step, using the cast command
201 | library(reshape)
202 | Corrupt_Product = cast(Requests2, Product ~ high_cpi , fun = sum, value = c("total.takedowns"))
203 | Corrupt_Product
204 | 
205 | # Run a chi-square test as before
206 | cs = chisq.test(Corrupt_Product[,c(-1)])
207 | cs
208 | 
209 | # Check standardized residuals
210 | cs$stdres
211 | 
212 | # And expected values
213 | cs$expected
214 | 
215 | # The fisher test is probably too computationally intensive to run
216 | fisher.test(Corrupt_Product[,c(-1)])
217 | 
218 | # could also use monte-carlo simulation to check significance
219 | chisq.test(Corrupt_Product[,c(-1)], simulate.p.value = T)
220 | 
221 | # let's use the function we wrote earlier to check the effect size
222 | cramers_v(cs)


--------------------------------------------------------------------------------
/linear regression/.Rapp.history:
--------------------------------------------------------------------------------
 1 | metallica
 2 | lecturerData
 3 | library(Rcmdr)
 4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic"))
 5 | alchoholPersonality
 6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality)
 7 | alchoholPersonalityMatrix
 8 | library(Rcmdr)
 9 | sunspots
10 | require(graphics)
11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers")
12 | pvalue()
13 | pscore()
14 | mu1 <- 2/(sqrt(100))
15 | mu1
16 | zscore()
17 | (2.5 - 2.0)/2
18 | 2*pnorm(2.5)
19 | 2 * pnorm(-2.5)
20 | -0.5/20
21 | pnorm(0.025)
22 | z <- (2 - 2.5)/2
23 | z
24 | 2*pnorm(-0.25)
25 | a <- 2
26 | s <- 2
27 | n <- 100
28 | xbar <- 2.5
29 | z <- (xbar-a)/(s/sqrt(n))
30 | z
31 | 2*pnorm(-2.5)
32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100)))
33 | 10000 - 10300
34 | -300/(1000 / sqrt(10))
35 | pnorm(-300/(1000/sqrt(10)))
36 | pnorm(300/(1000/sqrt(10)))
37 | 2*(pnorm(-abs(500/(1000/sqrt(10))))
38 | )
39 | 2*(pnorm((500/(1000/sqrt(10))))
40 | )
41 | 500/1000/sqrt(10)
42 | 1000/sqrt(10)
43 | 500/(1000/sqrt(10))
44 | pnorm(1.581139)
45 | pnorm(-1.581139)
46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10)))
47 | pnorm(10300,mean=10000,sd=1000/sqrt(10))
48 | pnorm(10500,mean=10000,sd=1000/sqrt(10))
49 | pnorm(9500,mean=10000,sd=1000/sqrt(10))
50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10))
51 | exit
52 | setwd("~/Documents/R/lr")
53 | setwd("~/Documents/R/lr")
54 | setwd("~/Documents/R/lr")
55 | load("Country3.dat")
56 | load("Countries3.dat")
57 | load("Countries3.Rdata")
58 | summary(Countries)
59 | scatterplot(Countries$gdp, Countries$internet_users_2011)
60 | library(car)
61 | scatterplot(Countries$gdp, Countries$internet_users_2011)
62 | rownames(Countries) <- Countries$country
63 | scatterplot(Countries$gdp, Countries$internet_users_2011)
64 | Countries$loggdp <- log(Countries$gpd)
65 | Countries$loggdp <- log(10, Countries$gpd)
66 | ?log
67 | Countries$loggdp <- log10(Countries$gpd)
68 | Countries$loggdp <- log10(Countries$gdp)
69 | scatterplot(Countries$loggdp, Countries$internet_users_2011)
70 | model = lm(internet_users_2011 ~ loggdp, data = Countries)
71 | summary(model)
72 | plot(model)
73 | library(lmtest)
74 | bptest(model)
75 | summary(model)
76 | 10**31.978
77 | Countries$loggdp <- log(Countries$gdp)
78 | scatterplot(Countries$loggdp, Countries$internet_users_2011)
79 | model = lm(internet_users_2011 ~ loggdp, data = Countries)
80 | summary(model)
81 | exp(13.89)
82 | summary(Countries)
83 | model = lm(internet_users_2011 ~ gdp, data = Countries)#
84 | summary(model)
85 | 


--------------------------------------------------------------------------------
/linear regression/Album Sales 1.dat:
--------------------------------------------------------------------------------
  1 | adverts	sales
  2 | 10.256	330
  3 | 985.685	120
  4 | 1445.563	360
  5 | 1188.193	270
  6 | 574.513	220
  7 | 568.954	170
  8 | 471.814	70
  9 | 537.352	210
 10 | 514.068	200
 11 | 174.093	300
 12 | 1720.806	290
 13 | 611.479	70
 14 | 251.192	150
 15 | 97.972	190
 16 | 406.814	240
 17 | 265.398	100
 18 | 1323.287	250
 19 | 196.65	210
 20 | 1326.598	280
 21 | 1380.689	230
 22 | 792.345	210
 23 | 957.167	230
 24 | 1789.659	320
 25 | 656.137	210
 26 | 613.697	230
 27 | 313.362	250
 28 | 336.51	60
 29 | 1544.899	330
 30 | 68.954	150
 31 | 785.692	150
 32 | 125.628	180
 33 | 377.925	80
 34 | 217.994	180
 35 | 759.862	130
 36 | 1163.444	320
 37 | 842.957	280
 38 | 125.179	200
 39 | 236.598	130
 40 | 669.811	190
 41 | 612.234	150
 42 | 922.019	230
 43 | 50	310
 44 | 2000	340
 45 | 1054.027	240
 46 | 385.045	180
 47 | 1507.972	220
 48 | 102.568	40
 49 | 204.568	190
 50 | 1170.918	290
 51 | 689.547	340
 52 | 784.22	250
 53 | 405.913	190
 54 | 179.778	120
 55 | 607.258	230
 56 | 1542.329	190
 57 | 1112.47	210
 58 | 856.985	170
 59 | 836.331	310
 60 | 236.908	90
 61 | 1077.855	140
 62 | 579.321	300
 63 | 1500	340
 64 | 731.364	170
 65 | 25.689	100
 66 | 391.749	200
 67 | 233.999	80
 68 | 275.7	100
 69 | 56.895	70
 70 | 255.117	50
 71 | 566.501	240
 72 | 102.568	160
 73 | 250.568	290
 74 | 68.594	140
 75 | 642.786	210
 76 | 1500	300
 77 | 102.563	230
 78 | 756.984	280
 79 | 51.229	160
 80 | 644.151	200
 81 | 15.313	110
 82 | 243.237	110
 83 | 256.894	70
 84 | 22.464	100
 85 | 45.689	190
 86 | 724.938	70
 87 | 1126.461	360
 88 | 1985.119	360
 89 | 1837.516	300
 90 | 135.986	120
 91 | 237.703	150
 92 | 976.641	220
 93 | 1452.689	280
 94 | 1600	300
 95 | 268.598	140
 96 | 900.889	290
 97 | 982.063	180
 98 | 201.356	140
 99 | 746.024	210
100 | 1132.877	250
101 | 1000	250
102 | 75.896	120
103 | 1351.254	290
104 | 202.705	60
105 | 365.985	140
106 | 305.268	290
107 | 263.268	160
108 | 513.694	100
109 | 152.609	160
110 | 35.987	150
111 | 102.568	140
112 | 215.368	230
113 | 426.784	230
114 | 507.772	30
115 | 233.291	80
116 | 1035.433	190
117 | 102.642	90
118 | 526.142	120
119 | 624.538	150
120 | 912.349	230
121 | 215.994	150
122 | 561.963	210
123 | 474.76	180
124 | 231.523	140
125 | 678.596	360
126 | 70.922	10
127 | 1567.548	240
128 | 263.598	270
129 | 1423.568	290
130 | 715.678	220
131 | 777.237	230
132 | 509.43	220
133 | 964.11	240
134 | 583.627	260
135 | 923.373	170
136 | 344.392	130
137 | 1095.578	270
138 | 100.025	140
139 | 30.425	60
140 | 1080.342	210
141 | 799.899	210
142 | 1071.752	240
143 | 893.355	210
144 | 283.161	200
145 | 917.017	140
146 | 234.568	90
147 | 456.897	120
148 | 206.973	100
149 | 1294.099	360
150 | 826.859	180
151 | 564.158	150
152 | 192.607	110
153 | 10.652	90
154 | 45.689	160
155 | 42.568	230
156 | 20.456	40
157 | 635.192	60
158 | 1002.273	230
159 | 1177.047	230
160 | 507.638	120
161 | 215.689	150
162 | 526.48	120
163 | 26.895	60
164 | 883.877	280
165 | 9.104	120
166 | 103.568	230
167 | 169.583	230
168 | 429.504	40
169 | 223.639	140
170 | 145.585	360
171 | 985.968	210
172 | 500.922	260
173 | 226.652	250
174 | 1051.168	200
175 | 68.093	150
176 | 1547.159	250
177 | 393.774	100
178 | 804.282	260
179 | 801.577	210
180 | 450.562	290
181 | 26.598	220
182 | 179.061	70
183 | 345.687	110
184 | 295.84	250
185 | 2271.86	320
186 | 1134.575	300
187 | 601.434	180
188 | 45.298	180
189 | 759.518	200
190 | 832.869	320
191 | 56.894	140
192 | 709.399	100
193 | 56.895	120
194 | 767.134	230
195 | 503.172	150
196 | 700.929	250
197 | 910.851	190
198 | 888.569	240
199 | 800.615	250
200 | 1500	230
201 | 785.694	110
202 | 


--------------------------------------------------------------------------------
/linear regression/Album Sales 2.dat:
--------------------------------------------------------------------------------
  1 | adverts	sales	airplay	attract
  2 | 10.256	330	43	10
  3 | 985.685	120	28	7
  4 | 1445.563	360	35	7
  5 | 1188.193	270	33	7
  6 | 574.513	220	44	5
  7 | 568.954	170	19	5
  8 | 471.814	70	20	1
  9 | 537.352	210	22	9
 10 | 514.068	200	21	7
 11 | 174.093	300	40	7
 12 | 1720.806	290	32	7
 13 | 611.479	70	20	2
 14 | 251.192	150	24	8
 15 | 97.972	190	38	6
 16 | 406.814	240	24	7
 17 | 265.398	100	25	5
 18 | 1323.287	250	35	5
 19 | 196.65	210	36	8
 20 | 1326.598	280	27	8
 21 | 1380.689	230	33	8
 22 | 792.345	210	33	7
 23 | 957.167	230	28	6
 24 | 1789.659	320	30	9
 25 | 656.137	210	34	7
 26 | 613.697	230	49	7
 27 | 313.362	250	40	8
 28 | 336.51	60	20	4
 29 | 1544.899	330	42	7
 30 | 68.954	150	35	8
 31 | 785.692	150	8	6
 32 | 125.628	180	49	7
 33 | 377.925	80	19	8
 34 | 217.994	180	42	6
 35 | 759.862	130	6	7
 36 | 1163.444	320	36	6
 37 | 842.957	280	32	7
 38 | 125.179	200	28	6
 39 | 236.598	130	25	8
 40 | 669.811	190	34	8
 41 | 612.234	150	21	6
 42 | 922.019	230	34	7
 43 | 50	310	63	7
 44 | 2000	340	31	7
 45 | 1054.027	240	25	7
 46 | 385.045	180	42	7
 47 | 1507.972	220	37	7
 48 | 102.568	40	25	8
 49 | 204.568	190	26	7
 50 | 1170.918	290	39	7
 51 | 689.547	340	46	7
 52 | 784.22	250	36	6
 53 | 405.913	190	12	4
 54 | 179.778	120	2	8
 55 | 607.258	230	29	8
 56 | 1542.329	190	33	8
 57 | 1112.47	210	28	7
 58 | 856.985	170	10	6
 59 | 836.331	310	38	7
 60 | 236.908	90	19	4
 61 | 1077.855	140	13	6
 62 | 579.321	300	30	7
 63 | 1500	340	38	8
 64 | 731.364	170	22	8
 65 | 25.689	100	23	6
 66 | 391.749	200	22	9
 67 | 233.999	80	20	7
 68 | 275.7	100	18	6
 69 | 56.895	70	37	7
 70 | 255.117	50	16	8
 71 | 566.501	240	32	8
 72 | 102.568	160	26	5
 73 | 250.568	290	53	9
 74 | 68.594	140	28	7
 75 | 642.786	210	32	7
 76 | 1500	300	24	7
 77 | 102.563	230	37	6
 78 | 756.984	280	30	8
 79 | 51.229	160	19	7
 80 | 644.151	200	47	6
 81 | 15.313	110	22	5
 82 | 243.237	110	10	8
 83 | 256.894	70	1	4
 84 | 22.464	100	1	6
 85 | 45.689	190	39	6
 86 | 724.938	70	8	5
 87 | 1126.461	360	38	7
 88 | 1985.119	360	35	5
 89 | 1837.516	300	40	5
 90 | 135.986	120	22	7
 91 | 237.703	150	27	8
 92 | 976.641	220	31	6
 93 | 1452.689	280	19	7
 94 | 1600	300	24	9
 95 | 268.598	140	1	7
 96 | 900.889	290	38	8
 97 | 982.063	180	26	6
 98 | 201.356	140	11	6
 99 | 746.024	210	34	6
100 | 1132.877	250	55	7
101 | 1000	250	5	7
102 | 75.896	120	34	6
103 | 1351.254	290	37	9
104 | 202.705	60	13	8
105 | 365.985	140	23	6
106 | 305.268	290	54	6
107 | 263.268	160	18	7
108 | 513.694	100	2	7
109 | 152.609	160	11	6
110 | 35.987	150	30	8
111 | 102.568	140	22	7
112 | 215.368	230	36	6
113 | 426.784	230	37	8
114 | 507.772	30	9	3
115 | 233.291	80	2	7
116 | 1035.433	190	12	8
117 | 102.642	90	5	9
118 | 526.142	120	14	7
119 | 624.538	150	20	5
120 | 912.349	230	57	6
121 | 215.994	150	19	8
122 | 561.963	210	35	7
123 | 474.76	180	22	5
124 | 231.523	140	16	7
125 | 678.596	360	53	7
126 | 70.922	10	4	6
127 | 1567.548	240	29	6
128 | 263.598	270	43	7
129 | 1423.568	290	26	7
130 | 715.678	220	28	7
131 | 777.237	230	37	8
132 | 509.43	220	32	5
133 | 964.11	240	34	7
134 | 583.627	260	30	7
135 | 923.373	170	15	7
136 | 344.392	130	23	7
137 | 1095.578	270	31	8
138 | 100.025	140	21	5
139 | 30.425	60	28	1
140 | 1080.342	210	18	7
141 | 799.899	210	28	7
142 | 1071.752	240	37	8
143 | 893.355	210	26	6
144 | 283.161	200	30	8
145 | 917.017	140	10	7
146 | 234.568	90	21	7
147 | 456.897	120	18	9
148 | 206.973	100	14	7
149 | 1294.099	360	38	7
150 | 826.859	180	36	6
151 | 564.158	150	32	7
152 | 192.607	110	9	5
153 | 10.652	90	39	5
154 | 45.689	160	24	7
155 | 42.568	230	45	7
156 | 20.456	40	13	8
157 | 635.192	60	17	6
158 | 1002.273	230	32	7
159 | 1177.047	230	23	6
160 | 507.638	120	0	6
161 | 215.689	150	35	5
162 | 526.48	120	26	6
163 | 26.895	60	19	6
164 | 883.877	280	26	7
165 | 9.104	120	53	8
166 | 103.568	230	29	8
167 | 169.583	230	28	7
168 | 429.504	40	17	6
169 | 223.639	140	26	8
170 | 145.585	360	42	8
171 | 985.968	210	17	6
172 | 500.922	260	36	8
173 | 226.652	250	45	7
174 | 1051.168	200	20	7
175 | 68.093	150	15	7
176 | 1547.159	250	28	8
177 | 393.774	100	27	6
178 | 804.282	260	17	8
179 | 801.577	210	32	8
180 | 450.562	290	46	9
181 | 26.598	220	47	8
182 | 179.061	70	19	1
183 | 345.687	110	22	8
184 | 295.84	250	55	9
185 | 2271.86	320	31	5
186 | 1134.575	300	39	8
187 | 601.434	180	21	6
188 | 45.298	180	36	6
189 | 759.518	200	21	7
190 | 832.869	320	44	7
191 | 56.894	140	27	7
192 | 709.399	100	16	6
193 | 56.895	120	33	6
194 | 767.134	230	33	8
195 | 503.172	150	21	7
196 | 700.929	250	35	9
197 | 910.851	190	26	7
198 | 888.569	240	14	6
199 | 800.615	250	34	6
200 | 1500	230	11	8
201 | 785.694	110	20	9
202 | 


--------------------------------------------------------------------------------
/linear regression/Chapter 7 DSUR Regression.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/linear regression/Chapter 7 DSUR Regression.R


--------------------------------------------------------------------------------
/linear regression/Countries3.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/linear regression/Countries3.Rdata


--------------------------------------------------------------------------------
/linear regression/Linear Regression.R:
--------------------------------------------------------------------------------
  1 | # Simple Linear Regression in R
  2 | #
  3 | # Use the albumSales.dat file for these examples
  4 | 
  5 | setwd("~/OneDrive/MIDS/W203/Data/lr")
  6 | album1 <- read.delim("Album Sales 1.dat")
  7 | head(album1)
  8 | summary(album1)
  9 | 
 10 | # run a regression analysis using a linear model, excluding any NA entries
 11 | newModel <- lm(sales ~ adverts, data=album1, na.action=na.exclude)
 12 | 
 13 | summary(newModel)
 14 | 
 15 | # The summary() command gives the output from the linear regression
 16 | # It tells us that the value of R^2 is 0.3346, so we can get the Pearson's
 17 | # correlation co-efficient by taking the square root of R^2:
 18 | newModel.pearsonscc = sqrt(0.3346)  # which comes to 0.5784462
 19 | 
 20 | # Another way we can determine the pearsons correlation co-efficient is
 21 | # to run a correlation test 
 22 | cor(album1$adverts, album1$sales, use = "complete.obs", method="pearson")
 23 | 
 24 | # Which results in the same value of 0.5784
 25 | 
 26 | # The R^2 value tells us that the adverts accounts for 33.5% of the variation
 27 | # in album sales. Therefore there must be other variables that account for 
 28 | # the remaining 66.5% of variation in album sales.
 29 | 
 30 | # Examining the F-statistic from the summary output we have:
 31 | F-statistic: 99.59 on 1 and 198 DF,  p-value: < 2.2e-16
 32 | 
 33 | # This tells us that we can reject the null hypothesis that the model is not 
 34 | # significantly improved with a P < .001
 35 | 
 36 | # The regression coefficients are given as 
 37 | # intercept -> 134.1, gradient -> 0.09612
 38 | # so we can say that with no additional change in adverts we will have 134100
 39 | # albums sold, and that for every additional 1000 spent in adverts we will 
 40 | # sell an additional 96 albums (not the best investment in the world)
 41 | # Both the intercept and gradient values have a t-test value < .001 so we 
 42 | # can reject the null hypothesis that these values are not zero and they are
 43 | # a genuine effect in the population.
 44 | 
 45 | # We can examine the individual residuals from the model on the actual data
 46 | # and look for any residuals that seem particularly large
 47 | resid(newModel)
 48 | 
 49 | # examine the standardized residuals. 95% of z-scores shoule lie between +/- 1.96
 50 | # and 99% between +/-2.58. Anything over 3 sd should be suspect, or values outside
 51 | # +/- 3.29
 52 | rstandard(newModel)
 53 | 
 54 | # Examine for residuals that have undue influence on the model, first using the t-test
 55 | rstudent(newModel)
 56 | 
 57 | # We can calculate the Pearson residual as
 58 | PearsonResidual <- (resid(newModel)-mean(resid(newModel)))/sd(resid(newModel))
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | # Multiple linear regression - we extend the model to multiple predictors
 65 | # first get the album2 data
 66 | album2<-read.delim("Album Sales 2.dat", header = TRUE)
 67 | head(album2)
 68 | summary(album2)
 69 | 
 70 | # run the multiple regression model 
 71 | albumSales.2<-lm(sales ~ adverts, data = album2)
 72 | albumSales.3<-lm(sales ~ adverts + airplay + attract, data = album2)
 73 | summary(albumSales.2)
 74 | summary(albumSales.3)
 75 | 
 76 | # We can see from the R^2 that if the first model (albumSales.2) has multiple R^2 
 77 | # of 0.3346 and the second model adds two more predictors to get an R^2 of .6647
 78 | # that we have accounted for an addtional 33% of album sales by the inclusion of the
 79 | # other two predictors.
 80 | 
 81 | 
 82 | 
 83 | 
 84 | #---We can obtain standardized parameter estimates with the lm.beta() function---
 85 | library(car)
 86 | library(QuantPsyc)
 87 | library(boot)
 88 | 
 89 | # the lm.beta() gives us the standardized parameters, such that if parameter 1
 90 | # varies by 1 sd then the outcome variable changes by beta sd's
 91 | lm.beta(albumSales.3)
 92 | sd(album2$sales)
 93 | sd(album2$adverts)
 94 | 
 95 | # sd of sales = 80.698, sd of adverts = 485.655 (these are in 1000's), so
 96 | # 80,698 and 485,655, respectively
 97 | # the lm.beta shows 0.511 on adverts, so an increase of 1 sd in adverts results in
 98 | # an increase of 0.511 sd's of sales -> increase of 485,655 in adverts results in
 99 | # 0.511 * 80,698 in sales = 41,240
100 | 
101 | # the larger the beta values the more important the predictor
102 | 
103 | #---Confidence intervals are obtained with the confint() function----
104 | confint(albumSales.3)
105 | 
106 | # compare the two models with anova
107 | anova(albumSales.2, albumSales.3)
108 | 
109 | # The anova gives us an F=96.447 and p < .001, so the updated model is an 
110 | # improvement over the first model
111 | 
112 | # Model Diagnostics
113 | #----Obtain casewise diagnostics and add them to the original data file.---
114 | 
115 | album2$residuals<-resid(albumSales.3)
116 | album2$standardized.residuals <- rstandard(albumSales.3)
117 | album2$studentized.residuals <- rstudent(albumSales.3)
118 | album2$cooks.distance<-cooks.distance(albumSales.3)
119 | album2$dfbeta <- dfbeta(albumSales.3)
120 | album2$dffit <- dffits(albumSales.3)
121 | album2$leverage <- hatvalues(albumSales.3)
122 | album2$covariance.ratios <- covratio(albumSales.3)
123 | 
124 | #----List of standardized residuals greater than 2--------------
125 | album2$standardized.residuals>2| album2$standardized.residuals < -2
126 | 
127 | #---Create a variable called large.residual, which is TRUE (or 1) if the residual is greater than 2, or less than -2.----------
128 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2
129 | 
130 | #---Count the number of large residuals-------------
131 | sum(album2$large.residual)
132 | 
133 | 
134 | #---Display the value of sales, airplay, attract, adverts, and the standardized residual, for those cases which have a residual greater than 2 or less than -2.-------------
135 | album2[album2$large.residual,c("sales", "airplay", "attract", "adverts", "standardized.residuals")]
136 | 
137 | #-----Cook's distance, leverage and covariance ratio for cases with large residuals.---------
138 | album2[album2$large.residual , c("cooks.distance", "leverage", "covariance.ratios")]
139 | 
140 | 
141 | #----The Durbin-Watson test is obtained with either dwt() or durbinWatsonTest()---
142 | durbinWatsonTest(albumSales.3)
143 | dwt(albumSales.3)
144 | 
145 | #----Obtaining the VIF---
146 | vif(albumSales.3)
147 | 
148 | #----The tolerance is 1/VIF---
149 | 1/vif(albumSales.3)
150 | 
151 | #----The mean VIF---
152 | mean(vif(albumSales.3))
153 | 
154 | 
155 | #---Histogram of studentized residuals---
156 | 
157 | hist(album2$studentized.residuals)
158 | hist(rstudent(albumSales.3))
159 | 
160 | #--Plot of residuals against fitted (predicted) values, with a flat line at the mean--
161 | plot(albumSales.3$fitted.values,rstandard(albumSales.3))
162 | abline(0, 0)
163 | 
164 | #same as above
165 | plot(albumSales.3)
166 | 
167 | 


--------------------------------------------------------------------------------
/linear regression/simple_regression.R:
--------------------------------------------------------------------------------
 1 | ### A demonstration of simple OLS regression in R
 2 | 
 3 | # 1. Setting up a regression
 4 | 
 5 | # Use car for scatterplots
 6 | library(car)
 7 | 
 8 | # Load the Countries dataset
 9 | load("Countries3.Rdata")
10 | summary(Countries)
11 | 
12 | # Let's look at the number of internet users as predicted by
13 | # a country's GDP.  As we'll see, this model isn't very well
14 | # chosen, but it will give us a chance to see some of the problems
15 | # that often come up with linear regression.
16 | 
17 | # First, check the scatterplot to get a sense of the underlying
18 | # relationship.
19 | scatterplot(Countries$gdp, Countries$internet_users_2011)
20 | 
21 | # That doesn't look too linear, so we already know that our
22 | # regression is somewhat problematic.
23 | 
24 | # Set the rownames so that they appear in the diagnostic plots
25 | rownames(Countries) = Countries$Country
26 | 
27 | # Create the linear model, and look at a summary
28 | model = lm(internet_users_2011 ~ gdp, data = Countries)
29 | summary(model)
30 | 
31 | # 2. Regression diagnostics
32 | 
33 | # use the plot command for common regression diagnostics
34 | plot(model)
35 | # This command pulls up a series of plots that can be used to
36 | # check the standard OLS assumptions
37 | #
38 | # plot 1: residuals versus fitted values.  check for 
39 | # heteroskedasticity.  This means that the spread of points
40 | # on the graph changes from the left side to the right side.  
41 | # We can also look for non-linear relationships.  That would
42 | # mean that the band of points moves up and down as you look
43 | # from the left to the right of the plot.  This particular
44 | # regression seems to suffer from both problems.
45 | #
46 | # plot 2: qqplot of standardized residuals vs. normal curve.  
47 | # Check to see if the errors are normally distributed.  That
48 | # means that the points should follow the diagonal line closely.
49 | # In this regression, we don't seem to have normal residuals, 
50 | # but it's a large sample size, so we can use the central limit
51 | # theorem and we don't have to worry about this.
52 | #
53 | # plot 3: scale-location.  This is a variation on the 
54 | # residuals vs fitted values plot.  Since we take the absolute
55 | # value of the resuduals, all points appear above the x-axis,
56 | # and more variance appears as higher points.  Heteroskedasticity
57 | # can then be detected if the mass of points moves up and down
58 | # as you look from the left to the right of the graph.
59 | # Curiously, this regression doesn't seem to have a problem on
60 | # this plot.  It seems the non-linearity is actually obscuring
61 | # the problem with heteroskedasticity.
62 | #
63 | # plot 4: residuals vs leverage. Look for outliers that 
64 | # may be biasing the model.
65 | # In this regression, no points have Cook's distance > 1, so
66 | # we don't need to worry.
67 | 
68 | # Here's an explicit way to check for points with large residuals 
69 | outlierTest(model)
70 | 
71 | # Could use the Durbin-Watson test to check for autocorrelation
72 | # It's really not necessary here since the order of Countries
73 | # is alphabetical, and there's no reason to suspect that alphabetical
74 | # proximity influences residuals.
75 | dwt(model)
76 | 
77 | # We suspect heteroskedasticity here, and we could check
78 | # with a Breusch-Pagan test.  Since we are testing a null
79 | # hypothesis of homoskedasticity, this test will tend to
80 | # be significant for large sample sizes, so interpret the
81 | # results with caution.
82 | library(lmtest)
83 | bptest(model)
84 | # In this case, the test is negative.  Breusch-Pagan is based
85 | # on squaring the residuals, so again, it could be that the
86 | # non-linear relationship is obscuring the heteroskedasticity.
87 | 
88 | # Just in case, let's check the heteroskedasticity-robust errors
89 | # We are keeping out regression line the same, just adjusting
90 | # our estimates of significance.
91 | library(sandwich)
92 | coeftest(model, vcov = vcovHC)
93 | 
94 | # As an exercise, you could try the log of gdp instead
95 | # to see what really nice regression output looks like


--------------------------------------------------------------------------------
/linear regression/simple_regression_transformation.R:
--------------------------------------------------------------------------------
 1 | # We're going to use a similar script to the simple_regression script and 
 2 | # the Countries3.Rdata file to take a look at Internet Users as a function
 3 | # of GDP. However, this time we'll transform the GDP data by taking the
 4 | # log(GDP) and performing the regression with that to help deal with the
 5 | # heteroskedacity issues we saw in the non-transformed data.
 6 | #
 7 | #
 8 | # Set the working directory
 9 | setwd("~/Documents/R/lr")
10 | #
11 | # Load the car library for the scatterplot function
12 | library(car)
13 | #
14 | # Load the countries dataset
15 | load("Countries3.Rdata")
16 | #
17 | # Take a look at the Countries data
18 | summary(Countries)
19 | #
20 | # Use the country name as the row name as a convenience
21 | rownames(Countries) <- Countries$country
22 | #
23 | # Look at the number of internet users per country by gdp
24 | scatterplot(Countries$gdp, Countries$internet_users_2011)
25 | #
26 | # This looks like there will be some problems with linear regression 
27 | # assumptions, and in the previous version of the script we saw
28 | # heteroskedacity issues. 
29 | # So let's try transforming the data for gdp by taking the log base 10 (gdp)
30 | #
31 | Countries$loggdp <- log(Countries$gdp)
32 | #
33 | scatterplot(Countries$loggdp, Countries$internet_users_2011)
34 | #
35 | # Now we see a little better scatterplot
36 | # Let's go ahead and construct our linear regression model
37 | model = lm(internet_users_2011 ~ loggdp, data = Countries)
38 | summary(model)
39 | #
40 | # the coefficients are highly significant as we saw before but
41 | # let's take a look at the plots of the model to see how they look
42 | plot(model)
43 | #
44 | # plot 1: Residuals vs. Fitted
45 | # We still see some variation of the band of residuals but it's much better
46 | # than before the transformation. There's a better overall uniformity to the
47 | # band 
48 | #
49 | # plot 2: QQ Plot 
50 | # Again we see that the ends of the plot move away from the diagonal, but it 
51 | # is much better than before the trasnformation. The lower left of the plot 
52 | # is still away from the diagonal, however. But normality is probably not as much 
53 | # of an issue here because of the Central Limit Theorem and sample size.
54 | #
55 | # plot 3: Scale Location 
56 | # This alternative homoskedacity plot shows much better then before the
57 | # transformation as well.
58 | #
59 | # plot 4: Residuals vs. Leverage
60 | # There's nothing to worry about on this plot. 
61 | #
62 | library(lmtest)
63 | bptest(model)
64 | # Again the Beusch-Pagan test doesn't really show anything for heterskedacity
65 | #
66 | # So if we look at the summary of the model again, we should be able to 
67 | # convert the gdp coefficients back to something that makes sense instead
68 | # of logarithm.
69 | summary(model)
70 | #
71 | # The intercept is -66.518 but the beta co-efficient is 13.89. 
72 | # That would make an equation of Y = 13.89*loggdp - 66.518
73 | # So to convert the beta co-efficent we take e^beta 
74 | exp(13.89)
75 | #
76 | # e^13.89 = 1077334
77 | # for every 1077334 change in gdp we have 1 unit change in internet users.


--------------------------------------------------------------------------------
/logistic regression/.Rapp.history:
--------------------------------------------------------------------------------
  1 | metallica
  2 | lecturerData
  3 | library(Rcmdr)
  4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic"))
  5 | alchoholPersonality
  6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality)
  7 | alchoholPersonalityMatrix
  8 | library(Rcmdr)
  9 | sunspots
 10 | require(graphics)
 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers")
 12 | pvalue()
 13 | pscore()
 14 | mu1 <- 2/(sqrt(100))
 15 | mu1
 16 | zscore()
 17 | (2.5 - 2.0)/2
 18 | 2*pnorm(2.5)
 19 | 2 * pnorm(-2.5)
 20 | -0.5/20
 21 | pnorm(0.025)
 22 | z <- (2 - 2.5)/2
 23 | z
 24 | 2*pnorm(-0.25)
 25 | a <- 2
 26 | s <- 2
 27 | n <- 100
 28 | xbar <- 2.5
 29 | z <- (xbar-a)/(s/sqrt(n))
 30 | z
 31 | 2*pnorm(-2.5)
 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100)))
 33 | 10000 - 10300
 34 | -300/(1000 / sqrt(10))
 35 | pnorm(-300/(1000/sqrt(10)))
 36 | pnorm(300/(1000/sqrt(10)))
 37 | 2*(pnorm(-abs(500/(1000/sqrt(10))))
 38 | )
 39 | 2*(pnorm((500/(1000/sqrt(10))))
 40 | )
 41 | 500/1000/sqrt(10)
 42 | 1000/sqrt(10)
 43 | 500/(1000/sqrt(10))
 44 | pnorm(1.581139)
 45 | pnorm(-1.581139)
 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10)))
 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10))
 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10))
 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10))
 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10))
 51 | exit
 52 | setwd("~/Documents/R/logistic regression")
 53 | eelData <- read.delim("eel.dat", header = TRUE)
 54 | str(eelData)
 55 | head(eelData)
 56 | eelData$Cured <- relevel(eelData$Cured, "Not Cured")
 57 | eelData$Intervention <- relevel(eelData$Intervention, "No Treatment")
 58 | str(eelData)
 59 | eelModel.1 <- glm(Cured ~ Intervention, data = eelData, family = binomial(link = "logit"))
 60 | eelModel.2 <- glm(Cured ~ Interention + Duration, data = eelData, family = binomial(link = "logit"))
 61 | eelModel.2 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(link = "logit"))
 62 | summary(eelModel.1)
 63 | summary(eelModel.2)
 64 | chisq.test((154.08 - 144.16))
 65 | chisq.prob <- 1 - pchisq(eelModel.1$null.deviance - eelModel.1$deviance, eelModel.1$df.null - eelModel.1$df.residual)
 66 | chisq.prob
 67 | print(154.08-144.16)
 68 | print(sqrt((3.074^2-(2*1))/154.08))
 69 | str(eelModel.1)
 70 | eelModel.1
 71 | eelModel.1$z
 72 | eelModel.1$z.statistic
 73 | eelModel.1$z.value
 74 | eelModel.1$effects
 75 | coef(summary(eelModel.1))[, 3]
 76 | summary(eelModel.1)$coefficients[-1,3]
 77 | coef(summary(eelModel.1))[-1, 3]
 78 | z.stat <- coef(summary(eelModel.1))[-1, 3]
 79 | eelModel.1.z <- coef(summary(eelModel.1))[-1, 3]
 80 | eelModel.1.R <- sqrt((eelModel.1.z^2 - 2*(eelModel.1$df.null - eelModel.1$df.residual))/eelModel.1$null.deviance)
 81 | eelModel.1.R
 82 | summary(eelModel.1)
 83 | eelModel.1$residual.deviance
 84 | eelModel.1$deviance
 85 | eelModel.1.chisq <- eelModel.1$null.deviance - eelModel.1$deviance
 86 | eelModel.1.R2L <- eelModel.1.chisq / eelModel.1$null.deviance
 87 | eelModel.1.R2L
 88 | eelModel.1.R
 89 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / (eelModel.1$df.null + 1))
 90 | eelModel.1.RCS
 91 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / (eelModel.1$df.null + 1)))))
 92 | eelModel.1.RCN
 93 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / length(eelModel.1$fitted.values))
 94 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / length(eelModel.1$fitted.values)))))
 95 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients)
 96 | eelModel.1.oddsRatio
 97 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[ , 1])
 98 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[1])
 99 | eelModel.1.oddsRatio
100 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[2])
101 | eelModel.1.oddsRatio
102 | exp(confint(eel.Model.1))
103 | exp(confint(eelModel.1))
104 | eelModel.2.chisq <- eelModel.1$deviance - eelModel.2$deviance  # deviance between the 2 models
105 | chisq.prob <- 1 - pchisq(eelModel.2.chisq, eelModel.1$df.residual - eelModel.2$df.residual)
106 | chisq.prob
107 | eelModel.2.chisq
108 | anova(eelModel.1, eelModel.2)
109 | eelData$predicted.probabilities <- fitted(eelModel.1)
110 | eelData$standardized.residuals <- rstandard(eelModel.1)
111 | eelData$studentized.residuals <- rstudent(eelModel.1)
112 | eelData$dfbeta <- dfbeta(eelModel.1)
113 | eelData$dffit <- dffit(eelModel.1)
114 | eelData$dffit <- dffits(eelModel.1)
115 | eelData$leverage <- hatvalues(eelModel.1)
116 | head(eelData[, c("Cured", "Intervantion", "Duration", "predicted.probabilities")])
117 | head(eelData[, c("Cured", "Intervention", "Duration", "predicted.probabilities")])
118 | eelData[, c("leverage", "studentized.residuals", "dfbeta")]
119 | penaltyData <- read.delim("penalty.day", header = TRUE)
120 | penaltyData <- read.delim("penalty.dat", header = TRUE)
121 | head(penaltyData)
122 | str(penaltyData)
123 | penaltyModel.1 = glm(Scored ~ Previous, data = penaltyData, family = binomial(link = "logit"))
124 | summary(penatlyModel.1)
125 | summary(penaltyModel.1)
126 | penaltyModel.1.chisq <- penaltyModel.1$null.deviance - penaltyModel.1$deviance
127 | penaltyModel.1.chisqProbability <- pchisq(penalyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual)
128 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual)
129 | penaltyModel.1.chisq
130 | penaltyModel.1.chisqProbability
131 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, 1)
132 | penaltyModel.1.chisqProbability
133 | pchisq(penaltyModel.1.chisq, 1)
134 | penaltyModel.1$null.deviance
135 | penaltyModel.1$deviance
136 | penaltyModel.1$null.deviance - penaltyModel.1$deviance
137 | pchisq(42.466, 1)
138 | penaltyModel.1.z <- coef(summary(penaltyModel.1))[-1, 3]
139 | penaltyModel.1.z
140 | penaltyModel.1.R <- sqrt((penaltyModel.1.z^2 - 2*(penaltyModel.1$df.null - penaltyModel.1$df.residual))/penaltyModel.1$null.deviance)
141 | penaltyModel.1.R
142 | penaltyModel.1.R2L <- penaltyModel.1.chisq / penaltyModel.1$null.deviance
143 | penaltyModel.1.R2L
144 | penaltyModel.1.RCS <- 1 - exp((penaltyModel.1$deviance - penaltyModel.1$null.deviance) / length(penaltyModel.1$fitted.values))
145 | penaltyModel.1.RCS
146 | penaltyModel.1.RCN <- penaltyModel.1.RCS / (1 - (exp(-(penaltyModel.1$null.deviance / length(penaltyModel.1$fitted.values)))))
147 | penaltyModel.1.RCN
148 | penaltyModel.1.oddsRatio <- exp(penaltyModel.1$coefficients[2])
149 | penaltyModel.1.oddsRatio
150 | exp(confint(penaltyModel.1))
151 | penaltyModel.2 <- glm(Scored ~ Previous + PSWQ, data = penaltyData, family = binomial(link = "logit"))
152 | summary(penaltyModel.2)
153 | penaltyModel.2.chisq <- penaltyModel.1$deviance - penaltyModel.2$deviance
154 | penaltyModel.2.chisqProbability <- pchisq(penaltyModel.2.chisq, penaltyModel.1$df.residual - penaltyModel.2$df.residual)
155 | penaltyModel.2.chisq
156 | penaltyModel.2.chisqProbability
157 | penaltyModel.2.z <- coef(summary(penaltyModel.2))[-1, 3]
158 | penaltyModel.2.R <- sqrt((penaltyModel.2.z^2 - 2*(penaltyModel.1$df.residual - penaltyModel.2$df.residual))/penaltyModel.1$deviance)
159 | penaltyModel.2.R
160 | penaltyModel.2.R2L <- penaltyModel.2.chisq / penaltyModel.1$deviance
161 | penaltyModel.2.R2L
162 | penaltyModel.2.RCS <- e - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values))
163 | penaltyModel.2.RCS <- 1 - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values))
164 | penaltyModel.2.RCS
165 | penaltyModel.2.RCN <- penaltyModel.2.RCS / (1 - (exp(-(penaltyModel.1$deviance / length(penaltyModel.2$fitted.values)))))
166 | penaltyModel.2.RCN
167 | penaltyModel.2.oddsRatio <- exp(penaltyModel.2$coefficents[2])
168 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penalty.dat, family = binomial())
169 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penaltyData, family = binomial())
170 | summary(penalyModel.3)
171 | summary(penaltyModel.3)
172 | vif(penaltyModel.3)
173 | library(car)
174 | vif(penaltyModel.3)
175 | 1/vif(penaltyModel.3)
176 | library(polycor)
177 | cor(penaltyData$Previous, penaltyData$PSWQ, penaltyData$Anxious)
178 | penaltyVars <- penaltyData[, c("Previous", "PSWQ", "Anxious")]
179 | cor(penaltyVars)
180 | cor.test(penaltyData$Previous, penaltyData$Anxious)
181 | cor.test(penaltyData$Previous, penaltyData$PSWQ)
182 | cor.test(penaltyData$Anxious, penaltyData$PSWQ)
183 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ
184 | penaltyData$logPreviousInt <- log(penaltyData$Previous)*penaltyData$Previous
185 | penaltyData$logAnxiousInt <- log(penaltyData$Anxious)*penaltyData$Anxious
186 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ
187 | penaltyModel.test <- glm(Scored ~ Previous + PSWQ + Anxious + logPreviousInt + logPSWQInt + logAnxiousInt, data = penaltyData, family = binomial())
188 | summary(penaltyModel.test)
189 | 


--------------------------------------------------------------------------------
/logistic regression/Lacourse et al. (2001) Females.dat:
--------------------------------------------------------------------------------
  1 | Age	Age_Group	Drug_Use	Father_Negligence	Gender	Isolation	Marital_Status	Meaninglessness	Metal	Mother_Negligence	Normlessness	Self_Estrangement	Suicide_Risk	Vicarious	Worshipping
  2 | 15.83	14-16 Years Old	8	17	Female	6	Together	10	4.83762518844781	10	6	15	Non-Suicidal	5	4
  3 | 14.92	14-16 Years Old	9	23	Female	8	Together	26	6	12	8	20	Non-Suicidal	4	6
  4 | 15.33	14-16 Years Old	5	15	Female	18	Together	19	6	16	7	17	Non-Suicidal	6	3
  5 | 15.83	14-16 Years Old	11	11	Female	9	Separated or Divorced	13	4	10	5	12	Non-Suicidal	3	3
  6 | 14.92	14-16 Years Old	7	13	Female	5	Together	13	8	16	3	6	Non-Suicidal	3	9
  7 | 14.58	14-16 Years Old	4	29	Female	15	Separated or Divorced	18	7	18	5	15	Suicidal	2	4
  8 | 14.5	14-16 Years Old	5	10	Female	8	Together	12	8	9	6	10	Non-Suicidal	3	4
  9 | 15.67	14-16 Years Old	7	27	Female	6	Separated or Divorced	18	4	12	7	12	Non-Suicidal	3	4
 10 | 14.92	14-16 Years Old	5	23	Female	10	Together	29	14	21	4	28	Non-Suicidal	8	9
 11 | 15	14-16 Years Old	4	12	Female	5	Together	22	4	15	7	7	Non-Suicidal	5	9
 12 | 15.17	14-16 Years Old	3	19	Female	12	Together	20	8	11	8	19	Non-Suicidal	5	7
 13 | 17.17	16-19 Years Old	8	30	Female	14	Separated or Divorced	13	10	29	6	12	Suicidal	6	12
 14 | 16.83	16-19 Years Old	11	12	Female	7	Together	22	4	22	7	19	Non-Suicidal	6	7
 15 | 16.75	16-19 Years Old	6	16	Female	14	Together	25	7	9	11	10	Non-Suicidal	5	3
 16 | 16.58	16-19 Years Old	8.43970783004555	20	Female	5	Together	16	4	15	4	9	Non-Suicidal	8	4
 17 | 16.5	16-19 Years Old	5	17	Female	12	Together	17	7	9	6	19	Non-Suicidal	3	7
 18 | 16.5	16-19 Years Old	3	24	Female	13	Separated or Divorced	21	7	24	7	16	Non-Suicidal	4	6
 19 | 16.75	16-19 Years Old	3	22	Female	16	Together	30	4	31	7	11	Non-Suicidal	4	5
 20 | 17	16-19 Years Old	5	20	Female	12	Together	13	4	14	4	12	Non-Suicidal	3	3
 21 | 17.25	16-19 Years Old	5	31	Female	16	Separated or Divorced	23	5	21	6	22	Suicidal	5	7
 22 | 17.17	16-19 Years Old	3	26	Female	15	Together	16	5	19.5824136069868	4	24	Non-Suicidal	5	9
 23 | 17.42	16-19 Years Old	7.15478004110386	23	Female	15	Together	18	5	36	9	14	Non-Suicidal	6	3
 24 | 17.08	16-19 Years Old	5	16	Female	10.9559005734461	Together	21	6.53961748140933	13	8	15	Non-Suicidal	6	4
 25 | 17.92	16-19 Years Old	11	16	Female	6	Together	17	10	13	12	16	Suicidal	7	6
 26 | 17.25	16-19 Years Old	7.87460242195194	26	Female	13	Together	24	6	10	8	10	Non-Suicidal	8	7
 27 | 18.5	16-19 Years Old	6	33	Female	6	Separated or Divorced	25	4	9	6	14	Non-Suicidal	4	3
 28 | 16.92	16-19 Years Old	5	27	Female	23	Together	10	12	19	5	22	Non-Suicidal	5	4
 29 | 17.33	16-19 Years Old	12	14	Female	10	Together	10	6	15	7	16	Suicidal	3	5
 30 | 17.42	16-19 Years Old	7	16	Female	6	Together	22	5	13	7	18	Non-Suicidal	5	4
 31 | 17.17	16-19 Years Old	7	10	Female	5	Together	11	6	9	5	12	Suicidal	7	4
 32 | 16.58	16-19 Years Old	8	12	Female	6	Together	13	9	12	6	11	Non-Suicidal	5	10
 33 | 16.58	16-19 Years Old	5	21	Female	8	Together	15	4	18	7	14	Non-Suicidal	6	7
 34 | 17.42	16-19 Years Old	6	30	Female	7	Together	25	9	24	4	14	Non-Suicidal	5	3
 35 | 16.75	16-19 Years Old	11	24	Female	9	Together	15	7	15	8	12	Non-Suicidal	4	4
 36 | 15	14-16 Years Old	11	29	Female	7	Together	26	11	10	6	7	Suicidal	5	10
 37 | 16.5	16-19 Years Old	9	10	Female	4	Together	10	4	9	7	5	Non-Suicidal	3	4
 38 | 16.58	16-19 Years Old	5	16	Female	13	Together	13	7	16	4	17	Non-Suicidal	5	4
 39 | 16.67	16-19 Years Old	6	18	Female	11	Together	19	4	14	3	10	Non-Suicidal	5	3
 40 | 17	16-19 Years Old	5	16	Female	8	Together	14	8.52820197005175	12	5	5	Suicidal	3	3
 41 | 17.25	16-19 Years Old	7	12	Female	4	Together	8	4	16	10	10	Suicidal	2	3
 42 | 14.58	14-16 Years Old	5	9	Female	4	Separated or Divorced	8	11	12	6	8	Non-Suicidal	8	6
 43 | 14.75	14-16 Years Old	6	15	Female	6	Together	17	5	19	4	17	Non-Suicidal	5	5
 44 | 14.5	14-16 Years Old	11	29	Female	10	Together	15	6.91536148045185	21	9	13	Non-Suicidal	2	3
 45 | 14.75	14-16 Years Old	3	9	Female	4	Together	13	5	9	3	8	Non-Suicidal	3	9
 46 | 14.58	14-16 Years Old	14	22	Female	14	Together	27	4	15	12	18	Suicidal	5	10
 47 | 14.92	14-16 Years Old	7	13	Female	6	Separated or Divorced	14	4	13	5	11	Non-Suicidal	3	9
 48 | 14.5	14-16 Years Old	14	27	Female	14	Together	25	7	18	8	16	Non-Suicidal	6	11
 49 | 16.5	16-19 Years Old	9	10	Female	18	Separated or Divorced	15	6	24	6	26	Suicidal	6	5
 50 | 14.67	14-16 Years Old	9	24	Female	6	Together	24	13	26	9	21	Suicidal	3	3
 51 | 16.83	16-19 Years Old	13	22	Female	9	Separated or Divorced	15	4	9	9	14	Non-Suicidal	5	3
 52 | 15.08	14-16 Years Old	8	29	Female	8	Separated or Divorced	30	9	21	17	29	Suicidal	7	10
 53 | 15.33	14-16 Years Old	13	14	Female	5	Separated or Divorced	21	12	10	9	22	Suicidal	6	6
 54 | 14.67	14-16 Years Old	9	11	Female	8	Together	19	6	10	7	18	Non-Suicidal	3	7
 55 | 15.17	14-16 Years Old	11	22	Female	18	Together	20	12	13	8	22	Suicidal	8	9
 56 | 16.17	14-16 Years Old	3	15	Female	18	Together	17	12	32	4	27	Non-Suicidal	7	3
 57 | 16.08	14-16 Years Old	7	12	Female	5	Together	13.6292687605901	6	9	8	18	Suicidal	4	4
 58 | 16.42	16-19 Years Old	3	13	Female	10	Together	14	6	20	3	10	Non-Suicidal	2	9
 59 | 15.83	14-16 Years Old	5	11	Female	5	Together	16	5	11	4	8	Non-Suicidal	3	5
 60 | 15.92	14-16 Years Old	10	35	Female	12	Separated or Divorced	25	4	17	8	14	Suicidal	7	5
 61 | 16.25	16-19 Years Old	5	13	Female	8	Together	14	5	19	5	14	Non-Suicidal	6	6
 62 | 17.75	16-19 Years Old	11	9	Female	9	Together	19	6	9	6	21	Suicidal	3	6
 63 | 16	14-16 Years Old	6	14	Female	11	Together	17	9	18	11	17	Suicidal	5	10
 64 | 15.42	14-16 Years Old	3	17	Female	16	Together	23	14	13	6	18	Non-Suicidal	3	3
 65 | 16	14-16 Years Old	11	13	Female	6	Together	30	15	36	8	11	Non-Suicidal	8	6
 66 | 15.17	14-16 Years Old	6	12	Female	12	Together	11	6	15	6	18	Non-Suicidal	4	5
 67 | 15	14-16 Years Old	7	17	Female	11	Together	16	4	29	9	27	Non-Suicidal	8	4
 68 | 14.5	14-16 Years Old	3	21	Female	5	Together	12	5	14	3	6	Non-Suicidal	8	12
 69 | 15.08	14-16 Years Old	13	13	Female	7	Separated or Divorced	21	4	12	12	17	Non-Suicidal	8	9
 70 | 15	14-16 Years Old	13	17	Female	9	Separated or Divorced	26	6	24	6	24	Non-Suicidal	8	9
 71 | 15.08	14-16 Years Old	6	9	Female	7	Separated or Divorced	16	5	16	8	16	Non-Suicidal	4	3
 72 | 15.25	14-16 Years Old	5	24	Female	18	Separated or Divorced	23.9099750687457	4	17	15	24	Non-Suicidal	2	4
 73 | 15	14-16 Years Old	7	13	Female	11	Together	13	6	9	13	17	Suicidal	4	8
 74 | 15.42	14-16 Years Old	13	22	Female	7	Together	18	5	13	5	28	Suicidal	8	6
 75 | 14.67	14-16 Years Old	5	13	Female	8	Together	23	6	12	5	18	Non-Suicidal	5	7
 76 | 15.42	14-16 Years Old	11	21	Female	6	Together	19	5	20	6	16	Non-Suicidal	5	4
 77 | 15.17	14-16 Years Old	15	34	Female	22	Separated or Divorced	27	20	17	12	29	Suicidal	8	10
 78 | 15.67	14-16 Years Old	6.36189616889847	11	Female	10	Together	11	5	9	8	15	Non-Suicidal	6	7
 79 | 16.42	16-19 Years Old	6	10	Female	18	Together	21	6	14	10	21	Non-Suicidal	3	4
 80 | 14.67	14-16 Years Old	4	11	Female	7	Together	11	10	9	4	11	Non-Suicidal	2	10
 81 | 16.17	14-16 Years Old	7.35574617962391	15	Female	10	Together	14	7	14	5	20	Non-Suicidal	4	3
 82 | 16.25	16-19 Years Old	5	12	Female	5	Together	16	4	13	5	14	Non-Suicidal	2	3
 83 | 15.75	14-16 Years Old	13	14	Female	4	Together	12	7	15	12	9	Non-Suicidal	7	9
 84 | 16.33	16-19 Years Old	4	15	Female	12	Together	14	5	13	4	10	Non-Suicidal	4	4
 85 | 16.42	16-19 Years Old	12	25	Female	5	Together	18	5	16	6	20	Suicidal	3	5
 86 | 17.08	16-19 Years Old	10	31	Female	16	Separated or Divorced	20	11	23	7	23	Suicidal	8	5
 87 | 16.67	16-19 Years Old	7	15	Female	8	Together	21	6	10	3	11	Non-Suicidal	2	3
 88 | 17.67	16-19 Years Old	4	10	Female	10	Together	22	16	10	5	15	Non-Suicidal	4	3
 89 | 17.08	16-19 Years Old	7	25	Female	7	Separated or Divorced	13	6	13	5	13	Non-Suicidal	5	3
 90 | 16.83	16-19 Years Old	5	17	Female	12	Separated or Divorced	13	4	20	4	14	Non-Suicidal	5	5
 91 | 17	16-19 Years Old	5	20	Female	6	Separated or Divorced	11	6	12	5	19	Non-Suicidal	6	3
 92 | 17.25	16-19 Years Old	11	27	Female	10	Separated or Divorced	13	12	12	8	17	Suicidal	7	7
 93 | 17.25	16-19 Years Old	9	20	Female	9	Together	30	18	9	15	27	Suicidal	8	10
 94 | 16.83	16-19 Years Old	11	31	Female	7	Separated or Divorced	21	7	14	13	16	Suicidal	3	5
 95 | 16.42	16-19 Years Old	3	14	Female	17	Together	11	4	17	4	12	Non-Suicidal	6	4
 96 | 17.42	16-19 Years Old	3	17	Female	12	Together	12	5	14	5	14	Non-Suicidal	6	3
 97 | 16.42	16-19 Years Old	5	12	Female	4	Together	15	4	12	4	12	Non-Suicidal	5	5
 98 | 16.75	16-19 Years Old	3	19	Female	11	Separated or Divorced	21	6	18	6	13	Non-Suicidal	5	3
 99 | 17.25	16-19 Years Old	3	19	Female	8	Together	10	6	23	3	12	Non-Suicidal	2	3
100 | 17.08	16-19 Years Old	11	11	Female	5	Together	14.433093821544	8	10	6	8	Non-Suicidal	2	5
101 | 17.25	16-19 Years Old	7	16	Female	9	Together	23	6	14	10	10	Suicidal	4	3
102 | 17.33	16-19 Years Old	6	13	Female	4	Together	22	8	11	6	9	Non-Suicidal	4	3
103 | 17.08	16-19 Years Old	6	10	Female	10	Together	10	6	9	5	10	Non-Suicidal	5	4
104 | 17.25	16-19 Years Old	3	19	Female	4	Together	14	12	16	3	9	Non-Suicidal	4	3
105 | 17.42	16-19 Years Old	3	11	Female	9	Together	12	5	10	3	12	Non-Suicidal	2	8
106 | 17.42	16-19 Years Old	5	12	Female	11	Together	26	7	9	5	18	Non-Suicidal	3	4
107 | 15.75	14-16 Years Old	13	13	Female	18	Together	18	14	9	8	21	Suicidal	4	7
108 | 16	14-16 Years Old	5	29	Female	8	Separated or Divorced	19	6	11	7	18	Non-Suicidal	6	6
109 | 16.42	16-19 Years Old	5	10	Female	9	Together	17	4	11.8560541141899	4	17	Non-Suicidal	6	3
110 | 15.58	14-16 Years Old	6	22	Female	18	Together	20	5.06898320036459	15.7517495914531	12	17	Non-Suicidal	6	7
111 | 15.83	14-16 Years Old	5	12	Female	6	Together	13	6	14	3	14	Non-Suicidal	3	4
112 | 15.58	14-16 Years Old	11	21	Female	10	Together	20	4	16	9	21	Suicidal	5	3
113 | 15.83	14-16 Years Old	3	18	Female	9	Together	18	10	14	5	10	Non-Suicidal	3	6
114 | 15.5	14-16 Years Old	3	13	Female	5	Together	19	5	15	6	11	Non-Suicidal	3	4
115 | 15.58	14-16 Years Old	3	10	Female	4	Together	25	4	9	3	14	Non-Suicidal	2	4
116 | 16.33	16-19 Years Old	3	17	Female	5	Together	18	13	13	5	14	Non-Suicidal	4	4
117 | 15.83	14-16 Years Old	11	26	Female	8	Separated or Divorced	22	6	17	9	7	Non-Suicidal	5	4
118 | 15.5	14-16 Years Old	11	12	Female	4	Together	14	18	12	14	15	Non-Suicidal	7	8
119 | 15.58	14-16 Years Old	5	16	Female	14	Together	29	4	12	13	14	Non-Suicidal	5	6
120 | 15.58	14-16 Years Old	4	13	Female	8	Together	15	5	13	8	16	Non-Suicidal	5	3
121 | 15.67	14-16 Years Old	7	20	Female	6	Together	14	6	18	4	11	Non-Suicidal	7	4
122 | 16	14-16 Years Old	5	9	Female	4	Separated or Divorced	11	4	9	4	10	Non-Suicidal	2	3
123 | 


--------------------------------------------------------------------------------
/logistic regression/eel.dat:
--------------------------------------------------------------------------------
  1 | Cured	Intervention	Duration
  2 | Not Cured	No Treatment	7
  3 | Not Cured	No Treatment	7
  4 | Not Cured	No Treatment	6
  5 | Cured	No Treatment	8
  6 | Cured	Intervention	7
  7 | Cured	No Treatment	6
  8 | Not Cured	Intervention	7
  9 | Cured	Intervention	7
 10 | Cured	No Treatment	8
 11 | Not Cured	No Treatment	7
 12 | Cured	Intervention	7
 13 | Cured	No Treatment	7
 14 | Cured	No Treatment	5
 15 | Not Cured	Intervention	9
 16 | Not Cured	No Treatment	6
 17 | Not Cured	No Treatment	7
 18 | Cured	Intervention	8
 19 | Not Cured	Intervention	7
 20 | Cured	Intervention	7
 21 | Cured	Intervention	9
 22 | Not Cured	No Treatment	7
 23 | Cured	Intervention	9
 24 | Cured	Intervention	8
 25 | Not Cured	No Treatment	7
 26 | Cured	Intervention	6
 27 | Cured	Intervention	8
 28 | Cured	No Treatment	7
 29 | Cured	No Treatment	6
 30 | Cured	Intervention	7
 31 | Cured	Intervention	6
 32 | Not Cured	No Treatment	7
 33 | Cured	No Treatment	6
 34 | Cured	Intervention	5
 35 | Not Cured	Intervention	6
 36 | Cured	Intervention	7
 37 | Cured	No Treatment	7
 38 | Cured	No Treatment	8
 39 | Not Cured	Intervention	7
 40 | Cured	No Treatment	5
 41 | Cured	Intervention	7
 42 | Cured	No Treatment	9
 43 | Not Cured	Intervention	10
 44 | Cured	Intervention	7
 45 | Not Cured	Intervention	8
 46 | Not Cured	No Treatment	5
 47 | Not Cured	No Treatment	8
 48 | Not Cured	No Treatment	4
 49 | Not Cured	Intervention	7
 50 | Not Cured	Intervention	8
 51 | Cured	No Treatment	6
 52 | Cured	Intervention	6
 53 | Cured	Intervention	6
 54 | Cured	Intervention	7
 55 | Cured	No Treatment	7
 56 | Not Cured	No Treatment	8
 57 | Cured	Intervention	7
 58 | Cured	Intervention	7
 59 | Not Cured	No Treatment	7
 60 | Cured	Intervention	7
 61 | Cured	Intervention	8
 62 | Cured	No Treatment	7
 63 | Not Cured	No Treatment	9
 64 | Cured	No Treatment	7
 65 | Not Cured	Intervention	8
 66 | Cured	Intervention	8
 67 | Not Cured	No Treatment	7
 68 | Not Cured	No Treatment	7
 69 | Cured	No Treatment	7
 70 | Not Cured	No Treatment	8
 71 | Not Cured	No Treatment	7
 72 | Cured	Intervention	8
 73 | Not Cured	No Treatment	7
 74 | Not Cured	Intervention	8
 75 | Cured	Intervention	8
 76 | Cured	Intervention	9
 77 | Cured	No Treatment	7
 78 | Cured	Intervention	10
 79 | Not Cured	Intervention	5
 80 | Cured	No Treatment	7
 81 | Not Cured	Intervention	8
 82 | Cured	No Treatment	9
 83 | Not Cured	No Treatment	5
 84 | Cured	Intervention	10
 85 | Cured	Intervention	8
 86 | Not Cured	Intervention	7
 87 | Cured	Intervention	6
 88 | Cured	No Treatment	5
 89 | Not Cured	Intervention	6
 90 | Cured	Intervention	7
 91 | Not Cured	No Treatment	7
 92 | Cured	Intervention	7
 93 | Not Cured	No Treatment	7
 94 | Cured	Intervention	7
 95 | Cured	Intervention	7
 96 | Not Cured	Intervention	8
 97 | Cured	Intervention	5
 98 | Cured	No Treatment	6
 99 | Not Cured	No Treatment	7
100 | Not Cured	No Treatment	6
101 | Cured	Intervention	7
102 | Not Cured	No Treatment	7
103 | Not Cured	No Treatment	9
104 | Cured	Intervention	6
105 | Cured	No Treatment	6
106 | Not Cured	No Treatment	7
107 | Cured	No Treatment	7
108 | Not Cured	No Treatment	6
109 | Not Cured	No Treatment	7
110 | Cured	Intervention	8
111 | Cured	Intervention	9
112 | Not Cured	No Treatment	4
113 | Not Cured	No Treatment	6
114 | Cured	Intervention	9
115 | 


--------------------------------------------------------------------------------
/logistic regression/logistic regression.R:
--------------------------------------------------------------------------------
  1 | ############################################################################
  2 | #
  3 | # W203 Week 14 - Logistic Regression
  4 | #
  5 | # Exercises from Andy Field's Discovering Statistics With R, Chapter 8
  6 | #
  7 | ############################################################################
  8 | #
  9 | # Eel fun, using the eel.dat data set from http://www.sagepub.com/dsur/study/default.htm
 10 | #
 11 | setwd("~/Documents/R/logistic regression")
 12 | #
 13 | # Get the eel data set
 14 | eelData <- read.delim("eel.dat", header = TRUE)
 15 | #
 16 | str(eelData)
 17 | head(eelData)
 18 | #
 19 | # reset the factor levels for the dichotomous variables "Intervention" and 
 20 | # "Cured" to the opposite of what R defaulted them to be 
 21 | eelData$Cured <- relevel(eelData$Cured, "Not Cured")
 22 | eelData$Intervention <- relevel(eelData$Intervention, "No Treatment")
 23 | str(eelData)
 24 | # 
 25 | # OK, that looks better. Let's get on with some logistic regression
 26 | # Use the glm() method, and use the Intervention variable as a predictor of 
 27 | # Cured outcome. In addition, specify the family of distributions to use as 
 28 | # the binomial distribution with the default link logit link function.
 29 | #
 30 | eelModel.1 <- glm(Cured ~ Intervention, data = eelData, family = binomial(link = "logit"))
 31 | #
 32 | # Now let's add to the model Duration and create a second model 
 33 | eelModel.2 <- glm(Cured ~ Intervention + Duration, data = eelData, family = binomial(link = "logit"))
 34 | #
 35 | # Let's see what we've got 
 36 | summary(eelModel.1)
 37 | summary(eelModel.2)
 38 | #
 39 | # Based on the model summaries, Intervention is statistically significanly different
 40 | # from zero, p < .003, but Duration is not.
 41 | # The chi square statistic on the first model is the difference between the two
 42 | # residual deviances (null deviande and residual deviance) = 154.08 - 144.16
 43 | print(154.08-144.16)   # 9.93
 44 | #
 45 | # Let's compute the chi square form the model itself
 46 | #
 47 | eelModel.1.chisq <- eelModel.1$null.deviance - eelModel.1$deviance
 48 | #
 49 | # We can calculate the probablity of the chi square statistic with the following
 50 | chisq.prob <- 1 - pchisq(eelModel.1$null.deviance - eelModel.1$deviance, eelModel.1$df.null - eelModel.1$df.residual)
 51 | #
 52 | # p < .002 based on the above calculation, so we can reject the null hypothesis
 53 | # that the model is no better than chance at predicting the outcome.
 54 | # Therefore: including Intervention in the model produced a significant improvement
 55 | # in the fit of the model, chi-sq(1) = 9.93, p = .002.
 56 | #
 57 | # If we examine the coefficients for model 1, we see that Intervention is a 
 58 | # significant predictor of being cured, b = 1.23, z = 3.07, p = .002
 59 | #
 60 | # Calculating the various R values for this model (model 1)
 61 | # 
 62 | # First, the standard R
 63 | # sqrt((z^2 - 2*degFreedom)/null deviance)
 64 | print(sqrt((3.074^2-(2*1))/154.08))   # this is pulling data from the summary display
 65 | #
 66 | # but let's do it directly from the model. First we extract the z-statistic to make
 67 | # it a little easier to deal with then we compute R
 68 | eelModel.1.z <- coef(summary(eelModel.1))[-1, 3]
 69 | eelModel.1.R <- sqrt((eelModel.1.z^2 - 2*(eelModel.1$df.null - eelModel.1$df.residual))/eelModel.1$null.deviance)
 70 | # 0.22
 71 | #
 72 | # R^2 L (Hosmer & Lemeshow) is chi-square / -2LL(baseline)
 73 | eelModel.1.R2L <- eelModel.1.chisq / eelModel.1$null.deviance   # .0644
 74 | #
 75 | # Cox and Snell R^2
 76 | eelModel.1.RCS <- 1 - exp((eelModel.1$deviance - eelModel.1$null.deviance) / length(eelModel.1$fitted.values))
 77 | # 0.0841
 78 | #
 79 | # Nagelkerke's R^2
 80 | eelModel.1.RCN <- eelModel.1.RCS / (1 - (exp(-(eelModel.1$null.deviance / length(eelModel.1$fitted.values)))))
 81 | # 0.113
 82 | #
 83 | # The book goes into a huge calculation using probabilities to calculate the odds ratio
 84 | # when all you need to do is take the exponential of the predictor coefficient.
 85 | eelModel.1.oddsRatio <- exp(eelModel.1$coefficients[2])   # 3.42
 86 | #
 87 | # Confidence Intervals
 88 | exp(confint(eelModel.1))
 89 | #
 90 | # The values of the confidence interval ends are greater than 1, which means as the predictor
 91 | # increases, so do the odds of being cured. Values less than one mean that as the predictor 
 92 | # increases the odds of being cured decrease. Since we observed that the direction was an 
 93 | # increase is odds with an increase of predictor variable, this corresponds and supports that
 94 | # observation. Furthermore, since the confidence interval doesn't go below 1 then it doesn't 
 95 | # include the chance that in the population the direction of the relationship is opposite to 
 96 | # what we observed.
 97 | #
 98 | #
 99 | # Model #2 Analysis - Adding Duration
100 | #
101 | # The b value is very small and not significant, p = 0.964. The deviance between the models
102 | # is the same and the AIC is slightly higher in model 2, so model 1 is a better model than
103 | # model 2. 
104 | # 
105 | # Comparing the models - manual calculations vs. anova
106 | # Manual --
107 | # 
108 | eelModel.2.chisq <- eelModel.1$deviance - eelModel.2$deviance  # deviance between the 2 models
109 | chisq.prob <- 1 - pchisq(eelModel.2.chisq, eelModel.1$df.residual - eelModel.2$df.residual)
110 | # confirms p = 0.9644, chi squared is 0.002
111 | #
112 | # Use anova 
113 | anova(eelModel.1, eelModel.2)  # gives same output of deviance (chi sq) = 0.002, df = 1
114 | #
115 | # Residuals
116 | #
117 | eelData$predicted.probabilities <- fitted(eelModel.1)
118 | eelData$standardized.residuals <- rstandard(eelModel.1)
119 | eelData$studentized.residuals <- rstudent(eelModel.1)
120 | eelData$dfbeta <- dfbeta(eelModel.1)
121 | eelData$dffit <- dffits(eelModel.1)
122 | eelData$leverage <- hatvalues(eelModel.1)
123 | #
124 | # 
125 | head(eelData[, c("Cured", "Intervention", "Duration", "predicted.probabilities")])
126 | #
127 | # from the output we see that the Cured, No Treatment -> 43% probability
128 | # and that Cured, Intervention -> 72% probability, which helps to validate the model 
129 | #
130 | # Let's see if we see points for which the model is poor fit, or which have undue leverage
131 | eelData[, c("leverage", "studentized.residuals", "dfbeta")]
132 | #
133 | # Leverage -> expected value = (k + 1)/N = 2/113 = 0.018 
134 | # All the leverage values in the output are very close to this value, so that's good
135 | #
136 | # Residuals (standard or studentized) should have ony 5% outside +/- 1.96, 1% outside +/-2.58
137 | # All the values in the output look very good
138 | #
139 | # dfBeta for constant and for Intervantion should be less than 1 -> all look OK.
140 | #
141 | # Soccer Penalty Data 
142 | penaltyData <- read.delim("penalty.dat", header = TRUE)
143 | head(penaltyData)
144 | str(penaltyData)
145 | #
146 | # data looks ok, let's start a hierarchical logistic regression model setup, 
147 | # starting with Scored ~ Previous
148 | penaltyModel.1 = glm(Scored ~ Previous, data = penaltyData, family = binomial(link = "logit"))
149 | summary(penaltyModel.1)
150 | #
151 | # highly significant coefficient of 0.0964, p = 0
152 | penaltyModel.1.chisq <- penaltyModel.1$null.deviance - penaltyModel.1$deviance
153 | penaltyModel.1.chisqProbability <- pchisq(penaltyModel.1.chisq, penaltyModel.1$df.null - penaltyModel.1$df.residual)
154 | #
155 | # We end up with a chi squared of 42.466, p = 1 -> we can't reject the null hypothesis
156 | # that the model is better than chance
157 | #
158 | penaltyModel.1.z <- coef(summary(penaltyModel.1))[-1, 3]
159 | penaltyModel.1.R <- sqrt((penaltyModel.1.z^2 - 2*(penaltyModel.1$df.null - penaltyModel.1$df.residual))/penaltyModel.1$null.deviance)
160 | #
161 | # R = 0.427
162 | penaltyModel.1.R2L <- penaltyModel.1.chisq / penaltyModel.1$null.deviance
163 | #
164 | # R^2 L = 0.41
165 | penaltyModel.1.RCS <- 1 - exp((penaltyModel.1$deviance - penaltyModel.1$null.deviance) / length(penaltyModel.1$fitted.values))
166 | # R^2 CS = 0.432
167 | #
168 | penaltyModel.1.RCN <- penaltyModel.1.RCS / (1 - (exp(-(penaltyModel.1$null.deviance / length(penaltyModel.1$fitted.values)))))
169 | # R^2 N = 0.577
170 | # All the R and R^2 values are very high
171 | #
172 | # odds ratio
173 | penaltyModel.1.oddsRatio <- exp(penaltyModel.1$coefficients[2]) # 1.10
174 | # confidence intervals
175 | exp(confint(penaltyModel.1))
176 | #
177 | # Move on to add PSWQ to the model 
178 | penaltyModel.2 <- glm(Scored ~ Previous + PSWQ, data = penaltyData, family = binomial(link = "logit"))
179 | summary(penaltyModel.2)
180 | #
181 | # b(Previous) = 0.0648, p = .0034
182 | # b(PSWQ) = -0.23009, p = .00395
183 | penaltyModel.2.chisq <- penaltyModel.1$deviance - penaltyModel.2$deviance
184 | penaltyModel.2.chisqProbability <- pchisq(penaltyModel.2.chisq, penaltyModel.1$df.residual - penaltyModel.2$df.residual)
185 | # chi squared = 12.51, p = .999
186 | penaltyModel.2.z <- coef(summary(penaltyModel.2))[-1, 3]
187 | penaltyModel.2.R <- sqrt((penaltyModel.2.z^2 - 2*(penaltyModel.1$df.residual - penaltyModel.2$df.residual))/penaltyModel.1$deviance)
188 | # R values have come down to ~0.33 and 0.32, respectively
189 | penaltyModel.2.R2L <- penaltyModel.2.chisq / penaltyModel.1$deviance
190 | penaltyModel.2.RCS <- 1 - exp((penaltyModel.2$deviance - penaltyModel.1$deviance) / length(penaltyModel.2$fitted.values))
191 | penaltyModel.2.RCN <- penaltyModel.2.RCS / (1 - (exp(-(penaltyModel.1$deviance / length(penaltyModel.2$fitted.values)))))
192 | # R^2 values also show improvement
193 | #
194 | # add Anxious to the penalty model 
195 | penaltyModel.3 <- glm(Scored ~ Previous + PSWQ + Anxious, data = penaltyData, family = binomial())
196 | summary(penaltyModel.3)
197 | #
198 | # testing assumptions
199 | library(car)
200 | vif(penaltyModel.3)
201 | 1/vif(penaltyModel.3)
202 | # Previous and Anxious have VIF 35.5 , way over 10.
203 | #
204 | # Check Pearson's correlation between predictor variables 
205 | library(polycor)
206 | penaltyVars <- penaltyData[, c("Previous", "PSWQ", "Anxious")]
207 | cor(penaltyVars)
208 | # extremely high correlation between Anxious and Previous, and a high correlation between Anxious and PSWQ
209 | cor.test(penaltyData$Previous, penaltyData$Anxious)  # highly significant, cor = -0.9929
210 | cor.test(penaltyData$Previous, penaltyData$PSWQ)  # highly significant, cor = -0.644
211 | cor.test(penaltyData$Anxious, penaltyData$PSWQ)   # highly significant, cor = 0.65
212 | #
213 | # The better a player's previous score, the lower the anxiety
214 | # Anxiety and PSWQ are correlation and have an inverse relationship with Previous
215 | # This model is unreliable.
216 | #
217 | # Testing for linearity
218 | # We need to test for linearity of the continuous predictors with the logit function.
219 | # To do this we need to compute the indicator variable as ln(predictor) * predictor for each predictor
220 | # Then we add all the variables and the interaction variables into a new model 
221 | # We examine thie significance of the interaction variables. If any are significant
222 | # then we have violated linearity for that variable/predictor
223 | penaltyData$logPSWQInt <- log(penaltyData$PSWQ)*penaltyData$PSWQ
224 | penaltyData$logAnxiousInt <- log(penaltyData$Anxious)*penaltyData$Anxious
225 | penaltyData$logPreviousInt <- log(penaltyData$Previous)*penaltyData$Previous
226 | penaltyModel.test <- glm(Scored ~ Previous + PSWQ + Anxious + logPreviousInt + logPSWQInt + logAnxiousInt, data = penaltyData, family = binomial())
227 | summary(penaltyModel.test)
228 | # None of the Int variables show significance, so linearity test passes
229 | 


--------------------------------------------------------------------------------
/logistic regression/penalty.dat:
--------------------------------------------------------------------------------
 1 | PSWQ	Anxious	Previous	Scored
 2 | 18	21	56	Scored Penalty
 3 | 17	32	35	Scored Penalty
 4 | 16	34	35	Scored Penalty
 5 | 14	40	15	Scored Penalty
 6 | 5	24	47	Scored Penalty
 7 | 1	15	67	Scored Penalty
 8 | 4	10	75	Scored Penalty
 9 | 12	19	53	Scored Penalty
10 | 11	29	35	Scored Penalty
11 | 15	14	65	Scored Penalty
12 | 23	5	85	Scored Penalty
13 | 11	6	86	Scored Penalty
14 | 14	34	23	Scored Penalty
15 | 22	27	46	Scored Penalty
16 | 12	26	46	Scored Penalty
17 | 5	16	67	Scored Penalty
18 | 6	14	67	Scored Penalty
19 | 4	5	84	Scored Penalty
20 | 7	10	74	Scored Penalty
21 | 8	13	64	Scored Penalty
22 | 12	19	55	Scored Penalty
23 | 14	26	46	Scored Penalty
24 | 13	27	43	Scored Penalty
25 | 1	29	35	Scored Penalty
26 | 7	30	35	Scored Penalty
27 | 10	31	33	Scored Penalty
28 | 15	26	45	Scored Penalty
29 | 16	28	35	Scored Penalty
30 | 18	10	74	Scored Penalty
31 | 17	8	76	Scored Penalty
32 | 14	12	75	Scored Penalty
33 | 20	24	44	Scored Penalty
34 | 18	23	47	Scored Penalty
35 | 2	16	65	Scored Penalty
36 | 6	14	66	Scored Penalty
37 | 4	19	55	Scored Penalty
38 | 4	18	56	Scored Penalty
39 | 2	17	64	Scored Penalty
40 | 4	20	53	Scored Penalty
41 | 1	23	47	Scored Penalty
42 | 20	26	45	Missed Penalty
43 | 25	29	35	Missed Penalty
44 | 16	21	55	Missed Penalty
45 | 18	34	24	Missed Penalty
46 | 17	38	15	Missed Penalty
47 | 29	39	15	Missed Penalty
48 | 24	46	4	Missed Penalty
49 | 25	49	0	Missed Penalty
50 | 22	47	0	Missed Penalty
51 | 26	42	5	Missed Penalty
52 | 23	41	16	Missed Penalty
53 | 25	40	18	Missed Penalty
54 | 28	35	24	Missed Penalty
55 | 18	36	23	Missed Penalty
56 | 17	31	34	Missed Penalty
57 | 14	35	24	Missed Penalty
58 | 15	42	16	Missed Penalty
59 | 11	20	56	Missed Penalty
60 | 10	24	42	Missed Penalty
61 | 15	19	56	Missed Penalty
62 | 14	18	56	Missed Penalty
63 | 16	34	24	Missed Penalty
64 | 17	35	24	Missed Penalty
65 | 25	36	26	Missed Penalty
66 | 24	32	33	Missed Penalty
67 | 17	35	24	Missed Penalty
68 | 27	39	16	Missed Penalty
69 | 28	38	15	Missed Penalty
70 | 27	41	14	Missed Penalty
71 | 25	42	14	Missed Penalty
72 | 26	45	8	Missed Penalty
73 | 22	48	0	Missed Penalty
74 | 28	49	0	Missed Penalty
75 | 28	46	4	Missed Penalty
76 | 24	26	45	Missed Penalty
77 | 


--------------------------------------------------------------------------------
/multiple regression/.Rapp.history:
--------------------------------------------------------------------------------
  1 | metallica
  2 | lecturerData
  3 | library(Rcmdr)
  4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic"))
  5 | alchoholPersonality
  6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality)
  7 | alchoholPersonalityMatrix
  8 | library(Rcmdr)
  9 | sunspots
 10 | require(graphics)
 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers")
 12 | pvalue()
 13 | pscore()
 14 | mu1 <- 2/(sqrt(100))
 15 | mu1
 16 | zscore()
 17 | (2.5 - 2.0)/2
 18 | 2*pnorm(2.5)
 19 | 2 * pnorm(-2.5)
 20 | -0.5/20
 21 | pnorm(0.025)
 22 | z <- (2 - 2.5)/2
 23 | z
 24 | 2*pnorm(-0.25)
 25 | a <- 2
 26 | s <- 2
 27 | n <- 100
 28 | xbar <- 2.5
 29 | z <- (xbar-a)/(s/sqrt(n))
 30 | z
 31 | 2*pnorm(-2.5)
 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100)))
 33 | 10000 - 10300
 34 | -300/(1000 / sqrt(10))
 35 | pnorm(-300/(1000/sqrt(10)))
 36 | pnorm(300/(1000/sqrt(10)))
 37 | 2*(pnorm(-abs(500/(1000/sqrt(10))))
 38 | )
 39 | 2*(pnorm((500/(1000/sqrt(10))))
 40 | )
 41 | 500/1000/sqrt(10)
 42 | 1000/sqrt(10)
 43 | 500/(1000/sqrt(10))
 44 | pnorm(1.581139)
 45 | pnorm(-1.581139)
 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10)))
 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10))
 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10))
 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10))
 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10))
 51 | exit
 52 | setwd("~/OneDrive/MIDS/W203/Data/multiple regression")
 53 | album2 <- read.delim("Album Sales 2.dat", header = TRUE)
 54 | summary(album2)
 55 | library(psych)
 56 | scatterplot(sales, adverts)
 57 | library(car)
 58 | scatterplot(sales, adverts)
 59 | scatterplot(album2$sales, album2$adverts)
 60 | cor.test(album2$sales, album2$adverts)
 61 | albumSales.2 <- lm(sales ~ adverts, data = album2)
 62 | summary(albumSales.2)
 63 | albmuSales.3 <- lm(sales ~ adverts + airplay, data = album2)
 64 | summary(albumSales.3)
 65 | albumSales.3 <- lm(sales ~ adverts + airplay, data = album2)
 66 | summary(albumSales.3)
 67 | plot(albumSales.2)
 68 | plot(albumSales.3)
 69 | albumSales.4 <- lm(sales ~ adverts + airplay + attract, data = album2)
 70 | summary(albumSales.4)
 71 | plot(albumSales.4)
 72 | library(QuantPsych)
 73 | library(QuantPsyc)
 74 | library(QuantPsyc)
 75 | library(boot)
 76 | lm.beta(albumSales.4)
 77 | confint(albumSales.4)
 78 | anova(albumSales.2, albumSales.3)
 79 | anova(albumSales.3, albumSales.4)
 80 | album2$standardize.residuals <- rstandard(albumSales.4)
 81 | album2$studentized.residuals <- rstudent(albumSales.4)
 82 | album2$cooks.distance <- cooks.distance(albumSales.4)
 83 | album2$dfbeta <- dfbeta(albumSales.4)
 84 | album2$dffit <- dffits(albumSales.4)
 85 | album2$leverage <- hatvalues(albumSales.4)
 86 | album2$covariance.ratios <- covratio(albumSales.4)
 87 | album2
 88 | album2.large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2
 89 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2
 90 | album2$large.residual <- album2$standardized.residuals > 2 | album2$standardized.residuals < -2
 91 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2)
 92 | album2.large.residual
 93 | album2$standardized.residuals <- rstandard(albumSales.4)
 94 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2)
 95 | sum(album2$large.residual)
 96 | album2[album2$large.residual, c("sales","airplay","attract","adverts","standardized.residuals")]
 97 | album2[album2$large.residual, c("cooks.distance", "leverage", "covariance.ratios")]
 98 | 1 + 3(3 + 1)/200
 99 | 1 + 12/200
100 | 1 - 12/200
101 | dwt(albumSales.4)
102 | vif(albumSales.4)
103 | 1/vif(albumSales.4)
104 | mean(vif(albumSales.4))
105 | 


--------------------------------------------------------------------------------
/multiple regression/Album Sales 2.dat:
--------------------------------------------------------------------------------
  1 | adverts	sales	airplay	attract
  2 | 10.256	330	43	10
  3 | 985.685	120	28	7
  4 | 1445.563	360	35	7
  5 | 1188.193	270	33	7
  6 | 574.513	220	44	5
  7 | 568.954	170	19	5
  8 | 471.814	70	20	1
  9 | 537.352	210	22	9
 10 | 514.068	200	21	7
 11 | 174.093	300	40	7
 12 | 1720.806	290	32	7
 13 | 611.479	70	20	2
 14 | 251.192	150	24	8
 15 | 97.972	190	38	6
 16 | 406.814	240	24	7
 17 | 265.398	100	25	5
 18 | 1323.287	250	35	5
 19 | 196.65	210	36	8
 20 | 1326.598	280	27	8
 21 | 1380.689	230	33	8
 22 | 792.345	210	33	7
 23 | 957.167	230	28	6
 24 | 1789.659	320	30	9
 25 | 656.137	210	34	7
 26 | 613.697	230	49	7
 27 | 313.362	250	40	8
 28 | 336.51	60	20	4
 29 | 1544.899	330	42	7
 30 | 68.954	150	35	8
 31 | 785.692	150	8	6
 32 | 125.628	180	49	7
 33 | 377.925	80	19	8
 34 | 217.994	180	42	6
 35 | 759.862	130	6	7
 36 | 1163.444	320	36	6
 37 | 842.957	280	32	7
 38 | 125.179	200	28	6
 39 | 236.598	130	25	8
 40 | 669.811	190	34	8
 41 | 612.234	150	21	6
 42 | 922.019	230	34	7
 43 | 50	310	63	7
 44 | 2000	340	31	7
 45 | 1054.027	240	25	7
 46 | 385.045	180	42	7
 47 | 1507.972	220	37	7
 48 | 102.568	40	25	8
 49 | 204.568	190	26	7
 50 | 1170.918	290	39	7
 51 | 689.547	340	46	7
 52 | 784.22	250	36	6
 53 | 405.913	190	12	4
 54 | 179.778	120	2	8
 55 | 607.258	230	29	8
 56 | 1542.329	190	33	8
 57 | 1112.47	210	28	7
 58 | 856.985	170	10	6
 59 | 836.331	310	38	7
 60 | 236.908	90	19	4
 61 | 1077.855	140	13	6
 62 | 579.321	300	30	7
 63 | 1500	340	38	8
 64 | 731.364	170	22	8
 65 | 25.689	100	23	6
 66 | 391.749	200	22	9
 67 | 233.999	80	20	7
 68 | 275.7	100	18	6
 69 | 56.895	70	37	7
 70 | 255.117	50	16	8
 71 | 566.501	240	32	8
 72 | 102.568	160	26	5
 73 | 250.568	290	53	9
 74 | 68.594	140	28	7
 75 | 642.786	210	32	7
 76 | 1500	300	24	7
 77 | 102.563	230	37	6
 78 | 756.984	280	30	8
 79 | 51.229	160	19	7
 80 | 644.151	200	47	6
 81 | 15.313	110	22	5
 82 | 243.237	110	10	8
 83 | 256.894	70	1	4
 84 | 22.464	100	1	6
 85 | 45.689	190	39	6
 86 | 724.938	70	8	5
 87 | 1126.461	360	38	7
 88 | 1985.119	360	35	5
 89 | 1837.516	300	40	5
 90 | 135.986	120	22	7
 91 | 237.703	150	27	8
 92 | 976.641	220	31	6
 93 | 1452.689	280	19	7
 94 | 1600	300	24	9
 95 | 268.598	140	1	7
 96 | 900.889	290	38	8
 97 | 982.063	180	26	6
 98 | 201.356	140	11	6
 99 | 746.024	210	34	6
100 | 1132.877	250	55	7
101 | 1000	250	5	7
102 | 75.896	120	34	6
103 | 1351.254	290	37	9
104 | 202.705	60	13	8
105 | 365.985	140	23	6
106 | 305.268	290	54	6
107 | 263.268	160	18	7
108 | 513.694	100	2	7
109 | 152.609	160	11	6
110 | 35.987	150	30	8
111 | 102.568	140	22	7
112 | 215.368	230	36	6
113 | 426.784	230	37	8
114 | 507.772	30	9	3
115 | 233.291	80	2	7
116 | 1035.433	190	12	8
117 | 102.642	90	5	9
118 | 526.142	120	14	7
119 | 624.538	150	20	5
120 | 912.349	230	57	6
121 | 215.994	150	19	8
122 | 561.963	210	35	7
123 | 474.76	180	22	5
124 | 231.523	140	16	7
125 | 678.596	360	53	7
126 | 70.922	10	4	6
127 | 1567.548	240	29	6
128 | 263.598	270	43	7
129 | 1423.568	290	26	7
130 | 715.678	220	28	7
131 | 777.237	230	37	8
132 | 509.43	220	32	5
133 | 964.11	240	34	7
134 | 583.627	260	30	7
135 | 923.373	170	15	7
136 | 344.392	130	23	7
137 | 1095.578	270	31	8
138 | 100.025	140	21	5
139 | 30.425	60	28	1
140 | 1080.342	210	18	7
141 | 799.899	210	28	7
142 | 1071.752	240	37	8
143 | 893.355	210	26	6
144 | 283.161	200	30	8
145 | 917.017	140	10	7
146 | 234.568	90	21	7
147 | 456.897	120	18	9
148 | 206.973	100	14	7
149 | 1294.099	360	38	7
150 | 826.859	180	36	6
151 | 564.158	150	32	7
152 | 192.607	110	9	5
153 | 10.652	90	39	5
154 | 45.689	160	24	7
155 | 42.568	230	45	7
156 | 20.456	40	13	8
157 | 635.192	60	17	6
158 | 1002.273	230	32	7
159 | 1177.047	230	23	6
160 | 507.638	120	0	6
161 | 215.689	150	35	5
162 | 526.48	120	26	6
163 | 26.895	60	19	6
164 | 883.877	280	26	7
165 | 9.104	120	53	8
166 | 103.568	230	29	8
167 | 169.583	230	28	7
168 | 429.504	40	17	6
169 | 223.639	140	26	8
170 | 145.585	360	42	8
171 | 985.968	210	17	6
172 | 500.922	260	36	8
173 | 226.652	250	45	7
174 | 1051.168	200	20	7
175 | 68.093	150	15	7
176 | 1547.159	250	28	8
177 | 393.774	100	27	6
178 | 804.282	260	17	8
179 | 801.577	210	32	8
180 | 450.562	290	46	9
181 | 26.598	220	47	8
182 | 179.061	70	19	1
183 | 345.687	110	22	8
184 | 295.84	250	55	9
185 | 2271.86	320	31	5
186 | 1134.575	300	39	8
187 | 601.434	180	21	6
188 | 45.298	180	36	6
189 | 759.518	200	21	7
190 | 832.869	320	44	7
191 | 56.894	140	27	7
192 | 709.399	100	16	6
193 | 56.895	120	33	6
194 | 767.134	230	33	8
195 | 503.172	150	21	7
196 | 700.929	250	35	9
197 | 910.851	190	26	7
198 | 888.569	240	14	6
199 | 800.615	250	34	6
200 | 1500	230	11	8
201 | 785.694	110	20	9
202 | 


--------------------------------------------------------------------------------
/multiple regression/Chamorro-Premuzic.dat:
--------------------------------------------------------------------------------
  1 | Age	Gender	studentN	studentE	studentO	studentA	studentC	lectureN	lecturE	lecturO	lecturA	lecturC
  2 |  	Female	33	35	26	43	30	-14	 	3	6	10
  3 |  	Female	18	29	19	31	27	-17	 	4	19	9
  4 | 18	Female	35	29	30	45	26	-30	 	13	15	25
  5 | 18	Female	38	43	17	49	29	-24	 	2	9	8
  6 |  	Female	30	30	29	56	40	-21	 	11	21	12
  7 |  	Female	19	31	31	35	22	-22	 	2	13	8
  8 | 19	Female	33	27	33	60	38	-30	 	10	15	11
  9 |  	Male	30	31	25	60	28	-23	 	15	15	24
 10 | 18	Female	30	23	26	48	27	-25	 	5	7	18
 11 | 18	Male	13	28	34	48	29	-30	 	10	9	7
 12 | 18	Female	37	36	37	45	32	-29	 	28	25	16
 13 | 18	Male	23	25	25	48	34	-13	 	5	4	 
 14 | 18	Male	21	20	24	38	26	-18	 	6	13	8
 15 | 0	Female	30	28	21	41	25	0	 	16	22	21
 16 | 18	Female	8	27	34	55	40	-26	 	17	22	29
 17 | 18	Female	21	28	38	37	27	-30	 	20	20	20
 18 | 18	Female	40	30	23	37	40	-23	 	10	1	6
 19 |  	Female	28	36	40	45	29	-5	 	24	22	19
 20 | 18	Female	40	24	20	35	21	-29	 	8	21	18
 21 | 17	Female	30	42	16	51	34	-26	 	12	24	21
 22 | 18	Male	31	13	20	47	27	-30	 	-4	9	27
 23 |  	Female	41	30	37	51	34	-26	 	0	14	16
 24 | 18	Female	25	30	19	50	31	-28	 	17	23	17
 25 | 18	Female	37	21	35	45	24	-30	 	28	8	17
 26 | 19	Male	14	36	27	 	34	-27	 	11	7	14
 27 | 18	Male	17	33	32	53	31	-17	 	6	2	12
 28 | 17	Male	10	38	26	52	39	-22	 	-6	15	8
 29 | 18	Female	28	29	25	58	34	-24	 	6	10	15
 30 | 18	Female	19	35	23	46	30	-24	 	13	18	22
 31 |  	Male	33	31	30	36	28	-24	 	19	17	14
 32 | 18	Male	15	34	37	43	25	-17	 	11	2	10
 33 | 18	Female	20	24	25	57	39	-29	 	2	26	20
 34 | 18	Female	15	34	35	50	34	-24	 	22	-3	20
 35 | 18	Male	25	38	20	47	58	0	 	0	6	4
 36 |  	Male	28	33	30	46	23	-19	 	2	3	17
 37 | 18	Male	26	26	28	 	25	-1	 	5	10	1
 38 | 18	Female	26	35	30	51	33	-27	 	11	20	18
 39 | 18	Female	16	29	33	49	34	-30	 	13	19	11
 40 | 18	Female	21	38	36	45	29	-22	 	15	14	25
 41 | 18	Female	 	 	 	57	 	 	 	 	 	 
 42 | 18	Female	32	33	27	57	36	-30	 	9	28	4
 43 | 18	Female	24	41	 	50	32	-24	 	15	4	10
 44 | 19	Female	15	34	37	50	25	-26	 	9	19	15
 45 | 18	Female	15	34	35	50	34	-26	 	0	14	16
 46 | 18	Female	25	38	20	45	58	-24	 	9	2	23
 47 | 19	Male	14	36	27	 	34	-28	 	17	23	17
 48 | 18	Male	17	33	32	53	31	-27	 	11	7	14
 49 | 17	Male	10	38	26	52	39	-6	 	17	18	19
 50 | 18	Female	28	29	25	58	34	-17	 	6	2	12
 51 | 18	Female	19	35	23	46	30	-22	 	-6	15	8
 52 |  	Male	33	31	30	36	28	-24	 	6	10	15
 53 | 18	Male	41	30	37	43	34	-24	 	13	18	22
 54 | 18	Female	25	27	31	57	30	-24	 	19	17	14
 55 | 18	Female	25	30	19	50	31	-17	 	11	2	10
 56 | 18	Male	37	21	35	47	24	-29	 	2	26	20
 57 |  	Male	28	33	30	46	23	-24	 	22	-3	20
 58 | 18	Male	26	26	28	 	25	0	 	0	6	4
 59 | 18	Female	24	41	 	50	32	-1	 	5	10	1
 60 | 21	Female	34	28	36	42	30	-30	 	16	-5	4
 61 | 18	Female	25	27	31	55	30	-24	 	9	2	23
 62 | 18	Female	18	26	22	43	29	-6	 	17	18	19
 63 | 18	Female	12	26	29	45	34	-26	 	9	19	15
 64 | 18	Female	20	24	25	55	39	-24	 	15	4	10
 65 | 18	Female	18	26	22	43	29	-30	 	28	8	17
 66 | 18	Female	12	26	29	45	34	-19	 	2	3	17
 67 | 18	Male	26	26	28	 	25	-1	 	5	10	1
 68 | 19	Female	33	27	33	60	38	-30	 	10	15	11
 69 | 18	Female	8	27	34	55	40	-26	 	17	22	29
 70 | 18	Female	25	30	19	50	31	-17	 	11	2	10
 71 | 17	Female	30	42	16	51	34	-26	 	12	24	21
 72 | 18	Female	25	30	19	50	31	-28	 	17	23	17
 73 | 20	Female	24	31	23	55	40	-30	 	1	-4	23
 74 | 18	Female	13	36	26	50	31	-30	 	7	21	23
 75 | 18	Female	30	29	40	59	43	-30	 	26	25	18
 76 | 19	Female	27	21	29	49	27	-22	 	23	19	21
 77 | 23	Female	14	32	21	57	35	-15	 	0	15	23
 78 |  	Female	17	33	28	41	33	-20	 	9	20	21
 79 | 18	Male	19	27	22	51	32	-17	 	7	12	15
 80 | 18	Male	17	34	31	49	29	-21	 	11	14	 
 81 | 18	Female	26	36	28	41	29	-25	 	2	8	-1
 82 | 17	Female	15	38	33	53	38	-24	 	12	13	19
 83 | 19	Male	31	37	30	51	34	-16	 	7	5	5
 84 | 18	Female	30	33	24	36	23	-27	 	9	17	15
 85 | 18	Female	13	32	30	41	41	-23	 	6	11	19
 86 | 19	Female	22	37	28	55	34	-30	 	13	17	26
 87 | 20	Male	19	31	35	45	26	 	 	-3	 	 
 88 | 16	Male	35	21	23	56	 	8	 	8	13	0
 89 | 18	Male	10	38	40	50	32	-29	 	25	19	15
 90 | 18	Female	 	 	 	49	 	-30	 	20	21	30
 91 | 18	Female	20	37	30	41	30	-23	 	2	10	17
 92 | 18	Male	14	24	32	43	24	-17	 	18	10	7
 93 | 18	Female	27	39	24	42	32	-30	 	 	19	5
 94 | 18	Male	20	 	21	38	21	-11	 	10	10	12
 95 | 18	Female	18	31	23	50	 	-16	 	6	12	8
 96 | 18	Female	16	35	26	49	29	-24	 	5	-1	4
 97 | 18	Female	31	37	22	53	32	-30	 	16	23	24
 98 | 17	Female	36	35	31	54	36	-30	 	6	3	24
 99 | 19	Male	15	31	36	45	24	-18	 	17	1	7
100 | 18	Female	35	31	28	49	30	-25	 	27	16	28
101 | 19	Female	18	42	35	49	31	-30	 	19	21	14
102 | 18	Female	18	28	37	51	33	-19	 	18	5	14
103 | 18	Female	32	27	27	54	24	-5	 	17	13	11
104 | 18	Female	27	32	21	46	37	-21	 	1	23	18
105 | 17	Female	21	38	23	49	32	-30	 	8	1	19
106 | 17	Female	15	32	26	41	25	-20	 	1	12	5
107 | 19	Male	10	36	23	48	32	-20	 	17	19	16
108 | 18	Male	8	30	22	45	26	-25	 	4	-5	8
109 | 18	Female	19	29	35	43	31	-26	 	21	20	21
110 | 18	Female	25	28	25	38	28	-11	 	20	22	25
111 | 19	Male	5	31	33	43	41	-28	 	3	-3	23
112 | 18	Female	17	33	32	48	33	-30	 	7	-1	22
113 | 18	Female	27	30	32	50	32	-18	 	9	17	9
114 | 18	Female	16	34	31	46	34	-13	 	6	17	20
115 | 20	Female	19	35	39	61	39	-24	 	18	11	20
116 | 18	Female	13	30	25	51	39	-24	 	1	-14	30
117 |  	Female	39	31	42	42	23	-20	 	4	-6	-1
118 | 17	Male	36	23	29	41	23	-24	 	3	1	15
119 | 19	Female	26	34	38	64	39	-27	 	24	25	19
120 | 18	Female	14	39	41	57	37	-30	 	25	-5	28
121 |  	Female	23	27	27	42	33	-30	 	16	8	23
122 |  	Female	34	28	34	38	25	-28	 	12	6	29
123 |  	Female	23	26	32	50	34	-24	 	3	12	-6
124 | 18	Male	4	35	23	43	27	-13	 	0	4	12
125 | 26	Female	25	28	33	49	35	-24	 	16	3	16
126 |  	Female	20	34	29	60	42	-29	 	20	17	26
127 | 18	Male	19	27	30	51	23	-24	 	1	 	9
128 | 18	Female	38	29	46	45	33	-24	 	14	7	22
129 | 20	Male	6	33	39	34	20	-28	 	12	-3	8
130 |  	Female	25	36	29	61	39	-29	 	19	22	24
131 | 19	Male	7	38	33	62	42	-30	 	19	12	24
132 |  	Male	26	32	36	44	28	-6	 	10	0	13
133 | 18	Male	19	36	39	50	33	-17	 	15	6	21
134 | 17	Male	36	23	29	41	23	-24	 	3	1	15
135 | 18	Male	20	 	21	38	21	-11	 	10	10	12
136 | 18	Female	18	31	23	50	 	-16	 	6	12	8
137 | 17	Female	30	42	16	51	34	-26	 	12	24	21
138 |  	Female	39	31	42	42	23	-20	 	4	-6	-1
139 | 19	Female	27	21	29	49	27	-22	 	23	19	21
140 | 23	Female	14	32	21	57	35	-15	 	0	15	23
141 |  	 	 	 	 	 	 	 	 	 	 	 
142 | 18	Female	28	19	21	48	35	-29	16	-7	10	23
143 | 19	Female	20	28	32	46	28	-10	21	17	26	25
144 | 18	Female	34	22	28	25	7	-6	3	-1	1	10
145 | 24	Female	20	32	32	44	27	-18	8	6	5	18
146 | 18	Female	29	17	31	52	27	-25	20	8	-2	20
147 | 18	Female	32	29	26	47	33	-29	7	-4	12	28
148 | 18	Male	0	41	32	50	45	-30	9	9	9	16
149 | 18	Female	21	25	25	45	33	-17	11	5	13	16
150 | 22	Female	26	33	32	37	27	-3	7	14	6	12
151 | 17	Female	19	33	27	34	35	-23	11	-8	19	27
152 | 19	Female	20	17	36	54	38	-27	13	-2	1	26
153 | 19	Male	26	35	29	46	18	-27	28	 	-7	-2
154 | 19	Female	37	17	35	47	18	-16	8	5	9	16
155 | 18	Female	30	34	31	44	32	-20	5	7	12	30
156 | 20	Female	19	36	28	50	36	-19	15	10	-6	15
157 | 18	Female	28	27	37	32	26	-22	12	12	11	5
158 | 19	Female	34	29	30	47	23	-23	12	12	14	30
159 | 19	Male	13	29	27	47	26	-4	3	4	15	16
160 | 19	Female	35	20	23	46	28	-30	16	3	10	14
161 | 18	Female	15	31	23	51	32	-21	8	-3	14	21
162 | 18	Female	14	23	31	54	33	-14	10	1	3	11
163 | 19	Female	32	32	42	36	16	-29	24	27	12	18
164 | 19	Male	10	39	37	44	34	-30	25	12	2	15
165 | 18	Female	23	34	26	45	34	-15	16	4	3	11
166 | 18	Female	26	28	35	50	36	-17	12	10	2	13
167 | 19	Female	24	25	22	63	32	-18	15	5	4	16
168 | 18	Female	23	31	22	58	41	-27	20	18	2	16
169 | 19	Female	34	26	31	58	28	-27	11	-1	13	23
170 | 18	Female	13	35	37	37	24	-30	2	3	-1	26
171 | 18	Female	28	5	28	27	24	-26	9	1	-1	9
172 | 18	Female	20	42	24	46	31	-29	14	2	-7	12
173 | 19	Female	21	37	33	57	43	-30	12	15	20	19
174 | 18	Female	25	24	19	54	39	-24	13	16	18	21
175 | 18	Female	14	32	24	50	36	-17	16	9	20	23
176 | 19	Female	27	23	27	55	26	-28	10	3	14	24
177 | 24	Male	40	19	37	59	41	-19	27	27	-6	15
178 | 20	Male	24	29	35	34	24	-21	12	19	12	19
179 | 18	Female	27	26	34	47	30	-25	7	8	4	16
180 | 18	Female	25	28	33	43	35	-21	4	-5	7	15
181 | 19	Male	24	29	24	52	32	-15	10	7	12	7
182 | 18	Female	14	36	23	51	28	-30	13	10	-7	17
183 | 18	Female	22	33	31	50	41	-20	14	3	-7	24
184 | 18	Female	23	41	14	50	37	-25	25	10	-1	4
185 | 19	Female	43	33	30	43	17	-11	11	3	6	15
186 | 19	Female	35	28	36	50	30	-21	5	7	-3	19
187 | 18	Female	34	30	40	52	23	-20	8	18	22	9
188 | 17	Female	37	33	36	42	21	-23	11	2	10	18
189 | 18	Male	24	29	44	43	24	-21	23	30	26	-8
190 | 18	Female	19	36	26	 	25	-30	10	-5	1	15
191 | 19	Female	39	24	21	44	36	-22	2	-5	17	16
192 | 18	Female	28	36	30	50	39	-30	27	16	9	20
193 | 23	Male	24	23	37	47	15	 	-6	-4	-2	15
194 | 18	Female	18	43	29	57	34	-20	20	10	7	20
195 | 18	Male	2	39	40	33	20	-18	22	20	6	6
196 | 20	Female	14	30	41	51	27	-20	18	14	-1	22
197 | 18	Female	22	30	34	51	36	-30	1	6	2	17
198 | 19	Female	29	27	32	38	26	-19	4	4	3	13
199 | 20	Female	28	34	22	47	31	-21	22	19	22	27
200 | 18	Female	15	29	31	54	33	-16	12	11	-2	7
201 | 20	Female	32	25	37	43	17	-30	10	10	-11	9
202 | 18	Female	29	29	29	41	31	-14	14	1	-8	18
203 | 19	Male	16	30	29	38	31	-27	14	6	4	23
204 | 20	Male	30	9	36	35	22	5	16	19	13	10
205 | 21	Male	2	35	29	53	42	-27	11	6	10	24
206 | 19	Female	33	21	15	50	29	-29	8	4	22	27
207 | 29	Female	19	31	30	47	36	-23	10	10	1	12
208 | 18	Female	22	26	30	48	28	-10	3	10	4	10
209 | 20	Male	29	20	39	37	21	-20	6	15	-6	-5
210 | 19	Female	37	18	32	47	31	-30	12	6	13	18
211 | 19	Female	20	26	22	49	32	-26	2	10	7	16
212 | 19	Female	21	31	32	51	39	-27	7	-1	11	26
213 | 18	Male	25	32	37	44	30	-27	18	19	26	3
214 | 19	Female	35	11	32	58	44	-26	9	2	11	 
215 | 19	Female	40	21	29	50	26	-30	12	6	19	29
216 | 18	Female	33	26	30	40	26	-26	16	12	1	17
217 | 19	Female	41	26	26	56	31	-24	5	1	13	17
218 | 18	Female	20	40	31	58	41	-30	18	5	11	22
219 | 18	Female	11	32	28	45	36	-18	1	14	7	15
220 | 18	Female	21	27	35	46	28	-13	24	18	15	15
221 | 18	Female	29	31	28	53	29	-24	10	-3	0	25
222 | 19	Female	42	34	33	44	30	-25	0	10	9	17
223 | 18	Male	8	32	36	49	28	-24	13	7	15	20
224 | 20	Male	29	23	41	39	20	-21	-3	3	4	21
225 | 19	Female	39	21	41	45	33	-21	19	24	-3	15
226 | 17	Female	24	33	32	49	30	-29	16	1	11	16
227 | 18	Female	24	33	27	50	37	-28	20	12	1	24
228 | 20	Female	19	20	14	39	30	-25	10	6	17	20
229 | 24	Male	22	38	40	45	23	-30	3	-7	7	18
230 | 18	Female	33	29	27	35	21	-20	19	7	2	8
231 | 18	Male	2	34	38	44	28	-30	10	-1	9	19
232 | 18	Female	23	35	22	47	31	-30	14	7	18	23
233 | 19	Female	13	33	34	37	27	-19	12	3	0	14
234 | 18	Female	35	27	40	47	25	-24	8	25	7	26
235 | 19	Female	25	36	28	54	31	-18	14	10	3	26
236 |  	 	 	 	 	 	 	 	 	 	 	 
237 | 22	Female	20	25	30	44	32	-29	4	4	17	22
238 | 19	Female	19	38	29	41	33	-23	14	-1	-4	27
239 | 22	Female	27	26	26	51	38	-27	18	8	8	23
240 | 23	Female	13	23	30	60	44	-23	6	5	4	15
241 | 19	Female	19	34	39	55	31	-9	12	6	1	9
242 | 18	Male	23	26	23	51	25	-21	4	11	9	19
243 | 21	Female	11	28	29	54	39	-17	15	9	7	14
244 | 19	Female	16	35	28	43	29	-16	26	20	18	23
245 | 18	Male	22	18	24	41	19	-28	4	-9	0	17
246 | 33	Male	29	26	26	48	33	-27	10	2	-13	6
247 | 22	Female	24	33	27	51	42	-29	13	9	7	21
248 | 25	Female	27	25	33	38	28	-4	5	2	-4	10
249 |  	 	24	30	31	38	33	0	4	-4	7	15
250 | 19	Female	20	31	32	48	33	-24	-3	1	1	12
251 | 40	Female	17	35	30	61	42	-30	6	4	-19	24
252 | 21	Male	19	11	32	27	14	-12	5	10	-6	21
253 | 2	Female	21	25	29	43	29	-27	14	14	2	12
254 | 26	Female	22	38	29	49	37	-23	13	18	1	-4
255 | 43	Female	28	31	27	45	30	-28	17	8	-2	13
256 | 20	Male	35	24	19	44	16	-26	12	4	8	18
257 | 21	Female	13	29	22	48	30	-30	20	16	-14	25
258 | 21	Female	24	33	28	50	34	-30	9	5	9	22
259 | 19	Female	25	31	25	55	34	-17	19	14	15	23
260 | 28	Female	12	34	30	38	35	-30	25	-5	-10	21
261 | 21	Female	15	27	27	44	23	-24	16	4	6	6
262 | 39	Female	10	36	28	50	39	-17	14	8	3	20
263 | 25	Male	30	30	42	41	34	-26	22	9	2	16
264 | 30	Female	37	28	21	40	29	 	3	10	 	 
265 | 19	Female	31	31	26	52	25	-30	-1	13	-5	12
266 | 27	Male	15	31	40	46	36	-19	14	11	-5	13
267 | 27	Female	25	24	22	46	37	-27	10	12	22	26
268 | 22	Male	26	38	29	53	22	-28	7	-2	-6	15
269 | 27	Female	20	33	29	47	31	-26	7	-1	-3	14
270 | 24	Female	27	28	38	45	25	-24	21	14	-4	19
271 | 24	Female	19	31	24	57	42	-22	19	2	13	20
272 | 20	Female	20	24	31	55	28	-30	15	0	-14	29
273 | 19	Female	18	34	22	46	38	-22	12	4	5	4
274 | 20	Female	18	26	39	52	44	-28	17	9	4	23
275 | 21	Female	14	38	36	50	42	-30	4	16	9	16
276 | 19	Female	19	37	25	51	36	-26	17	19	21	24
277 | 22	Female	22	23	25	54	36	 	 	 	 	 
278 | 39	Female	31	33	15	51	29	22	12	11	19	27
279 | 18	Male	26	33	19	56	37	-21	20	8	29	13
280 | 18	Female	40	35	31	51	24	-26	4	-10	-4	21
281 | 18	Female	27	45	23	55	36	 	 	 	 	 
282 | 18	Female	21	33	31	53	41	-23	9	6	23	25
283 | 21	Male	20	27	26	55	24	10	17	12	19	18
284 | 32	Female	22	27	24	33	24	-6	18	18	17	18
285 | 22	Female	26	25	23	46	 	-13	14	10	21	21
286 | 20	Female	20	31	25	37	26	-27	16	14	11	26
287 | 28	Male	25	20	22	51	24	4	10	13	1	13
288 | 18	Female	21	18	21	42	24	-30	13	20	5	26
289 | 18	Female	35	16	23	51	38	-22	7	6	22	16
290 | 18	Female	17	29	22	42	19	-18	7	-3	6	5
291 | 18	Female	27	33	27	47	30	-26	16	4	1	27
292 | 24	Female	16	27	30	53	31	-30	17	5	2	21
293 | 20	Female	24	26	28	50	31	-29	-5	-2	13	3
294 | 18	Female	26	18	18	42	24	-8	1	6	0	15
295 | 18	Male	23	19	23	41	24	-14	9	3	4	22
296 |  	 	 	 	 	 	 	 	 	 	 	 
297 | 21	Male	27	30	24	43	36	-30	13	-2	7	20
298 | 18	Female	36	27	20	43	26	 	21	6	5	11
299 | 21	Male	3	34	38	49	32	-25	11	9	13	29
300 | 20	Female	9	35	25	52	32	-30	14	7	0	24
301 | 18	Male	17	39	33	32	17	-30	14	8	6	16
302 | 20	Female	28	33	21	47	31	-22	16	-5	-1	15
303 | 18	Male	28	27	20	41	23	-30	10	6	12	21
304 | 19	Male	14	35	34	38	25	-26	5	8	-3	16
305 | 20	Male	17	28	29	31	32	-25	18	3	 	10
306 | 21	Female	27	31	27	43	30	-19	5	12	10	9
307 | 21	Female	24	25	22	48	33	-30	15	9	22	26
308 | 18	Female	36	27	30	53	36	-12	24	8	15	18
309 | 20	Female	36	17	28	47	39	-30	26	17	25	30
310 | 19	Male	24	28	18	46	35	-24	1	-9	25	30
311 | 20	Female	19	39	39	47	26	-14	10	5	-3	15
312 | 19	Male	21	19	29	54	42	-30	8	7	21	30
313 | 21	Female	37	28	30	46	29	-24	7	-10	26	24
314 | 20	Female	40	25	26	38	25	-30	6	5	2	26
315 | 19	Female	26	30	35	55	32	-16	11	5	9	23
316 | 20	Male	31	32	21	44	39	-18	10	-1	1	6
317 | 20	Male	14	36	22	33	19	-26	26	3	15	16
318 | 20	Male	10	31	36	35	24	-20	22	14	2	7
319 | 19	Female	16	25	23	39	38	-15	20	0	14	15
320 | 20	Female	24	32	34	49	36	-24	19	3	-5	26
321 | 20	Female	35	16	41	41	28	-29	17	2	2	8
322 | 20	Female	19	29	33	50	37	-30	24	5	23	17
323 | 18	Male	16	39	41	51	41	-30	23	20	0	18
324 | 19	Female	23	30	25	47	36	-13	24	13	16	16
325 | 19	Female	21	39	35	33	23	-19	23	13	5	-1
326 | 19	Female	44	18	28	67	34	-29	14	29	29	27
327 | 20	Female	27	27	30	29	24	-18	8	-3	-3	9
328 | 21	Female	40	23	33	50	40	-21	15	8	26	22
329 | 20	Female	29	33	35	41	30	-15	13	1	2	5
330 | 19	Male	21	30	19	47	36	-23	15	23	22	22
331 | 20	Female	18	42	34	50	35	-13	21	14	4	17
332 | 18	Male	19	25	31	41	30	-30	18	8	-4	23
333 | 23	Male	18	41	41	37	23	-30	17	3	16	26
334 | 21	Male	34	13	20	42	34	-11	20	11	8	12
335 | 18	Female	23	26	25	46	36	-30	13	9	-6	17
336 | 21	Female	27	30	24	39	31	-28	7	1	27	22
337 | 21	Female	8	43	31	52	32	-24	19	15	15	2
338 | 20	Male	8	29	21	56	35	-30	22	21	-3	12
339 | 21	Female	20	38	35	50	35	-29	15	15	15	13
340 |  	 	 	 	 	 	 	 	 	 	 	 
341 | 21	Female	34	28	24	39	22	-29	0	-2	26	17
342 | 19	Female	36	26	18	 	34	 	17	6	2	8
343 | 19	Female	44	23	29	34	14	-19	22	19	20	15
344 | 20	Female	37	37	24	53	38	-30	7	-15	14	20
345 | 19	Female	20	24	24	43	34	-30	17	6	15	28
346 | 20	Female	23	36	24	41	32	-10	19	5	9	14
347 | 19	Male	17	37	22	28	19	-14	7	8	13	3
348 | 20	Female	15	46	18	37	35	-28	18	15	7	16
349 | 20	Female	39	38	26	31	12	-30	10	16	-12	20
350 | 30	Female	36	28	26	38	18	-29	25	23	8	18
351 | 20	Male	28	18	22	46	21	-20	18	13	22	20
352 | 21	Female	42	43	35	44	33	-20	23	13	19	26
353 | 21	Male	19	18	31	39	33	-30	13	8	7	30
354 | 19	Female	20	24	33	35	20	-22	18	7	-9	9
355 | 19	Female	30	25	28	43	30	-30	12	15	-6	20
356 | 19	Male	6	38	27	27	16	-16	11	9	-7	2
357 | 20	Female	12	41	27	36	31	-30	27	5	-2	19
358 | 19	Female	22	31	25	42	28	-30	12	17	0	20
359 | 20	Female	13	43	29	28	27	-24	28	24	14	29
360 | 20	Male	11	35	34	39	31	-30	11	1	-15	20
361 | 20	Male	24	28	36	35	9	-30	23	13	9	30
362 | 24	Male	22	31	30	42	22	-25	3	0	4	17
363 | 19	Female	32	28	30	44	26	-14	13	-9	3	27
364 | 19	Female	26	30	28	43	33	-27	7	-9	1	7
365 | 34	Male	21	33	40	54	35	-26	19	20	1	23
366 | 19	Male	8	33	31	39	25	-9	14	20	-3	-2
367 | 18	Female	34	20	20	48	22	-27	13	3	-6	18
368 | 20	Male	24	34	22	51	33	-24	19	3	7	20
369 | 20	Female	27	30	29	 	29	-30	26	7	-21	18
370 | 18	Female	 	 	 	31	 	-22	28	26	22	23
371 | 18	Female	7	37	34	40	33	-30	13	3	-1	4
372 | 19	Female	22	29	27	 	27	-22	28	16	23	21
373 | 19	Female	15	33	27	 	26	-23	14	8	-3	2
374 | 23	Female	12	34	36	47	27	-28	13	5	9	13
375 | 19	Female	22	30	23	 	23	-23	13	-3	9	13
376 | 20	Male	15	35	16	47	38	-22	14	6	22	21
377 | 21	Female	17	38	39	 	29	-30	8	-3	20	23
378 | 20	Female	25	33	34	41	30	-22	0	-3	3	18
379 | 20	Female	25	31	25	40	28	-23	12	7	14	16
380 | 19	Female	31	33	28	52	29	-20	13	6	12	17
381 | 19	Female	34	20	22	53	36	-29	21	8	12	10
382 | 20	Female	36	38	28	56	35	-30	4	9	16	23
383 | 19	Female	16	31	26	43	33	-17	10	19	17	27
384 | 20	Female	14	41	27	33	25	-27	14	2	13	21
385 | 20	Female	19	31	25	44	30	-25	8	7	10	8
386 | 19	Female	20	31	32	32	26	-30	14	17	11	16
387 | 20	Female	27	25	21	33	28	-29	6	2	19	26
388 | 19	Female	29	24	21	43	27	-27	25	13	18	10
389 | 20	Female	29	28	33	41	25	-19	18	11	15	15
390 | 25	Female	37	20	32	40	29	-12	15	7	-4	15
391 | 19	Female	27	35	23	49	30	-29	6	2	 	18
392 | 21	Female	25	26	31	45	34	-30	18	6	6	16
393 | 21	Female	15	33	24	34	25	-30	-4	1	-3	17
394 | 20	Male	32	20	31	45	25	-9	9	8	4	10
395 | 19	Female	24	30	27	50	30	-28	11	5	6	22
396 | 19	Male	39	33	42	40	13	-27	13	18	16	12
397 | 20	Male	10	32	30	46	35	-19	21	22	20	22
398 | 19	Female	30	27	23	51	37	-30	13	2	12	14
399 | 31	Female	28	25	34	29	30	-10	13	13	4	23
400 |  	 	 	 	 	 	 	 	 	 	 	 
401 | 23	Female	28	28	17	52	21	2	12	12	6	3
402 | 19	Female	24	29	25	36	29	-28	10	18	11	5
403 | 19	Female	30	34	23	45	25	-23	7	7	5	10
404 | 20	Female	28	30	31	41	31	-6	14	19	8	18
405 | 20	Male	26	31	32	44	17	-21	17	18	18	23
406 | 19	Male	24	32	25	46	22	-18	11	8	10	12
407 | 18	Female	36	33	39	44	26	-30	10	-3	5	4
408 | 19	Female	28	37	28	47	28	0	2	-4	0	0
409 | 19	Female	24	27	26	48	31	-25	12	11	12	5
410 | 18	Female	15	33	23	48	35	-24	5	8	10	12
411 | 19	Female	26	22	32	53	22	-30	7	15	14	16
412 | 19	Male	24	14	26	45	22	1	6	2	2	0
413 | 25	Male	26	29	33	43	23	-15	10	6	4	6
414 | 19	Female	13	33	28	39	29	-8	9	0	5	6
415 | 19	Female	20	33	27	48	34	8	15	16	15	16
416 | 22	Male	17	31	24	44	29	-30	14	-3	22	25
417 | 18	Female	28	35	34	39	18	-30	25	20	9	21
418 | 19	Female	25	30	25	36	25	-9	3	1	-5	-7
419 | 19	Female	 	 	 	54	 	-29	 	23	15	26
420 | 25	Female	27	25	19	57	35	25	19	25	26	27
421 | 19	Male	21	33	26	54	25	-23	17	16	4	20
422 | 19	Female	24	27	29	49	20	-30	10	-7	17	9
423 | 21	Male	12	40	40	63	32	25	28	27	28	28
424 | 19	Female	 	 	 	54	 	4	6	2	2	1
425 | 21	Male	27	38	24	58	19	-8	16	14	7	9
426 | 20	Male	21	25	27	44	26	13	15	3	16	4
427 | 20	Male	23	31	21	51	29	-25	19	17	18	24
428 | 22	Female	24	26	23	59	24	0	13	8	12	22
429 | 19	Male	18	21	36	44	26	-12	11	1	7	8
430 | 22	Male	40	32	29	73	24	-18	26	16	19	27
431 | 19	Female	10	37	43	62	39	-23	16	20	-4	22
432 | 


--------------------------------------------------------------------------------
/multiple regression/multiple regression.R:
--------------------------------------------------------------------------------
  1 | #############################################################
  2 | #
  3 | # Multiple Regression in R
  4 | #
  5 | # From Andy Field's book Discovering Statistics Using R
  6 | #
  7 | #############################################################
  8 | #
  9 | setwd("~/OneDrive/MIDS/W203/Data/multiple regression")
 10 | 
 11 | # Load up the album2.dat file as discussed in 7.8.2.2
 12 | album2 <- read.delim("Album Sales 2.dat", header = TRUE)
 13 | 
 14 | summary(album2)
 15 | #
 16 | # From the Album Sales 2 data set there are 4 vectors in the
 17 | # data frame: adverts, sales, airplay and attract
 18 | #
 19 | # The goal is to create a model that takes into account these
 20 | # predictors on the sales as the outcome variable.
 21 | # The first thing we can do is load the car library to use 
 22 | # the handy scatterplot() in that library. 
 23 | library(car)
 24 | scatterplot(album2$sales, album2$adverts)
 25 | 
 26 | # The data show a pretty wide variance for these variables
 27 | # but we see that there is a line that may indicate a linear 
 28 | # relationship exists.
 29 | #
 30 | # Out of curiousity, let's see what the correlation is between
 31 | # these two vectors.
 32 | cor.test(album2$sales, album2$adverts)
 33 | 
 34 | # The correlation t-test shows that we can reject the null 
 35 | # hypothesis that there is no correlation between adverts and 
 36 | # album sales and support the hypothesis that there is a 
 37 | # correlation with p = 2.2e-16 (highly significant). The 
 38 | # correlation is 0.5785 with a 95% confidence interval of 
 39 | # 0.4781 - 0.66394. 
 40 | #
 41 | # Performing a linear regression and obtaining the first model
 42 | albumSales.2 <- lm(sales ~ adverts, data = album2)
 43 | summary(albumSales.2)
 44 | 
 45 | # The model shows an increase of 1 unit of adverts results in
 46 | # 0.09612 in album sales, highly significant, with a multiple 
 47 | # R^2 = 0.3346, and an adjusted R^2 of 0.3313
 48 | #
 49 | # Let's take a look at the plot output
 50 | plot(albumSales.2)
 51 | #
 52 | # There appears to be a bit of heteroscadicity in the Residuals
 53 | # vs. Fitted graph; not a huge amount, but it's there.
 54 | # The QQ Plot actually looks somewhat reasonable.
 55 | # Scale-Location plot shows a similar indication of
 56 | # heteroscadicity
 57 | # Don't see anything with respect to leverage that is of concern
 58 | #
 59 | #
 60 | # Now, we think that airplay is also a significant predictor of 
 61 | # album sales, so let's create another model that now inludes
 62 | # airplay
 63 | albumSales.3 <- lm(sales ~ adverts + airplay, data = album2)
 64 | summary(albumSales.3)
 65 | 
 66 | # As we expected, airplay has a highly significant non-zero
 67 | # coefficient. For each unit increase in airplay, album sales
 68 | # increase 3.59 units The multiple R^2 has increased to 0.6293,
 69 | # so our model accounts for almost 63% of the variation in album sales.
 70 | #
 71 | plot(albumSales.3)
 72 | #
 73 | # Little bit better on the homoscedacity than the first model 
 74 | # QQ Plot still looks reasonable
 75 | # Other diagnostics look good
 76 | #
 77 | # Now let's add in the final variable, attractiveness of the band 
 78 | albumSales.4 <- lm(sales ~ adverts + airplay + attract, data = album2)
 79 | summary(albumSales.4)
 80 | #
 81 | # Now we have all 3 beta coefficients and all are statistically
 82 | # significantly different from zero.
 83 | # The R^2 has increased to 0.665
 84 | plot(albumSales.4)
 85 | #
 86 | # Diagnostics from the plot() all look OK
 87 | #
 88 | # We can examine the standardized beta estimates by loading the 
 89 | # QuantPsych library and using the lm.beta() 
 90 | library(QuantPsyc)
 91 | library(boot)
 92 | 
 93 | lm.beta(albumSales.4)
 94 | #
 95 | # The betas are given in terms of standard deviations. So now
 96 | # for adverts with a standardized beta of 0.511 we can say that
 97 | # as advert budget increases by 1 SD then album sales increase 
 98 | # by 0.511 SD.
 99 | # Also, every 1 SD increase in airplay results in 0.512 increase
100 | # in album sales.
101 | # Finally, every 1 SD increase in band attractiveness results
102 | # in a 0.192 increase in album sales.
103 | #
104 | # Compute the confidence intervas for the model 
105 | confint(albumSales.4)
106 | 
107 | # We see that our coefficients fall in range of each of these 
108 | # confidence intervals and that the confidence intervals don't
109 | # cross 0.
110 | #
111 | # Compare models using anova
112 | anova(albumSales.2, albumSales.3)
113 | 
114 | # Compared to a model based on the mean, the first model improved
115 | # with F(2, 197) = 156.57, p < .001
116 | anova(albumSales.3, albumSales.4)
117 | 
118 | # Compared to the previous model, the final model is an improvement
119 | # F(2, 196) = 20.681, p < .001
120 | 
121 | # Compute some useful diagnostic statistics and place them in the
122 | # dataframe
123 | album2$standardized.residuals <- rstandard(albumSales.4)
124 | album2$studentized.residuals <- rstudent(albumSales.4)
125 | album2$cooks.distance <- cooks.distance(albumSales.4)
126 | album2$dfbeta <- dfbeta(albumSales.4)
127 | album2$dffit <- dffits(albumSales.4)
128 | album2$leverage <- hatvalues(albumSales.4)
129 | album2$covariance.ratios <- covratio(albumSales.4)
130 | #
131 | album2$large.residual <- (album2$standardized.residuals > 2) | (album2$standardized.residuals < -2)
132 | # how many residuals do we have outside 2 sd?
133 | sum(album2$large.residual)
134 | #
135 | # There are 12 data points with large residuals outside the 96.5% range.
136 | # The next command will display those rows and the specified columns
137 | album2[album2$large.residual, c("sales","airplay","attract","adverts","standardized.residuals")]
138 | #
139 | # There's nothing too huge except for data point 169, which is ~ 3.1
140 | # Let's take a look at some of the other diagnostic variables for these
141 | # 12 data points. 
142 | album2[album2$large.residual, c("cooks.distance", "leverage", "covariance.ratios")]
143 | #
144 | # We know that the CVR upper bound is 1 + 3(k + 1)/n = 1 + 3(3 + 1)/200
145 | # and the CVR lower bound is 1 - 3(k + 1)n = 1 - 3(3 + 1)/200
146 | 1 + 12/200
147 | 1 - 12/200
148 | # 1.06 and 0.94, respectively.
149 | # Looking at the covariance ratios, the only point outside these bounds
150 | # is 169, again. However, the Cooks distance value for this point is
151 | # of little convern, so this point is not excercising undue influence 
152 | #
153 | # Durbin-Watson test of independence
154 | dwt(albumSales.4)
155 | #
156 | # This shows that we can't reject the null hypothesis of no autocorrelation
157 | #
158 | # Multicollinearity
159 | vif(albumSales.4)
160 | 1/vif(albumSales.4)		# Tolerance is 1/VIF
161 | mean(vif(albumSales.4))
162 | #
163 | # If the largest VIF > 10 then there is cause for concern
164 | # If the average VIF is substantially > 1 then the regression may be biased
165 | # Tolerance < 0.1 indicates a serious issue
166 | # Tolerance < 0.2 indicates a potential issue


--------------------------------------------------------------------------------
/multiple regression/neste-model.r:
--------------------------------------------------------------------------------
  1 | # Today's Agenda
  2 | # Using the 1994 GSS you will examine the dependent variable “partyid”, which is “political party identification” 
  3 | # (ie., strong democrat to strong republican on a 7-point scale). You will treat this as a metric variable for the purposes of this investigation.
  4 | # Your task: 
  5 | # 0) load the GSS1994_daughters dataset.
  6 | # 1) Recode “sex of the first child” (kdsex1) into a dummy variable (daughter1) where female is coded as 1 or ‘true’.  
  7 | # Examine partyid and turn it into a numeric variable (the as.numeric function should be helpful here).
  8 | # 2) Conduct a linear regression with “daughter1” as your independent variable.  
  9 | # 3) Conduct another linear regression with “daughter1” and one or more additional independent variables.  You may use the following function to view the description
 10 | # of each variable:
 11 | # desc = function(var, df) {
 12 | # 	attr(df,"var.labels")[match(var, colnames(df))]
 13 | # }
 14 | # For example, typing desc(“partyid”, GSS) will return a description of partyid.  You may also use the following command to pull up a table of all variables and descriptions:
 15 | # data.frame(names = colnames(GSS), labels = attr(GSS, "var.labels"))
 16 | # 4) Assess the improvement, if any, from the first model to the second model.
 17 | # 5) Did the coefficient for daughter1 change from the first model to the second model? If so, how do you explain this change?
 18 | 
 19 | 
 20 | # X (first child) -> Y (Party ID)
 21 | 
 22 | # Z = control, want it to be exogenous to X and Y.
 23 | 
 24 | # Post-treatment bias 
 25 | 
 26 | # X -> Z -> Y
 27 | 
 28 | # X -> Y will be downwardly biased.  
 29 | 
 30 | 
 31 | setwd('C:/Users/thomasa/Downloads/MIDS/EAD/Week13')
 32 | load('daughters.rdata')
 33 | 
 34 | #1)
 35 | #dummy code the daughter variable
 36 | GSS$daughter1 = as.numeric(GSS$kdsex1 == 'female')
 37 | 
 38 | #create numeric party variable
 39 | GSS$party_num = as.numeric(GSS$partyid)
 40 | 
 41 | #Other party is coded as 8 so recode to NA
 42 | GSS$party_num[GSS$party_num==8] = NA
 43 | 
 44 | #check distribution
 45 | table(GSS$party_num)
 46 | hist(GSS$party_num)
 47 | 
 48 | #2)
 49 | mod = lm(party_num ~ daughter1,data=GSS)
 50 | mod
 51 | summary(mod)
 52 | plot(mod)
 53 | 
 54 | #3)
 55 | #not sure what this does
 56 | desc = function(var, df) {
 57 |  	attr(df,"var.labels")[match(var, colnames(df))]
 58 | }
 59 | desc('age',GSS)
 60 | 
 61 | #Try Age
 62 | mod1 = lm(party_num ~ daughter1 + age,data=GSS)
 63 | mod1
 64 | summary(mod1)
 65 | plot(mod1)
 66 | #not statistically significant - don't include
 67 | 
 68 | #Try Race
 69 | #recode to remove iap
 70 | GSS$race = factor(as.character(GSS$race),levels=c('black','white','other'))
 71 | 
 72 | #contrasts
 73 | contrasts(GSS$race)
 74 | 
 75 | #model
 76 | mod2 = lm(party_num ~ daughter1 + race,data=GSS)
 77 | mod2
 78 | summary(mod2)
 79 | plot(mod2)
 80 | 
 81 | 
 82 | 
 83 | # 4)
 84 | #model comparison base with Age
 85 | AIC(mod)
 86 | AIC(mod1)
 87 | 
 88 | #anova
 89 | anova(mod,mod1)
 90 | #no improvement
 91 | 
 92 | #try race
 93 | AIC(mod)
 94 | AIC(mod2)
 95 | anova(mod,mod2)
 96 | 
 97 | # 5) Does the effect of daughter1 change from base model (daughter1) to model2 (daughter1 + race)
 98 | # highly overlapping confidence levels
 99 | confint(mod)
100 | confint(mod2)
101 | 
102 | 
103 | t.test(GSS$party_num[GSS$race=='black'],GSS$party_num[GSS$race=='other'])
104 | 
105 | 
106 | 
107 | #income
108 | cbind(as.character(GSS$income),as.numeric(gsub('.*\\$| .*','',as.character(GSS$income))))
109 | 
110 | 
111 | mod3 = lm(party_num ~ daughter1 + race + polviews,data=GSS)
112 | mod3
113 | summary(mod3)
114 | plot(mod3)
115 | 
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/plots/.Rapp.history:
--------------------------------------------------------------------------------
  1 | metallica
  2 | lecturerData
  3 | library(Rcmdr)
  4 | alchoholPersonality<-subset(lecturerData, alchohol>10, select=c("friends","alchohol","neurotic"))
  5 | alchoholPersonality
  6 | alchoholPersonalityMatrix<-as.matrix(alchoholPersonality)
  7 | alchoholPersonalityMatrix
  8 | library(Rcmdr)
  9 | sunspots
 10 | require(graphics)
 11 | plot(sunspots, main = "sunspots data", xlab = "Year", ylab= "Monthly Sunspot Numbers")
 12 | pvalue()
 13 | pscore()
 14 | mu1 <- 2/(sqrt(100))
 15 | mu1
 16 | zscore()
 17 | (2.5 - 2.0)/2
 18 | 2*pnorm(2.5)
 19 | 2 * pnorm(-2.5)
 20 | -0.5/20
 21 | pnorm(0.025)
 22 | z <- (2 - 2.5)/2
 23 | z
 24 | 2*pnorm(-0.25)
 25 | a <- 2
 26 | s <- 2
 27 | n <- 100
 28 | xbar <- 2.5
 29 | z <- (xbar-a)/(s/sqrt(n))
 30 | z
 31 | 2*pnorm(-2.5)
 32 | 2*(1-pnorm(xbar,mean=a,sd=s/sqrt(100)))
 33 | 10000 - 10300
 34 | -300/(1000 / sqrt(10))
 35 | pnorm(-300/(1000/sqrt(10)))
 36 | pnorm(300/(1000/sqrt(10)))
 37 | 2*(pnorm(-abs(500/(1000/sqrt(10))))
 38 | )
 39 | 2*(pnorm((500/(1000/sqrt(10))))
 40 | )
 41 | 500/1000/sqrt(10)
 42 | 1000/sqrt(10)
 43 | 500/(1000/sqrt(10))
 44 | pnorm(1.581139)
 45 | pnorm(-1.581139)
 46 | pnorm(-500/(1000/sqrt(10))) - pnorm(500/(1000/sqrt(10)))
 47 | pnorm(10300,mean=10000,sd=1000/sqrt(10))
 48 | pnorm(10500,mean=10000,sd=1000/sqrt(10))
 49 | pnorm(9500,mean=10000,sd=1000/sqrt(10))
 50 | pnorm(10500,mean=10000,sd=1000/sqrt(10)) - pnorm(9500,mean=10000,sd=1000/sqrt(10))
 51 | exit
 52 | setwd("/Users/rcordell/Documents/Data/W203/R Book Data")
 53 | setwd("/Users/rcordell/Documents/R/W203")
 54 | library(ggplot2)
 55 | facebookData = read.delim("FacebookNarcissism.dat", header = TRUE)
 56 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating))
 57 | graph + geom_point()
 58 | graph + geom_point(shape = 17)
 59 | graph + geom_point(shape = 7)
 60 | graph + geom_point(shape = 7, size = 6)
 61 | graph + geom_point(shape = 2, size = 6)
 62 | graph + geom_point(shape = 3, size = 6)
 63 | graph + geom_point(shape = 4, size = 6)
 64 | graph + geom_point(shape = 17)
 65 | graph + geom_point(shape = 4, size = 6)
 66 | graph + geom_point(shape = 5, size = 6)
 67 | graph + geom_point(aes(color = Rating_Type))
 68 | graph + geom_point(color = "RED")
 69 | graph + geom_point(aes(color = Rating_Type), position = "jitter")
 70 | graph + geom_point(aes(shape = Rating_Type), position = "jitter")
 71 | examData <- read.delim("Exam.dat", header = TRUE)
 72 | examData <- read.delim("Exam Anxiety.dat", header = TRUE)
 73 | scatter <- ggplot(examData, aes(Anxiety, Exam))
 74 | scatter + geom_point()
 75 | scatter + geom_point() + labs(x = "Exam Anxiety", y = "Exam Performance %")
 76 | scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %")
 77 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 78 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 79 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %")
 80 | scatter + geom_point() + geom_smooth(method = "lm", alpha = 0.1, fill = "Blue") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 81 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 82 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 83 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 84 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 85 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 86 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 87 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 88 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 89 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 90 | scatter + geom_point(color = Gender) + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 91 | summary(examData)
 92 | scatter <- ggplot(examData, aes(Anxiety, Exam))
 93 | scatter + geom_point(aes(color = Gender) + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 94 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 95 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 96 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 97 | scatter + geom_point()
 98 | scatter + geom_point() + geom_smooth(method = "lm")
 99 | scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1)
100 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
101 | scatter <- ggplot(examData, aes(Anxiety, Exam, color = Gender))
102 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
103 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
104 | 


--------------------------------------------------------------------------------
/plots/ChickFlick.dat:
--------------------------------------------------------------------------------
 1 | gender	film	arousal
 2 | Male	Bridget Jones' Diary	22
 3 | Male	Bridget Jones' Diary	13
 4 | Male	Bridget Jones' Diary	16
 5 | Male	Bridget Jones' Diary	10
 6 | Male	Bridget Jones' Diary	18
 7 | Male	Bridget Jones' Diary	24
 8 | Male	Bridget Jones' Diary	13
 9 | Male	Bridget Jones' Diary	14
10 | Male	Bridget Jones' Diary	19
11 | Male	Bridget Jones' Diary	23
12 | Male	Memento	37
13 | Male	Memento	20
14 | Male	Memento	16
15 | Male	Memento	28
16 | Male	Memento	27
17 | Male	Memento	18
18 | Male	Memento	32
19 | Male	Memento	24
20 | Male	Memento	21
21 | Male	Memento	35
22 | Female	Bridget Jones' Diary	3
23 | Female	Bridget Jones' Diary	15
24 | Female	Bridget Jones' Diary	5
25 | Female	Bridget Jones' Diary	16
26 | Female	Bridget Jones' Diary	13
27 | Female	Bridget Jones' Diary	20
28 | Female	Bridget Jones' Diary	11
29 | Female	Bridget Jones' Diary	19
30 | Female	Bridget Jones' Diary	15
31 | Female	Bridget Jones' Diary	7
32 | Female	Memento	30
33 | Female	Memento	25
34 | Female	Memento	31
35 | Female	Memento	36
36 | Female	Memento	23
37 | Female	Memento	14
38 | Female	Memento	21
39 | Female	Memento	31
40 | Female	Memento	22
41 | Female	Memento	14
42 | 


--------------------------------------------------------------------------------
/plots/DownloadFestival.dat:
--------------------------------------------------------------------------------
  1 | ticknumb	gender	day1	day2	day3
  2 | 2111	Male	2.64	1.35	1.61
  3 | 2229	Female	.97	1.41	.29
  4 | 2338	Male	.84	 	 
  5 | 2384	Female	3.03	 	 
  6 | 2401	Female	.88	.08	 
  7 | 2405	Male	.85	 	 
  8 | 2467	Female	1.56	 	 
  9 | 2478	Female	3.02	 	 
 10 | 2490	Male	2.29	 	 
 11 | 2504	Female	1.11	.44	.55
 12 | 2509	Male	2.17	 	 
 13 | 2510	Female	.82	.2	.47
 14 | 2514	Male	1.41	 	 
 15 | 2515	Female	1.76	1.64	1.58
 16 | 2520	Male	1.38	.02	 
 17 | 2521	Female	2.79	 	 
 18 | 2529	Male	1.5	 	 
 19 | 2533	Female	1.91	2.05	 
 20 | 2535	Female	2.32	 	 
 21 | 2538	Male	2.05	 	 
 22 | 2549	Male	2.17	.7	.76
 23 | 2551	Female	2.05	 	 
 24 | 2558	Female	1.61	 	 
 25 | 2562	Female	1.66	.85	 
 26 | 2565	Female	2.3	 	 
 27 | 2566	Female	2.76	 	 
 28 | 2568	Female	1.44	 	 
 29 | 2586	Female	1.06	 	 
 30 | 2601	Male	3.23	 	 
 31 | 2602	Male	.97	.38	.76
 32 | 2604	Male	2.57	.11	.02
 33 | 2606	Female	.26	 	 
 34 | 2609	Female	.47	 	 
 35 | 2611	Female	1.73	 	 
 36 | 2612	Male	1.94	.82	1.67
 37 | 2616	Male	1.91	 	 
 38 | 2624	Female	2.08	.91	.96
 39 | 2633	Female	1.91	 	 
 40 | 2642	Female	1.42	 	 
 41 | 2644	Male	1.5	 	 
 42 | 2662	Female	.11	 	 
 43 | 2663	Male	1.67	 	 
 44 | 2664	Male	2.08	 	 
 45 | 2669	Female	2.05	 	 
 46 | 2670	Male	2	 	 
 47 | 2675	Male	1.52	 	 
 48 | 2676	Female	1.58	 	 
 49 | 2677	Male	1.28	.38	.14
 50 | 2681	Male	1.88	 	 
 51 | 2685	Female	1.32	 	 
 52 | 2686	Female	2.09	 	 
 53 | 2687	Male	2	 	 
 54 | 2688	Female	2.64	 	 
 55 | 2689	Male	.85	.32	.52
 56 | 2692	Female	2.47	.23	.38
 57 | 2698	Female	1.79	 	 
 58 | 2709	Female	1.64	 	 
 59 | 2710	Male	1.32	 	 
 60 | 2712	Female	2.97	 	 
 61 | 2726	Female	1.44	.14	 
 62 | 2727	Female	2.02	 	 
 63 | 2728	Male	1.79	 	 
 64 | 2730	Male	1.34	 	 
 65 | 2731	Female	2.29	1.9	 
 66 | 2732	Female	1.66	 	 
 67 | 2734	Female	.6	 	 
 68 | 2735	Female	1.76	.76	.29
 69 | 2736	Female	1.5	 	 
 70 | 2737	Female	2.08	.7	 
 71 | 2748	Female	1	.55	 
 72 | 2752	Male	1.73	 	 
 73 | 2756	Female	1.05	.38	 
 74 | 2758	Male	2.81	 	 
 75 | 2759	Female	1.52	 	 
 76 | 2768	Male	1.47	 	 
 77 | 2769	Female	2.64	 	 
 78 | 2770	Female	2.2	1.18	 
 79 | 2771	Male	.55	.79	 
 80 | 2772	Male	2.29	 	 
 81 | 2773	Female	2	 	 
 82 | 2775	Male	2.23	 	 
 83 | 2779	Female	2.45	 	 
 84 | 2780	Male	1.2	 	 
 85 | 2783	Male	2.91	2.08	2.11
 86 | 2784	Female	1.14	1	 
 87 | 2788	Male	1.88	 	 
 88 | 2789	Male	.94	 	 
 89 | 2791	Female	1.85	 	 
 90 | 2794	Female	2.58	 	 
 91 | 2796	Female	.61	 	 
 92 | 2799	Male	.7	 	 
 93 | 2806	Male	1.38	 	 
 94 | 2807	Female	1.94	 	 
 95 | 2811	Female	2.29	 	 
 96 | 2812	Male	1.59	 	 
 97 | 2813	Female	2.46	 	 
 98 | 2814	Female	1.67	.14	 
 99 | 2825	Female	2.02	.58	.5
100 | 2827	Male	1.5	 	 
101 | 2828	Female	2.7	1.7	1.91
102 | 2829	Female	1.61	 	 
103 | 2830	Male	2.29	 	 
104 | 2831	Female	.97	1.06	.76
105 | 2839	Male	1.85	 	 
106 | 2842	Female	2.76	 	 
107 | 2857	Male	1.64	 	 
108 | 2858	Male	1.17	 	 
109 | 2859	Male	1.57	 	 
110 | 2862	Female	2.23	 	 
111 | 2863	Female	2.05	1.58	2.15
112 | 2875	Female	2.05	 	 
113 | 2876	Female	2.94	 	 
114 | 2883	Female	2.39	 	 
115 | 2890	Male	1.94	 	 
116 | 2894	Female	2.12	 	 
117 | 2895	Female	1.11	 	 
118 | 2897	Female	.97	 	 
119 | 2898	Female	1.35	 	 
120 | 2899	Female	2.81	2.08	 
121 | 2900	Female	2.5	 	 
122 | 2901	Male	1.87	 	 
123 | 2920	Male	1.33	 	 
124 | 2921	Female	1.26	 	 
125 | 2922	Female	1.44	 	 
126 | 2923	Male	.55	 	 
127 | 2924	Female	1.75	 	 
128 | 2925	Female	2.08	 	 
129 | 2932	Male	.85	 	 
130 | 2933	Female	2.52	 	 
131 | 2935	Female	3	 	 
132 | 2936	Female	1.41	 	 
133 | 2937	Female	1.08	 	 
134 | 2938	Male	1.2	1.38	1.5
135 | 2940	Male	1.94	1.44	 
136 | 2941	Female	2.26	1.73	1.73
137 | 2942	Male	1.41	 	 
138 | 2948	Female	2.5	 	 
139 | 2952	Male	2.17	 	 
140 | 2953	Male	1.82	1.11	 
141 | 2954	Female	1.44	1.14	 
142 | 2956	Female	1.66	 	 
143 | 2957	Male	1.82	 	 
144 | 2958	Male	1.26	 	 
145 | 2959	Female	2.67	 	 
146 | 2961	Female	1.47	 	 
147 | 2962	Female	1.84	 	 
148 | 2964	Female	2.58	 	 
149 | 2966	Male	1.73	 	 
150 | 2967	Male	1.23	 	 
151 | 2968	Male	2.32	 	 
152 | 2972	Female	2.67	 	 
153 | 2974	Female	1.02	 	 
154 | 2975	Female	1.66	2.12	2.7
155 | 2976	Female	1.88	 	 
156 | 2977	Female	1.91	 	 
157 | 2978	Female	1.64	 	 
158 | 2979	Male	1.34	 	 
159 | 2982	Male	1.85	 	 
160 | 2983	Male	2.08	 	 
161 | 2984	Male	1.02	 	 
162 | 2985	Female	1.79	 	 
163 | 2988	Female	1.94	 	 
164 | 2989	Female	3.26	1.97	1.67
165 | 2990	Male	1.14	.58	.11
166 | 3008	Female	1.5	.7	.38
167 | 3009	Female	2.03	 	 
168 | 3010	Female	2.24	 	 
169 | 3013	Female	1.11	 	 
170 | 3014	Female	2.21	 	 
171 | 3016	Male	1.94	 	 
172 | 3017	Male	2.41	 	 
173 | 3018	Male	.88	 	 
174 | 3019	Female	1.17	1.35	 
175 | 3027	Male	2.23	 	 
176 | 3028	Female	1.64	 	 
177 | 3029	Female	2.14	 	 
178 | 3030	Male	.11	.29	 
179 | 3031	Male	2.17	 	 
180 | 3032	Female	1.67	 	 
181 | 3033	Female	1	 	 
182 | 3034	Female	.88	 	 
183 | 3035	Male	2.2	 	 
184 | 3048	Male	2.17	 	 
185 | 3049	Female	2.32	 	 
186 | 3051	Male	1.64	 	 
187 | 3052	Female	3	 	 
188 | 3053	Female	2.38	.85	 
189 | 3054	Female	1.6	1.02	 
190 | 3055	Female	1.58	 	 
191 | 3056	Female	2.61	 	 
192 | 3057	Male	1.44	.05	.2
193 | 3064	Male	1.57	 	 
194 | 3065	Female	2.32	 	 
195 | 3068	Female	1.14	 	 
196 | 3069	Male	1.93	 	 
197 | 3070	Male	2.47	 	 
198 | 3072	Female	2.29	 	 
199 | 3073	Female	1	 	 
200 | 3092	Female	1.58	 	 
201 | 3093	Male	2.44	 	 
202 | 3094	Female	.83	 	 
203 | 3095	Male	2.71	.78	.33
204 | 3096	Female	1.73	 	 
205 | 3097	Male	1.58	 	 
206 | 3098	Male	1.5	 	 
207 | 3100	Female	1.05	 	 
208 | 3106	Male	2.05	 	 
209 | 3107	Female	2.63	 	 
210 | 3109	Male	2.55	2.29	 
211 | 3111	Female	2	 	 
212 | 3112	Male	2	 	 
213 | 3114	Female	1.32	 	 
214 | 3116	Female	3.14	 	 
215 | 3118	Male	1.44	 	 
216 | 3129	Male	1.85	.23	 
217 | 3131	Female	1.41	.44	 
218 | 3132	Female	1.94	 	 
219 | 3133	Male	2.91	 	 
220 | 3135	Female	1.85	 	 
221 | 3136	Male	1.7	 	 
222 | 3137	Female	2.23	 	 
223 | 3138	Male	1.11	 	 
224 | 3139	Female	1.47	 	 
225 | 3146	Female	2.2	 	 
226 | 3147	Male	1.82	 	 
227 | 3148	Female	1.42	 	 
228 | 3168	Female	2.44	 	 
229 | 3171	Female	2.66	 	 
230 | 3172	Male	1.52	 	 
231 | 3173	Female	1.35	.47	.73
232 | 3180	Female	1.29	 	 
233 | 3182	Female	2.32	 	 
234 | 3190	Male	.78	 	 
235 | 3192	Female	2.84	 	 
236 | 3202	Male	.97	 	 
237 | 3236	Female	1.52	 	 
238 | 3245	Female	1.7	 	 
239 | 3246	Female	.94	1.17	1.29
240 | 3247	Male	1.41	 	 
241 | 3248	Male	1.79	 	 
242 | 3249	Male	1.08	.44	.44
243 | 3250	Male	1.47	 	 
244 | 3251	Female	1.79	.47	 
245 | 3253	Male	2	 	 
246 | 3254	Female	.76	 	 
247 | 3255	Male	2.2	 	 
248 | 3256	Female	.94	.17	 
249 | 3257	Female	1.38	 	 
250 | 3258	Female	1.38	.85	 
251 | 3260	Male	.32	 	 
252 | 3261	Male	2.58	 	 
253 | 3262	Male	.51	 	 
254 | 3264	Female	.32	 	 
255 | 3267	Female	.91	1.11	1.7
256 | 3273	Male	1.51	 	 
257 | 3275	Female	1.47	 	 
258 | 3276	Female	2.5	 	 
259 | 3277	Female	2.26	 	 
260 | 3278	Female	2.81	 	 
261 | 3279	Female	1.87	 	 
262 | 3281	Female	2	 	 
263 | 3282	Female	2.23	.41	1.02
264 | 3284	Male	2	.76	 
265 | 3290	Female	1.41	 	 
266 | 3291	Male	1.64	 	 
267 | 3292	Male	1.64	 	 
268 | 3296	Male	1.26	 	 
269 | 3306	Female	1.52	.55	1.88
270 | 3307	Male	2.44	1.02	.76
271 | 3308	Female	2.18	 	 
272 | 3309	Female	3.02	 	 
273 | 3310	Female	1.02	 	 
274 | 3311	Female	2.88	 	 
275 | 3312	Male	1.54	 	 
276 | 3313	Female	1.64	 	 
277 | 3314	Female	2.44	2.5	1.7
278 | 3315	Female	1.29	 	 
279 | 3316	Female	1.61	.32	.26
280 | 3321	Female	1.77	 	 
281 | 3325	Male	.91	.17	 
282 | 3326	Female	.85	.2	.38
283 | 3327	Male	.85	.52	.44
284 | 3328	Female	1.5	 	 
285 | 3329	Male	1.05	.23	 
286 | 3338	Female	3.38	 	 
287 | 3340	Female	1.42	.52	2
288 | 3341	Female	1.85	 	 
289 | 3348	Female	1.91	.84	 
290 | 3349	Male	.82	.26	 
291 | 3350	Female	1.32	.76	 
292 | 3351	Female	2.23	.85	.39
293 | 3352	Female	1.47	1.52	.17
294 | 3363	Female	2.7	 	 
295 | 3365	Male	1.58	 	 
296 | 3366	Male	1	 	 
297 | 3367	Female	1.44	 	 
298 | 3368	Female	2	 	 
299 | 3369	Male	1.6	 	 
300 | 3370	Female	2.32	2.53	1.67
301 | 3371	Female	3.41	 	 
302 | 3372	Female	2.02	 	 
303 | 3373	Male	.64	.52	 
304 | 3374	Male	3.58	3.35	 
305 | 3375	Male	1.5	 	 
306 | 3376	Male	1.08	 	 
307 | 3377	Female	1.52	 	 
308 | 3378	Male	1.26	 	 
309 | 3379	Female	1.68	 	 
310 | 3380	Male	1.47	1.08	.58
311 | 3390	Female	1.47	 	 
312 | 3391	Female	1.67	1.55	 
313 | 3392	Female	2.47	1.97	 
314 | 3393	Male	1.82	 	 
315 | 3394	Female	2.17	 	 
316 | 3395	Female	3.21	 	 
317 | 3397	Female	1.6	1.38	1.02
318 | 3398	Female	.32	 	 
319 | 3407	Female	.55	 	 
320 | 3411	Female	1.42	 	 
321 | 3412	Male	1.14	 	 
322 | 3413	Female	2.64	 	 
323 | 3416	Female	2.58	 	 
324 | 3418	Female	2.02	 	 
325 | 3419	Male	2	 	 
326 | 3420	Female	2.9	 	 
327 | 3423	Male	1.82	 	 
328 | 3429	Female	.5	 	 
329 | 3431	Male	1.53	 	 
330 | 3449	Female	2.48	 	 
331 | 3450	Female	2.05	 	 
332 | 3453	Male	2.52	 	 
333 | 3454	Female	1.88	 	 
334 | 3455	Male	2.73	 	 
335 | 3456	Female	2.88	 	 
336 | 3457	Female	1.67	 	 
337 | 3458	Female	1.93	 	 
338 | 3460	Female	1.67	 	 
339 | 3461	Male	1.2	 	 
340 | 3464	Female	2.75	 	 
341 | 3467	Female	1.94	.97	 
342 | 3468	Female	.59	 	 
343 | 3469	Female	1.5	 	 
344 | 3477	Male	1.58	.94	.94
345 | 3480	Female	2.23	.11	.17
346 | 3483	Female	2.35	 	 
347 | 3490	Female	2.55	.82	.29
348 | 3493	Male	1.55	 	 
349 | 3494	Female	2.31	 	 
350 | 3495	Male	2.23	 	 
351 | 3500	Female	.67	.5	 
352 | 3501	Male	2.51	 	 
353 | 3503	Male	1.08	.58	.61
354 | 3510	Female	2.44	 	 
355 | 3511	Female	.23	.14	 
356 | 3512	Female	2.17	 	 
357 | 3518	Male	1.9	1.17	 
358 | 3519	Female	1.67	.44	 
359 | 3521	Female	2	.58	.52
360 | 3522	Female	2.44	 	 
361 | 3523	Male	1.44	 	 
362 | 3524	Male	.82	 	 
363 | 3525	Female	2.5	 	 
364 | 3526	Male	1.82	 	 
365 | 3536	Female	1.97	 	 
366 | 3539	Female	2.52	 	 
367 | 3540	Female	.05	 	 
368 | 3542	Female	2.08	 	 
369 | 3545	Female	2.39	 	 
370 | 3546	Male	1.45	.82	 
371 | 3551	Male	2.58	 	 
372 | 3557	Female	2.12	 	 
373 | 3565	Female	2.02	.76	.55
374 | 3567	Male	1.78	1.14	.44
375 | 3568	Male	.73	.17	.76
376 | 3569	Female	2.26	.9	1.85
377 | 3570	Female	2.79	 	 
378 | 3571	Male	.43	.67	.14
379 | 3572	Male	.52	.38	.7
380 | 3573	Female	2.32	 	 
381 | 3574	Male	2.22	 	 
382 | 3575	Male	.58	 	 
383 | 3576	Male	2	 	 
384 | 3577	Female	.7	 	 
385 | 3586	Male	1	 	 
386 | 3587	Male	.3	 	 
387 | 3588	Male	1.52	 	 
388 | 3593	Female	1.58	.35	 
389 | 3594	Male	2.34	 	 
390 | 3595	Female	.79	 	 
391 | 3596	Female	2.26	 	 
392 | 3597	Male	2.35	 	 
393 | 3598	Female	1.7	 	 
394 | 3599	Female	3.09	 	 
395 | 3600	Female	1.52	 	 
396 | 3601	Female	.35	 	 
397 | 3602	Female	2.7	 	 
398 | 3603	Female	1.64	 	 
399 | 3605	Male	.82	 	 
400 | 3606	Male	2.73	 	 
401 | 3607	Female	2.23	 	 
402 | 3609	Female	1.06	 	 
403 | 3610	Male	2.05	.2	.35
404 | 3613	Female	1.73	1.44	 
405 | 3614	Female	.93	.91	 
406 | 3620	Female	2.5	2.44	 
407 | 3621	Male	1.44	 	 
408 | 3622	Female	2.88	 	 
409 | 3625	Female	.67	.23	.44
410 | 3626	Male	1.85	.35	 
411 | 3627	Female	1.21	.79	 
412 | 3628	Male	1.06	.76	.7
413 | 3629	Male	.61	.26	.33
414 | 3631	Female	2	 	 
415 | 3634	Female	1.17	.73	1.17
416 | 3635	Female	1.48	.79	1.55
417 | 3645	Female	1.55	 	 
418 | 3646	Female	3.29	 	 
419 | 3647	Male	1.47	 	 
420 | 3648	Male	.96	 	 
421 | 3649	Male	1	1.11	1.2
422 | 3652	Male	1.47	 	 
423 | 3653	Female	2.55	2.38	 
424 | 3654	Female	.44	.06	 
425 | 3655	Female	2.35	2.41	 
426 | 3656	Female	1.71	.85	 
427 | 3659	Male	1.84	.58	.7
428 | 3660	Female	1.11	.23	.55
429 | 3667	Female	1.38	 	 
430 | 3669	Male	.88	 	 
431 | 3670	Female	.94	 	 
432 | 3672	Female	1.91	 	 
433 | 3676	Male	2.76	 	 
434 | 3677	Male	1.55	.32	.47
435 | 3678	Male	2.67	 	 
436 | 3680	Female	1.03	.29	.72
437 | 3696	Female	2.5	 	 
438 | 3697	Male	1.64	 	 
439 | 3698	Female	2.26	 	 
440 | 3709	Female	2.14	 	 
441 | 3710	Male	.52	 	 
442 | 3711	Male	1.08	 	 
443 | 3712	Male	1.69	 	 
444 | 3713	Male	2.73	 	 
445 | 3714	Male	1.91	 	 
446 | 3715	Male	1.73	 	 
447 | 3716	Female	3.21	 	 
448 | 3717	Female	2.11	 	 
449 | 3718	Female	2.05	 	 
450 | 3719	Male	2.17	 	 
451 | 3720	Female	2.17	 	 
452 | 3721	Female	2.3	 	 
453 | 3722	Female	2.56	 	 
454 | 3723	Female	2.11	.41	.47
455 | 3724	Male	1.7	 	 
456 | 3725	Female	1.23	 	 
457 | 3726	Female	3.2	 	 
458 | 3727	Female	2.02	 	 
459 | 3728	Female	2.64	 	 
460 | 3729	Male	2.52	.14	 
461 | 3730	Male	1.61	 	 
462 | 3731	Male	1.5	1.2	.91
463 | 3734	Male	1.15	.45	.44
464 | 3736	Male	1.82	 	 
465 | 3753	Female	1.5	 	 
466 | 3754	Female	2.32	 	 
467 | 3759	Female	2.92	 	 
468 | 3764	Male	1.41	 	 
469 | 3769	Male	1.35	 	 
470 | 3778	Male	.61	.14	 
471 | 3779	Female	.73	 	 
472 | 3782	Female	2.23	1.88	 
473 | 3783	Female	1.32	.91	 
474 | 3784	Female	2.94	1.79	 
475 | 3785	Male	1.61	 	 
476 | 3786	Female	1	 	 
477 | 3787	Female	3.15	3	 
478 | 3788	Female	2.88	 	 
479 | 3791	Female	2.09	1.21	 
480 | 3792	Female	1.32	1.7	2.29
481 | 3793	Male	1.47	.35	 
482 | 3794	Male	1.61	 	 
483 | 3796	Male	2.2	1.5	1.88
484 | 3797	Female	2.78	 	 
485 | 3798	Female	2.06	 	 
486 | 3799	Female	.47	 	 
487 | 3804	Female	2.87	 	 
488 | 3826	Male	1.14	 	 
489 | 3828	Female	3.32	3.21	 
490 | 3831	Female	2.08	1.38	 
491 | 3836	Female	2.38	2.5	 
492 | 3837	Female	2.08	 	 
493 | 3838	Male	1.85	 	 
494 | 3840	Male	1.38	 	 
495 | 3844	Male	1.14	 	 
496 | 3846	Male	1.58	 	 
497 | 3849	Male	1.23	.7	1.02
498 | 3850	Female	2.53	 	 
499 | 3851	Male	.67	 	 
500 | 3854	Female	.73	 	 
501 | 3855	Female	1.34	 	 
502 | 3856	Female	2.14	.7	 
503 | 3857	Female	1	 	 
504 | 3858	Male	1.35	 	 
505 | 3859	Male	1.94	.79	 
506 | 3860	Male	.5	 	 
507 | 3869	Female	3.08	 	 
508 | 3870	Female	2.88	 	 
509 | 3871	Female	1.91	 	 
510 | 3872	Male	1.41	 	 
511 | 3873	Male	2.02	 	 
512 | 3874	Female	.76	 	 
513 | 3875	Male	1.94	 	 
514 | 3876	Male	.67	.28	 
515 | 3879	Female	2.41	 	 
516 | 3880	Female	2.17	 	 
517 | 3882	Female	2.67	.41	 
518 | 3883	Male	1.94	.64	 
519 | 3888	Female	2.05	.85	1.7
520 | 3890	Male	2.17	 	 
521 | 3909	Male	.47	 	 
522 | 3912	Male	.62	.76	 
523 | 3913	Female	2	 	 
524 | 3914	Male	.45	 	 
525 | 3916	Female	2.29	.91	1.38
526 | 3917	Female	2.55	 	 
527 | 3918	Female	.82	 	 
528 | 3919	Female	3.12	2.2	 
529 | 3920	Male	2.5	2.23	.41
530 | 3921	Male	1.79	 	 
531 | 3922	Female	2.28	 	 
532 | 3923	Male	.58	1.05	.58
533 | 3924	Female	2.5	1.29	 
534 | 3925	Female	1.41	 	 
535 | 3926	Female	2.14	 	 
536 | 3927	Male	.76	.26	 
537 | 3929	Male	1.79	1.11	 
538 | 3930	Male	1.02	.35	 
539 | 3932	Female	2.62	 	 
540 | 3942	Male	.88	 	 
541 | 3944	Male	1.58	.2	.2
542 | 3945	Female	2.2	 	 
543 | 3946	Female	1.14	 	 
544 | 3947	Male	1.47	.52	.35
545 | 3948	Female	1.41	.23	.38
546 | 3949	Male	1.44	1.76	1.18
547 | 3950	Female	1.23	1.17	 
548 | 3951	Female	1.82	 	 
549 | 3952	Female	2.44	 	 
550 | 3954	Female	1.94	1.2	 
551 | 3955	Male	2.41	 	 
552 | 3964	Female	2.27	 	 
553 | 3965	Male	1.79	.23	 
554 | 3966	Male	1.88	 	 
555 | 3972	Male	1.85	.64	1.32
556 | 3973	Female	2.21	 	 
557 | 3986	Male	1.97	1.94	 
558 | 3987	Female	2.51	 	 
559 | 3988	Female	2.05	 	 
560 | 3989	Female	1.29	1	.82
561 | 3990	Female	2.05	 	 
562 | 3991	Female	2.23	 	 
563 | 3992	Female	1.76	 	 
564 | 3993	Male	1.05	 	 
565 | 3994	Male	1.79	 	 
566 | 3995	Female	1.02	.73	.47
567 | 3996	Female	2.76	1.58	 
568 | 3997	Female	1.67	.55	 
569 | 4010	Male	2.85	 	 
570 | 4011	Female	.23	.84	 
571 | 4012	Female	1.9	 	 
572 | 4013	Male	1.23	.52	 
573 | 4014	Male	1.97	 	 
574 | 4015	Female	1.5	 	 
575 | 4016	Female	3.69	 	 
576 | 4017	Female	.5	 	 
577 | 4022	Female	2.18	 	 
578 | 4023	Female	2.17	 	 
579 | 4024	Female	1.58	 	 
580 | 4025	Male	2.88	 	 
581 | 4030	Male	2.52	 	 
582 | 4032	Female	2.2	 	 
583 | 4033	Male	1.73	 	 
584 | 4034	Female	2.23	 	 
585 | 4079	Male	1.97	 	 
586 | 4089	Male	1.2	.67	.91
587 | 4090	Male	2	 	 
588 | 4092	Female	1.91	 	 
589 | 4093	Male	.81	 	 
590 | 4095	Female	1.31	 	 
591 | 4096	Female	.38	 	 
592 | 4101	Female	1.97	 	 
593 | 4104	Female	.38	 	 
594 | 4105	Female	2.11	.76	.85
595 | 4106	Female	3.2	 	 
596 | 4107	Female	.02	 	 
597 | 4109	Female	2.56	 	 
598 | 4110	Female	2.02	 	 
599 | 4111	Female	2.3	 	 
600 | 4114	Male	2.02	 	 
601 | 4147	Female	2.05	 	 
602 | 4148	Female	1.7	 	 
603 | 4149	Male	1.61	 	 
604 | 4150	Male	.73	 	 
605 | 4151	Male	2.5	1.64	 
606 | 4152	Female	2.18	1.75	1.91
607 | 4153	Female	2.46	1.08	1.91
608 | 4154	Female	1.5	.91	 
609 | 4155	Female	1.73	.94	 
610 | 4156	Male	1.44	 	 
611 | 4157	Male	1.64	.32	.52
612 | 4158	Female	20.02	2.44	 
613 | 4159	Female	1.2	.17	.2
614 | 4160	Male	.38	.02	 
615 | 4161	Female	1.58	1.54	1.76
616 | 4162	Female	1.67	.5	 
617 | 4163	Female	1	.48	 
618 | 4164	Female	2.58	1.35	 
619 | 4165	Female	2.82	2.61	3.02
620 | 4166	Female	2.29	2.05	 
621 | 4167	Female	1.14	 	 
622 | 4168	Female	1.64	.76	 
623 | 4171	Female	1.82	.08	 
624 | 4172	Female	3.32	2.91	3.02
625 | 4193	Male	3.32	 	 
626 | 4194	Male	1.85	1	1.5
627 | 4195	Male	2.29	 	 
628 | 4196	Female	1.47	.47	1.58
629 | 4197	Male	2.08	.7	.67
630 | 4198	Male	2.2	 	 
631 | 4199	Female	1.06	1.45	 
632 | 4216	Female	.97	.14	.02
633 | 4218	Male	2	 	 
634 | 4219	Female	1.67	.38	 
635 | 4220	Female	2.94	 	 
636 | 4221	Female	1.55	 	 
637 | 4222	Male	.88	.26	 
638 | 4223	Female	1.35	2.32	 
639 | 4224	Female	.61	.2	.17
640 | 4225	Male	1	 	 
641 | 4226	Female	1.52	2.72	.52
642 | 4234	Male	1	 	 
643 | 4235	Male	1.76	.41	.88
644 | 4236	Female	2.52	 	 
645 | 4237	Female	2	.88	1.44
646 | 4238	Female	2.63	 	 
647 | 4239	Female	.73	.85	 
648 | 4240	Male	1.58	.23	1.66
649 | 4241	Female	.58	 	 
650 | 4245	Female	1.67	 	 
651 | 4247	Female	1.47	 	 
652 | 4249	Female	1.81	 	 
653 | 4256	Male	1.91	 	 
654 | 4257	Male	1.06	 	 
655 | 4259	Male	1.47	1.23	.94
656 | 4262	Male	2.52	 	 
657 | 4263	Female	1.85	.2	 
658 | 4264	Male	3.44	 	 
659 | 4265	Female	1.55	1.32	1.19
660 | 4266	Female	2.29	2.7	 
661 | 4267	Female	1.76	 	 
662 | 4268	Male	1.9	 	 
663 | 4269	Male	2.52	2.55	 
664 | 4271	Female	2.52	 	 
665 | 4274	Female	2.82	.17	 
666 | 4281	Female	2.02	 	 
667 | 4284	Female	1.29	 	 
668 | 4285	Male	1.26	 	 
669 | 4287	Female	.94	 	 
670 | 4291	Female	2	1.13	.53
671 | 4292	Female	.73	 	 
672 | 4294	Male	2.26	.79	 
673 | 4295	Female	2.23	 	 
674 | 4297	Male	2.35	 	 
675 | 4298	Female	.55	.38	 
676 | 4301	Female	1.85	 	 
677 | 4302	Male	.67	 	 
678 | 4305	Male	1.85	1	 
679 | 4306	Male	1.23	.2	 
680 | 4308	Female	2.35	 	 
681 | 4309	Male	1.35	 	 
682 | 4310	Female	1.94	 	 
683 | 4311	Male	1.55	 	 
684 | 4312	Female	1.29	 	 
685 | 4313	Male	2.17	 	 
686 | 4314	Female	1.91	 	 
687 | 4333	Female	2.88	 	 
688 | 4334	Female	2.36	 	 
689 | 4335	Female	2.36	 	 
690 | 4336	Female	2.2	 	 
691 | 4349	Female	2.17	.47	.38
692 | 4351	Male	.52	 	 
693 | 4352	Male	.32	 	 
694 | 4353	Female	1.52	.55	 
695 | 4354	Male	2	.94	.08
696 | 4355	Male	1.32	1.02	1.2
697 | 4356	Female	2.05	 	 
698 | 4357	Female	1.73	 	 
699 | 4358	Male	1.94	 	 
700 | 4359	Female	1.81	 	 
701 | 4360	Male	.9	.64	 
702 | 4361	Male	1.58	.67	 
703 | 4363	Female	2.29	1.87	.91
704 | 4366	Female	2.57	 	 
705 | 4370	Female	1.58	 	 
706 | 4382	Female	2.33	.82	 
707 | 4383	Female	3.15	 	 
708 | 4384	Female	2.29	 	 
709 | 4385	Female	.82	 	 
710 | 4386	Female	1.93	 	 
711 | 4387	Female	1.82	 	 
712 | 4388	Female	1.96	 	 
713 | 4389	Female	1.32	.64	.47
714 | 4391	Female	1.02	 	 
715 | 4392	Female	1.14	 	 
716 | 4393	Female	2.32	 	 
717 | 4394	Male	2.16	 	 
718 | 4396	Female	2.42	1.7	 
719 | 4397	Female	1.14	 	 
720 | 4398	Male	1.55	.79	1.76
721 | 4404	Female	1.17	 	 
722 | 4405	Female	1	.58	.32
723 | 4410	Female	1.05	.11	 
724 | 4419	Female	1.38	 	 
725 | 4428	Female	1.93	2.42	 
726 | 4430	Female	2.73	 	 
727 | 4435	Female	2.02	 	 
728 | 4436	Female	2.81	 	 
729 | 4437	Female	2.47	 	 
730 | 4439	Male	1.35	 	 
731 | 4440	Male	2.08	 	 
732 | 4441	Female	2.5	 	 
733 | 4442	Female	2.45	 	 
734 | 4443	Female	2.17	 	 
735 | 4444	Male	1.7	0	 
736 | 4445	Male	.7	.23	.45
737 | 4446	Male	1.51	 	 
738 | 4447	Female	1.23	 	 
739 | 4448	Female	2.14	.85	 
740 | 4452	Male	1.14	1.14	 
741 | 4453	Female	.96	 	 
742 | 4454	Male	1.52	1.14	1.26
743 | 4455	Male	.52	 	 
744 | 4456	Male	1.56	 	 
745 | 4459	Female	3.29	.26	 
746 | 4464	Male	.45	 	 
747 | 4467	Female	2.63	 	 
748 | 4468	Female	1.7	 	 
749 | 4470	Male	3.11	 	 
750 | 4479	Female	1.82	 	 
751 | 4481	Female	1.58	.14	 
752 | 4482	Female	2.73	 	 
753 | 4485	Female	1.5	1.14	 
754 | 4486	Male	1.78	1.02	 
755 | 4488	Female	2.02	 	 
756 | 4507	Male	.67	.94	.91
757 | 4509	Female	1.41	.55	 
758 | 4510	Male	.9	 	 
759 | 4511	Female	1.23	1.11	.76
760 | 4512	Female	2.7	 	 
761 | 4514	Female	1.97	 	 
762 | 4515	Male	.84	 	 
763 | 4516	Male	1.79	 	 
764 | 4517	Female	2.84	 	 
765 | 4518	Male	2.02	 	 
766 | 4519	Male	1.64	.7	 
767 | 4525	Female	1.08	 	 
768 | 4533	Female	2.97	 	 
769 | 4552	Female	.94	 	 
770 | 4553	Female	2.97	1.94	 
771 | 4554	Female	.97	 	 
772 | 4555	Male	1.47	 	 
773 | 4559	Female	2.61	 	 
774 | 4563	Female	1.73	.2	.91
775 | 4564	Female	3.38	3.44	3.41
776 | 4569	Male	3.17	1	1.73
777 | 4570	Female	2.2	.91	 
778 | 4571	Female	2.14	 	 
779 | 4582	Male	1.29	1.58	.81
780 | 4590	Female	3.21	2.85	 
781 | 4597	Male	2.67	 	 
782 | 4598	Male	1.85	.79	 
783 | 4601	Female	1.35	 	 
784 | 4607	Female	2.14	.76	 
785 | 4611	Male	1.24	.56	 
786 | 4654	Female	2.02	1.78	2.55
787 | 4655	Female	2.32	 	 
788 | 4663	Male	1.08	.23	 
789 | 4666	Male	1.14	1.35	1.02
790 | 4690	Female	2.14	1.82	 
791 | 4692	Female	2.88	 	 
792 | 4693	Male	1.35	.17	.44
793 | 4694	Male	1	1.7	 
794 | 4695	Male	2.02	 	 
795 | 4696	Female	.64	1.32	 
796 | 4697	Male	.29	.14	 
797 | 4698	Female	1.73	.94	 
798 | 4699	Female	1.82	1.52	 
799 | 4704	Female	2.11	 	 
800 | 4710	Female	1.23	 	 
801 | 4711	Male	.64	 	 
802 | 4724	Male	2.23	1.41	2.11
803 | 4725	Female	2.44	.32	 
804 | 4740	Male	1.17	.58	 
805 | 4744	Female	.61	.44	 
806 | 4749	Female	.52	 	 
807 | 4756	Female	2.91	.94	 
808 | 4758	Female	2.61	1.44	 
809 | 4759	Female	1.47	 	 
810 | 4760	Male	1.28	 	 
811 | 4765	Female	1.26	 	 
812 | 


--------------------------------------------------------------------------------
/plots/Exam Anxiety.dat:
--------------------------------------------------------------------------------
  1 | Code	Revise	Exam	Anxiety	Gender
  2 | 1	4	40	86.298	Male
  3 | 2	11	65	88.716	Female
  4 | 3	27	80	70.178	Male
  5 | 4	53	80	61.312	Male
  6 | 5	4	40	89.522	Male
  7 | 6	22	70	60.506	Female
  8 | 7	16	20	81.462	Female
  9 | 8	21	55	75.82	Female
 10 | 9	25	50	69.372	Female
 11 | 10	18	40	82.268	Female
 12 | 11	18	45	79.044	Male
 13 | 12	16	85	80.656	Male
 14 | 13	13	70	70.178	Male
 15 | 14	18	50	75.014	Female
 16 | 15	98	95	34.714	Male
 17 | 16	1	70	95.164	Male
 18 | 17	14	95	75.82	Male
 19 | 18	29	95	79.044	Female
 20 | 19	4	50	91.134	Female
 21 | 20	23	60	64.536	Male
 22 | 21	14	80	80.656	Male
 23 | 22	12	75	77.432	Male
 24 | 23	22	85	65.342	Female
 25 | 24	84	90	.0560000000000116	Female
 26 | 25	23	30	71.79	Female
 27 | 26	26	60	81.462	Female
 28 | 27	24	75	63.73	Male
 29 | 28	72	75	27.46	Female
 30 | 29	37	27	73.402	Female
 31 | 30	10	20	89.522	Male
 32 | 31	3	75	89.522	Female
 33 | 32	36	90	75.014	Female
 34 | 33	43	60	43.58	Male
 35 | 34	19	30	82.268	Male
 36 | 35	12	80	79.044	Male
 37 | 36	9	10	79.044	Female
 38 | 37	72	85	37.132	Male
 39 | 38	10	7	81.462	Male
 40 | 39	12	5	83.074	Female
 41 | 40	30	85	50.834	Male
 42 | 41	15	20	82.268	Male
 43 | 42	8	45	78.238	Female
 44 | 43	34	60	72.596	Male
 45 | 44	22	70	74.208	Female
 46 | 45	21	50	75.82	Female
 47 | 46	27	25	70.984	Male
 48 | 47	6	50	97.582	Male
 49 | 48	18	40	67.76	Male
 50 | 49	8	80	75.014	Male
 51 | 50	19	50	73.402	Female
 52 | 51	0	35	93.552	Female
 53 | 52	52	80	58.894	Female
 54 | 53	38	50	53.252	Female
 55 | 54	19	49	84.686	Male
 56 | 55	23	75	89.522	Female
 57 | 56	11	25	71.79	Female
 58 | 57	27	65	82.268	Male
 59 | 58	17	80	69.372	Male
 60 | 59	13	50	62.118	Male
 61 | 60	42	70	68.566	Female
 62 | 61	4	40	93.552	Male
 63 | 62	8	80	84.686	Female
 64 | 63	6	10	82.268	Male
 65 | 64	11	20	81.462	Female
 66 | 65	7	40	82.268	Male
 67 | 66	15	40	91.134	Male
 68 | 67	4	70	91.94	Female
 69 | 68	28	52	86.298	Female
 70 | 69	22	50	72.596	Male
 71 | 70	29	60	63.73	Female
 72 | 71	2	80	63.73	Male
 73 | 72	16	60	71.79	Female
 74 | 73	59	65	57.282	Male
 75 | 74	10	15	84.686	Female
 76 | 75	13	85	84.686	Male
 77 | 76	8	20	77.432	Female
 78 | 77	5	80	82.268	Female
 79 | 78	2	100	10	Male
 80 | 79	38	100	50.834	Female
 81 | 80	4	80	87.91	Male
 82 | 81	10	10	83.88	Male
 83 | 82	6	70	84.686	Female
 84 | 83	68	100	20.206	Female
 85 | 84	8	70	87.104	Male
 86 | 85	1	70	83.88	Female
 87 | 86	14	65	67.76	Male
 88 | 87	42	75	95.97	Female
 89 | 88	13	85	62.118	Female
 90 | 89	1	30	84.686	Male
 91 | 90	3	5	92.746	Male
 92 | 91	5	10	84.686	Female
 93 | 92	12	90	83.074	Female
 94 | 93	19	70	73.402	Male
 95 | 94	2	20	87.91	Female
 96 | 95	19	85	71.79	Male
 97 | 96	11	35	86.298	Male
 98 | 97	15	30	84.686	Female
 99 | 98	23	70	75.82	Male
100 | 99	13	55	70.984	Female
101 | 100	14	75	78.238	Female
102 | 101	1	2	82.268	Male
103 | 102	9	40	79.044	Male
104 | 103	20	50	91.134	Female
105 | 


--------------------------------------------------------------------------------
/plots/FacebookNarcissism.dat:
--------------------------------------------------------------------------------
  1 | id	NPQC_R_Total	Rating_Type	Rating
  2 | 1	31	Attractive	2
  3 | 1	31	Fashionable	2
  4 | 1	31	Glamourous	2
  5 | 1	31	Cool	2
  6 | 2	37	Attractive	2
  7 | 2	37	Fashionable	2
  8 | 2	37	Glamourous	2
  9 | 2	37	Cool	2
 10 | 5	44.47	Attractive	3
 11 | 5	44.47	Fashionable	3
 12 | 5	44.47	Glamourous	3
 13 | 5	44.47	Cool	3
 14 | 6	30	Attractive	1
 15 | 6	30	Fashionable	3
 16 | 6	30	Glamourous	3
 17 | 6	30	Cool	3
 18 | 7	28	Fashionable	3
 19 | 7	28	Cool	3
 20 | 7	28	Attractive	4
 21 | 7	28	Glamourous	4
 22 | 13	42	Attractive	5
 23 | 13	42	Fashionable	5
 24 | 13	42	Glamourous	5
 25 | 13	42	Cool	5
 26 | 14	23.47	Attractive	3
 27 | 14	23.47	Fashionable	3
 28 | 14	23.47	Glamourous	3
 29 | 14	23.47	Cool	3
 30 | 15	28	Fashionable	1
 31 | 15	28	Attractive	2
 32 | 15	28	Glamourous	2
 33 | 15	28	Cool	2
 34 | 18	44	Attractive	3
 35 | 18	44	Fashionable	3
 36 | 18	44	Glamourous	3
 37 | 18	44	Cool	3
 38 | 21	40	Cool	2
 39 | 21	40	Attractive	4
 40 | 21	40	Glamourous	4
 41 | 21	40	Fashionable	5
 42 | 22	32.46	Attractive	3
 43 | 22	32.46	Fashionable	3
 44 | 22	32.46	Glamourous	3
 45 | 22	32.46	Cool	3
 46 | 23	24	Attractive	1
 47 | 23	24	Fashionable	1
 48 | 23	24	Glamourous	1
 49 | 23	24	Cool	5
 50 | 24	38	Attractive	3
 51 | 24	38	Fashionable	3
 52 | 24	38	Cool	3
 53 | 24	38	Glamourous	4
 54 | 25	19	Attractive	3
 55 | 25	19	Fashionable	3
 56 | 25	19	Glamourous	3
 57 | 25	19	Cool	3
 58 | 27	22	Attractive	3
 59 | 27	22	Fashionable	3
 60 | 27	22	Glamourous	3
 61 | 27	22	Cool	3
 62 | 28	48	Attractive	3
 63 | 28	48	Fashionable	3
 64 | 28	48	Glamourous	3
 65 | 28	48	Cool	3
 66 | 31	22	Attractive	1
 67 | 31	22	Fashionable	1
 68 | 31	22	Glamourous	1
 69 | 31	22	Cool	1
 70 | 32	26	Fashionable	2
 71 | 32	26	Glamourous	2
 72 | 32	26	Cool	2
 73 | 32	26	Attractive	3
 74 | 33	33	Attractive	3
 75 | 33	33	Fashionable	3
 76 | 33	33	Glamourous	3
 77 | 33	33	Cool	3
 78 | 34	26	Attractive	3
 79 | 34	26	Fashionable	3
 80 | 34	26	Glamourous	3
 81 | 34	26	Cool	3
 82 | 35	22	Fashionable	2
 83 | 35	22	Attractive	3
 84 | 35	22	Glamourous	3
 85 | 35	22	Cool	5
 86 | 36	31	Attractive	3
 87 | 36	31	Fashionable	3
 88 | 36	31	Glamourous	3
 89 | 36	31	Cool	3
 90 | 37	50	Attractive	4
 91 | 37	50	Glamourous	4
 92 | 37	50	Cool	4
 93 | 37	50	Fashionable	5
 94 | 39	37	Fashionable	3
 95 | 39	37	Glamourous	3
 96 | 39	37	Cool	3
 97 | 39	37	Attractive	4
 98 | 45	30	Fashionable	2
 99 | 45	30	Glamourous	2
100 | 45	30	Cool	2
101 | 45	30	Attractive	3
102 | 46	33	Fashionable	2
103 | 46	33	Glamourous	3
104 | 46	33	Attractive	4
105 | 46	33	Cool	5
106 | 48	38	Attractive	3
107 | 48	38	Fashionable	3
108 | 48	38	Cool	3
109 | 48	38	Glamourous	4
110 | 50	30	Attractive	3
111 | 50	30	Fashionable	4
112 | 50	30	Glamourous	4
113 | 50	30	Cool	4
114 | 51	45	Attractive	2
115 | 51	45	Fashionable	2
116 | 51	45	Glamourous	2
117 | 51	45	Cool	3
118 | 53	23	Attractive	1
119 | 53	23	Fashionable	1
120 | 53	23	Glamourous	1
121 | 53	23	Cool	1
122 | 54	30	Cool	2
123 | 54	30	Attractive	3
124 | 54	30	Fashionable	3
125 | 54	30	Glamourous	3
126 | 55	39	Attractive	3
127 | 55	39	Fashionable	3
128 | 55	39	Glamourous	3
129 | 55	39	Cool	3
130 | 56	21	Attractive	1
131 | 56	21	Glamourous	1
132 | 56	21	Cool	1
133 | 56	21	Fashionable	2
134 | 59	31	Attractive	3
135 | 59	31	Fashionable	3
136 | 59	31	Glamourous	3
137 | 59	31	Cool	4
138 | 60	41	Cool	2
139 | 60	41	Attractive	3
140 | 60	41	Fashionable	3
141 | 60	41	Glamourous	3
142 | 62	38	Fashionable	1
143 | 62	38	Glamourous	1
144 | 62	38	Cool	2
145 | 62	38	Attractive	3
146 | 66	35	Fashionable	2
147 | 66	35	Glamourous	2
148 | 66	35	Attractive	3
149 | 66	35	Cool	3
150 | 67	26	Attractive	2
151 | 67	26	Fashionable	2
152 | 67	26	Glamourous	2
153 | 67	26	Cool	2
154 | 68	35	Attractive	3
155 | 68	35	Glamourous	3
156 | 68	35	Fashionable	5
157 | 68	35	Cool	5
158 | 69	48	Fashionable	2
159 | 69	48	Glamourous	3
160 | 69	48	Attractive	4
161 | 69	48	Cool	4
162 | 70	23	Glamourous	1
163 | 70	23	Attractive	2
164 | 70	23	Fashionable	2
165 | 70	23	Cool	3
166 | 71	41	Fashionable	3
167 | 71	41	Glamourous	4
168 | 71	41	Cool	4
169 | 71	41	Attractive	5
170 | 72	36	Attractive	3
171 | 72	36	Fashionable	3
172 | 72	36	Glamourous	3
173 | 72	36	Cool	3
174 | 73	29	Attractive	3
175 | 73	29	Fashionable	3
176 | 73	29	Glamourous	3
177 | 73	29	Cool	3
178 | 74	34	Attractive	2
179 | 74	34	Glamourous	2
180 | 74	34	Fashionable	3
181 | 74	34	Cool	3
182 | 75	50	Attractive	3
183 | 75	50	Glamourous	3
184 | 75	50	Fashionable	4
185 | 75	50	Cool	4
186 | 76	36	Attractive	3
187 | 76	36	Fashionable	3
188 | 76	36	Glamourous	3
189 | 76	36	Cool	4
190 | 77	27	Attractive	3
191 | 77	27	Fashionable	3
192 | 77	27	Glamourous	3
193 | 77	27	Cool	3
194 | 78	31	Attractive	3
195 | 78	31	Fashionable	3
196 | 78	31	Glamourous	3
197 | 78	31	Cool	3
198 | 79	35	Fashionable	3
199 | 79	35	Cool	3
200 | 79	35	Attractive	4
201 | 79	35	Glamourous	4
202 | 80	25	Fashionable	2
203 | 80	25	Glamourous	2
204 | 80	25	Cool	2
205 | 80	25	Attractive	3
206 | 81	23	Attractive	3
207 | 81	23	Fashionable	3
208 | 81	23	Glamourous	3
209 | 81	23	Cool	3
210 | 82	32	Attractive	1
211 | 82	32	Fashionable	1
212 | 82	32	Glamourous	1
213 | 82	32	Cool	3
214 | 83	32	Attractive	3
215 | 83	32	Fashionable	3
216 | 83	32	Glamourous	3
217 | 83	32	Cool	4
218 | 84	32	Attractive	3
219 | 84	32	Fashionable	3
220 | 84	32	Glamourous	3
221 | 84	32	Cool	3
222 | 85	25	Glamourous	2
223 | 85	25	Attractive	3
224 | 85	25	Fashionable	3
225 | 85	25	Cool	3
226 | 86	36	Cool	2
227 | 86	36	Attractive	3
228 | 86	36	Fashionable	3
229 | 86	36	Glamourous	3
230 | 87	37	Attractive	3
231 | 87	37	Fashionable	3
232 | 87	37	Glamourous	3
233 | 87	37	Cool	3
234 | 88	33	Attractive	3
235 | 88	33	Fashionable	3
236 | 88	33	Glamourous	3
237 | 88	33	Cool	3
238 | 89	19	Attractive	2
239 | 89	19	Fashionable	2
240 | 89	19	Glamourous	2
241 | 89	19	Cool	2
242 | 90	25	Fashionable	1
243 | 90	25	Glamourous	1
244 | 90	25	Cool	1
245 | 90	25	Attractive	2
246 | 91	36	Fashionable	1
247 | 91	36	Cool	1
248 | 91	36	Attractive	2
249 | 91	36	Glamourous	2
250 | 92	33	Attractive	3
251 | 92	33	Fashionable	3
252 | 92	33	Glamourous	3
253 | 92	33	Cool	3
254 | 93	29	Attractive	3
255 | 93	29	Fashionable	3
256 | 93	29	Glamourous	3
257 | 93	29	Cool	3
258 | 94	31	Attractive	1
259 | 94	31	Fashionable	1
260 | 94	31	Glamourous	1
261 | 94	31	Cool	1
262 | 98	34	Attractive	3
263 | 98	34	Fashionable	3
264 | 98	34	Glamourous	3
265 | 98	34	Cool	3
266 | 99	34	Glamourous	4
267 | 99	34	Cool	4
268 | 99	34	Attractive	5
269 | 99	34	Fashionable	5
270 | 100	26	Attractive	1
271 | 100	26	Fashionable	1
272 | 100	26	Glamourous	1
273 | 100	26	Cool	1
274 | 101	20	Attractive	1
275 | 101	20	Glamourous	1
276 | 101	20	Fashionable	2
277 | 101	20	Cool	3
278 | 102	36.41	Attractive	2
279 | 102	36.41	Fashionable	3
280 | 102	36.41	Glamourous	3
281 | 102	36.41	Cool	4
282 | 103	28.46	Attractive	5
283 | 103	28.46	Fashionable	5
284 | 103	28.46	Glamourous	5
285 | 103	28.46	Cool	5
286 | 104	45	Fashionable	3
287 | 104	45	Glamourous	3
288 | 104	45	Attractive	4
289 | 104	45	Cool	5
290 | 105	28	Attractive	2
291 | 105	28	Glamourous	2
292 | 105	28	Fashionable	3
293 | 105	28	Cool	3
294 | 107	29	Fashionable	2
295 | 107	29	Cool	2
296 | 107	29	Attractive	3
297 | 107	29	Glamourous	3
298 | 109	36	Attractive	3
299 | 109	36	Fashionable	3
300 | 109	36	Glamourous	3
301 | 109	36	Cool	3
302 | 110	42	Fashionable	1
303 | 110	42	Glamourous	2
304 | 110	42	Attractive	3
305 | 110	42	Cool	3
306 | 111	27.44	Cool	1
307 | 111	27.44	Attractive	2
308 | 111	27.44	Fashionable	2
309 | 111	27.44	Glamourous	2
310 | 112	23	Glamourous	1
311 | 112	23	Attractive	2
312 | 112	23	Fashionable	2
313 | 112	23	Cool	2
314 | 114	40.52	Fashionable	1
315 | 114	40.52	Glamourous	1
316 | 114	40.52	Attractive	2
317 | 114	40.52	Cool	2
318 | 116	41	Attractive	3
319 | 116	41	Glamourous	3
320 | 116	41	Cool	3
321 | 116	41	Fashionable	4
322 | 117	17	Attractive	1
323 | 117	17	Fashionable	1
324 | 117	17	Glamourous	1
325 | 117	17	Cool	1
326 | 119	39	Glamourous	2
327 | 119	39	Attractive	3
328 | 119	39	Fashionable	3
329 | 119	39	Cool	3
330 | 120	24	Attractive	3
331 | 120	24	Fashionable	3
332 | 120	24	Glamourous	3
333 | 120	24	Cool	3
334 | 121	50	Attractive	3
335 | 121	50	Glamourous	3
336 | 121	50	Fashionable	4
337 | 121	50	Cool	5
338 | 123	35	Attractive	3
339 | 123	35	Fashionable	3
340 | 123	35	Glamourous	3
341 | 123	35	Cool	3
342 | 124	21	Attractive	1
343 | 124	21	Glamourous	1
344 | 124	21	Cool	1
345 | 124	21	Fashionable	2
346 | 125	28	Attractive	2
347 | 125	28	Glamourous	2
348 | 125	28	Cool	2
349 | 125	28	Fashionable	3
350 | 126	22	Attractive	3
351 | 126	22	Fashionable	3
352 | 126	22	Glamourous	3
353 | 126	22	Cool	3
354 | 127	35	Attractive	3
355 | 127	35	Fashionable	3
356 | 127	35	Glamourous	3
357 | 127	35	Cool	3
358 | 129	29	Attractive	4
359 | 129	29	Fashionable	4
360 | 129	29	Glamourous	4
361 | 129	29	Cool	4
362 | 131	42	Attractive	4
363 | 131	42	Fashionable	4
364 | 131	42	Glamourous	4
365 | 131	42	Cool	4
366 | 132	27	Cool	2
367 | 132	27	Attractive	3
368 | 132	27	Fashionable	3
369 | 132	27	Glamourous	3
370 | 133	27	Attractive	3
371 | 133	27	Fashionable	3
372 | 133	27	Glamourous	3
373 | 133	27	Cool	3
374 | 134	36	Cool	3
375 | 134	36	Attractive	4
376 | 134	36	Fashionable	5
377 | 134	36	Glamourous	5
378 | 136	40	Glamourous	2
379 | 136	40	Attractive	3
380 | 136	40	Fashionable	3
381 | 136	40	Cool	3
382 | 137	25	Cool	1
383 | 137	25	Fashionable	2
384 | 137	25	Glamourous	2
385 | 137	25	Attractive	3
386 | 138	42	Attractive	3
387 | 138	42	Fashionable	3
388 | 138	42	Glamourous	3
389 | 138	42	Cool	3
390 | 140	25	Attractive	1
391 | 140	25	Fashionable	1
392 | 140	25	Glamourous	3
393 | 140	25	Cool	5
394 | 141	33	Attractive	3
395 | 141	33	Fashionable	3
396 | 141	33	Glamourous	3
397 | 141	33	Cool	3
398 | 142	36	Attractive	5
399 | 142	36	Fashionable	5
400 | 142	36	Glamourous	5
401 | 142	36	Cool	5
402 | 143	45	Attractive	5
403 | 143	45	Fashionable	5
404 | 143	45	Glamourous	5
405 | 143	45	Cool	5
406 | 147	31	Attractive	3
407 | 147	31	Fashionable	3
408 | 147	31	Glamourous	3
409 | 147	31	Cool	3
410 | 148	23	Fashionable	1
411 | 148	23	Glamourous	1
412 | 148	23	Attractive	2
413 | 148	23	Cool	2
414 | 150	28	Fashionable	2
415 | 150	28	Glamourous	2
416 | 150	28	Cool	2
417 | 150	28	Attractive	3
418 | 152	26	Attractive	3
419 | 152	26	Fashionable	3
420 | 152	26	Glamourous	3
421 | 152	26	Cool	3
422 | 153	37.06	Fashionable	3
423 | 153	37.06	Cool	3
424 | 153	37.06	Attractive	4
425 | 153	37.06	Glamourous	4
426 | 154	38	Attractive	3
427 | 154	38	Fashionable	3
428 | 154	38	Glamourous	3
429 | 154	38	Cool	3
430 | 157	23	Attractive	2
431 | 157	23	Fashionable	2
432 | 157	23	Glamourous	2
433 | 157	23	Cool	2
434 | 158	40	Fashionable	4
435 | 158	40	Glamourous	4
436 | 158	40	Attractive	5
437 | 158	40	Cool	5
438 | 159	41	Fashionable	2
439 | 159	41	Cool	2
440 | 159	41	Glamourous	3
441 | 159	41	Attractive	4
442 | 161	28	Attractive	1
443 | 161	28	Fashionable	1
444 | 161	28	Glamourous	3
445 | 161	28	Cool	5
446 | 162	47	Attractive	5
447 | 162	47	Fashionable	5
448 | 162	47	Glamourous	5
449 | 162	47	Cool	5
450 | 163	46	Attractive	3
451 | 163	46	Fashionable	3
452 | 163	46	Glamourous	3
453 | 163	46	Cool	3
454 | 165	30	Attractive	3
455 | 165	30	Fashionable	3
456 | 165	30	Glamourous	3
457 | 165	30	Cool	3
458 | 167	41	Attractive	3
459 | 167	41	Fashionable	3
460 | 167	41	Glamourous	3
461 | 167	41	Cool	4
462 | 168	34	Glamourous	2
463 | 168	34	Fashionable	3
464 | 168	34	Attractive	4
465 | 168	34	Cool	5
466 | 169	26	Attractive	3
467 | 169	26	Fashionable	3
468 | 169	26	Glamourous	3
469 | 169	26	Cool	3
470 | 170	35	Cool	4
471 | 170	35	Attractive	5
472 | 170	35	Fashionable	5
473 | 170	35	Glamourous	5
474 | 173	36	Fashionable	1
475 | 173	36	Glamourous	1
476 | 173	36	Attractive	2
477 | 173	36	Cool	4
478 | 174	30	Attractive	3
479 | 174	30	Fashionable	3
480 | 174	30	Glamourous	3
481 | 174	30	Cool	3
482 | 175	31	Attractive	3
483 | 175	31	Fashionable	3
484 | 175	31	Glamourous	3
485 | 175	31	Cool	3
486 | 178	38	Attractive	3
487 | 178	38	Fashionable	4
488 | 178	38	Glamourous	4
489 | 178	38	Cool	5
490 | 179	44	Attractive	3
491 | 179	44	Fashionable	3
492 | 179	44	Glamourous	3
493 | 179	44	Cool	3
494 | 180	24.52	Attractive	3
495 | 180	24.52	Fashionable	3
496 | 180	24.52	Glamourous	3
497 | 180	24.52	Cool	3
498 | 183	36	Attractive	3
499 | 183	36	Fashionable	3
500 | 183	36	Glamourous	3
501 | 183	36	Cool	3
502 | 184	36	Attractive	1
503 | 184	36	Fashionable	1
504 | 184	36	Glamourous	1
505 | 184	36	Cool	1
506 | 186	37	Attractive	2
507 | 186	37	Fashionable	2
508 | 186	37	Glamourous	2
509 | 186	37	Cool	2
510 | 187	52	Attractive	5
511 | 187	52	Fashionable	5
512 | 187	52	Glamourous	5
513 | 187	52	Cool	5
514 | 188	33	Attractive	3
515 | 188	33	Fashionable	3
516 | 188	33	Glamourous	3
517 | 188	33	Cool	3
518 | 189	31	Attractive	3
519 | 189	31	Glamourous	3
520 | 189	31	Fashionable	4
521 | 189	31	Cool	4
522 | 190	39	Attractive	3
523 | 190	39	Fashionable	3
524 | 190	39	Glamourous	3
525 | 190	39	Cool	3
526 | 191	27	Attractive	1
527 | 191	27	Glamourous	2
528 | 191	27	Fashionable	3
529 | 191	27	Cool	3
530 | 192	35	Attractive	2
531 | 192	35	Cool	2
532 | 192	35	Fashionable	3
533 | 192	35	Glamourous	3
534 | 193	33	Attractive	3
535 | 193	33	Fashionable	3
536 | 193	33	Glamourous	3
537 | 193	33	Cool	3
538 | 194	29	Attractive	2
539 | 194	29	Fashionable	2
540 | 194	29	Glamourous	2
541 | 194	29	Cool	2
542 | 195	38	Attractive	3
543 | 195	38	Fashionable	3
544 | 195	38	Glamourous	3
545 | 195	38	Cool	3
546 | 196	34	Fashionable	2
547 | 196	34	Glamourous	2
548 | 196	34	Attractive	3
549 | 196	34	Cool	3
550 | 198	41	Attractive	4
551 | 198	41	Fashionable	4
552 | 198	41	Glamourous	4
553 | 198	41	Cool	4
554 | 199	38	Attractive	1
555 | 199	38	Fashionable	1
556 | 199	38	Glamourous	1
557 | 199	38	Cool	1
558 | 200	32	Fashionable	3
559 | 200	32	Glamourous	3
560 | 200	32	Cool	3
561 | 200	32	Attractive	5
562 | 201	21	Attractive	3
563 | 201	21	Fashionable	3
564 | 201	21	Glamourous	3
565 | 201	21	Cool	3
566 | 202	37	Attractive	1
567 | 202	37	Fashionable	1
568 | 202	37	Glamourous	1
569 | 202	37	Cool	1
570 | 203	36	Attractive	3
571 | 203	36	Fashionable	3
572 | 203	36	Glamourous	3
573 | 203	36	Cool	3
574 | 205	45	Attractive	3
575 | 205	45	Fashionable	3
576 | 205	45	Glamourous	3
577 | 205	45	Cool	3
578 | 207	19	Attractive	3
579 | 207	19	Fashionable	3
580 | 207	19	Glamourous	3
581 | 207	19	Cool	3
582 | 208	31	Attractive	4
583 | 208	31	Fashionable	4
584 | 208	31	Glamourous	4
585 | 208	31	Cool	4
586 | 209	35	Attractive	2
587 | 209	35	Fashionable	2
588 | 209	35	Glamourous	2
589 | 209	35	Cool	5
590 | 210	23	Attractive	2
591 | 210	23	Fashionable	2
592 | 210	23	Glamourous	2
593 | 210	23	Cool	3
594 | 211	31.06	Fashionable	2
595 | 211	31.06	Glamourous	2
596 | 211	31.06	Cool	2
597 | 211	31.06	Attractive	3
598 | 214	38	Glamourous	4
599 | 214	38	Attractive	5
600 | 214	38	Fashionable	5
601 | 214	38	Cool	5
602 | 215	30	Attractive	2
603 | 215	30	Fashionable	2
604 | 215	30	Glamourous	2
605 | 215	30	Cool	5
606 | 217	39	Attractive	3
607 | 217	39	Fashionable	3
608 | 217	39	Glamourous	3
609 | 217	39	Cool	3
610 | 218	34	Attractive	1
611 | 218	34	Fashionable	1
612 | 218	34	Glamourous	1
613 | 218	34	Cool	1
614 | 220	38	Attractive	3
615 | 220	38	Fashionable	3
616 | 220	38	Glamourous	3
617 | 220	38	Cool	3
618 | 221	40	Attractive	3
619 | 221	40	Glamourous	3
620 | 221	40	Cool	3
621 | 221	40	Fashionable	4
622 | 222	27	Attractive	2
623 | 222	27	Fashionable	2
624 | 222	27	Glamourous	2
625 | 222	27	Cool	3
626 | 224	33	Fashionable	2
627 | 224	33	Cool	2
628 | 224	33	Attractive	3
629 | 224	33	Glamourous	3
630 | 226	39.47	Attractive	3
631 | 226	39.47	Fashionable	3
632 | 226	39.47	Glamourous	3
633 | 226	39.47	Cool	3
634 | 227	31	Attractive	2
635 | 227	31	Fashionable	2
636 | 227	31	Glamourous	2
637 | 227	31	Cool	2
638 | 228	24	Attractive	3
639 | 228	24	Fashionable	3
640 | 228	24	Glamourous	3
641 | 228	24	Cool	3
642 | 229	49	Fashionable	3
643 | 229	49	Glamourous	3
644 | 229	49	Cool	4
645 | 229	49	Attractive	5
646 | 230	30	Cool	2
647 | 230	30	Glamourous	3
648 | 230	30	Attractive	5
649 | 230	30	Fashionable	5
650 | 231	34	Attractive	4
651 | 231	34	Fashionable	4
652 | 231	34	Glamourous	4
653 | 231	34	Cool	4
654 | 232	32	Attractive	1
655 | 232	32	Fashionable	1
656 | 232	32	Glamourous	1
657 | 232	32	Cool	1
658 | 234	44	Attractive	3
659 | 234	44	Fashionable	3
660 | 234	44	Glamourous	3
661 | 234	44	Cool	3
662 | 235	35	Attractive	2
663 | 235	35	Fashionable	2
664 | 235	35	Glamourous	2
665 | 235	35	Cool	4
666 | 236	24	Attractive	3
667 | 236	24	Fashionable	3
668 | 236	24	Glamourous	3
669 | 236	24	Cool	3
670 | 237	32	Fashionable	2
671 | 237	32	Glamourous	2
672 | 237	32	Cool	2
673 | 237	32	Attractive	3
674 | 238	45	Attractive	5
675 | 238	45	Fashionable	5
676 | 238	45	Glamourous	5
677 | 238	45	Cool	5
678 | 240	14	Attractive	2
679 | 240	14	Fashionable	2
680 | 240	14	Glamourous	2
681 | 240	14	Cool	2
682 | 242	36	Fashionable	2
683 | 242	36	Glamourous	2
684 | 242	36	Attractive	3
685 | 242	36	Cool	4
686 | 243	41	Attractive	3
687 | 243	41	Fashionable	3
688 | 243	41	Glamourous	3
689 | 243	41	Cool	3
690 | 245	31	Attractive	3
691 | 245	31	Glamourous	3
692 | 245	31	Fashionable	4
693 | 245	31	Cool	4
694 | 247	26	Attractive	2
695 | 247	26	Fashionable	2
696 | 247	26	Glamourous	2
697 | 247	26	Cool	2
698 | 248	47	Attractive	3
699 | 248	47	Fashionable	3
700 | 248	47	Glamourous	3
701 | 248	47	Cool	3
702 | 250	29	Attractive	3
703 | 250	29	Fashionable	3
704 | 250	29	Glamourous	3
705 | 250	29	Cool	3
706 | 251	22	Attractive	3
707 | 251	22	Fashionable	3
708 | 251	22	Glamourous	3
709 | 251	22	Cool	3
710 | 255	25	Attractive	2
711 | 255	25	Fashionable	2
712 | 255	25	Glamourous	2
713 | 255	25	Cool	2
714 | 256	23	Attractive	2
715 | 256	23	Fashionable	2
716 | 256	23	Glamourous	2
717 | 256	23	Cool	2
718 | 257	22	Glamourous	2
719 | 257	22	Cool	2
720 | 257	22	Attractive	3
721 | 257	22	Fashionable	3
722 | 258	51	Glamourous	3
723 | 258	51	Attractive	4
724 | 258	51	Fashionable	5
725 | 258	51	Cool	5
726 | 260	30	Cool	3
727 | 260	30	Fashionable	4
728 | 260	30	Glamourous	4
729 | 260	30	Attractive	5
730 | 264	26	Attractive	2
731 | 264	26	Fashionable	2
732 | 264	26	Glamourous	3
733 | 264	26	Cool	3
734 | 265	34	Attractive	2
735 | 265	34	Fashionable	2
736 | 265	34	Glamourous	2
737 | 265	34	Cool	2
738 | 266	41.39	Attractive	4
739 | 266	41.39	Fashionable	4
740 | 266	41.39	Glamourous	4
741 | 266	41.39	Cool	5
742 | 267	42	Attractive	3
743 | 267	42	Fashionable	3
744 | 267	42	Glamourous	3
745 | 267	42	Cool	4
746 | 268	33	Fashionable	2
747 | 268	33	Glamourous	3
748 | 268	33	Cool	3
749 | 268	33	Attractive	4
750 | 269	36	Attractive	3
751 | 269	36	Fashionable	3
752 | 269	36	Glamourous	3
753 | 269	36	Cool	3
754 | 270	37	Attractive	5
755 | 270	37	Fashionable	5
756 | 270	37	Glamourous	5
757 | 270	37	Cool	5
758 | 271	30	Attractive	3
759 | 271	30	Glamourous	3
760 | 271	30	Fashionable	4
761 | 271	30	Cool	4
762 | 272	36	Attractive	3
763 | 272	36	Fashionable	3
764 | 272	36	Glamourous	3
765 | 272	36	Cool	3
766 | 273	36	Attractive	3
767 | 273	36	Fashionable	3
768 | 273	36	Glamourous	3
769 | 273	36	Cool	3
770 | 274	38	Attractive	3
771 | 274	38	Fashionable	4
772 | 274	38	Glamourous	4
773 | 274	38	Cool	5
774 | 275	42	Attractive	4
775 | 275	42	Fashionable	4
776 | 275	42	Glamourous	4
777 | 275	42	Cool	4
778 | 


--------------------------------------------------------------------------------
/plots/Fertility.csv:
--------------------------------------------------------------------------------
1 | Country,fertility_rate,contraception,regionAfghanistan,6.9,NA,AsiaAlbania,2.6,NA,EuropeAlgeria,3.81,52,AfricaAmerican-Samoa,NA,NA,OceaniaAndorra,NA,NA,EuropeAngola,6.69,NA,AfricaAntigua,NA,53,AmericasArgentina,2.62,NA,AmericasArmenia,1.7,22,EuropeAustralia,1.89,76,OceaniaAustria,1.42,71,EuropeAzerbaijan,2.3,17,AsiaBahamas,1.95,62,AmericasBahrain,2.97,53,AsiaBangladesh,3.14,49,AsiaBarbados,1.73,55,AmericasBelarus,1.4,50,EuropeBelgium,1.62,79,EuropeBelize,3.66,47,AmericasBenin,5.83,16,AfricaBhutan,5.89,19,AsiaBolivia,4.36,45,AmericasBosnia,1.4,NA,EuropeBotswana,4.45,33,AfricaBrazil,2.17,74,AmericasBrunei,2.7,NA,AsiaBulgaria,1.45,NA,EuropeBurkina-Faso,6.57,8,AfricaBurundi,6.28,9,AfricaCambodia,4.5,NA,AsiaCameroon,5.3,16,AfricaCanada,1.61,66,AmericasCape-Verde,3.56,NA,AfricaCentral-African-Rep,4.95,24,AfricaChad,5.51,NA,AfricaChile,2.44,30,AmericasChina,1.8,83,AsiaColombia,2.69,72,AmericasComoros,5.51,21,AfricaCongo,5.87,NA,AfricaCook-Islands,3.5,38,OceaniaCosta-Rica,2.95,75,AmericasCroatia,1.6,NA,EuropeCuba,1.55,70,AmericasCyprus,2.31,NA,EuropeCzech-Republic,1.4,69,EuropeDem-Rep-of-the-Congo,6.24,8,AfricaDenmark,1.82,78,EuropeDjibouti,5.39,NA,AfricaDominica,NA,50,AmericasDominican-Republic,2.8,64,AmericasEast-Timor,4.32,NA,OceaniaEcuador,3.1,57,AmericasEgypt,3.4,47,AfricaEl-Salvador,3.09,53,AmericasEquatorial-Guinea,5.51,NA,AfricaEritrea,5.34,5,AfricaEstonia,1.3,70,EuropeEthiopia,7,4,AfricaFiji,2.76,40,OceaniaFinland,1.83,NA,EuropeFrance,1.63,77,EuropeFrench-Guiana,NA,NA,AmericasFrench-Polynesia,2.85,NA,OceaniaGabon,5.4,NA,AfricaGambia,5.2,12,AfricaGaza-Strip,8,NA,AsiaGeorgia,1.9,17,EuropeGermany,1.3,75,EuropeGhana,5.28,20,AfricaGreece,1.38,NA,EuropeGrenada,NA,54,AmericasGuadeloupe,2.1,NA,AmericasGuam,3.04,NA,OceaniaGuatemala,4.9,31,AmericasGuinea,6.61,2,AfricaGuinea-Bissau,5.42,NA,AfricaGuyana,2.32,NA,AmericasHaiti,4.6,18,AmericasHonduras,4.3,47,AmericasHong-Kong,1.32,86,AsiaHungary,1.4,73,EuropeIceland,2.19,NA,EuropeIndia,3.07,41,AsiaIndonesia,2.63,55,OceaniaIran,4.77,65,AsiaIraq,5.25,14,AsiaIreland,1.8,NA,EuropeIsrael,2.75,NA,AsiaItaly,1.19,NA,EuropeIvory-Coast,5.1,11,AfricaJamaica,2.44,62,AmericasJapan,1.48,59,OceaniaJordan,5.13,35,AsiaKazakhstan,2.3,59,AsiaKenya,4.85,33,AfricaKiribati,3.8,37,OceaniaKorea-Dem-Peoples-Rep,2.1,62,AsiaKorea-Republic-of,1.65,79,AsiaKuwait,2.77,35,AsiaKyrgyzstan,3.21,31,AsiaLaos,6.69,19,AsiaLatvia,1.4,47,EuropeLebanon,2.75,NA,AsiaLesotho,4.86,23,AfricaLiberia,6.33,6,AfricaLibya,5.92,40,AfricaLiechtenstein,1.45,NA,EuropeLithuania,1.5,59,EuropeLuxembourg,1.76,NA,EuropeMacau,1.6,NA,AsiaMacedonia,1.9,NA,EuropeMadagascar,5.65,17,AfricaMalawi,6.69,22,AfricaMalaysia,3.24,48,OceaniaMaldives,6.8,NA,AsiaMali,6.6,7,AfricaMalta,2.1,NA,EuropeMarshall-Islands,4.49,27,OceaniaMartinique,2,NA,AmericasMauritania,5.03,3,AfricaMauritius,2.28,75,AfricaMexico,2.75,53,AmericasMicronesia,5.6,NA,OceaniaMoldova,1.8,22,EuropeMonaco,NA,NA,EuropeMongolia,3.27,61,AsiaMorocco,3.1,50,AfricaMozambique,6.06,NA,AfricaMyanmar,3.3,17,AsiaNamibia,4.9,29,AfricaNepal,4.95,29,AsiaNetherlands,1.55,78,EuropeNetherlands-Antilles,2.1,NA,AmericasNew-Caledonia,2.53,25,OceaniaNew-Zealand,2.02,69,OceaniaNicaragua,3.85,49,AmericasNiger,7.1,4,AfricaNigeria,5.97,6,AfricaNorthern-Mariana-Islands,5.11,NA,OceaniaNorway,1.88,76,EuropeOman,7.2,9,AsiaPakistan,5.02,18,AsiaPalau,3,NA,OceaniaPanama,2.63,64,AmericasPapua-New-Guinea,4.65,NA,OceaniaParaguay,4.17,56,AmericasPeru,2.98,64,AmericasPhilippines,3.62,40,OceaniaPoland,1.65,NA,EuropePortugal,1.48,66,EuropePuerto-Rico,2.1,64,AmericasQatar,3.77,32,AsiaReunion,2.1,67,AfricaRomania,1.4,57,EuropeRussia,1.35,32,EuropeRwanda,6,21,AfricaSaint-Kitts,2.63,41,AmericasSaint-Lucia,3.82,47,AmericasSamoa,3.8,34,OceaniaSan-Marino,NA,NA,EuropeSao-Tome,NA,NA,AfricaSaudi-Arabia,5.9,NA,AsiaSenegal,5.62,13,AfricaSeychelles,2.59,NA,AfricaSierra-Leone,6.06,NA,AfricaSingapore,1.79,74,OceaniaSlovakia,1.5,74,EuropeSlovenia,1.3,NA,EuropeSolomon-Islands,4.98,NA,OceaniaSomalia,7,NA,AfricaSouth-Africa,3.81,50,AfricaSpain,1.22,59,EuropeSri-Lanka,2.1,66,AsiaSt-Vincent,3.86,58,AmericasSudan,4.61,8,AfricaSuriname,2.39,NA,AmericasSwaziland,4.46,20,AfricaSweden,1.8,78,EuropeSwitzerland,1.46,71,EuropeSyria,4,36,AsiaTajikistan,3.93,21,AsiaTanzania,5.48,18,AfricaThailand,1.74,74,AsiaTogo,6.08,12,AfricaTonga,4.02,74,OceaniaTrinidad-and-Tobago,2.1,53,AmericasTunisia,2.92,60,AfricaTurkey,2.5,63,AsiaTurkmenistan,3.58,20,AsiaTuvalu,NA,NA,OceaniaUganda,7.1,15,AfricaUkraine,1.38,23,EuropeUnited-Arab-Emirates,3.46,NA,AsiaUnited-Kingdom,1.72,82,EuropeUnited-States,1.96,71,AmericasUruguay,2.25,NA,AmericasUzbekistan,3.48,56,AsiaVanuatu,4.36,15,OceaniaVenezuela,2.98,52,AmericasViet-Nam,2.97,65,AsiaVirgin-Islands,3.03,NA,AmericasWestern-Sahara,3.98,NA,AfricaYemen,7.6,7,AsiaYugoslavia,1.8,NA,EuropeZambia,5.49,25,AfricaZimbabwe,4.68,48,Africa


--------------------------------------------------------------------------------
/plots/InternetUsers.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rocket-ron/R/8fd6b08be1f5b74823ebefff77d7955597bd7681/plots/InternetUsers.RData


--------------------------------------------------------------------------------
/plots/ggplot_scripts.R:
--------------------------------------------------------------------------------
  1 | # Set the working directory
  2 | setwd("/Users/rcordell/Documents/R/W203")
  3 | 
  4 | # Load the ggplot library
  5 | library(ggplot2)
  6 | 
  7 | # Load the Facebook data into a dataframe
  8 | facebookData = read.delim("FacebookNarcissism.dat", header = TRUE)
  9 | 
 10 | # experiment with different graph elements
 11 | graph <- ggplot(facebookData, aes(NPQC_R_Total, Rating))
 12 | # simple scatterplot
 13 | graph + geom_point()
 14 | # scsatterplot with inverted triangle shapes instead of circles
 15 | graph + geom_point(shape = 17)
 16 | # scatterplot with squares with x's in them
 17 | graph + geom_point(shape = 7)
 18 | # scatterplot with big squares with x's in them
 19 | graph + geom_point(shape = 7, size = 6)
 20 | # scatterplot with large right-side up triangles
 21 | graph + geom_point(shape = 2, size = 6)
 22 | # scatterplot with large + signs
 23 | graph + geom_point(shape = 3, size = 6)
 24 | # scatterplot with large X's
 25 | graph + geom_point(shape = 4, size = 6)
 26 | # scatterplot with large diamonds
 27 | graph + geom_point(shape = 5, size = 6)
 28 | # scatterplot with default shape and size, but vary color by variable "rating_type"
 29 | # Rating_Type is a 4-level factor. A legend for the colors is shown on the right
 30 | graph + geom_point(aes(color = Rating_Type))
 31 | # scatterplot with red dots
 32 | graph + geom_point(color = "RED")
 33 | # scatterplot that uses jitter to spread the points out so they're not on top of one another
 34 | # and vary color by Rating_Type
 35 | graph + geom_point(aes(color = Rating_Type), position = "jitter")
 36 | # vary shape by "Rating_Type". A legend for shapes is shown on the right
 37 | graph + geom_point(aes(shape = Rating_Type), position = "jitter")
 38 | #
 39 | #
 40 | # Let's play with a different data set, Exam Anxiety
 41 | examData <- read.delim("Exam Anxiety.dat", header = TRUE)
 42 | summary(examData)
 43 | # scatter plot of Exam data
 44 | scatter <- ggplot(examData, aes(Anxiety, Exam))
 45 | scatter + geom_point()
 46 | # custom labels for the scatter plot
 47 | scatter + geom_point() + labs(x = "Exam Anxiety", y = "Exam Performance %")
 48 | # add a curve that follows the mean of the data
 49 | scatter + geom_point() + geom_smooth() + labs(x = "Exam Anxiety", y = "Exam Performance %")
 50 | # add a line that follows a linear model of the data
 51 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 52 | # change the color of the line to RED
 53 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 54 | # remove the envelope from the line
 55 | scatter + geom_point() + geom_smooth(method = "lm", colour = "RED", se = F) + labs(x = "Exam Anxiety", y = "Exam Performance %")
 56 | # change the color of the envelope and line to blue and make it transparent
 57 | scatter + geom_point() + geom_smooth(method = "lm", alpha = 0.1, fill = "Blue") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 58 | # color the scatterplot points by gender - but this isn't working as expected
 59 | scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 60 | scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 61 | # scatter + geom_point() + geom_smooth(method = "lm") + labs(x = "Exam Anxiety", y = "Exam Performance %")
 62 | # scatter <- ggplot(examData, aes(Anxiety, Exam), color = Gender)
 63 | 
 64 | scatter + geom_point()
 65 | scatter + geom_point() + geom_smooth(method = "lm")
 66 | scatter + geom_point() + geom_smooth(method = "lm", aes(fill = Gender), alpha = 0.1)
 67 | # create a linear line for each Gender and color the envelope accordingly
 68 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1)
 69 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
 70 | scatter <- ggplot(examData, aes(Anxiety, Exam, color = Gender))
 71 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
 72 | scatter + geom_point() + geom_smooth(method = "lm", aes(color = Gender, fill = Gender), alpha = 0.1) + labs(x = "Exam Anxiety", y = "Exam Performance %")
 73 | 
 74 | # Now let's graph some of the Festival Hygiene data
 75 | festivalData <- read.delim("DownloadFestival.dat", header = TRUE)
 76 | 
 77 | graph + geom_point()
 78 | graph + geom_point() + geom_smooth(method = "lm")
 79 | graph + geom_point() + geom_smooth(method = "lm", se = F)
 80 | graph + geom_point() + geom_smooth(method = "lm", aes(color = Rating_Type))
 81 | graph + geom_point() + geom_smooth(method = "lm", aes(color = Rating_Type), se = F)
 82 | graph + geom_point(aes(color = Rating_Type)) + geom_smooth(method = "lm", aes(color = Rating_Type), se = F)
 83 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F)
 84 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating")
 85 | graph + geom_point(aes(color = Rating_Type)) + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating")
 86 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating")
 87 | graph + geom_smooth(aes(color = Rating_Type)) + labs(x = "Total Score", y = "Narcissism Rating")
 88 | 
 89 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Total Score", y = "Narcissism Rating")
 90 | graph + geom_point(aes(color = Rating_Type), position = "jitter") + geom_smooth(method = "lm", aes(color = Rating_Type), se = F) + labs(x = "Narcissism (NPQC)", y = "Facebook Picture Rating")
 91 | 
 92 | # Create a histogram of festival data
 93 | festivalHistogram <- ggplot(festivalData, aes(day1))
 94 | festivalHistogram + geom_histogram()
 95 | 
 96 | # Adjust bin width
 97 | festivalHistogram + geom_histogram(binwidth = 0.4)
 98 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency")
 99 | 
100 | festivalHistogram <- ggplot(festivalData, aes(day1), theme(legend.position = "none"))
101 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency")
102 | festivalHistogram <- ggplot(festivalData, aes(day1)) + theme(legend.position = "none")
103 | festivalHistogram + geom_histogram(binwidth = 0.4) + labs(x = "Hygeine (Day 1)", y = "Frequency")
104 | festivalHistogram <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none")
105 | festivalBoxplot <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none")
106 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")
107 | 
108 | # Order the data to look for outliers
109 | festivalData <- festivalData[order(festivalData$day1),]
110 | festivalData
111 | 
112 | # User RCommander to edit the data to correct the error
113 | library(Rcmdr)
114 | 
115 | festivalBoxplot <- ggplot(festivalData, aes(gender, day1)) + theme(legend.position = "none")
116 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")
117 | festivalHistogram <- ggplot(festivalData, aes(day1)) + theme(legend.position = "none")
118 | festivalHistogram + geom_histogram()
119 | 
120 | festivalHistogram + geom_histogram(binwidth = 0.4)
121 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 1 of Festival)")
122 | festivalBoxplot <- ggplot(festivalData, aes(gender, day2)) + theme(legend.position = "none")
123 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 2 of Festival)")
124 | 
125 | festivalBoxplot <- ggplot(festivalData, aes(gender, day3)) + theme(legend.position = "none")
126 | festivalBoxplot + geom_boxplot() + labs(x = "Gender", y = "Hygiene (Day 3 of Festival)")
127 | 
128 | density <- ggplot(festivalData, aes(day1))
129 | density + geom_density()
130 | 
131 | # Create the function outlierSummary as given by the book
132 | function(variable, digits = 2){
133 | 	
134 | 	zvariable<-(variable-mean(variable, na.rm = TRUE))/sd(variable, na.rm = TRUE)
135 | 		
136 | 	outlier95<-abs(zvariable) >= 1.96
137 | 	outlier99<-abs(zvariable) >= 2.58
138 | 	outlier999<-abs(zvariable) >= 3.29
139 | 	
140 | 	ncases<-length(na.omit(zvariable))
141 | 	
142 | 	percent95<-round(100*length(subset(outlier95, outlier95 == TRUE))/ncases, digits)
143 | 	percent99<-round(100*length(subset(outlier99, outlier99 == TRUE))/ncases, digits)
144 | 	percent999<-round(100*length(subset(outlier999, outlier999 == TRUE))/ncases, digits)
145 | 	
146 | 	cat("Absolute z-score greater than 1.96 = ", percent95, "%", "\n")
147 | 	cat("Absolute z-score greater than 2.58 = ",  percent99, "%", "\n")
148 | 	cat("Absolute z-score greater than 3.29 = ",  percent999, "%", "\n")
149 | }
150 | 
151 | outlierSummary(festivalData$day2)
152 | 
153 | # Now let's use the ChickFlick data
154 | chickFlick <- read.delim("ChickFlick.dat", header = TRUE)
155 | 
156 | bar <- ggplot(chickFlick, aes(film, arousal))
157 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black")
158 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_normal, geom = "pointrange")
159 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_boot, geom = "pointrange")
160 | bar + stat_summary(fun.y = mean, geom = "bar", fill = "White", color = "Black") + stat_summary(fun.data = mean_cl_boot, geom = "pointrange")
161 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender))
162 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2)
163 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2) + labs(x = "Film", y = "Mean Arousal", fill = "Gender")
164 | bar <- ggplot(chickFlick, aes(film, arousal, fill = film))
165 | bar + stat_summary(fun.y = mean, geom = "bar")
166 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2)
167 | 
168 | # Let's see how facets work
169 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender)
170 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none")
171 | 
172 | # Change the color of the bars
173 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none") + scale_fill_manual(values = c("Female" = "Blue", "Male" = "Green"))
174 | 
175 | # Remove the legend
176 | bar + stat_summary(fun.y = mean, geom = "bar") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + facet_wrap( ~ gender) + labs(x = "Film", y = "Mean Arousal") + theme(legend.position = "none")
177 | 
178 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender))
179 | bar <- ggplot(chickFlick, aes(film, arousal, fill = gender), scale_fill_manual("Gender", values = c("Female" = "Blue", "Male" = "Green")))
180 | bar + stat_summary(fun.y = mean, geom = "bar")
181 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge")
182 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + scale_fill_manual("Gender", values = c("Female" = "Blue", "Male" = "Green"))
183 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633"))
184 | 
185 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2) + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633"))
186 | bar + stat_summary(fun.y = mean, geom = "bar", position = "dodge") + stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90)) + scale_fill_manual("Gender", values = c("Female" = "#3366FF", "Male" = "#336633"))
187 | 
188 | # From Section 4 class - working with z-values and normal distrubutions
189 | # Population mean = 10,000, Standard error = 1000/sqrt(10) where 10 = sample size and 1000 is population standard deviation, and 10,300 is the sample mean we're testing
190 | # This will give P(sample mean < 10,300)
191 | pnorm(10300,10000,316.22)
192 | [1] 0.8286151
193 | 
194 | # This will give P(sample mean > 10,300)
195 | 1 - pnorm(10300,10000,316.22)
196 | 
197 | # Checking the Standard Error value
198 | 1000/sqrt(10)
199 | 
200 | # Now check to see what is probability of sample mean between 10,500 and 9,500
201 | # First find P(sample mean < 10500)
202 | pnorm(10500, 10000, 316.22)
203 | 
204 | # Hmmmm - it's going to be the same for 9,500 - so if we take the probability of the two tails, so to speak
205 | 2 * (1 - pnorm(10500, 10000, 316.22))
206 | 
207 | # I wonder if it is mor accurate to put in the SE computation
208 | 2 * (1 - pnorm(10500, 10000, 1000/sqrt(10)))
209 | 
210 | # Actually we want 1 - P
211 | 1 - 2 * (1 - pnorm(10500, 10000, 1000/sqrt(10)))
212 | # Here's a better way to do it using the z-values (need to look at the pnorm function in more detail)
213 | pnorm(-1.58) - pnorm(1.58)
214 | 
215 | # And another way
216 | 1 - 2 * pnorm(1.58, lower.tail = FALSE)
217 | 


--------------------------------------------------------------------------------