├── Edition1
    ├── CorrectedOdds_Ed1.pdf
    ├── ErrataEdition1.pdf
    └── README.md
├── Edition2
    ├── Chapters
    │   ├── Table3.1.pdf
    │   └── Table5.1.pdf
    ├── Data
    │   ├── Alelager.csv
    │   ├── Bangladesh.csv
    │   ├── Beerwings.csv
    │   ├── BookPrices.csv
    │   ├── Bushmeat.csv
    │   ├── Cereals.csv
    │   ├── Challenger.csv
    │   ├── ChiMarathonMen.csv
    │   ├── Cuckoos.csv
    │   ├── Diving2017.csv
    │   ├── Fatalities.csv
    │   ├── FishMercury.csv
    │   ├── FlightDelays.csv
    │   ├── GSS2002.csv
    │   ├── Girls2004.csv
    │   ├── Groceries.csv
    │   ├── ILBoys.csv
    │   ├── IceCream.csv
    │   ├── Illiteracy.csv
    │   ├── Lottery.csv
    │   ├── MathAnxiety.csv
    │   ├── MathStatsData_Ed2.zip
    │   ├── Maunaloa.csv
    │   ├── MnGroundwater.csv
    │   ├── MobileAds.csv
    │   ├── NBA1617.csv
    │   ├── NCBirths2004.csv
    │   ├── Nasdaq.csv
    │   ├── Olympics2012.csv
    │   ├── Phillies2009.csv
    │   ├── Quakes.csv
    │   ├── Quetzal.csv
    │   ├── RangersTwins2016.csv
    │   ├── Recidivism.csv
    │   ├── Salaries.csv
    │   ├── Service.csv
    │   ├── Skateboard.csv
    │   ├── Skating2010.csv
    │   ├── Spruce.csv
    │   ├── Starcraft.csv
    │   ├── TV.csv
    │   ├── TXBirths2004.csv
    │   ├── Titanic.csv
    │   ├── Turbine.csv
    │   ├── Verizon.csv
    │   ├── Volleyball2009.csv
    │   ├── Walleye.csv
    │   ├── Watertable.csv
    │   └── wafers.csv
    ├── Errata_Edition2.pdf
    ├── R
    │   ├── Chap02EDA.R
    │   ├── Chap02EDA.Rmd
    │   ├── Chap02EDA_d.Rmd
    │   ├── Chap03Testing.R
    │   ├── Chap03Testing.Rmd
    │   ├── Chap03Testing_Exer.R
    │   ├── Chap03Testing_Exer.Rmd
    │   ├── Chap03Testing_Exer_d.Rmd
    │   ├── Chap03Testing_d.Rmd
    │   ├── Chap04SamplingDist.R
    │   ├── Chap04SamplingDist.Rmd
    │   ├── Chap04SamplingDist_Exer.R
    │   ├── Chap04SamplingDist_Exer.Rmd
    │   ├── Chap04SamplingDist_Exer_d.Rmd
    │   ├── Chap04SamplingDist_d.Rmd
    │   ├── Chap05Bootstrap.R
    │   ├── Chap05Bootstrap.Rmd
    │   ├── Chap05Bootstrap_Exer.R
    │   ├── Chap05Bootstrap_Exer.Rmd
    │   ├── Chap05Bootstrap_Exer_d.Rmd
    │   ├── Chap05Bootstrap_d.Rmd
    │   ├── Chap06Estimation.R
    │   ├── Chap06Estimation.Rmd
    │   ├── Chap06Estimation_d.Rmd
    │   ├── Chap07MoreConfIntervals.R
    │   ├── Chap07MoreConfIntervals.Rmd
    │   ├── Chap07MoreConfIntervals_Exer.R
    │   ├── Chap07MoreConfIntervals_Exer.Rmd
    │   ├── Chap07MoreConfIntervals_Exer_d.Rmd
    │   ├── Chap07MoreConfIntervals_d.Rmd
    │   ├── Chap08MoreHypTests.R
    │   ├── Chap08MoreHypTests.Rmd
    │   ├── Chap08MoreHypTests_Exer.R
    │   ├── Chap08MoreHypTests_Exer.Rmd
    │   ├── Chap08MoreHypTests_Exer_d.Rmd
    │   ├── Chap08MoreHypTests_d.Rmd
    │   ├── Chap09Regression.R
    │   ├── Chap09Regression.Rmd
    │   ├── Chap09Regression_d.Rmd
    │   ├── Chap10categorical.R
    │   ├── Chap10categorical.Rmd
    │   ├── Chap10categorical_d.Rmd
    │   ├── Chap11Bayesian.R
    │   ├── Chap11Bayesian.Rmd
    │   ├── Chap11Bayesian_d.Rmd
    │   ├── Chap12ANOVA.R
    │   ├── Chap12ANOVA.Rmd
    │   ├── Chap12ANOVA_Exer.Rmd
    │   ├── Chap12Anova_Exer.R
    │   ├── Chap13AddTopics.R
    │   ├── Chap13AddTopics.Rmd
    │   └── Chap13AddTopics_d.Rmd
    └── README.md
├── Edition3
    ├── Chapters
    │   ├── c01_GSS2018Questions.pdf
    │   └── c06_Supplement.pdf
    ├── Data
    │   ├── Data.zip
    │   └── Readme.md
    ├── Errata_Edition3.pdf
    ├── README.md
    └── RScripts
    │   ├── MobileAds.R
    │   ├── c02_RIntroEDA1.R
    │   ├── c02_RIntroEDA1.Rmd
    │   ├── c02_RIntroEDA1.pdf
    │   ├── c02_RIntroEDA2.Rmd
    │   ├── c03_PermutationTests.R
    │   ├── c03_SolnExercise.R
    │   ├── c04_SamplingDistributions.R
    │   ├── c05_Bootstrap.R
    │   ├── c06_Estimation.R
    │   ├── c06_WindTurbine.R
    │   ├── c07_MoreConfidenceIntervals.R
    │   ├── c08_MoreHypothesisTests.R
    │   ├── c09_Regression.R
    │   ├── c10_CategoricalData.R
    │   ├── c10_PermTestIndependence.R
    │   ├── c11_Bayes.R
    │   ├── c12_ANOVA.R
    │   └── c13_AdditionalTopics.R
├── README.md
└── readme-MathStatsResamplingR.txt


/Edition1/CorrectedOdds_Ed1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/CorrectedOdds_Ed1.pdf


--------------------------------------------------------------------------------
/Edition1/ErrataEdition1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/ErrataEdition1.pdf


--------------------------------------------------------------------------------
/Edition1/README.md:
--------------------------------------------------------------------------------
 1 | # Mathematical Statistics with Resampling and R, 1st edition (2011)
 2 | 
 3 | This is an older edition. For the current edition, see
 4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR)
 5 | 
 6 | 
 7 | ## First Edition
 8 | 
 9 | [Author's website](https://sites.google.com/site/chiharahesterberg/chapter-materials-Ed1)
10 | 
11 | Available on:
12 | 
13 |   * [Google Books](https://books.google.com/books?id=9KRHFDKDV84C)
14 |   * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara/dp/1118029852/ref=sr_1_1?ie=UTF8)
15 | 


--------------------------------------------------------------------------------
/Edition2/Chapters/Table3.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table3.1.pdf


--------------------------------------------------------------------------------
/Edition2/Chapters/Table5.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table5.1.pdf


--------------------------------------------------------------------------------
/Edition2/Data/Alelager.csv:
--------------------------------------------------------------------------------
 1 | ID,Type,Alcohol,Calories
 2 | 1,Ale,5.5,160
 3 | 2,Ale,5.4,156
 4 | 3,Ale,4.85,146
 5 | 4,Ale,4.5,150
 6 | 5,Ale,5.2,160
 7 | 6,Ale,5.3,174
 8 | 7,Ale,5.3,177
 9 | 8,Ale,5.2,177
10 | 9,Ale,5.77,179
11 | 10,Ale,4.94,160
12 | 11,Ale,5.6,187
13 | 12,Ale,5.6,175
14 | 13,Ale,6.77,167
15 | 14,Lager,5,145
16 | 15,Lager,5,150
17 | 16,Lager,5,153
18 | 17,Lager,4.9,153
19 | 18,Lager,4.94,163
20 | 19,Lager,4.9,175
21 | 20,Lager,4.7,146
22 | 21,Lager,4.75,160
23 | 22,Lager,4.55,142
24 | 23,Lager,4.81,169
25 | 24,Lager,4.4,156
26 | 25,Lager,5,146
27 | 26,Lager,5,147
28 | 27,Lager,4.6,138
29 | 28,Lager,4.79,153
30 | 29,Lager,5,150
31 | 30,Lager,5.8,160
32 | 31,Lager,4.9,140
33 | 


--------------------------------------------------------------------------------
/Edition2/Data/Bangladesh.csv:
--------------------------------------------------------------------------------
  1 | "Arsenic","Chlorine","Cobalt"
  2 | 2400,6.2,0.42
  3 | 6,116,0.45
  4 | 904,14.8,0.63
  5 | 321,35.9,0.68
  6 | 1280,18.9,0.58
  7 | 151,7.8,0.35
  8 | 141,56.3,0.46
  9 | 1050,16,0.59
 10 | 511,40.4,0.48
 11 | 688,29.3,0.87
 12 | 81,31.3,0.6
 13 | 8,36.9,0.34
 14 | 37,20.3,0.32
 15 | 6,1.3,0.41
 16 | 22,22.3,0.32
 17 | 43,22.1,0.39
 18 | 39,25.8,0.38
 19 | 92,16.2,0.44
 20 | 253,6.6,0.45
 21 | 200,1.2,0.46
 22 | 255,2,0.33
 23 | 1150,16.4,0.61
 24 | 1180,16.6,0.97
 25 | 9,55.5,0.51
 26 | 107,7.2,0.33
 27 | 6,12.9,0.31
 28 | 149,8.7,0.31
 29 | 6,4.4,0.24
 30 | 46,70.6,0.38
 31 | 13,148,0.39
 32 | 6,44.1,0.1
 33 | 150,43.2,0.88
 34 | 6,37.1,0.08
 35 | 189,5.7,0.33
 36 | 364,360,0.77
 37 | 42,17.6,0.28
 38 | 390,67.7,0.8
 39 | 6,6.7,0.11
 40 | 270,390,1.28
 41 | 248,9.6,0.35
 42 | 139,59.2,0.63
 43 | 6,3.5,1.44
 44 | 82,239,0.34
 45 | 82,63.8,1.18
 46 | 256,5.4,0.52
 47 | 165,5.4,0.37
 48 | 6,47.3,0.13
 49 | 180,1240,1.01
 50 | 86,72,0.3
 51 | 6,295,0.24
 52 | 38,157,0.29
 53 | 262,72.1,0.18
 54 | 404,23.6,1.01
 55 | 8,637,0.32
 56 | 85,133,0.57
 57 | 98,15.6,0.22
 58 | 6,68,0.11
 59 | 22,1090,0.66
 60 | 6,1290,0.63
 61 | 6,24.7,0.08
 62 | 6,74.6,0.1
 63 | 15,115,0.41
 64 | 103,72.1,0.25
 65 | 86,96,0.22
 66 | 6,324,0.23
 67 | 46,155,0.22
 68 | 62,64.3,0.43
 69 | 43,89,0.45
 70 | 6,310,0.16
 71 | 6,310,0.21
 72 | 55,23.9,0.43
 73 | 6,1550,0.66
 74 | 107,61.3,0.26
 75 | 65,69.5,0.48
 76 | 276,82.5,1.11
 77 | 114,11.1,1.5
 78 | 6,3.7,0.07
 79 | 6,2.7,0.08
 80 | 6,238,3.18
 81 | 65,254,0.41
 82 | 142,27.1,2.27
 83 | 194,6.2,0.64
 84 | 6,129,0.22
 85 | 54,14.9,0.4
 86 | 702,414,1.41
 87 | 6,74,0.23
 88 | 986,6.3,0.34
 89 | 153,34.8,0.55
 90 | 84,7.1,0.3
 91 | 16,27.6,0.39
 92 | 1460,9.3,0.71
 93 | 306,33.9,0.67
 94 | 49,16.5,0.38
 95 | 36,13.5,0.59
 96 | 106,7.2,1.75
 97 | 6,3.4,0.45
 98 | 41,127,1.57
 99 | 84,16.2,0.7
100 | 278,3.6,0.42
101 | 41,1.8,0.3
102 | 123,10.4,0.39
103 | 186,56.7,0.95
104 | 80,86,0.6
105 | 162,2,0.21
106 | 398,7.6,0.56
107 | 39,5.5,2.1
108 | 57,3.8,0.63
109 | 6,18.6,0.34
110 | 6,2.6,0.65
111 | 6,51.9,0.69
112 | 18,16,0.37
113 | 129,1.8,0.72
114 | 245,3,0.59
115 | 148,11.5,0.39
116 | 28,38.7,0.6
117 | 20,79.8,0.65
118 | 6,93,1.17
119 | 52,195,0.94
120 | 6,115,0.45
121 | 6,15.8,0.25
122 | 6,4.2,0.3
123 | 15,4,0.26
124 | 73,2.6,0.26
125 | 30,8.2,0.45
126 | 6,2.6,0.33
127 | 128,14.7,0.31
128 | 45,8.7,0.29
129 | 343,7.2,0.24
130 | 109,5.1,0.26
131 | 191,3.8,0.32
132 | 160,36.6,0.76
133 | 51,11.3,0.44
134 | 35,16.4,0.73
135 | 8,17.7,0.52
136 | 11,11,0.84
137 | 15,4.8,0.57
138 | 6,11.3,0.95
139 | 6,13.8,0.39
140 | 6,10.5,0.44
141 | 6,2.2,0.14
142 | 132,7.5,0.42
143 | 8,87,0.68
144 | 10,17.1,0.34
145 | 6,9.6,0.78
146 | 6,2,0.85
147 | 195,4,0.41
148 | 27,67.8,1.06
149 | 6,4.8,0.86
150 | 24,12.7,1.51
151 | 12,4.8,0.5
152 | 161,7.5,0.47
153 | 9,78.2,1.27
154 | 171,4.8,0.38
155 | 35,42.5,0.33
156 | 742,14.9,0.57
157 | 51,18.2,0.44
158 | 6,8.7,0.33
159 | 6,8.8,0.26
160 | 6,328,0.91
161 | 6,3.2,0.2
162 | 6,7,0.4
163 | 6,3.2,0.2
164 | 6,19.1,0.25
165 | 6,13.4,0.9
166 | 6,13.6,0.45
167 | 20,6.8,0.34
168 | 212,28.5,0.41
169 | 6,1.4,0.43
170 | 6,1.2,0.31
171 | 6,6.9,0.26
172 | 6,3.2,0.28
173 | 40,70.4,0.45
174 | 10,3.4,0.42
175 | 6,6.7,0.24
176 | 6,22.7,0.84
177 | 6,67.3,0.88
178 | 6,8,0.25
179 | 6,25.1,0.3
180 | 6,8.6,0.24
181 | 6,2.4,0.13
182 | 8,4.4,0.3
183 | 67,5.4,0.37
184 | 21,11.5,0.51
185 | 17,4.6,0.22
186 | 6,34.5,0.5
187 | 6,13.9,0.09
188 | 6,2.6,0.23
189 | 6,16.9,0.39
190 | 14,4,0.27
191 | 6,16.5,0.31
192 | 6,51.5,1.19
193 | 6,13.2,0.22
194 | 6,4.1,0.27
195 | 137,3.9,0.43
196 | 136,6.2,0.2
197 | 12,7.2,0.19
198 | 27,59.6,0.34
199 | 234,3.4,0.13
200 | 6,3,0.79
201 | 1520,4.3,0.4
202 | 13,105,0.62
203 | 9,12.9,0.47
204 | 6,4.7,0.21
205 | 6,7.2,0.39
206 | 6,13.7,0.29
207 | 6,10.6,0.76
208 | 6,2.8,0.22
209 | 6,30.7,0.31
210 | 6,11.9,0.45
211 | 14,51.1,0.45
212 | 6,,0.31
213 | 8,64.3,0.63
214 | 6,11.6,0.42
215 | 88,5,0.59
216 | 0.5,2.9,0.18
217 | 0.5,5.1,0.14
218 | 0.5,3.1,0.13
219 | 18.6,14.2,0.68
220 | 5.5,1.4,0.52
221 | 61.2,43.4,0.42
222 | 0.5,18.1,1.3
223 | 103,1,0.28
224 | 1.5,4.4,0.14
225 | 10.7,3.1,0.27
226 | 313,1.7,0.64
227 | 0.5,5.1,0.75
228 | 131,27.5,0.49
229 | 157,3.1,0.24
230 | 8.9,2.7,0.24
231 | 3.9,4.8,0.29
232 | 0.5,33.1,1.88
233 | 0.5,3.4,0.86
234 | 0.5,3.8,0.69
235 | 12.1,3.8,0.39
236 | 0.5,8.2,0.37
237 | 109,12.2,0.41
238 | 0.5,2.4,0.98
239 | 0.5,223,0.46
240 | 2.7,4.4,0.51
241 | 0.5,13.3,0.31
242 | 0.5,1.9,0.53
243 | 0.5,20,0.32
244 | 16.8,16.1,0.31
245 | 29,9.6,0.16
246 | 2340,8.9,0.32
247 | 3.8,95.1,0.38
248 | 108,2.4,0.3
249 | 6.8,3.4,0.14
250 | 0.5,5.1,0.15
251 | 61.4,20.2,0.13
252 | 0.5,1.7,0.47
253 | 670,17.5,
254 | 287,140,0.58
255 | 409,426,0.43
256 | 1.2,1.8,0.05
257 | 125,16.6,0.35
258 | 202,7.7,0.74
259 | 30.3,39.8,0.18
260 | 0.5,1050,0.32
261 | 52.3,342,0.6
262 | 109,1360,0.75
263 | 80.7,5,0.76
264 | 75.6,4.9,0.26
265 | 72.9,195,0.68
266 | 64.5,892,0.92
267 | 36.4,65.7,0.29
268 | 34.5,99.5,0.65
269 | 88.6,,0.58
270 | 9.4,18,0.35
271 | 2.1,17.6,0.09
272 | 94.8,736,0.23
273 | 


--------------------------------------------------------------------------------
/Edition2/Data/Beerwings.csv:
--------------------------------------------------------------------------------
 1 | ID,Hotwings,Beer,Gender
 2 | 1,4,24,F
 3 | 2,5,0,F
 4 | 3,5,12,F
 5 | 4,6,12,F
 6 | 5,7,12,F
 7 | 6,7,12,F
 8 | 7,7,24,M
 9 | 8,8,24,F
10 | 9,8,0,M
11 | 10,8,12,M
12 | 11,9,24,F
13 | 12,11,24,F
14 | 13,11,24,M
15 | 14,12,30,F
16 | 15,12,30,F
17 | 16,13,24,F
18 | 17,13,36,F
19 | 18,13,30,M
20 | 19,13,30,M
21 | 20,14,30,F
22 | 21,14,36,F
23 | 22,14,48,M
24 | 23,16,36,M
25 | 24,16,36,M
26 | 25,17,36,M
27 | 26,17,42,M
28 | 27,18,30,M
29 | 28,18,30,M
30 | 29,21,36,M
31 | 30,21,42,M
32 | 


--------------------------------------------------------------------------------
/Edition2/Data/BookPrices.csv:
--------------------------------------------------------------------------------
 1 | "Subject","Area","Price"
 2 | "Biology","Math & Science",190.7
 3 | "Biology","Math & Science",160
 4 | "Biology","Math & Science",117.3
 5 | "Biology","Math & Science",115.15
 6 | "Chemistry","Math & Science",222.67
 7 | "Chemistry","Math & Science",174.95
 8 | "Chemistry","Math & Science",197.15
 9 | "Chemistry","Math & Science",196.4
10 | "Chemistry","Math & Science",197.5
11 | "Computer Science","Math & Science",157.8
12 | "Computer Science","Math & Science",178.7
13 | "Computer Science","Math & Science",77.95
14 | "Computer Science","Math & Science",128
15 | "Computer Science","Math & Science",138
16 | "Economics","Social Sciences",31.95
17 | "Economics","Social Sciences",209
18 | "Economics","Social Sciences",104
19 | "Economics","Social Sciences",168
20 | "Economics","Social Sciences",168
21 | "Economics","Social Sciences",163.35
22 | "Economics","Social Sciences",178.7
23 | "Educational Studies","Social Sciences",15
24 | "Geology","Math & Science",134.4
25 | "Mathematics","Math & Science",137.35
26 | "Mathematics","Math & Science",222
27 | "Mathematics","Math & Science",138.7
28 | "Mathematics","Math & Science",138.7
29 | "Mathematics","Math & Science",106.65
30 | "Mathematics","Math & Science",174
31 | "Mathematics","Math & Science",172.35
32 | "Physics","Math & Science",149.35
33 | "Physics","Math & Science",200
34 | "Physics","Math & Science",192.7
35 | "Physics","Math & Science",85.35
36 | "Physics","Math & Science",128
37 | "Political Science","Social Sciences",11
38 | "Political Science","Social Sciences",26.95
39 | "Political Science","Social Sciences",17
40 | "Psychology","Social Sciences",138.7
41 | "Psychology","Social Sciences",136
42 | "Psychology","Social Sciences",139.95
43 | "Psychology","Social Sciences",15.95
44 | "SOAN","Social Sciences",139.33
45 | "SOAN","Social Sciences",19.95
46 | 


--------------------------------------------------------------------------------
/Edition2/Data/Bushmeat.csv:
--------------------------------------------------------------------------------
 1 | "Fish","Biomass","Year"
 2 | 28.6,942.54,1970
 3 | 34.7,969.77,1971
 4 | 39.3,999.45,1972
 5 | 32.4,987.13,1973
 6 | 31.8,976.31,1974
 7 | 32.8,944.07,1975
 8 | 38.4,979.37,1976
 9 | 33.2,997.86,1977
10 | 29.7,994.85,1978
11 | 25,936.36,1979
12 | 21.8,862.85,1980
13 | 20.8,815.67,1981
14 | 19.7,756.58,1982
15 | 20.8,725.27,1983
16 | 21.1,662.65,1984
17 | 21.3,625.97,1985
18 | 24.3,621.69,1986
19 | 27.4,589.83,1987
20 | 24.5,548.05,1988
21 | 25.2,524.88,1989
22 | 25.9,529.41,1990
23 | 23,497.37,1991
24 | 27.1,476.86,1992
25 | 23.4,453.8,1993
26 | 18.9,402.7,1994
27 | 19.6,365.25,1995
28 | 25.3,326.02,1996
29 | 22,320.12,1997
30 | 21,296.49,1998
31 | 23,228.72,1999
32 | 


--------------------------------------------------------------------------------
/Edition2/Data/Cereals.csv:
--------------------------------------------------------------------------------
 1 | "ID","Age","Shelf","Sodiumgram","Proteingram"
 2 | 1,"adult","bottom",0.007,0.1
 3 | 2,"children","bottom",0.006666667,0.066666667
 4 | 3,"children","bottom",0.004666667,0.033333333
 5 | 4,"children","bottom",0.006969697,0.03030303
 6 | 5,"adult","bottom",0.007,0.1
 7 | 6,"children","bottom",0.006,0.033333333
 8 | 7,"children","bottom",0.006129032,0.032258065
 9 | 8,"children","bottom",0.00483871,0.032258065
10 | 9,"children","bottom",0.001851852,0.074074074
11 | 10,"children","middle",0.005517241,0.034482759
12 | 11,"children","middle",0.006666667,0.066666667
13 | 12,"children","middle",0.0045,0.066666667
14 | 13,"children","middle",0.004375,0.03125
15 | 14,"children","middle",0.007096774,0.064516129
16 | 15,"children","middle",0.007,0.033333333
17 | 16,"children","middle",0.006785714,0.107142857
18 | 17,"adult","middle",0.004545455,0.090909091
19 | 18,"children","middle",0.005,0.09375
20 | 19,"children","middle",0.0046875,0.09375
21 | 20,"children","middle",0.003833333,0.066666667
22 | 21,"children","middle",0.0045,0.066666667
23 | 22,"children","middle",0.006666667,0.066666667
24 | 23,"children","middle",0.006296296,0.037037037
25 | 24,"children","middle",0.007407407,0.037037037
26 | 25,"children","middle",0.004375,0.03125
27 | 26,"children","middle",0.005333333,0.033333333
28 | 27,"children","middle",0.005666667,0.033333333
29 | 28,"children","middle",0.004848485,0.060606061
30 | 29,"adult","top",0.0022,0.2
31 | 30,"children","top",0.007,0.033333333
32 | 31,"adult","top",0.0035,0.266666667
33 | 32,"adult","top",0.001792453,0.169811321
34 | 33,"adult","top",0.0045,0.1
35 | 34,"adult","top",0.0028,0.04
36 | 35,"adult","top",0.000222222,0.177777778
37 | 36,"adult","top",0.001634615,0.25
38 | 37,"adult","top",0.0028,0.14
39 | 38,"adult","top",0.005818182,0.072727273
40 | 39,"adult","top",0.002727273,0.121212121
41 | 40,"adult","top",0.0056,0.06
42 | 41,"adult","top",0,0.074074074
43 | 42,"adult","top",0,0.092592593
44 | 43,"adult","top",0.00245283,0.094339623
45 | 


--------------------------------------------------------------------------------
/Edition2/Data/Challenger.csv:
--------------------------------------------------------------------------------
 1 | "Date","Temperature","Incident"
 2 | "Apr12.81",66,0
 3 | "Nov12.81",70,1
 4 | "Mar22.82",69,0
 5 | "Nov11.82",68,0
 6 | "Apr04.83",67,0
 7 | "Jun18.83",72,0
 8 | "Aug30.83",73,0
 9 | "Nov28.83",70,0
10 | "Feb03.84",57,1
11 | "Apr06.84",63,1
12 | "Aug30.84",70,1
13 | "Oct05.84",78,0
14 | "Nov08.84",67,0
15 | "Jan24.85",53,1
16 | "Apr12.85",67,0
17 | "Apr29.85",75,0
18 | "Jun17.85",70,0
19 | "Jul29.85",81,0
20 | "Aug27.85",76,0
21 | "Oct03.85",79,0
22 | "Oct30.85",75,1
23 | "Nov26.85",76,0
24 | "Jan12.86",58,1
25 | 


--------------------------------------------------------------------------------
/Edition2/Data/ChiMarathonMen.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/ChiMarathonMen.csv


--------------------------------------------------------------------------------
/Edition2/Data/Cuckoos.csv:
--------------------------------------------------------------------------------
  1 | "Eggs","Bird"
  2 | 19.65,"MeadowPipit"
  3 | 20.05,"MeadowPipit"
  4 | 20.65,"MeadowPipit"
  5 | 20.85,"MeadowPipit"
  6 | 21.65,"MeadowPipit"
  7 | 21.65,"MeadowPipit"
  8 | 21.65,"MeadowPipit"
  9 | 21.85,"MeadowPipit"
 10 | 21.85,"MeadowPipit"
 11 | 21.85,"MeadowPipit"
 12 | 22.05,"MeadowPipit"
 13 | 22.05,"MeadowPipit"
 14 | 22.05,"MeadowPipit"
 15 | 22.05,"MeadowPipit"
 16 | 22.05,"MeadowPipit"
 17 | 22.05,"MeadowPipit"
 18 | 22.05,"MeadowPipit"
 19 | 22.05,"MeadowPipit"
 20 | 22.05,"MeadowPipit"
 21 | 22.05,"MeadowPipit"
 22 | 22.25,"MeadowPipit"
 23 | 22.25,"MeadowPipit"
 24 | 22.25,"MeadowPipit"
 25 | 22.25,"MeadowPipit"
 26 | 22.25,"MeadowPipit"
 27 | 22.25,"MeadowPipit"
 28 | 22.25,"MeadowPipit"
 29 | 22.25,"MeadowPipit"
 30 | 22.45,"MeadowPipit"
 31 | 22.45,"MeadowPipit"
 32 | 22.45,"MeadowPipit"
 33 | 22.65,"MeadowPipit"
 34 | 22.65,"MeadowPipit"
 35 | 22.85,"MeadowPipit"
 36 | 22.85,"MeadowPipit"
 37 | 22.85,"MeadowPipit"
 38 | 22.85,"MeadowPipit"
 39 | 23.05,"MeadowPipit"
 40 | 23.25,"MeadowPipit"
 41 | 23.25,"MeadowPipit"
 42 | 23.45,"MeadowPipit"
 43 | 23.65,"MeadowPipit"
 44 | 23.85,"MeadowPipit"
 45 | 24.25,"MeadowPipit"
 46 | 24.45,"TreePipit"
 47 | 21.05,"TreePipit"
 48 | 21.85,"TreePipit"
 49 | 22.05,"TreePipit"
 50 | 22.45,"TreePipit"
 51 | 22.65,"TreePipit"
 52 | 23.25,"TreePipit"
 53 | 23.25,"TreePipit"
 54 | 23.25,"TreePipit"
 55 | 23.45,"TreePipit"
 56 | 23.45,"TreePipit"
 57 | 23.65,"TreePipit"
 58 | 23.85,"TreePipit"
 59 | 24.05,"TreePipit"
 60 | 24.05,"TreePipit"
 61 | 24.05,"TreePipit"
 62 | 20.85,"HedgeSparrow"
 63 | 21.65,"HedgeSparrow"
 64 | 22.05,"HedgeSparrow"
 65 | 22.85,"HedgeSparrow"
 66 | 23.05,"HedgeSparrow"
 67 | 23.05,"HedgeSparrow"
 68 | 23.05,"HedgeSparrow"
 69 | 23.05,"HedgeSparrow"
 70 | 23.45,"HedgeSparrow"
 71 | 23.85,"HedgeSparrow"
 72 | 23.85,"HedgeSparrow"
 73 | 23.85,"HedgeSparrow"
 74 | 24.05,"HedgeSparrow"
 75 | 25.05,"HedgeSparrow"
 76 | 21.05,"Robin"
 77 | 21.85,"Robin"
 78 | 22.05,"Robin"
 79 | 22.05,"Robin"
 80 | 22.05,"Robin"
 81 | 22.25,"Robin"
 82 | 22.45,"Robin"
 83 | 22.45,"Robin"
 84 | 22.65,"Robin"
 85 | 23.05,"Robin"
 86 | 23.05,"Robin"
 87 | 23.05,"Robin"
 88 | 23.05,"Robin"
 89 | 23.05,"Robin"
 90 | 23.25,"Robin"
 91 | 23.85,"Robin"
 92 | 21.05,"PiedWagtail"
 93 | 21.85,"PiedWagtail"
 94 | 21.85,"PiedWagtail"
 95 | 21.85,"PiedWagtail"
 96 | 22.05,"PiedWagtail"
 97 | 22.45,"PiedWagtail"
 98 | 22.65,"PiedWagtail"
 99 | 23.05,"PiedWagtail"
100 | 23.05,"PiedWagtail"
101 | 23.25,"PiedWagtail"
102 | 23.45,"PiedWagtail"
103 | 24.05,"PiedWagtail"
104 | 24.05,"PiedWagtail"
105 | 24.05,"PiedWagtail"
106 | 24.85,"PiedWagtail"
107 | 19.85,"Wren"
108 | 20.05,"Wren"
109 | 20.25,"Wren"
110 | 20.85,"Wren"
111 | 20.85,"Wren"
112 | 20.85,"Wren"
113 | 21.05,"Wren"
114 | 21.05,"Wren"
115 | 21.05,"Wren"
116 | 21.25,"Wren"
117 | 21.45,"Wren"
118 | 22.05,"Wren"
119 | 22.05,"Wren"
120 | 22.05,"Wren"
121 | 22.25,"Wren"
122 | 


--------------------------------------------------------------------------------
/Edition2/Data/Diving2017.csv:
--------------------------------------------------------------------------------
 1 | Name,Country,Semifinal,Final
 2 | CHEONG Jun Hoong,Malaysia,325.50,397.50
 3 |  SI Yajie,China,382.80,396.00
 4 | REN Qian,China,367.50,391.95
 5 | KIM Mi Rae,North Korea,346.00,385.55
 6 | WU Melissa,Australia,318.70,370.20
 7 | KIM Kuk Hyang,North Korea,360.85,360.00
 8 | ITAHASHI Minami,Japan,313.70,357.85
 9 | BENFEITO Meaghan,Canada,355.15,331.40
10 | PAMG Pandelela,Malaysia,322.75,322.40
11 | CHAMANDY Olivia,Canada,320.55,307.15
12 | PARRATTO Jessica,USA,322.75,302.35
13 | MURILLO URREA Carolina,Colombia,325.75,283.35
14 | 


--------------------------------------------------------------------------------
/Edition2/Data/Fatalities.csv:
--------------------------------------------------------------------------------
  1 | "ID","Alcohol","Age"
  2 | 1,0,86
  3 | 2,0,38
  4 | 3,0,40
  5 | 4,1,20
  6 | 5,1,27
  7 | 6,0,19
  8 | 7,1,43
  9 | 8,0,71
 10 | 9,0,63
 11 | 10,0,37
 12 | 11,0,24
 13 | 12,0,60
 14 | 13,0,52
 15 | 14,0,53
 16 | 15,0,71
 17 | 16,1,21
 18 | 17,1,17
 19 | 18,0,58
 20 | 19,1,39
 21 | 20,1,21
 22 | 21,0,23
 23 | 22,0,19
 24 | 23,0,52
 25 | 24,0,46
 26 | 25,0,50
 27 | 26,1,59
 28 | 27,0,43
 29 | 28,1,25
 30 | 29,0,80
 31 | 30,0,32
 32 | 31,1,40
 33 | 32,0,75
 34 | 33,0,20
 35 | 34,0,21
 36 | 35,1,57
 37 | 36,0,44
 38 | 37,0,17
 39 | 38,0,18
 40 | 39,0,20
 41 | 40,0,21
 42 | 41,0,84
 43 | 42,0,19
 44 | 43,0,18
 45 | 44,0,42
 46 | 45,0,73
 47 | 46,0,27
 48 | 47,0,62
 49 | 48,1,47
 50 | 49,0,45
 51 | 50,0,49
 52 | 51,0,54
 53 | 52,0,79
 54 | 53,0,53
 55 | 54,0,82
 56 | 55,0,71
 57 | 56,0,37
 58 | 57,0,45
 59 | 58,0,19
 60 | 59,0,73
 61 | 60,0,78
 62 | 61,0,45
 63 | 62,0,25
 64 | 63,0,33
 65 | 64,0,28
 66 | 65,0,59
 67 | 66,0,48
 68 | 67,0,49
 69 | 68,0,57
 70 | 69,0,18
 71 | 70,0,61
 72 | 71,1,28
 73 | 72,0,21
 74 | 73,0,83
 75 | 74,0,76
 76 | 75,0,57
 77 | 76,0,21
 78 | 77,0,79
 79 | 78,0,29
 80 | 79,0,20
 81 | 80,0,22
 82 | 81,1,27
 83 | 82,0,36
 84 | 83,0,19
 85 | 84,1,60
 86 | 85,1,23
 87 | 86,0,91
 88 | 87,1,28
 89 | 88,1,49
 90 | 89,0,29
 91 | 90,0,56
 92 | 91,0,20
 93 | 92,0,69
 94 | 93,0,17
 95 | 94,1,28
 96 | 95,1,28
 97 | 96,0,65
 98 | 97,1,37
 99 | 98,1,30
100 | 99,0,20
101 | 100,0,21
102 | 


--------------------------------------------------------------------------------
/Edition2/Data/FishMercury.csv:
--------------------------------------------------------------------------------
 1 | "Mercury"
 2 | 1.87
 3 | 0.16
 4 | 0.088
 5 | 0.16
 6 | 0.145
 7 | 0.099
 8 | 0.101
 9 | 0.18
10 | 0.187
11 | 0.097
12 | 0.18
13 | 0.132
14 | 0.065
15 | 0.126
16 | 0.107
17 | 0.152
18 | 0.11
19 | 0.076
20 | 0.168
21 | 0.151
22 | 0.048
23 | 0.15
24 | 0.162
25 | 0.118
26 | 0.163
27 | 0.178
28 | 0.076
29 | 0.078
30 | 0.039
31 | 0.09
32 | 


--------------------------------------------------------------------------------
/Edition2/Data/Girls2004.csv:
--------------------------------------------------------------------------------
 1 | "ID","State","MothersAge","Smoker","Weight","Gestation"
 2 | 1,"WY","15-19","No",3085,40
 3 | 2,"WY","35-39","No",3515,39
 4 | 3,"WY","25-29","No",3775,40
 5 | 4,"WY","20-24","No",3265,39
 6 | 5,"WY","25-29","No",2970,40
 7 | 6,"WY","20-24","No",2850,38
 8 | 7,"WY","20-24","No",2737,38
 9 | 8,"WY","25-29","No",3515,37
10 | 9,"WY","25-29","No",3742,39
11 | 10,"WY","35-39","No",3570,40
12 | 11,"WY","20-24","No",3834,41
13 | 12,"WY","20-24","Yes",3090,39
14 | 13,"WY","25-29","Yes",3350,40
15 | 14,"WY","30-34","No",3292,37
16 | 15,"WY","15-19","No",3317,40
17 | 16,"WY","30-34","No",2485,37
18 | 17,"WY","20-24","No",3215,39
19 | 18,"WY","20-24","No",3230,40
20 | 19,"WY","30-34","No",3345,39
21 | 20,"WY","25-29","No",3050,41
22 | 21,"WY","30-34","No",2212,37
23 | 22,"WY","35-39","No",3605,39
24 | 23,"WY","30-34","No",2722,39
25 | 24,"WY","30-34","No",2880,39
26 | 25,"WY","20-24","No",3610,39
27 | 26,"WY","30-34","No",3355,39
28 | 27,"WY","20-24","No",3995,41
29 | 28,"WY","20-24","Yes",2948,39
30 | 29,"WY","35-39","No",3345,41
31 | 30,"WY","30-34","Yes",2892,39
32 | 31,"WY","20-24","No",2466,37
33 | 32,"WY","20-24","Yes",3290,39
34 | 33,"WY","25-29","No",3310,39
35 | 34,"WY","40-44","No",3175,37
36 | 35,"WY","25-29","No",2715,38
37 | 36,"WY","25-29","No",3540,38
38 | 37,"WY","25-29","No",3402,38
39 | 38,"WY","25-29","Yes",3923,39
40 | 39,"WY","20-24","No",3204,37
41 | 40,"WY","15-19","Yes",2495,37
42 | 41,"AK","20-24","No",4337,41
43 | 42,"AK","20-24","No",2948,40
44 | 43,"AK","30-34","No",3269,39
45 | 44,"AK","20-24","No",3608,38
46 | 45,"AK","30-34","No",4016,39
47 | 46,"AK","25-29","No",2919,40
48 | 47,"AK","20-24","No",2608,37
49 | 48,"AK","40-44","No",4309,39
50 | 49,"AK","20-24","No",3288,39
51 | 50,"AK","25-29","No",3742,38
52 | 51,"AK","15-19","No",4394,41
53 | 52,"AK","20-24","No",2182,37
54 | 53,"AK","25-29","No",4592,40
55 | 54,"AK","20-24","No",3090,39
56 | 55,"AK","30-34","No",3770,40
57 | 56,"AK","20-24","No",3977,39
58 | 57,"AK","25-29","No",3153,40
59 | 58,"AK","25-29","No",3458,41
60 | 59,"AK","15-19","No",3912,38
61 | 60,"AK","20-24","Yes",2863,40
62 | 61,"AK","35-39","No",3190,39
63 | 62,"AK","25-29","Yes",3515,38
64 | 63,"AK","25-29","No",3288,39
65 | 64,"AK","15-19","No",3114,40
66 | 65,"AK","30-34","Yes",3543,41
67 | 66,"AK","20-24","No",3825,39
68 | 67,"AK","25-29","No",3458,39
69 | 68,"AK","30-34","No",3698,41
70 | 69,"AK","20-24","No",3572,39
71 | 70,"AK","30-34","Yes",2352,40
72 | 71,"AK","20-24","No",3175,40
73 | 72,"AK","25-29","No",3742,41
74 | 73,"AK","20-24","No",3997,39
75 | 74,"AK","25-29","No",2576,38
76 | 75,"AK","30-34","No",3572,40
77 | 76,"AK","35-39","No",3968,39
78 | 77,"AK","20-24","No",4564,42
79 | 78,"AK","20-24","No",4210,40
80 | 79,"AK","25-29","No",3260,38
81 | 80,"AK","20-24","No",3600,40
82 | 


--------------------------------------------------------------------------------
/Edition2/Data/Groceries.csv:
--------------------------------------------------------------------------------
 1 | Product,Size,Target,Walmart
 2 | Kellogg NutriGrain Bars,8 bars,2.50,2.78
 3 | Quaker Oats Life Cereal  Original ,18oz,3.19,6.01
 4 | General Mills Lucky Charms,11.50z,3.19,2.98
 5 | Quaker Oats Old Fashioned,18oz,2.82,2.68
 6 | Nabisco Oreo Cookies,14.3oz ,2.99,2.98
 7 | Nabisco Chips Ahoy,13oz,2.64,1.98
 8 | Doritos Nacho Cheese Chips,10oz,3.99,2.5
 9 | Cheez-it Original Baked,21oz,4.79,4.79
10 | Swiss Miss Hot Chocolate,10 count,1.49,1.28
11 | Tazo Chai Classic Latte Black Tea,32 oz ,3.49,2.98
12 | Annie's Macaroni & Cheese,6oz,1.79,1.72
13 | Rice A Roni Chicken,6.9oz,1.00,1.00
14 | Zatarain's Jambalaya Rice Mix,8oz,1.62,1.54
15 | SPAM Original Lunch Meat,12oz,2.79,2.64
16 | Campbell's Chicken Noodle Soup,10.75oz,0.99,1.58
17 | Dinty Moore Hearty Meals Beef Stew,15oz,1.99,1.98
18 | Hormel  Chili with Beans,15oz,1.94,1.88
19 | Dole Pineapple Chunks,20 oz,1.59,1.47
20 | Skippy Creamy Peanut Butter,16.3oz,2.59,2.58
21 | Smucker's Strawberry Preserve,18oz,2.99,2.84
22 | Heinz Tomato Ketchup,32oz,2.99,2.88
23 | Near East Couscous Toasted Pine Nuts mix,5.6oz,2.12,1.98
24 | Barilla Angel Hair Pasta,16oz,1.42,1.38
25 | Betty Crocker Super Moist Chocolate Fudge Cake Mix,15.25oz,1.22,1.17
26 | Kraft Jet-Puffed Marshmllows,16oz,1.99,1.96
27 | Dunkin' Donuts Original Blend Medium Roast Ground Coffee,12oz,7.19,6.98
28 | Dove Promises Milk Chocolate,8.87oz,3.19,3.50
29 | Skittles,41oz,7.99,6.98
30 | Vlasic Kosher Dill Pickle Spears,24oz,2.39,2.18
31 | Vlasic Old Fashioned Sauerkraut,32oz,1.99,1.97
32 | 


--------------------------------------------------------------------------------
/Edition2/Data/ILBoys.csv:
--------------------------------------------------------------------------------
  1 | "MothersAge","Weight"
  2 | "25-29",3005
  3 | "25-29",3686
  4 | "25-29",3714
  5 | "20-24",2807
  6 | "25-29",4054
  7 | "15-19",3884
  8 | "25-29",3005
  9 | "20-24",2920
 10 | "25-29",3236
 11 | "20-24",3345
 12 | "20-24",2948
 13 | "25-29",3345
 14 | "25-29",3997
 15 | "20-24",3025
 16 | "20-24",4026
 17 | "20-24",4423
 18 | "20-24",3487
 19 | "20-24",3232
 20 | "15-19",3005
 21 | "15-19",3300
 22 | "20-24",3575
 23 | "25-29",3742
 24 | "15-19",3232
 25 | "20-24",3572
 26 | "25-29",4167
 27 | "20-24",2870
 28 | "20-24",3374
 29 | "15-19",3515
 30 | "20-24",3232
 31 | "20-24",3600
 32 | "20-24",3025
 33 | "25-29",4139
 34 | "20-24",3232
 35 | "25-29",3345
 36 | "20-24",3837
 37 | "15-19",3430
 38 | "25-29",3544
 39 | "15-19",4030
 40 | "20-24",3600
 41 | "20-24",3770
 42 | "20-24",4394
 43 | "25-29",4253
 44 | "25-29",3374
 45 | "25-29",3487
 46 | "15-19",3629
 47 | "25-29",3289
 48 | "25-29",3535
 49 | "15-19",3680
 50 | "20-24",3090
 51 | "25-29",3997
 52 | "25-29",2892
 53 | "15-19",2552
 54 | "25-29",3487
 55 | "20-24",2410
 56 | "20-24",2920
 57 | "20-24",3260
 58 | "25-29",3260
 59 | "25-29",3657
 60 | "25-29",3515
 61 | "20-24",2438
 62 | "25-29",3856
 63 | "15-19",3317
 64 | "20-24",3165
 65 | "20-24",3572
 66 | "15-19",3884
 67 | "20-24",3544
 68 | "20-24",3608
 69 | "15-19",3487
 70 | "25-29",4564
 71 | "25-29",4054
 72 | "20-24",2336
 73 | "20-24",3119
 74 | "15-19",2778
 75 | "20-24",3741
 76 | "20-24",3119
 77 | "15-19",3317
 78 | "20-24",3260
 79 | "20-24",3742
 80 | "25-29",3629
 81 | "20-24",3033
 82 | "25-29",3390
 83 | "15-19",3374
 84 | "25-29",4335
 85 | "20-24",3090
 86 | "25-29",3520
 87 | "20-24",3657
 88 | "20-24",3920
 89 | "25-29",3430
 90 | "20-24",3742
 91 | "15-19",2878
 92 | "20-24",3912
 93 | "25-29",4082
 94 | "25-29",3119
 95 | "15-19",3204
 96 | "20-24",3430
 97 | "20-24",2975
 98 | "15-19",2990
 99 | "25-29",4200
100 | "20-24",3656
101 | "20-24",2948
102 | "20-24",2948
103 | "20-24",3459
104 | "25-29",3657
105 | "25-29",3930
106 | "25-29",3232
107 | "25-29",2892
108 | "15-19",2580
109 | "25-29",3505
110 | "20-24",3232
111 | "25-29",3345
112 | "20-24",3430
113 | "20-24",3657
114 | "15-19",3459
115 | "15-19",3657
116 | "25-29",3058
117 | "25-29",3771
118 | "25-29",3317
119 | "25-29",3317
120 | "20-24",3033
121 | "20-24",3090
122 | "15-19",3827
123 | "25-29",3175
124 | "25-29",3289
125 | "20-24",3771
126 | "25-29",3317
127 | "15-19",2792
128 | "20-24",3130
129 | "25-29",3175
130 | "20-24",3033
131 | "25-29",3175
132 | "25-29",3459
133 | "25-29",3997
134 | "20-24",3260
135 | "15-19",2955
136 | "25-29",3232
137 | "25-29",3289
138 | "20-24",3175
139 | "25-29",3313
140 | "20-24",3771
141 | "15-19",2000
142 | "20-24",3255
143 | "25-29",2920
144 | "20-24",3572
145 | "25-29",3232
146 | "15-19",3147
147 | "25-29",4026
148 | "20-24",2977
149 | "25-29",3380
150 | "25-29",3033
151 | "20-24",3459
152 | "25-29",3750
153 | "20-24",3600
154 | "15-19",3374
155 | "25-29",3375
156 | "25-29",2778
157 | "25-29",3686
158 | "15-19",3430
159 | "25-29",3714
160 | "25-29",4253
161 | "15-19",2540
162 | "25-29",3827
163 | "20-24",3544
164 | "25-29",3250
165 | "25-29",3317
166 | "20-24",3827
167 | "20-24",3912
168 | "25-29",3289
169 | "25-29",4204
170 | "20-24",3289
171 | "15-19",3204
172 | "15-19",3033
173 | "20-24",3062
174 | "25-29",2807
175 | "20-24",3572
176 | "20-24",3515
177 | "20-24",3487
178 | "20-24",3289
179 | "25-29",4338
180 | "20-24",3250
181 | "20-24",3515
182 | "25-29",3289
183 | "20-24",3430
184 | "20-24",3747
185 | "25-29",3714
186 | "20-24",4005
187 | "25-29",4082
188 | "25-29",3686
189 | "25-29",3515
190 | "20-24",3260
191 | "25-29",3629
192 | "25-29",3296
193 | "20-24",3147
194 | "15-19",2863
195 | "25-29",3175
196 | "25-29",4139
197 | "25-29",3062
198 | "20-24",2523
199 | "25-29",3771
200 | "25-29",3714
201 | "15-19",2905
202 | "15-19",3997
203 | "25-29",3090
204 | "25-29",3575
205 | "20-24",3941
206 | "25-29",2975
207 | "25-29",2977
208 | "15-19",3119
209 | "25-29",3175
210 | "25-29",4423
211 | "25-29",4590
212 | "25-29",3430
213 | "25-29",3657
214 | "25-29",3657
215 | "25-29",2778
216 | "20-24",3515
217 | "20-24",2655
218 | "15-19",3119
219 | "25-29",3340
220 | "15-19",2863
221 | "15-19",3232
222 | "25-29",3317
223 | "25-29",3799
224 | "20-24",3941
225 | "15-19",3175
226 | "20-24",3005
227 | "15-19",2892
228 | "25-29",3374
229 | "25-29",3374
230 | "20-24",4139
231 | "25-29",3487
232 | "25-29",3260
233 | "20-24",3090
234 | "15-19",2920
235 | "25-29",3204
236 | "15-19",3515
237 | "25-29",3260
238 | "25-29",3970
239 | "20-24",3430
240 | "25-29",3969
241 | "15-19",3033
242 | "20-24",3107
243 | 


--------------------------------------------------------------------------------
/Edition2/Data/IceCream.csv:
--------------------------------------------------------------------------------
 1 | "Brand","VanillaCalories","VanillaFat","VanillaSugar","ChocolateCalories","ChocolateFat","ChocolateSugar"
 2 | "Baskin Robbins",260,16,26,260,14,31
 3 | "Ben & Jerry's",240,16,19,260,16,22
 4 | "Blue Bunny",140,7,12,130,7,14
 5 | "Breyers",140,7,13,140,8,16
 6 | "Brigham's",190,12,17,200,12,18
 7 | "Bulla",234,13.5,21.8,266,15,22.6
 8 | "Carvel",240,14,21,250,13,25
 9 | "Cass-Clay",130,7,11,150,7,16
10 | "Chapman's",120,6,11,120,5,12
11 | "Cold Stone",270,15.5,23,264,16.2,23.6
12 | "Culver's",222,13,19,205,10,20
13 | "Dairy Queen",140,4.5,19,150,5,17
14 | "Dove",240,15,20,290,17,27
15 | "Dreamery",260,15,24,280,12,33
16 | "Edy's Grand",140,8,13,150,8,15
17 | "Emack & Bolio's",160,9,12,170,9,13
18 | "Good Humor",120,6,12,120,6,14
19 | "Graeter's",260,16,24,260,16,24
20 | "Green and Black",194,11.6,18,227,12.8,22.7
21 | "Green's",150,8,17,140,8,15
22 | "Haagen Dazs",270,18,21,270,18,21
23 | "Hershey's",140,9,14,140,8,13
24 | "Hill Station",226,15.6,16.8,235,14.3,21.2
25 | "Kemp's",130,7,13,140,6,17
26 | "Klein's",130,8,15,140,8,14
27 | "Oberweis Dairy",307,21,23,320,21,19
28 | "Our Family",130,7,11,130,6,15
29 | "Perry's",140,8,15,140,7,15
30 | "Ronnybrook Farm",240,16,20,260,19,21
31 | "Ruggles",150,8,12,150,8,16
32 | "Sara Lee",242,15.5,21.5,234,14.4,20.9
33 | "Schwan's",140,7,12,140,7,12
34 | "Sheer Bliss",300,19,27,320,19,29
35 | "Smith's",150,8,13,150,8,13
36 | "Stonyfield Farm",240,16,20,250,17,20
37 | "Tillamook",160,9,10,170,9,13
38 | "Turkey Hill",140,8,16,150,8,19
39 | "Value Choice",130,6,12,130,6,15
40 | "Whitey's",250,14,23,250,13,25
41 | 


--------------------------------------------------------------------------------
/Edition2/Data/Illiteracy.csv:
--------------------------------------------------------------------------------
 1 | ID,Country,Illit,Births
 2 | 1,Albania,20.5,1.78
 3 | 2,Algeria,39.1,2.44
 4 | 3,Bahrain,15,2.34
 5 | 4,Belize,5.9,2.97
 6 | 5,Benin,73.5,5.6
 7 | 6,Bolivia,18.5,3.65
 8 | 7,Botswana,17.6,3.03
 9 | 8,Brazil,11.9,2.29
10 | 9,Brunei,11.5,2.38
11 | 10,Burkina Faso,83.4,5.9
12 | 11,Burma,18.1,2.23
13 | 12,Burundi,54.8,6.8
14 | 13,Cambodia,39.8,3.89
15 | 14,Cape Verde,30.9,3.53
16 | 15,Central African Republic,60.1,4.73
17 | 16,Chad,60.7,6.3
18 | 17,China,19.6,1.81
19 | 18,Colombia,7.5,2.4
20 | 19,Comoros,50.7,3.76
21 | 20,"Congo, Democratic Republic of the",44.9,6.7
22 | 21,"Congo, Republic of the",21.6,5.6
23 | 22,Cote d'Ivoire,59.1,4.7
24 | 23,Djibouti,41.6,4.74
25 | 24,Dominican Republic,15.2,2.73
26 | 25,Ecuador,9.1,2.67
27 | 26,Egypt,53.1,3.1
28 | 27,El Salvador,22.3,2.76
29 | 28,Equatorial Guinea,21.6,5.89
30 | 29,Eritrea,52.4,5.24
31 | 30,Ethiopia,64.8,5.32
32 | 31,Fiji,8.1,2.79
33 | 32,"Gambia, The",66.9,4.4
34 | 33,Ghana,32.7,4.06
35 | 34,Guatemala,36.8,4.33
36 | 35,Guinea-Bissau,72.4,7.08
37 | 36,Haiti,48.8,3.75
38 | 37,Honduras,23,3.47
39 | 38,Hong Kong,9.5,0.97
40 | 39,India,51.7,2.84
41 | 40,Indonesia,15.9,2.27
42 | 41,Iran,27.4,2.07
43 | 42,Israel,6.2,2.82
44 | 43,Jamaica,8.3,2.38
45 | 44,Jordan,13.4,3.29
46 | 45,Kenya,20.3,4.98
47 | 46,Kuwait,18.3,2.39
48 | 47,Laos,43.4,4.5
49 | 48,Lebanon,17.8,2.25
50 | 49,Lesotho,5.5,3.4
51 | 50,Liberia,59.5,6.78
52 | 51,Libya,28.1,2.85
53 | 52,Macau,8,0.88
54 | 53,Madagascar,37.5,5.04
55 | 54,Malawi,50.3,5.84
56 | 55,Malaysia,14.7,2.74
57 | 56,Mali,82,6.72
58 | 57,Malta,6.4,1.37
59 | 58,Mauritania,68.1,5.59
60 | 59,Mauritius,17.4,1.98
61 | 60,Mexico,9.8,2.11
62 | 61,Mozambique,67.3,5.3
63 | 62,Namibia,16.3,3.66
64 | 63,Nicaragua,32.2,3.08
65 | 64,Niger,90.2,7.67
66 | 65,Nigeria,39,5.5
67 | 66,Oman,32.8,3.44
68 | 67,Pakistan,69.4,4.12
69 | 68,Panama,8.1,2.62
70 | 69,Papua New Guinea,40.6,3.8
71 | 70,Paraguay,7,3.67
72 | 71,Peru,13.3,2.74
73 | 72,Portugal,8.8,1.4
74 | 73,Puerto Rico,5.5,1.8
75 | 74,Qatar,15,2.89
76 | 75,Rwanda,35.2,5.8
77 | 76,Saudi Arabia,29.3,3.83
78 | 77,Senegal,69.2,4.9
79 | 78,Singapore,10.4,1.24
80 | 79,South Africa,14.3,2.78
81 | 80,Sri Lanka,10.1,1.91
82 | 81,Sudan,49.5,4.15
83 | 82,Swaziland,19.2,3.91
84 | 83,Syria,36.1,3.24
85 | 84,Tanzania,29.4,5.2
86 | 85,Thailand,5.4,1.89
87 | 86,Togo,53.1,5.03
88 | 87,Tunisia,35.6,2.04
89 | 88,Turkey,21.4,2.19
90 | 89,United Arab Emirates,18.5,2.43
91 | 90,Venezuela,6.9,2.65
92 | 91,Vietnam,8.5,1.78
93 | 92,Yemen,69.8,5.87
94 | 93,Zambia,25.2,5.4
95 | 94,Zimbabwe,12.9,3.34
96 | 


--------------------------------------------------------------------------------
/Edition2/Data/Lottery.csv:
--------------------------------------------------------------------------------
  1 | "Win"
  2 | 25
  3 | 30
  4 | 32
  5 | 16
  6 | 17
  7 | 23
  8 | 28
  9 | 1
 10 | 36
 11 | 10
 12 | 26
 13 | 15
 14 | 22
 15 | 7
 16 | 21
 17 | 8
 18 | 22
 19 | 14
 20 | 23
 21 | 5
 22 | 19
 23 | 31
 24 | 27
 25 | 15
 26 | 19
 27 | 35
 28 | 20
 29 | 20
 30 | 10
 31 | 35
 32 | 19
 33 | 38
 34 | 36
 35 | 12
 36 | 12
 37 | 16
 38 | 17
 39 | 2
 40 | 13
 41 | 31
 42 | 37
 43 | 36
 44 | 25
 45 | 23
 46 | 16
 47 | 32
 48 | 7
 49 | 38
 50 | 31
 51 | 31
 52 | 11
 53 | 1
 54 | 25
 55 | 11
 56 | 10
 57 | 6
 58 | 1
 59 | 15
 60 | 37
 61 | 6
 62 | 34
 63 | 5
 64 | 31
 65 | 13
 66 | 32
 67 | 36
 68 | 36
 69 | 24
 70 | 38
 71 | 34
 72 | 32
 73 | 9
 74 | 14
 75 | 20
 76 | 29
 77 | 29
 78 | 34
 79 | 13
 80 | 24
 81 | 39
 82 | 7
 83 | 35
 84 | 17
 85 | 18
 86 | 23
 87 | 15
 88 | 38
 89 | 23
 90 | 8
 91 | 30
 92 | 5
 93 | 20
 94 | 33
 95 | 17
 96 | 34
 97 | 35
 98 | 32
 99 | 21
100 | 32
101 | 19
102 | 33
103 | 19
104 | 4
105 | 9
106 | 28
107 | 1
108 | 16
109 | 36
110 | 13
111 | 36
112 | 18
113 | 26
114 | 34
115 | 36
116 | 30
117 | 20
118 | 39
119 | 38
120 | 10
121 | 23
122 | 2
123 | 13
124 | 39
125 | 26
126 | 22
127 | 18
128 | 1
129 | 7
130 | 2
131 | 20
132 | 16
133 | 8
134 | 35
135 | 8
136 | 18
137 | 35
138 | 30
139 | 17
140 | 24
141 | 4
142 | 26
143 | 39
144 | 2
145 | 32
146 | 15
147 | 27
148 | 29
149 | 10
150 | 33
151 | 13
152 | 4
153 | 20
154 | 15
155 | 2
156 | 28
157 | 24
158 | 23
159 | 21
160 | 36
161 | 5
162 | 27
163 | 9
164 | 24
165 | 11
166 | 5
167 | 31
168 | 20
169 | 16
170 | 9
171 | 30
172 | 25
173 | 31
174 | 3
175 | 10
176 | 26
177 | 13
178 | 35
179 | 10
180 | 8
181 | 31
182 | 18
183 | 8
184 | 11
185 | 37
186 | 11
187 | 4
188 | 22
189 | 39
190 | 32
191 | 22
192 | 8
193 | 39
194 | 18
195 | 20
196 | 32
197 | 21
198 | 31
199 | 27
200 | 23
201 | 26
202 | 23
203 | 29
204 | 32
205 | 8
206 | 14
207 | 1
208 | 28
209 | 31
210 | 36
211 | 29
212 | 22
213 | 26
214 | 37
215 | 36
216 | 39
217 | 9
218 | 20
219 | 35
220 | 4
221 | 10
222 | 22
223 | 36
224 | 24
225 | 5
226 | 31
227 | 20
228 | 7
229 | 3
230 | 25
231 | 24
232 | 37
233 | 33
234 | 29
235 | 4
236 | 2
237 | 1
238 | 22
239 | 23
240 | 28
241 | 6
242 | 8
243 | 4
244 | 33
245 | 20
246 | 16
247 | 34
248 | 27
249 | 34
250 | 5
251 | 22
252 | 38
253 | 23
254 | 20
255 | 20
256 | 27
257 | 27
258 | 27
259 | 9
260 | 20
261 | 7
262 | 11
263 | 7
264 | 34
265 | 31
266 | 19
267 | 11
268 | 36
269 | 35
270 | 8
271 | 30
272 | 3
273 | 30
274 | 15
275 | 11
276 | 15
277 | 12
278 | 12
279 | 11
280 | 23
281 | 22
282 | 30
283 | 6
284 | 36
285 | 33
286 | 18
287 | 24
288 | 37
289 | 29
290 | 38
291 | 20
292 | 34
293 | 21
294 | 32
295 | 5
296 | 10
297 | 33
298 | 3
299 | 12
300 | 33
301 | 24
302 | 15
303 | 36
304 | 21
305 | 11
306 | 22
307 | 17
308 | 18
309 | 10
310 | 3
311 | 4
312 | 29
313 | 11
314 | 14
315 | 34
316 | 34
317 | 18
318 | 23
319 | 20
320 | 32
321 | 17
322 | 37
323 | 2
324 | 31
325 | 15
326 | 13
327 | 6
328 | 27
329 | 37
330 | 5
331 | 2
332 | 26
333 | 5
334 | 39
335 | 16
336 | 10
337 | 26
338 | 37
339 | 27
340 | 12
341 | 14
342 | 18
343 | 18
344 | 14
345 | 35
346 | 18
347 | 8
348 | 17
349 | 32
350 | 39
351 | 10
352 | 9
353 | 18
354 | 28
355 | 24
356 | 24
357 | 16
358 | 38
359 | 6
360 | 36
361 | 23
362 | 25
363 | 11
364 | 37
365 | 12
366 | 32
367 | 1
368 | 11
369 | 21
370 | 15
371 | 14
372 | 24
373 | 28
374 | 8
375 | 38
376 | 4
377 | 21
378 | 17
379 | 28
380 | 16
381 | 4
382 | 31
383 | 16
384 | 10
385 | 1
386 | 24
387 | 21
388 | 22
389 | 22
390 | 22
391 | 19
392 | 31
393 | 10
394 | 3
395 | 6
396 | 29
397 | 5
398 | 36
399 | 38
400 | 39
401 | 24
402 | 26
403 | 6
404 | 6
405 | 37
406 | 32
407 | 24
408 | 3
409 | 15
410 | 23
411 | 38
412 | 33
413 | 1
414 | 35
415 | 28
416 | 32
417 | 34
418 | 28
419 | 33
420 | 29
421 | 39
422 | 3
423 | 27
424 | 36
425 | 2
426 | 13
427 | 28
428 | 14
429 | 39
430 | 8
431 | 31
432 | 27
433 | 25
434 | 11
435 | 1
436 | 28
437 | 11
438 | 14
439 | 18
440 | 23
441 | 33
442 | 10
443 | 18
444 | 4
445 | 1
446 | 17
447 | 13
448 | 24
449 | 12
450 | 36
451 | 9
452 | 9
453 | 26
454 | 32
455 | 14
456 | 9
457 | 23
458 | 32
459 | 11
460 | 28
461 | 11
462 | 22
463 | 36
464 | 33
465 | 12
466 | 2
467 | 30
468 | 39
469 | 24
470 | 13
471 | 5
472 | 26
473 | 38
474 | 39
475 | 3
476 | 5
477 | 39
478 | 9
479 | 35
480 | 7
481 | 23
482 | 5
483 | 29
484 | 12
485 | 26
486 | 33
487 | 11
488 | 35
489 | 38
490 | 24
491 | 3
492 | 31
493 | 28
494 | 12
495 | 26
496 | 35
497 | 36
498 | 18
499 | 6
500 | 35
501 | 5
502 | 


--------------------------------------------------------------------------------
/Edition2/Data/MathStatsData_Ed2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/MathStatsData_Ed2.zip


--------------------------------------------------------------------------------
/Edition2/Data/Maunaloa.csv:
--------------------------------------------------------------------------------
 1 | ID,Year,Level
 2 | 1,1990,357.08
 3 | 2,1991,359
 4 | 3,1992,359.45
 5 | 4,1993,360.07
 6 | 5,1994,361.48
 7 | 6,1995,363.62
 8 | 7,1996,364.93
 9 | 8,1997,366.47
10 | 9,1998,369.33
11 | 10,1999,370.66
12 | 11,2000,371.32
13 | 12,2001,373.77
14 | 13,2002,375.58
15 | 14,2003,378.54
16 | 15,2004,380.62
17 | 16,2005,382.45
18 | 17,2006,384.94
19 | 18,2007,386.53
20 | 19,2008,388.5
21 | 20,2009,390.18
22 | 21,2010,392.94
23 | 


--------------------------------------------------------------------------------
/Edition2/Data/NBA1617.csv:
--------------------------------------------------------------------------------
 1 | Name,Position,Team,Games,Minutes,PercFG,Perc3P,Perc2P,PercFT,OffReb,DefReb,Assists,Blocks
 2 | Quincy Acy,PF,Brooklyn,32,510,42.5,43.4,41.4,75.4,18,89,18,15
 3 | Anthony Bennett,PF,Brooklyn,23,264,41.3,27.1,56.8,72.2,25,53,12,3
 4 | Bojan Bogdanovic,SF,Brooklyn,55,1482,44,35.7,50.9,87.4,23,174,90,3
 5 | Trevor Booker,PF,Brooklyn,71,1754,51.6,32.1,54.6,67.3,143,428,138,28
 6 | Spencer Dinwiddie,PG,Brooklyn,59,1334,44.4,37.6,47.8,79.2,27,137,185,23
 7 | Yogi Ferrell,PG,Brooklyn,10,151,36.7,29.6,45.5,62.5,4,8,17,2
 8 | Randy Foye,SG,Brooklyn,69,1284,36.3,33,41.8,85.7,9,146,135,9
 9 | Archie Goodwin,SG,Brooklyn,12,184,55.7,30.8,62.5,71.9,7,21,23,4
10 | Justin Hamilton,C,Brooklyn,64,1177,45.9,30.6,59.8,75,71,191,55,43
11 | Joe Harris,SG,Brooklyn,52,1138,42.5,38.5,48.9,71.4,16,131,54,8
12 | Rondae Hollis-Jefferson,SF,Brooklyn,78,1761,43.4,22.4,46.3,75.1,96,356,154,44
13 | Sean Kilpatrick,SG,Brooklyn,70,1754,41.5,34.1,46.8,84.3,22,258,157,6
14 | Caris LeVert,SF,Brooklyn,57,1237,45,32.1,57.1,72,23,165,110,8
15 | Jeremy Lin,PG,Brooklyn,36,883,43.8,37.2,48,81.6,11,124,184,14
16 | Brook Lopez,C,Brooklyn,75,2222,47.4,34.6,53.6,81,121,282,176,124
17 | K.J. McDaniels,SF,Brooklyn,20,293,45.5,28.2,56.5,82.1,9,43,9,10
18 | Andrew Nicholson,PF,Brooklyn,10,111,38.2,18.2,47.8,100,4,23,3,0
19 | Luis Scola,PF,Brooklyn,36,461,47,34,53.5,67.6,51,88,37,4
20 | Isaiah Whitehead,PG,Brooklyn,73,1643,40.2,29.5,44.6,80.5,32,152,192,36
21 | Nicolas Batum,SG,Charlotte,77,2617,40.3,33.3,45.3,85.6,46,435,456,29
22 | Marco Belinelli,SG,Charlotte,74,1778,42.9,36,48.8,89.3,14,164,147,9
23 | Treveon Graham,SG,Charlotte,27,189,47.5,60,40,66.7,5,17,6,1
24 | Spencer Hawes,PF,Charlotte,35,626,47.7,29.1,54,88.2,32,114,62,26
25 | Roy Hibbert,C,Charlotte,42,671,54.2,NA,54.2,81.3,53,97,20,44
26 | Frank Kaminsky,C,Charlotte,75,1954,39.9,32.8,45.5,75.6,57,279,162,34
27 | Michael Kidd-Gilchrist,SF,Charlotte,81,2349,47.7,11.1,48.3,78.4,156,409,114,77
28 | Jeremy Lamb,SG,Charlotte,62,1143,46,28.1,53.6,85.3,30,234,75,23
29 | Miles Plumlee,C,Charlotte,13,174,58.3,NA,58.3,75,15,27,3,4
30 | Brian Roberts,PG,Charlotte,41,416,37.7,38.6,37.2,84.6,5,34,52,1
31 | Ramon Sessions,PG,Charlotte,50,811,38,33.9,39.3,77.1,11,62,129,3
32 | Kemba Walker,PG,Charlotte,79,2739,44.4,39.9,47.6,84.7,45,263,434,22
33 | Briante Weber,PG,Charlotte,13,159,43.5,14.3,48.7,69.2,8,14,16,0
34 | Marvin Williams,PF,Charlotte,76,2295,42.2,35,49.4,87.3,89,411,106,53
35 | Christian Wood,PF,Charlotte,13,107,52.2,0,66.7,73.3,14,15,2,6
36 | Cody Zeller,PF,Charlotte,62,1725,57.1,0,57.2,67.9,135,270,99,58
37 | Chris Andersen,C,Cleveland,12,114,40.9,0,47.4,71.4,9,22,5,7
38 | Mike Dunleavy,SF,Cleveland,23,366,40,35.1,48.5,73.7,5,42,20,2
39 | Kay Felder,PG,Cleveland,42,386,39.2,31.8,40.4,71.4,3,38,58,7
40 | Channing Frye,C,Cleveland,74,1398,45.8,40.9,54.6,85.1,37,253,45,37
41 | Kyrie Irving,PG,Cleveland,72,2525,47.3,40.1,50.5,90.5,52,178,418,24
42 | LeBron James,SF,Cleveland,74,2794,54.8,36.3,61.1,67.4,97,542,646,44
43 | Richard Jefferson,SF,Cleveland,79,1614,44.6,33.3,58,74.1,28,175,78,10
44 | James Jones,SF,Cleveland,48,381,47.8,47,50,65,3,34,14,10
45 | Kyle Korver,SG,Cleveland,35,859,48.7,48.5,49.3,93.3,7,90,35,8
46 | DeAndre Liggins,SG,Cleveland,61,752,38.2,37.8,38.5,62.2,17,84,54,14
47 | Kevin Love,PF,Cleveland,60,1885,42.7,37.3,47.1,87.1,148,518,116,21
48 | Jordan McRae,SG,Cleveland,37,384,38.7,35.3,40.4,79.4,6,35,19,6
49 | Iman Shumpert,SG,Cleveland,76,1937,41.1,36,46.9,78.9,39,179,109,27
50 | J.R. Smith,SG,Cleveland,41,1187,34.6,35.1,32.9,66.7,17,96,62,11
51 | Tristan Thompson,C,Cleveland,78,2336,60,0,60.4,49.8,286,429,77,84
52 | Deron Williams,PG,Cleveland,24,486,46.3,41.5,48.9,84,1,44,86,6
53 | Derrick Williams,PF,Cleveland,25,427,50.5,40.4,60,69.2,3,54,14,2
54 | LaMarcus Aldridge,PF,San Antonio,72,2335,47.7,41.1,48,81.2,172,351,139,88
55 | Kyle Anderson,SG,San Antonio,72,1020,44.5,37.5,46.2,78.9,33,175,91,26
56 | Joel Anthony,C,San Antonio,19,122,62.5,NA,62.5,62.5,8,23,3,6
57 | Davis Bertans,PF,San Antonio,67,808,44,39.9,55.7,82.4,22,76,46,28
58 | Dewayne Dedmon,C,San Antonio,76,1330,62.2,NA,62.2,69.9,129,367,44,61
59 | Bryn Forbes,SG,San Antonio,36,285,36.4,32.1,41.3,83.3,2,21,23,0
60 | Pau Gasol,C,San Antonio,64,1627,50.2,53.8,49.4,70.7,107,394,150,70
61 | Manu Ginobili,SG,San Antonio,69,1291,39,39.2,38.7,80.4,28,129,183,16
62 | Danny Green,SG,San Antonio,68,1807,39.2,37.9,42,84.4,31,193,124,57
63 | Nicolas Laprovittola,PG,San Antonio,18,174,42.6,37,50,100,1,9,28,1
64 | David Lee,PF,San Antonio,79,1477,59,NA,59,70.8,149,292,124,40
65 | Kawhi Leonard,SF,San Antonio,74,2474,48.5,38,52.9,88,80,350,260,55
66 | Patty Mills,PG,San Antonio,80,1754,44,41.4,47.4,82.5,24,117,280,3
67 | Dejounte Murray,PG,San Antonio,38,322,43.1,39.1,44.1,70,6,36,48,6
68 | Tony Parker,PG,San Antonio,63,1587,46.6,33.3,48.4,72.6,9,104,285,2
69 | Jonathon Simmons,SG,San Antonio,78,1392,42,29.4,46.1,75,20,140,126,25
70 | 


--------------------------------------------------------------------------------
/Edition2/Data/Nasdaq.csv:
--------------------------------------------------------------------------------
 1 | "Symbol","Open","Close","Volume"
 2 | "LQDT",5.75,5.8,58900
 3 | "FEUZ",43.55,43.45,5400
 4 | "PRIM",28,27.89,206600
 5 | "OMED",4.84,4.9,202100
 6 | "AGND",43.34,43.29,100400
 7 | "WEBK",27.8,27.9,2200
 8 | "MNDO",2.68,2.63,10800
 9 | "RTRX",22.39,21.8,222400
10 | "VRNS",51,52.35,426700
11 | "NFEC",1.03,1.02,18400
12 | "HLG",41.36,41.75,44300
13 | "CCD",20.51,20.44,28200
14 | "PGLC",2.8,2.81,54100
15 | "FHB",29.27,29.62,249500
16 | "ZIONW",17.75,17.71,121500
17 | "USAU",1.4,1.44,211100
18 | "PTH",71.29,71.02,15700
19 | "FAT",9.25,9.29,26700
20 | "ROCK",32.9,33.05,218900
21 | "WEB",23,22.7,242600
22 | "GRVY",75.88,74.26,26100
23 | "ENFC",29,29.15,8900
24 | "CHRS",9,8.8,318100
25 | "LUNA",2.31,2.31,453800
26 | "RUTH",21.4,21.4,218900
27 | "UPL",9.63,9.45,2725400
28 | "HAYN",32.09,31.67,76900
29 | "LIVE",14.71,14.98,7600
30 | "CCXI",6.48,6.85,79200
31 | "QTNT",4.87,4.78,75000
32 | "HPT",29.98,29.91,459300
33 | "ZUMZ",20.9,19.95,2652900
34 | "FTGC",20.65,20.65,42900
35 | "FLEX",18.06,18.01,4869200
36 | "FLKS",4.23,4.18,16800
37 | "IPCI",0.845,0.85,786600
38 | "MRBK",18.365,18.35,3000
39 | "VDSI",13.35,12.95,154000
40 | "MLVF",26.3,26.7,4300
41 | "OCLR",7.07,7.01,5764000
42 | "FLL",3.68,3.68,115000
43 | "CDXC",6.84,6.59,660100
44 | "WNEB",10.6,10.65,35300
45 | "NEON",0.83,0.81,95700
46 | "EBAYL",26.95,27.01,26900
47 | "DRAD",2.3,2.35,94000
48 | "CXSE",83.28,82.72,39100
49 | "DORM",68.11,67.1,392800
50 | "GRFS",22.41,22.2,527200
51 | "ACGL",94.63,94.86,633400
52 | 


--------------------------------------------------------------------------------
/Edition2/Data/Olympics2012.csv:
--------------------------------------------------------------------------------
 1 | Name,Country,Age,Sex,Height,Weight,Sport
 2 | Chiara Cainero,Italy,34,F,67,165,Shooting
 3 | Ciara Michel,Great Britain,27,F,76,154,Volleyball
 4 | Claudette Mukasakindi,Rwanda,29,F,63,110,Archery
 5 | Claudia Wurzel,Italy,25,F,71,146,Rowing
 6 | Daria Korczynska,Poland,31,F,66,132,Track/Field
 7 | Elizabeth Beisel,United States of America,19,F,66,146,Swimming
 8 | Evelyn Yesenia Garcia Marroquin,El Salvador,29,F,64,119,Cycling - Road
 9 | Giulia Rambaldi,Italy,25,F,70,170,Water Polo
10 | Holley Mangold,United States of America,22,F,68,340,Weightlifting
11 | Joanne Morgan,Great Britain,28,F,67,132,Volleyball
12 | Joyce Sombroek,Netherlands,21,F,70,141,Hockey
13 | Kim Conley,United States of America,26,F,63,108,Track/Field
14 | Liu Ying Goh,Malaysia,23,F,65,110,Badminton
15 | Maria Alexandra Escobar Guerrero,Ecuador,32,F,62,126,Weightlifting
16 | Maria Vasco,Spain,36,F,62,104,Track/Field
17 | Michelle Vittese,United States of America,22,F,63,128,Hockey
18 | Paola Croce,Italy,34,F,66,115,Volleyball
19 | Sara Hendershot,United States of America,24,F,71,165,Rowing
20 | Sara Winther,New Zealand,30,F,65,148,Sailing
21 | Sheilla Castro,Brazil,29,F,74,148,Volleyball
22 | Shereefa Lloyd,Jamaica,29,F,66,134,Track/Field
23 | Urszula Sadkowska,Poland,28,F,76,172,Judo
24 | Valerie Vermeersch,Belgium,26,F,69,148,Hockey
25 | Willy Kanis,Netherlands,28,F,68,176,Cycling-Track
26 | Xiang Wei Jasmine Ser,Singapore,21,F,61,106,Shooting
27 | Xuerui Li,People's Republic of China,21,F,67,132,Badminton
28 | Christopher Duenas,Guam,20,M,73,185,Swimming
29 | Emmanuel Dyen,France,33,M,71,154,Sailing
30 | Gregory Wathelet,Belgium,31,M,75,187,Equestrian
31 | Hiroshi Hoketsu,Japan,71,M,66,134,Equestrian
32 | Kazuya Kaneda,Japan,24,M,67,146,Swimming
33 | Marco Fortes,Portugal,29,M,74,298,Track/FIeld
34 | Mickael Gelabale,France,29,M,79,198,Basketball
35 | Minwoo Kim,Republic of Korea,22,M,68,152,Football
36 | Nahom Mesfin,Ethiopia,23,M,71,137,Track/Field
37 | Ramunas Navardauskas,Lithuania,24,M,75,172,Cycling - Road
38 | Ruslan Ismailov,Kyrgyzstan,25,M,68,132,Shooting
39 | Timothy Kitum,Kenya,17,M,67,132,Track/Field
40 | Tonci Stipanovic,Croatia,26,M,70,183,Sailing
41 | Victor Minibaev,Russian Federation,21,M,68,139,Diving
42 | Youcef Abdi,Australia,34,M,70,146,Track/Field
43 | Yu-Cheng Chen,Taipei (Chinese Taipei),19,M,71,159,Archery
44 | 


--------------------------------------------------------------------------------
/Edition2/Data/Phillies2009.csv:
--------------------------------------------------------------------------------
  1 | Date,Location,Outcome,Hits,Doubles,Homeruns,StrikeOuts
  2 | 5-Apr,Home,Lose,4,2,0,6
  3 | 7-Apr,Home,Lose,6,1,0,3
  4 | 8-Apr,Home,Win,11,3,1,6
  5 | 10-Apr,Away,Lose,7,2,1,3
  6 | 11-Apr,Away,Win,15,3,1,6
  7 | 12-Apr,Away,Win,13,3,2,4
  8 | 13-Apr,Away,Win,10,3,3,7
  9 | 16-Apr,Away,Lose,5,1,0,3
 10 | 17-Apr,Home,Lose,14,3,1,5
 11 | 18-Apr,Home,Lose,8,2,3,7
 12 | 19-Apr,Home,Win,9,1,3,5
 13 | 21-Apr,Home,Win,13,4,1,8
 14 | 22-Apr,Home,Lose,8,0,1,4
 15 | 23-Apr,Home,Lose,2,0,1,4
 16 | 24-Apr,Away,Win,8,2,2,12
 17 | 25-Apr,Away,Win,9,0,2,8
 18 | 26-Apr,Away,Win,12,2,0,7
 19 | 27-Apr,Home,Win,14,5,2,7
 20 | 28-Apr,Home,Win,11,0,4,5
 21 | 29-Apr,Home,Lose,7,0,1,8
 22 | 1-May,Home,Lose,8,0,1,4
 23 | 2-May,Home,Win,9,3,1,6
 24 | 4-May,Away,Win,8,1,2,11
 25 | 5-May,Away,Win,15,4,2,7
 26 | 6-May,Away,Lose,3,2,0,11
 27 | 7-May,Away,Lose,10,2,2,0
 28 | 8-May,Home,Win,8,3,3,7
 29 | 9-May,Home,Lose,5,2,2,8
 30 | 10-May,Home,Lose,6,3,0,8
 31 | 12-May,Home,Win,6,1,0,6
 32 | 13-May,Home,Lose,5,2,2,9
 33 | 14-May,Home,Lose,5,2,0,13
 34 | 15-May,Away,Win,16,1,1,9
 35 | May 16 (1),Away,Win,14,2,3,5
 36 | May 16 (2),Away,Win,9,3,2,3
 37 | 17-May,Away,Win,10,3,0,6
 38 | 19-May,Away,Win,7,2,1,5
 39 | 20-May,Away,Lose,4,3,1,10
 40 | 21-May,Away,Win,14,5,4,11
 41 | 22-May,Away,Win,14,0,4,9
 42 | 23-May,Away,Lose,6,1,2,5
 43 | 24-May,Away,Win,11,3,0,7
 44 | 25-May,Home,Lose,6,2,2,8
 45 | 26-May,Home,Win,10,5,0,4
 46 | 27-May,Home,Lose,8,2,2,7
 47 | 29-May,Home,Win,16,6,0,5
 48 | 30-May,Home,Win,10,4,2,5
 49 | 31-May,Home,Win,7,1,1,10
 50 | 1-Jun,Away,Win,12,4,2,9
 51 | 2-Jun,Away,Win,11,2,3,6
 52 | 3-Jun,Away,Win,10,2,1,3
 53 | 4-Jun,Away,Win,7,3,0,7
 54 | 5-Jun,Away,Lose,11,2,0,8
 55 | 6-Jun,Away,Lose,6,1,0,8
 56 | 7-Jun,Away,Win,9,1,3,6
 57 | 9-Jun,Away,Lose,10,1,4,4
 58 | 10-Jun,Away,Win,9,0,2,3
 59 | 11-Jun,Away,Win,9,2,1,9
 60 | 12-Jun,Home,Lose,5,2,1,20
 61 | 13-Jun,Home,Lose,13,2,3,8
 62 | 14-Jun,Home,Win,14,3,1,9
 63 | 16-Jun,Home,Lose,8,1,1,10
 64 | 17-Jun,Home,Lose,6,0,1,12
 65 | 18-Jun,Home,Lose,12,1,4,9
 66 | 19-Jun,Home,Lose,6,3,0,7
 67 | 20-Jun,Home,Lose,8,2,1,4
 68 | 21-Jun,Home,Lose,4,1,1,4
 69 | 23-Jun,Away,Win,10,2,2,7
 70 | 24-Jun,Away,Lose,4,0,1,7
 71 | 25-Jun,Away,Lose,8,2,0,10
 72 | 26-Jun,Away,Lose,3,0,0,8
 73 | 27-Jun,Away,Win,14,3,3,9
 74 | 28-Jun,Away,Win,10,1,0,9
 75 | 30-Jun,Away,Lose,10,2,3,6
 76 | 1-Jul,Away,Lose,2,0,0,7
 77 | 2-Jul,Away,Lose,8,3,0,7
 78 | 3-Jul,Home,Win,11,3,1,6
 79 | 4-Jul,Home,Win,9,1,0,5
 80 | 5-Jul,Home,Win,3,0,2,4
 81 | 6-Jul,Home,Win,21,6,4,9
 82 | 7-Jul,Home,Lose,10,2,2,8
 83 | 8-Jul,Home,Win,9,0,1,11
 84 | 9-Jul,Home,Win,12,2,2,5
 85 | 10-Jul,Home,Win,5,3,0,6
 86 | 11-Jul,Home,Win,13,3,2,8
 87 | 12-Jul,Home,Win,6,2,1,7
 88 | 16-Jul,Away,Win,10,2,3,10
 89 | 17-Jul,Away,Win,12,2,1,10
 90 | 19-Jul,Away,Win,8,2,0,7
 91 | 20-Jul,Home,Win,9,2,3,10
 92 | 21-Jul,Home,Win,6,0,2,12
 93 | 22-Jul,Home,Lose,11,0,0,8
 94 | 23-Jul,Home,Win,14,4,1,8
 95 | 24-Jul,Home,Lose,8,2,0,6
 96 | 25-Jul,Home,Win,14,3,2,5
 97 | 26-Jul,Home,Win,13,1,4,6
 98 | 27-Jul,Away,Win,6,1,1,6
 99 | 28-Jul,Away,Win,7,2,1,9
100 | 29-Jul,Away,Lose,7,2,0,12
101 | 30-Jul,Away,Lose,5,3,1,9
102 | 31-Jul,Away,Win,8,2,1,6
103 | 1-Aug,Away,Lose,7,0,0,8
104 | 2-Aug,Away,Lose,9,1,1,6
105 | 4-Aug,Home,Lose,9,2,1,9
106 | 5-Aug,Home,Win,11,5,3,5
107 | 6-Aug,Home,Win,7,1,1,3
108 | 7-Aug,Home,Lose,4,1,1,9
109 | 8-Aug,Home,Lose,11,3,0,6
110 | 9-Aug,Home,Lose,6,0,1,9
111 | 11-Aug,Away,Win,3,0,2,9
112 | 12-Aug,Away,Win,14,3,3,9
113 | 13-Aug,Away,Win,10,2,2,9
114 | 14-Aug,Away,Win,7,2,2,9
115 | 15-Aug,Away,Lose,7,3,1,4
116 | 16-Aug,Away,Win,7,1,2,8
117 | 18-Aug,Home,Win,12,1,2,6
118 | 19-Aug,Home,Win,13,2,4,10
119 | 20-Aug,Home,Win,12,1,3,7
120 | 21-Aug,Away,Lose,10,1,0,10
121 | 22-Aug,Away,Win,7,4,1,6
122 | 23-Aug,Away,Win,10,1,2,6
123 | 24-Aug,Away,Win,7,2,2,9
124 | 25-Aug,Away,Lose,9,5,2,8
125 | 26-Aug,Away,Win,9,4,2,6
126 | 27-Aug,Away,Lose,6,1,1,9
127 | 28-Aug,Home,Win,9,2,2,8
128 | 29-Aug,Home,Lose,11,1,1,7
129 | 30-Aug,Home,Win,8,3,1,4
130 | 1-Sep,Home,Win,5,2,0,10
131 | 2-Sep,Home,Lose,5,0,0,4
132 | 3-Sep,Home,Win,4,3,1,12
133 | 4-Sep,Away,Lose,8,1,0,9
134 | 5-Sep,Away,Lose,10,1,0,9
135 | 6-Sep,Away,Lose,9,1,3,8
136 | 7-Sep,Away,Lose,7,2,2,7
137 | 8-Sep,Away,Win,7,0,5,2
138 | 9-Sep,Away,Win,11,3,2,8
139 | 10-Sep,Away,Lose,11,1,1,2
140 | 11-Sep,Home,Win,12,6,0,6
141 | 12-Sep,Home,Lose,12,3,3,7
142 | Sep 13 (1),Home,Win,10,2,2,5
143 | Sep 13 (2),Home,Win,3,1,0,4
144 | 15-Sep,Home,Win,10,4,0,3
145 | 16-Sep,Home,Win,8,0,1,5
146 | 17-Sep,Home,Win,6,1,0,7
147 | 18-Sep,Away,Win,10,1,4,4
148 | 19-Sep,Away,Lose,8,1,1,8
149 | 20-Sep,Away,Win,10,5,0,9
150 | Sep 22 (1),Away,Win,11,1,1,16
151 | Sep 22 (2),Away,Lose,2,1,0,7
152 | 23-Sep,Away,Lose,9,3,2,6
153 | 24-Sep,Away,Win,14,2,1,8
154 | 25-Sep,Away,Lose,9,0,0,8
155 | 26-Sep,Away,Lose,8,0,2,5
156 | 27-Sep,Away,Win,14,5,1,9
157 | 28-Sep,Home,Lose,4,1,0,5
158 | 29-Sep,Home,Win,8,1,2,6
159 | 30-Sep,Home,Win,9,3,1,2
160 | 1-Oct,Home,Lose,13,3,0,8
161 | 2-Oct,Home,Lose,7,1,1,4
162 | 3-Oct,Home,Lose,6,0,2,4
163 | 4-Oct,Home,Win,12,0,1,13
164 | 


--------------------------------------------------------------------------------
/Edition2/Data/Quetzal.csv:
--------------------------------------------------------------------------------
 1 | "Country","Nest","Snag"
 2 | "Guatemala",4.62,6.15
 3 | "Guatemala",18.46,24.62
 4 | "Guatemala",5.23,6.15
 5 | "Guatemala",9.85,12.31
 6 | "Guatemala",7.69,9.23
 7 | "Guatemala",9.85,15.38
 8 | "Guatemala",4,5.85
 9 | "Guatemala",10.77,12.31
10 | "Guatemala",13.85,16.92
11 | "Guatemala",10.77,16.92
12 | "Guatemala",24.62,29.23
13 | "Costa Rica",6.9,8.7
14 | "Costa Rica",5.6,7.7
15 | "Costa Rica",4.3,5.2
16 | "Costa Rica",8.3,9.7
17 | "Costa Rica",1.5,1.8
18 | "Costa Rica",6.2,9.6
19 | "Costa Rica",7.4,10.7
20 | "Costa Rica",4.1,7.3
21 | "Costa Rica",10.1,10.8
22 | "Costa Rica",8.4,9.7
23 | 


--------------------------------------------------------------------------------
/Edition2/Data/RangersTwins2016.csv:
--------------------------------------------------------------------------------
 1 | Name,Team,Pos,Age,Games,AtBats,Runs,Hits,Doubles,Triples,HR,RBI,SB,CS,BB,SO,BA
 2 | Robinson Chirinos,Rangers,C,32,57,147,21,33,11,0,9,20,0,1,15,44,0.224
 3 | Mitch Moreland,Rangers,1B,30,147,460,49,107,21,0,22,60,1,0,35,118,0.233
 4 | Rougned Odor,Rangers,2B,22,150,605,89,164,33,4,33,88,14,7,19,135,0.271
 5 | Elvis Andrus,Rangers,SS,27,147,506,75,153,31,7,8,69,24,8,47,70,0.302
 6 | Adrian Beltre,Rangers,3B,37,153,583,89,175,31,1,32,104,1,1,48,66,0.3
 7 | Ryan Rua,Rangers,LF,26,99,240,40,62,8,1,8,22,9,0,21,76,0.258
 8 | Ian Desmond,Rangers,CF,30,156,625,107,178,29,3,22,86,21,6,44,160,0.285
 9 | Nomar Mazara,Rangers,RF,21,145,516,59,137,13,3,20,64,0,2,39,112,0.266
10 | Prince Fielder,Rangers,DH,32,89,326,29,69,16,0,8,44,0,0,32,63,0.212
11 | Jurickson Profar,Rangers,UT,23,90,272,35,65,6,3,5,20,2,1,30,61,0.239
12 | Carlos Beltran,Rangers,DH,39,52,193,23,54,12,0,7,29,1,0,13,31,0.28
13 | Delino DeShields,Rangers,OF,23,74,182,36,38,7,0,4,13,8,3,15,54,0.209
14 | Kurt Suzuki,Twins,C,32,106,345,34,89,24,1,8,49,0,0,18,48,0.258
15 | Joe Mauer,Twins,1B,33,134,494,68,129,22,4,11,49,2,0,79,93,0.261
16 | Brian Dozier,Twins,2B,29,155,615,104,165,35,5,42,99,18,2,61,138,0.268
17 | Eduardo Escobar,Twins,SS,27,105,352,32,83,14,2,6,37,1,3,21,72,0.236
18 | Trevor Plouffe,Twins,3B,30,84,319,35,83,13,1,12,47,1,0,19,60,0.26
19 | Robbie Grossman,Twins,LF,26,99,332,49,93,19,1,11,37,2,3,55,96,0.28
20 | Byron Buxton,Twins,CF,22,92,298,44,67,19,6,10,38,10,2,23,118,0.225
21 | Max Kepler,Twins,RF,23,113,396,52,93,20,2,17,63,6,2,42,93,0.235
22 | ByungHo Park,Twins,DH,29,62,215,28,41,9,1,12,24,1,0,21,80,0.191
23 | Miguel Sano,Twins,UT,23,116,437,57,103,22,1,25,66,1,0,54,178,0.236
24 | Eduardo Nunez,Twins,IF,29,91,371,49,110,15,1,12,47,27,6,15,58,0.296
25 | Eddie Rosario,Twins,OF,24,92,335,52,90,17,2,10,32,5,2,12,91,0.269
26 | Jorge Polanco,Twins,SS,22,69,245,24,69,15,4,4,27,4,3,17,46,0.282
27 | Danny Santana,Twins,CF,25,75,233,29,56,10,2,2,14,12,9,12,55,0.24
28 | Juan Centeno,Twins,C,26,55,176,16,46,12,1,3,25,0,0,12,38,0.261
29 | 


--------------------------------------------------------------------------------
/Edition2/Data/Salaries.csv:
--------------------------------------------------------------------------------
 1 | League,Salary,Year
 2 | National,0.55775,1985
 3 | National,0.5187075,1985
 4 | National,2.1,2015
 5 | American,8,2015
 6 | National,0.568905,1985
 7 | American,0.5081,2015
 8 | American,1.45015,1985
 9 | American,1.5,2015
10 | National,0.8,2015
11 | American,0.2231,1985
12 | American,0.51,2015
13 | National,1.7848,1985
14 | National,1,2015
15 | National,1.88,2015
16 | National,1.333333,2015
17 | American,0.24541,1985
18 | American,0.501975,1985
19 | National,0.5075525,1985
20 | National,2.0079,1985
21 | American,0.66,2015
22 | American,1.675,2015
23 | National,0.523925,2015
24 | National,0.5125,2015
25 | American,0.517,2015
26 | American,1.710434077,1985
27 | National,1.776730473,1985
28 | American,0.55775,1985
29 | American,0.5109,2015
30 | American,2.95,2015
31 | American,0.5201,2015
32 | National,0.5165,2015
33 | National,2.5,2015
34 | National,0.78085,1985
35 | American,1.29398,1985
36 | American,8.5,2015
37 | American,0.3290725,1985
38 | American,1.153746033,1985
39 | National,0.5057,2015
40 | National,0.5085,2015
41 | American,1.225,2015
42 | American,1.47246,1985
43 | American,2.475,2015
44 | National,0.836625,1985
45 | American,0.26772,1985
46 | National,2.6,2015
47 | National,0.531,2015
48 | American,1.394375,1985
49 | American,2.416915923,1985
50 | National,0.4462,1985
51 | National,0.518,2015
52 | American,7.7,2015
53 | American,0.525415,2015
54 | National,0.5229,2015
55 | National,0.29003,1985
56 | American,0.301185,1985
57 | American,0.5095,2015
58 | American,0.747385,1985
59 | American,1.24936,1985
60 | American,0.603,2015
61 | American,0.5129,2015
62 | American,1.6,2015
63 | National,0.825,2015
64 | National,0.525,2015
65 | National,1.75,2015
66 | National,0.858935,1985
67 | National,6.857143,2015
68 | National,2.536647,1985
69 | American,1.015105,1985
70 | National,0.55775,1985
71 | American,0.5085,2015
72 | 


--------------------------------------------------------------------------------
/Edition2/Data/Service.csv:
--------------------------------------------------------------------------------
  1 | ID,Times
  2 | 1,1.1
  3 | 2,1.4
  4 | 3,0.683333333
  5 | 4,0.716666666
  6 | 5,0.316666666
  7 | 6,0.533333334
  8 | 7,0.35
  9 | 8,0.7
 10 | 9,1.633333333
 11 | 10,0.933333334
 12 | 11,0.533333334
 13 | 12,0.283333333
 14 | 13,0.516666666
 15 | 14,0.383333334
 16 | 15,1.783333333
 17 | 16,1.216666667
 18 | 17,0.883333333
 19 | 18,0.383333334
 20 | 19,1.566666667
 21 | 20,0.216666667
 22 | 21,0.483333333
 23 | 22,1.333333334
 24 | 23,0.133333333
 25 | 24,0.833333334
 26 | 25,0.766666667
 27 | 26,0.783333333
 28 | 27,0.3
 29 | 28,0.683333333
 30 | 29,0.566666667
 31 | 30,0.683333333
 32 | 31,0.866666667
 33 | 32,0.7
 34 | 33,0.333333333
 35 | 34,0.983333334
 36 | 35,0.616666667
 37 | 36,0.383333333
 38 | 37,0.35
 39 | 38,0.283333333
 40 | 39,0.566666667
 41 | 40,1.766666667
 42 | 41,1.6
 43 | 42,0.783333334
 44 | 43,0.316666666
 45 | 44,0.616666667
 46 | 45,0.316666666
 47 | 46,0.15
 48 | 47,0.516666667
 49 | 48,0.7
 50 | 49,0.233333333
 51 | 50,0.833333333
 52 | 51,0.65
 53 | 52,0.916666667
 54 | 53,1.333333334
 55 | 54,0.25
 56 | 55,0.133333333
 57 | 56,0.383333333
 58 | 57,2.2
 59 | 58,0.35
 60 | 59,0.55
 61 | 60,0.933333334
 62 | 61,0.4
 63 | 62,0.566666666
 64 | 63,0.433333334
 65 | 64,1.633333334
 66 | 65,1.966666666
 67 | 66,0.65
 68 | 67,1.133333333
 69 | 68,0.5
 70 | 69,0.15
 71 | 70,1.05
 72 | 71,0.75
 73 | 72,1
 74 | 73,1.133333334
 75 | 74,1.15
 76 | 75,0.816666667
 77 | 76,0.866666667
 78 | 77,0.45
 79 | 78,0.15
 80 | 79,0.383333333
 81 | 80,0.55
 82 | 81,0.683333333
 83 | 82,0.966666667
 84 | 83,0.533333333
 85 | 84,0.35
 86 | 85,0.666666667
 87 | 86,0.633333334
 88 | 87,0.466666667
 89 | 88,0.466666667
 90 | 89,0.783333334
 91 | 90,0.6
 92 | 91,1.483333333
 93 | 92,0.733333334
 94 | 93,1.4
 95 | 94,1.033333333
 96 | 95,0.683333333
 97 | 96,0.1
 98 | 97,0.45
 99 | 98,0.416666667
100 | 99,0.516666666
101 | 100,1.15
102 | 101,0.466666667
103 | 102,0.183333334
104 | 103,0.433333333
105 | 104,0.3
106 | 105,0.666666666
107 | 106,1.15
108 | 107,0.55
109 | 108,0.733333334
110 | 109,0.9
111 | 110,0.95
112 | 111,0.583333333
113 | 112,1.3
114 | 113,0.316666666
115 | 114,0.733333334
116 | 115,0.433333334
117 | 116,0.283333333
118 | 117,0.316666667
119 | 118,0.416666666
120 | 119,0.933333334
121 | 120,0.8
122 | 121,0.45
123 | 122,0.6
124 | 123,0.25
125 | 124,1.783333334
126 | 125,0.5
127 | 126,0.5
128 | 127,0.233333333
129 | 128,0.216666667
130 | 129,1.033333333
131 | 130,0.516666667
132 | 131,0.7
133 | 132,0.216666667
134 | 133,0.833333333
135 | 134,1.183333333
136 | 135,1.116666667
137 | 136,0.433333333
138 | 137,0.283333334
139 | 138,0.35
140 | 139,0.716666667
141 | 140,0.4
142 | 141,0.333333334
143 | 142,0.216666667
144 | 143,0.433333333
145 | 144,0.3
146 | 145,0.35
147 | 146,1.666666666
148 | 147,0.983333334
149 | 148,0.316666667
150 | 149,1.416666667
151 | 150,1.033333333
152 | 151,1.116666667
153 | 152,0.55
154 | 153,0.466666667
155 | 154,0.566666666
156 | 155,0.55
157 | 156,0.983333333
158 | 157,0.8
159 | 158,0.533333333
160 | 159,0.3
161 | 160,0.3
162 | 161,1.183333333
163 | 162,0.166666666
164 | 163,0.366666667
165 | 164,0.366666667
166 | 165,0.15
167 | 166,0.183333333
168 | 167,0.283333334
169 | 168,0.633333334
170 | 169,0.566666667
171 | 170,0.45
172 | 171,0.983333333
173 | 172,1.433333333
174 | 173,1.816666667
175 | 174,1.183333333
176 | 


--------------------------------------------------------------------------------
/Edition2/Data/Skateboard.csv:
--------------------------------------------------------------------------------
 1 | "Age","Experimenter","Testosterone"
 2 | 18,"Female",206
 3 | 18,"Female",197
 4 | 18,"Female",135.8
 5 | 18,"Female",170.2
 6 | 19,"Female",107.3
 7 | 19,"Female",351.6
 8 | 18,"Female",282.6
 9 | 18,"Female",257
10 | 18,"Female",117.8
11 | 19,"Female",342.4
12 | 34,"Female",129.6
13 | 26,"Female",208.6
14 | 19,"Female",253.8
15 | 22,"Female",213.6
16 | 25,"Female",344
17 | 25,"Female",127.7
18 | 29,"Female",351.6
19 | 19,"Female",179.6
20 | 21,"Female",469.6
21 | 19,"Female",411.4
22 | 25,"Female",267.2
23 | 20,"Female",267.4
24 | 19,"Female",308.8
25 | 33,"Female",568.2
26 | 23,"Female",293.8
27 | 27,"Female",495
28 | 20,"Female",408
29 | 21,"Female",644.8
30 | 18,"Female",206.6
31 | 18,"Female",472
32 | 18,"Female",369.8
33 | 23,"Female",286.4
34 | 22,"Female",246.4
35 | 18,"Female",232
36 | 18,"Female",126.4
37 | 32,"Female",106.2
38 | 22,"Female",160.6
39 | 19,"Female",146.8
40 | 21,"Female",361.2
41 | 29,"Female",196.4
42 | 23,"Female",307.4
43 | 24,"Female",625
44 | 23,"Female",209
45 | 19,"Female",502
46 | 19,"Female",236.6
47 | 19,"Female",183.8
48 | 20,"Female",320
49 | 27,"Female",544
50 | 21,"Female",552.6
51 | 19,"Male",127.2
52 | 21,"Male",143.2
53 | 18,"Male",273
54 | 25,"Male",131.6
55 | 20,"Male",190
56 | 29,"Male",193.2
57 | 19,"Male",112
58 | 24,"Male",457
59 | 26,"Male",155.4
60 | 20,"Male",277.8
61 | 22,"Male",139.6
62 | 20,"Male",420.6
63 | 20,"Male",206
64 | 18,"Male",411
65 | 22,"Male",151.2
66 | 18,"Male",271.6
67 | 22,"Male",202
68 | 35,"Male",204
69 | 19,"Male",227
70 | 21,"Male",141.2
71 | 20,"Male",124.4
72 | 19,"Male",124.4
73 | 


--------------------------------------------------------------------------------
/Edition2/Data/Skating2010.csv:
--------------------------------------------------------------------------------
 1 | "Name","Country","Short","Free","Total"
 2 | "LYSACEK Evan","United States",90.3,167.37,257.67
 3 | "PLUSHENKO Evgeni","Russian Federation",90.85,165.51,256.36
 4 | "TAKAHASHI Daisuke","Japan",90.25,156.98,247.23
 5 | "LAMBIEL Stephane","Switzerland",84.63,162.09,246.72
 6 | "CHAN Patrick","Canada",81.12,160.3,241.42
 7 | "WEIR Johnny","United States",82.1,156.77,238.87
 8 | "ODA Nobunari","Japan",84.85,153.69,238.54
 9 | "KOZUKA Takahiko","Japan",79.59,151.6,231.19
10 | "ABBOTT Jeremy","United States",69.4,149.56,218.96
11 | "BREZINA Michal","Czech Republic",78.8,137.93,216.73
12 | "TEN Denis","Kazakhstan",76.24,135.01,211.25
13 | "AMODIO Florent","France",75.35,134.95,210.3
14 | "BORODULIN Artem","Russian Federation",72.24,137.92,210.16
15 | "FERNANDEZ Javier","Spain",68.69,137.99,206.68
16 | "SCHULTHEISS Adrian","Sweden",63.13,137.31,200.44
17 | "JOUBERT Brian","France",68,132.22,200.22
18 | "van der PERREN Kevin","Belgium",72.9,116.94,189.84
19 | "CONTESTI Samuel","Italy",70.6,116.9,187.5
20 | "VERNER Tomas","Czech Republic",65.32,119.42,184.74
21 | "BACCHINI Paolo","Italy",64.42,112.79,177.21
22 | "PFEIFER Viktor","Austria",60.88,115.05,175.93
23 | "LINDEMANN Stefan","Germany",68.5,103.48,171.98
24 | "CHIPEUR Vaughn","Canada",57.22,113.7,170.92
25 | "KOVALEVSKI Anton","Ukraine",63.81,102.09,165.9
26 | 


--------------------------------------------------------------------------------
/Edition2/Data/Spruce.csv:
--------------------------------------------------------------------------------
 1 | "Tree","Competition","Fertilizer","Height0","Height5","Diameter0","Diameter5","Ht.change","Di.change"
 2 | 1,"NC","F",15,60,1.984375,7.4,45,5.415625
 3 | 2,"NC","F",9,45.2,1.190625,5.2,36.2,4.009375
 4 | 3,"NC","F",12,42,1.7859375,5.7,30,3.9140625
 5 | 4,"NC","F",13.7,49.5,1.5875,6.4,35.8,4.8125
 6 | 5,"NC","F",12,47.3,1.5875,6.2,35.3,4.612500000000001
 7 | 6,"NC","F",12,56.4,1.5875,7.4,44.4,5.8125
 8 | 7,"NC","NF",16.8,43.5,1.984375,4.9,26.7,2.9156250000000004
 9 | 8,"NC","NF",14.6,49.2,1.984375,5.4,34.6,3.4156250000000004
10 | 9,"NC","NF",16,54,1.984375,7.1,38,5.115625
11 | 10,"NC","NF",15.4,45,1.984375,5.1,29.6,3.1156249999999996
12 | 11,"NC","NF",11.7,38,1.3890625,4.1,26.3,2.7109374999999996
13 | 12,"NC","NF",15,60.5,1.5875,7.3,45.5,5.7125
14 | 13,"C","F",13.1,45.4,1.984375,6.3,32.3,4.315625
15 | 14,"C","F",11,50,1.5875,6.2,39,4.612500000000001
16 | 15,"C","F",16,53,2.1828125,5.6,37,3.4171875
17 | 16,"C","F",13.5,54,1.5875,6.3,40.5,4.7125
18 | 17,"C","F",11.6,39,1.5875,4.5,27.4,2.9125
19 | 18,"C","F",13.5,54.2,1.984375,6.5,40.7,4.515625
20 | 19,"C","NF",13.2,28.3,1.5875,3.4,15.100000000000001,1.8125
21 | 20,"C","NF",15.8,29.5,2.38125,3.4,13.7,1.0187499999999998
22 | 21,"C","NF",13.5,42,1.7859375,4.6,28.5,2.8140624999999995
23 | 22,"C","NF",13.4,29,1.984375,4.2,15.6,2.215625
24 | 23,"C","NF",12.5,31,1.984375,3.5,18.5,1.515625
25 | 24,"C","NF",14.7,38,1.984375,4.7,23.3,2.715625
26 | 25,"NC","F",11.5,63,1.984375,8.7,51.5,6.715624999999999
27 | 26,"NC","F",13.7,64.5,1.7859375,8.4,50.8,6.6140625
28 | 27,"NC","F",18.7,58.3,1.984375,7.1,39.599999999999994,5.115625
29 | 28,"NC","F",15.8,66.2,1.5875,9.1,50.400000000000006,7.512499999999999
30 | 29,"NC","F",17.5,62.2,1.984375,8,44.7,6.015625
31 | 30,"NC","F",15,63,1.7859375,8.9,48,7.1140625
32 | 31,"NC","NF",17,53,1.984375,6.9,36,4.915625
33 | 32,"NC","NF",14.2,46.2,1.5875,4.9,32,3.3125000000000004
34 | 33,"NC","NF",11.2,41.4,1.5875,4.3,30.2,2.7125
35 | 34,"NC","NF",16.7,36.2,1.984375,4.8,19.500000000000004,2.815625
36 | 35,"NC","NF",12.5,46.9,1.5875,5.2,34.4,3.6125000000000003
37 | 36,"NC","NF",15.2,43.5,1.984375,5.8,28.3,3.815625
38 | 37,"C","F",15.5,43,1.984375,7.1,27.5,5.115625
39 | 38,"C","F",13.7,43.2,1.984375,5.9,29.500000000000004,3.9156250000000004
40 | 39,"C","F",17.8,48,1.984375,6.4,30.2,4.415625
41 | 40,"C","F",12.8,41,1.5875,6.3,28.2,4.7125
42 | 41,"C","F",15,46,2.38125,5.7,31,3.31875
43 | 42,"C","F",15,45.5,1.984375,6,30.5,4.015625
44 | 43,"C","NF",14,40,1.7859375,4.3,26,2.5140624999999996
45 | 44,"C","NF",15.7,24,1.7859375,3.7,8.3,1.9140625000000002
46 | 45,"C","NF",15.1,37.1,1.5875,4.3,22,2.7125
47 | 46,"C","NF",14,30,1.984375,4.3,16,2.315625
48 | 47,"C","NF",14.6,35,1.984375,3.9,20.4,1.915625
49 | 48,"C","NF",16,37,1.984375,4.1,21,2.1156249999999996
50 | 49,"NC","F",17,68,2.38125,11.3,51,8.918750000000001
51 | 50,"NC","F",17.3,56,1.7859375,9.4,38.7,7.6140625
52 | 51,"NC","F",18.2,68,2.38125,8.9,49.8,6.518750000000001
53 | 52,"NC","F",15,55.4,1.984375,8.7,40.4,6.715624999999999
54 | 53,"NC","F",15.3,62,2.1828125,8.7,46.7,6.5171874999999995
55 | 54,"NC","F",17,48.6,2.38125,8.1,31.6,5.71875
56 | 55,"NC","NF",16,45,1.984375,6.5,29,4.515625
57 | 56,"NC","NF",16.4,43.5,1.984375,5.1,27.1,3.1156249999999996
58 | 57,"NC","NF",14.8,37.7,2.38125,4.3,22.900000000000002,1.9187499999999997
59 | 58,"NC","NF",12,40,1.7859375,4.7,28,2.9140625
60 | 59,"NC","NF",14.5,40.5,1.7859375,5.2,26,3.4140625
61 | 60,"NC","NF",17.1,35,2.38125,4.9,17.9,2.5187500000000003
62 | 61,"C","F",14.3,52,2.38125,6.7,37.7,4.31875
63 | 62,"C","F",12.5,64,1.984375,9,51.5,7.015625
64 | 63,"C","F",14.7,50,2.38125,7,35.3,4.61875
65 | 64,"C","F",16.3,46.2,2.1828125,6.8,29.900000000000002,4.6171875
66 | 65,"C","F",16.2,47,2.38125,7.8,30.8,5.418749999999999
67 | 66,"C","F",17.5,47,2.38125,6.6,29.5,4.21875
68 | 67,"C","NF",16.2,24.7,1.984375,3.3,8.5,1.3156249999999998
69 | 68,"C","NF",11.3,26.4,1.5875,2.7,15.099999999999998,1.1125000000000003
70 | 69,"C","NF",17.5,36,1.984375,3.8,18.5,1.8156249999999998
71 | 70,"C","NF",13.3,24.4,1.7859375,3.5,11.099999999999998,1.7140625
72 | 71,"C","NF",11,27.2,2.38125,4,16.2,1.61875
73 | 72,"C","NF",14.6,33.6,2.38125,4.5,19,2.11875
74 | 


--------------------------------------------------------------------------------
/Edition2/Data/Starcraft.csv:
--------------------------------------------------------------------------------
 1 | ID,Race,Age,Wins
 2 | 1,Protoss,20,29
 3 | 2,Protoss,19,27
 4 | 3,Protoss,19,26
 5 | 4,Protoss,18,19
 6 | 5,Protoss,22,23
 7 | 6,Protoss,18,25
 8 | 7,Protoss,24,19
 9 | 8,Protoss,20,20
10 | 9,Protoss,21,19
11 | 10,Protoss,18,21
12 | 11,Protoss,22,16
13 | 12,Protoss,23,21
14 | 13,Protoss,21,18
15 | 14,Protoss,19,18
16 | 15,Protoss,24,17
17 | 16,Terran,18,26
18 | 17,Terran,16,21
19 | 18,Terran,20,25
20 | 19,Terran,18,24
21 | 20,Terran,20,23
22 | 21,Terran,18,26
23 | 22,Terran,17,22
24 | 23,Terran,21,21
25 | 24,Terran,21,21
26 | 25,Terran,21,20
27 | 26,Terran,18,22
28 | 27,Terran,23,20
29 | 28,Terran,18,22
30 | 29,Terran,19,21
31 | 30,Terran,17,24
32 | 31,Zerg,18,28
33 | 32,Zerg,20,24
34 | 33,Zerg,20,20
35 | 34,Zerg,21,23
36 | 35,Zerg,22,17
37 | 36,Zerg,18,20
38 | 37,Zerg,20,15
39 | 38,Zerg,24,17
40 | 39,Zerg,23,14
41 | 40,Zerg,21,18
42 | 41,Zerg,23,12
43 | 42,Zerg,21,15
44 | 43,Zerg,24,16
45 | 44,Zerg,18,14
46 | 45,Zerg,23,11
47 | 


--------------------------------------------------------------------------------
/Edition2/Data/TV.csv:
--------------------------------------------------------------------------------
 1 | ID,Times,Cable
 2 | 1,7,Basic
 3 | 2,10,Basic
 4 | 3,10.6,Basic
 5 | 4,10.2,Basic
 6 | 5,8.6,Basic
 7 | 6,7.6,Basic
 8 | 7,8.2,Basic
 9 | 8,10.4,Basic
10 | 9,11,Basic
11 | 10,8.5,Basic
12 | 11,3.4,Extended
13 | 12,7.8,Extended
14 | 13,9.4,Extended
15 | 14,4.7,Extended
16 | 15,5.4,Extended
17 | 16,7.6,Extended
18 | 17,5,Extended
19 | 18,8,Extended
20 | 19,7.8,Extended
21 | 20,9.6,Extended
22 | 


--------------------------------------------------------------------------------
/Edition2/Data/Turbine.csv:
--------------------------------------------------------------------------------
  1 | "Date2010","AveKW","AveSpeed","Production"
  2 | "Feb 14",547.9,7.8,13146
  3 | "Feb 15",776,8.9,18626
  4 | "Feb 16",944.4,9.7,22667
  5 | "Feb 17",506.2,7.7,12148
  6 | "Feb 18",322.9,6.4,7742
  7 | "Feb 19",67.9,3.1,1585
  8 | "Feb 20",79.9,3.9,1876
  9 | "Feb 21",123.6,4.5,2936
 10 | "Feb 22",273.3,6.5,6559
 11 | "Feb 23",626.8,7.8,15041
 12 | "Feb 24",242.2,5.8,5800
 13 | "Feb 25",2.2,2.5,6
 14 | "Feb 26",124.6,3.8,2940
 15 | "Feb 27",494.6,7.7,11871
 16 | "Feb 28",187.2,5.8,4481
 17 | "Mar 01",303.5,5.9,7258
 18 | "Mar 02",74.6,3.5,1743
 19 | "Mar 03",148.6,5.1,3543
 20 | "Mar 04",120.2,4.2,2848
 21 | "Mar 05",581.9,8,13965
 22 | "Mar 06",503.6,7.6,12087
 23 | "Mar 07",89.4,3.5,2099
 24 | "Mar 08",210.1,5.9,5037
 25 | "Mar 09",347.9,7.1,8348
 26 | "Mar 10",594.9,8.4,14279
 27 | "Mar 11",611.5,8.2,14674
 28 | "Mar 12",35.3,3.1,793
 29 | "Mar 13",675,8.7,16202
 30 | "Mar 14",317,6.7,7607
 31 | "Mar 15",334.3,6.7,8019
 32 | "Mar 16",201.5,5.8,4833
 33 | "Mar 17",255.7,6,6125
 34 | "Mar 18",454.2,6.7,10870
 35 | "Mar 19",564.1,8.6,13768
 36 | "Mar 20",278.4,6.4,6678
 37 | "Mar 21",72.4,3.7,1692
 38 | "Mar 22",405.6,7.6,9708
 39 | "Mar 23",304.1,6.7,7226
 40 | "Mar 24",176.2,6.3,4190
 41 | "Mar 25",736.1,8.5,17666
 42 | "Mar 26",1072,10.2,25729
 43 | "Mar 27",601.5,7.1,14420
 44 | "Mar 28",448.3,7,10752
 45 | "Mar 29",849.6,9.2,20386
 46 | "Mar 30",841.2,13.2,20146
 47 | "Mar 31",271.4,8.4,6485
 48 | "Apr 01",867.2,9.9,20811
 49 | "Apr 02",945.6,11.3,22663
 50 | "Apr 03",824.2,9.1,19781
 51 | "Apr 04",716.9,8.7,17201
 52 | "Apr 05",318.5,5.9,7618
 53 | "Apr 06",971.4,10,23315
 54 | "Apr 07",884.4,9.4,21225
 55 | "Apr 08",309.3,6.5,7418
 56 | "Apr 09",438.9,6.2,10502
 57 | "Apr 10",276.8,5.4,6613
 58 | "Apr 11",160.1,4.9,3814
 59 | "Apr 12",755.8,9,18139
 60 | "Apr 13",1149,10.9,27572
 61 | "Apr 14",315,9.6,7514
 62 | "Apr 15",898.3,9.3,21554
 63 | "Apr 16",1142.6,10.6,27422
 64 | "Apr 17",434.2,7.1,10411
 65 | "Apr 18",44.7,4.3,1047
 66 | "Apr 19",148.4,4.8,3530
 67 | "Apr 20",175.9,4.3,4169
 68 | "Apr 21",356.4,7,8552
 69 | "Apr 22",80.6,3.7,1889
 70 | "Apr 23",1041,10.5,24985
 71 | "Apr 24",623.1,8.2,14952
 72 | "Apr 25",981,10,23546
 73 | "Apr 26",218.1,5.2,5208
 74 | "Apr 27",233.7,5.7,5589
 75 | "Apr 28",614.5,8,14744
 76 | "Apr 29",1285.6,12.2,30854
 77 | "Apr 30",556.3,7.6,13338
 78 | "May 01",1111.7,11.1,26680
 79 | "May 02",781.7,9.2,18762
 80 | "May 03",339.2,6.4,8127
 81 | "May 04",727.3,8.5,17443
 82 | "May 05",1254.4,12.9,30096
 83 | "May 06",190.2,4.5,4522
 84 | "May 07",164.6,5.1,3925
 85 | "May 08",732.7,8.7,17584
 86 | "May 09",88.8,3.8,2091
 87 | "May 10",1210.8,11,29059
 88 | "May 11",660,8.3,15841
 89 | "May 12",432.7,7.2,10386
 90 | "May 13",490,6.9,11744
 91 | "May 14",881.8,9.4,21164
 92 | "May 15",77.5,4.6,1830
 93 | "May 16",286.5,6.6,6870
 94 | "May 17",149.8,5.1,3568
 95 | "May 18",100.2,4.8,2370
 96 | "May 19",166.2,5,3959
 97 | "May 20",426.4,6.9,10224
 98 | "May 21",170.2,5.1,4060
 99 | "May 22",1005.6,11.2,24132
100 | "May 23",1139.4,11,27343
101 | "May 24",690,10.6,16531
102 | "May 25",360,6,8606
103 | "May 26",195.3,5.6,4669
104 | "May 27",257.5,6.5,6179
105 | "May 28",347.5,6.9,8334
106 | "May 29",765.5,9.3,18370
107 | "May 30",643.5,8.2,15432
108 | "May 31",144.3,4.2,3410
109 | "Jun 01",571.9,7.8,13718
110 | "Jun 02",101.6,4.4,2405
111 | "Jun 03",136.2,4.4,3222
112 | "Jun 04",400.2,6.8,995
113 | "Jun 05",142.6,5.2,3399
114 | "Jun 06",338.1,6.3,8096
115 | "Jun 07",55.1,3.4,1261
116 | "Jun 08",633.3,8.3,15200
117 | "Jun 09",869.4,9.6,20866
118 | "Jun 10",383.9,7.1,9212
119 | "Jun 11",624.9,8.3,14984
120 | "Jun 12",170.3,5.4,4073
121 | "Jun 13",14.9,3.3,309
122 | "Jun 14",259.5,6.1,6218
123 | "Jun 15",282,6.4,6762
124 | "Jun 16",184.4,4.8,4384
125 | "Jun 17",1079.3,12.6,25896
126 | "Jun 18",687.2,8.8,16490
127 | "Jun 19",602.9,7.7,14453
128 | "Jun 20",30.1,3.7,674
129 | "Jun 21",127,4.9,3017
130 | "Jun 22",408.2,7,9785
131 | "Jun 23",531,7.5,12731
132 | "Jun 24",156.6,4.3,3713
133 | "Jun 25",743.4,9.2,17841
134 | "Jun 26",114,3.7,2681
135 | "Jun 27",356.1,5.6,8511
136 | "Jun 28",676.7,8.5,16241
137 | "Jun 29",76.2,4.2,1787
138 | "Jun 30",171.8,5,4094
139 | "Jul 01",973.7,10.2,23367
140 | "Jul 03",1238.8,11.5,29731
141 | "Jul 04",533.1,7.6,12793
142 | "Jul 05",54.3,3.7,1258
143 | "Jul 06",18.5,2.6,379
144 | "Jul 07",113,4,2671
145 | "Jul 08",144.9,5.5,3470
146 | "Jul 09",162.9,5.5,3899
147 | "Jul 10",564.9,7.9,13549
148 | "Jul 11",462.7,6.9,11090
149 | "Jul 12",102.7,4.3,2417
150 | "Jul 13",442.7,7.5,10617
151 | "Jul 14",953.6,10.2,22882
152 | "Jul 15",439.2,7.6,10542
153 | "Jul 16",382.5,7,9177
154 | "Jul 17",385.1,6.2,9214
155 | "Jul 18",459.7,7.6,11034
156 | "Jul 19",59.1,3.1,1349
157 | "Jul 20",129.6,5.3,3096
158 | "Jul 21",123,4.4,2910
159 | "Jul 22",444,6.5,10630
160 | "Jul 23",120.2,4.6,2848
161 | "Jul 24",311.7,7,7470
162 | "Jul 25",32.6,3.3,720
163 | "Jul 26",259.9,6.5,6235
164 | "Jul 27",696.8,9,16722
165 | "Jul 28",248.4,6.4,5959
166 | "Jul 29",2.2,2.2,0
167 | "Jul 30",140.3,5.2,3340
168 | "Jul 31",27.1,3.5,595
169 | "Aug 01",231.7,6,5548
170 | 


--------------------------------------------------------------------------------
/Edition2/Data/Volleyball2009.csv:
--------------------------------------------------------------------------------
 1 | "Team","HitPercent","Assts","Kills"
 2 | "Penn St.",38.1,13.64,14.62
 3 | "Texas",33.800000000000004,13.37,14.59
 4 | "Hawaii",30.5,13.56,14.58
 5 | "Florida St.",30,12.6,13.75
 6 | "Florida",29.099999999999998,13.21,14.47
 7 | "Washington",28.799999999999997,13.35,14.25
 8 | "Md.-East. Shore",28.799999999999997,11.87,12.76
 9 | "Middle Tenn.",28.1,12.78,13.65
10 | "St. Mary's (CA)",27.900000000000002,13.31,14.24
11 | "Kentucky",27.900000000000002,13.63,14.74
12 | "Ohio",27.3,12.77,14.06
13 | "California",27.1,12.89,13.95
14 | "LSU",26.900000000000002,13.22,13.99
15 | "Stanford",26.8,13.03,13.85
16 | "Ohio St.",26.8,12.26,13.32
17 | "UNI",26.700000000000003,13.65,14.83
18 | "Oregon",26.6,13.83,14.69
19 | "Lipscomb",26.3,13.79,14.75
20 | "Tulsa",26.200000000000003,13.48,14.43
21 | "Western Ky.",26.1,11.94,13.06
22 | "St. Louis",26.1,12.73,14.04
23 | "Clemson",25.8,12.14,13.36
24 | "Nebraska",25.6,13.63,14.65
25 | "Yale",25.5,12.8,14.02
26 | "Duke",25.5,12.9,13.83
27 | "Minnesota",25.4,12.88,13.86
28 | "FIU",25.4,13.42,14.52
29 | "Louisville",25.4,11.92,12.99
30 | "Notre Dame",25.3,12.55,13.68
31 | "Pepperdine",25,12.9,13.93
32 | 


--------------------------------------------------------------------------------
/Edition2/Data/Walleye.csv:
--------------------------------------------------------------------------------
 1 | "Length","Weight"
 2 | 11.1,0.4
 3 | 16.1,1.39
 4 | 20.7,2.8
 5 | 14.3,1.03
 6 | 11.5,0.5
 7 | 15.7,0.9
 8 | 12.5,0.6
 9 | 15.3,1.3
10 | 26.6,7.5
11 | 17.8,2.2
12 | 15.2,1.2
13 | 15,1
14 | 20.8,3.5
15 | 11,0.4
16 | 21,3.1
17 | 14.1,1.2
18 | 12.5,0.6
19 | 13.5,0.8
20 | 16.4,1.2
21 | 29.3,10.3
22 | 12.7,0.8
23 | 12.6,0.6
24 | 11,0.4
25 | 20,2
26 | 14.6,1.1
27 | 20.5,3
28 | 23.3,5.4
29 | 16.2,1.5
30 | 9.2,0.4
31 | 25.5,5.5
32 | 15.9,1.4
33 | 17.6,1.6
34 | 20.8,3
35 | 11,0.4
36 | 17.2,1.9
37 | 14.7,1.08
38 | 20.5,3.1
39 | 9.1,0.3
40 | 10.3,0.3
41 | 19.4,2.66
42 | 13.6,0.81
43 | 20.3,2.8
44 | 16,1.4
45 | 22,4
46 | 12.5,0.8
47 | 14.2,1
48 | 17.5,2.2
49 | 16.1,1.47
50 | 13.5,0.7
51 | 24.2,4.5
52 | 12.8,0.68
53 | 20.6,3.1
54 | 10.5,0.4
55 | 27,6.4
56 | 14,1.1
57 | 10.9,0.4
58 | 17.6,1.8
59 | 16.1,1.7
60 | 23.6,4.5
61 | 15.8,1.2
62 | 


--------------------------------------------------------------------------------
/Edition2/Data/Watertable.csv:
--------------------------------------------------------------------------------
  1 | "Depth","Alive"
  2 | 50,1
  3 | 43,1
  4 | 50,1
  5 | 46,1
  6 | 25,1
  7 | 19,1
  8 | 30,1
  9 | 35,1
 10 | 8,1
 11 | 8,0
 12 | 11,1
 13 | 9,1
 14 | 30,1
 15 | 46,1
 16 | 38,1
 17 | 34,1
 18 | 24,1
 19 | 30,1
 20 | 34,1
 21 | 36,1
 22 | 12,1
 23 | 18,1
 24 | 20,1
 25 | 22,1
 26 | 25,1
 27 | 18,1
 28 | 30,1
 29 | 23,1
 30 | 6,1
 31 | 4,1
 32 | 6,1
 33 | 6,1
 34 | 4,1
 35 | 3,0
 36 | 10,1
 37 | 4,0
 38 | 36,1
 39 | 42,1
 40 | 27,1
 41 | 42,1
 42 | 47,1
 43 | 56,1
 44 | 50,1
 45 | 51,1
 46 | 48,1
 47 | 48,1
 48 | 53,1
 49 | 55,1
 50 | 30,1
 51 | 29,1
 52 | 28,1
 53 | 25,1
 54 | 28,1
 55 | 25,1
 56 | 27,1
 57 | 27,1
 58 | 24,1
 59 | 20,1
 60 | 26,1
 61 | 22,1
 62 | 2,0
 63 | 6,0
 64 | 8,1
 65 | 9,0
 66 | 9,1
 67 | 1,0
 68 | 8,0
 69 | 8,0
 70 | 1,0
 71 | 4,0
 72 | 3,0
 73 | 8,0
 74 | 26,1
 75 | 31,1
 76 | 31,1
 77 | 32,1
 78 | 30,1
 79 | 24,1
 80 | 30,1
 81 | 28,1
 82 | 15,1
 83 | 19,1
 84 | 20,1
 85 | 24,1
 86 | 19,1
 87 | 15,1
 88 | 17,1
 89 | 20,1
 90 | 5,0
 91 | 9,1
 92 | 7,1
 93 | 11,0
 94 | 7,1
 95 | 9,1
 96 | 8,1
 97 | 9,1
 98 | 33,1
 99 | 37,1
100 | 36,1
101 | 36,1
102 | 46,1
103 | 41,1
104 | 44,1
105 | 47,1
106 | 25,1
107 | 24,1
108 | 24,1
109 | 21,1
110 | 20,1
111 | 24,1
112 | 23,1
113 | 22,1
114 | 2,0
115 | 1,0
116 | 5,0
117 | 3,0
118 | 4,0
119 | 4,0
120 | 7,0
121 | 7,0
122 | 43,1
123 | 43,1
124 | 47,1
125 | 36,1
126 | 32,1
127 | 30,1
128 | 29,1
129 | 33,1
130 | 22,1
131 | 24,1
132 | 23,1
133 | 22,1
134 | 18,1
135 | 18,1
136 | 21,1
137 | 17,1
138 | 14,1
139 | 13,0
140 | 13,1
141 | 14,1
142 | 5,0
143 | 10,0
144 | 13,1
145 | 4,0
146 | 43,1
147 | 40,1
148 | 47,1
149 | 50,1
150 | 39,1
151 | 39,1
152 | 48,1
153 | 50,1
154 | 41,1
155 | 40,1
156 | 39,1
157 | 38,1
158 | 20,1
159 | 18,1
160 | 26,1
161 | 18,1
162 | 19,1
163 | 20,1
164 | 18,1
165 | 25,1
166 | 23,1
167 | 19,1
168 | 27,1
169 | 26,1
170 | 9,0
171 | 9,0
172 | 11,0
173 | 10,0
174 | 8,0
175 | 6,0
176 | 14,0
177 | 12,0
178 | 10,1
179 | 13,1
180 | 15,1
181 | 12,1
182 | 41,1
183 | 42,1
184 | 48,1
185 | 45,1
186 | 38,1
187 | 39,1
188 | 38,1
189 | 39,1
190 | 40,1
191 | 36,1
192 | 38,1
193 | 39,1
194 | 17,1
195 | 18,1
196 | 20,1
197 | 22,1
198 | 18,1
199 | 18,1
200 | 20,1
201 | 19,1
202 | 13,1
203 | 18,1
204 | 22,1
205 | 24,1
206 | 6,0
207 | 10,0
208 | 9,0
209 | 13,0
210 | 7,0
211 | 4,0
212 | 5,0
213 | 7,0
214 | 5,0
215 | 7,0
216 | 4,0
217 | 11,0
218 | 30,1
219 | 31,1
220 | 42,1
221 | 35,1
222 | 30,1
223 | 30,1
224 | 32,1
225 | 50,1
226 | 18,1
227 | 17,1
228 | 30,1
229 | 27,1
230 | 17,1
231 | 18,1
232 | 22,1
233 | 20,1
234 | 13,1
235 | 11,1
236 | 12,1
237 | 11,0
238 | 7,0
239 | 4,0
240 | 7,0
241 | 8,0
242 | 45,1
243 | 37,1
244 | 50,1
245 | 44,1
246 | 18,1
247 | 13,1
248 | 17,1
249 | 19,1
250 | 10,1
251 | 10,1
252 | 11,0
253 | 7,0
254 | 37,1
255 | 36,1
256 | 36,1
257 | 30,1
258 | 30,1
259 | 24,1
260 | 37,1
261 | 32,1
262 | 12,1
263 | 14,1
264 | 23,1
265 | 22,1
266 | 15,1
267 | 24,1
268 | 24,1
269 | 17,1
270 | 12,1
271 | 12,1
272 | 16,1
273 | 16,0
274 | 10,1
275 | 8,0
276 | 9,1
277 | 11,1
278 | 67,1
279 | 69,1
280 | 64,1
281 | 65,1
282 | 35,1
283 | 35,1
284 | 40,1
285 | 40,1
286 | 50,1
287 | 55,1
288 | 50,1
289 | 53,1
290 | 14,1
291 | 16,1
292 | 18,1
293 | 21,1
294 | 28,1
295 | 27,1
296 | 22,1
297 | 30,1
298 | 21,1
299 | 20,1
300 | 28,1
301 | 25,1
302 | 14,1
303 | 12,1
304 | 14,1
305 | 14,1
306 | 10,0
307 | 6,0
308 | 4,0
309 | 8,0
310 | 8,1
311 | 10,0
312 | 11,0
313 | 11,1
314 | 28,1
315 | 27,1
316 | 32,1
317 | 43,1
318 | 27,1
319 | 35,1
320 | 37,1
321 | 33,1
322 | 16,1
323 | 16,1
324 | 20,1
325 | 18,1
326 | 17,1
327 | 15,1
328 | 17,1
329 | 18,1
330 | 14,1
331 | 9,0
332 | 18,1
333 | 15,1
334 | 11,0
335 | 11,0
336 | 12,0
337 | 12,0
338 | 33,1
339 | 30,1
340 | 30,1
341 | 33,1
342 | 14,1
343 | 18,1
344 | 17,1
345 | 16,1
346 | 10,1
347 | 7,1
348 | 10,0
349 | 12,0
350 | 28,1
351 | 21,1
352 | 34,1
353 | 34,1
354 | 20,1
355 | 19,1
356 | 20,1
357 | 27,1
358 | 11,1
359 | 11,1
360 | 11,1
361 | 9,0
362 | 


--------------------------------------------------------------------------------
/Edition2/Data/wafers.csv:
--------------------------------------------------------------------------------
 1 | "Instrument","Resistance"
 2 | 1,196.3052
 3 | 1,196.124
 4 | 1,196.189
 5 | 1,196.2569
 6 | 1,196.3403
 7 | 2,196.3042
 8 | 2,196.3825
 9 | 2,196.1669
10 | 2,196.3257
11 | 2,196.0422
12 | 3,196.1303
13 | 3,196.2005
14 | 3,196.2889
15 | 3,196.0343
16 | 3,196.1811
17 | 4,196.2795
18 | 4,196.1748
19 | 4,196.1494
20 | 4,196.1485
21 | 4,195.9885
22 | 5,196.2119
23 | 5,196.1051
24 | 5,196.185
25 | 5,196.0052
26 | 5,196.209
27 | 


--------------------------------------------------------------------------------
/Edition2/Errata_Edition2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Errata_Edition2.pdf


--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA.R:
--------------------------------------------------------------------------------
 1 | #Chap 2: Exploratory Data Analysis
 2 | 
 3 | 
 4 | #Section 2.4
 5 |  x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
 6 |  qqnorm(x)   # plot points
 7 |  qqline(x)   # add straight line
 8 | 
 9 | 
10 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
11 | 
12 | qqnorm(NCBirths$Weight)
13 | qqline(NCBirths$Weight)
14 | 
15 | #---------------------------------------------------------------------------
16 | #Section 2.5
17 | #R Note
18 | x <- c(3, 6, 15, 15, 17, 19, 24)
19 |  plot.ecdf(x)
20 |  x <- rnorm(25)              # random sample of size 25 from N(0,1)
21 |  plot.ecdf(x, xlim = c(-4, 4))             # adjust x range
22 |  curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf
23 | 
24 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
25 | 
26 |  beerM <- subset(Beerwings, select = Beer, subset = Gender == "M",
27 |             drop = T)
28 |  beerF <- subset(Beerwings, select = Beer, subset = Gender == "F",
29 |            drop = T)
30 | 
31 |  plot.ecdf(beerM, xlab = "ounces")
32 |  plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE)
33 |  abline(v = 25, lty = 2)
34 |  legend(5, .8, legend = c("Males", "Females"),
35 |     col = c("black", "blue"), pch = c(19, 2))
36 | 
37 | #--------------------------
38 | #Section 2.6
39 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten",
40 |      ylab = "Beer consumed")
41 | 
42 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten",
43 |      ylab = "Beer consumed")
44 | 
45 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender,  xlab = "Hot wings eaten",
46 |      ylab = "Beer consumed")
47 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chapter 2 Exploratory Data Analysis"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | 
13 | ###Section 2.4
14 | 
15 | ```{r}
16 |  x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
17 |  qqnorm(x)   # plot points
18 |  qqline(x)   # add straight line
19 | 
20 | 
21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
22 | 
23 | qqnorm(NCBirths$Weight)
24 | qqline(NCBirths$Weight)
25 | ```
26 | 
27 | ###Section 2.5
28 | ####R Note
29 | ```{r}
30 | x <- c(3, 6, 15, 15, 17, 19, 24)
31 |  plot.ecdf(x)
32 |  x <- rnorm(25)              # random sample of size 25 from N(0,1)
33 |  plot.ecdf(x, xlim = c(-4, 4))             # adjust x range
34 |  curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf
35 | 
36 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
37 | 
38 |  beerM <- subset(Beerwings, select = Beer, subset = Gender == "M",
39 |             drop = T)
40 |  beerF <- subset(Beerwings, select = Beer, subset = Gender == "F",
41 |            drop = T)
42 | 
43 |  plot.ecdf(beerM, xlab = "ounces")
44 |  plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE)
45 |  abline(v = 25, lty = 2)
46 |  legend(5, .8, legend = c("Males", "Females"),
47 |     col = c("black", "blue"), pch = c(19, 2))
48 | ```
49 | 
50 | ###Section 2.6
51 | ```{r}
52 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten",
53 |      ylab = "Beer consumed")
54 | 
55 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten",
56 |      ylab = "Beer consumed")
57 | 
58 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender,  xlab = "Hot wings eaten",
59 |      ylab = "Beer consumed")
60 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chapter 2 Exploratory Data Analysis"
 3 | author: "Chihara-Hesterberg"
 4 | date: "November 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ###Section 2.4
15 | 
16 | ```{r}
17 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
18 | df <- data.frame(x)
19 | ggplot(df, aes(sample = x)) + stat_qq() + stat_qq_line()  
20 |  
21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
22 | 
23 | ggplot(NCBirths, aes(sample = Weight)) + stat_qq() + stat_qq_line()
24 | 
25 | ```
26 | 
27 | ###Section 2.5
28 | ####R Note
29 | ```{r}
30 | x <- c(3, 6, 15, 15, 17, 19, 24)
31 | df <- data.frame(x)
32 | ggplot(df, aes(x)) + stat_ecdf(geom = "step")
33 |                              
34 | # random sample of size 25 from N(0,1)
35 | df <- data.frame(x = rnorm(25))
36 | 
37 | ggplot(df, aes(x)) + stat_ecdf() + stat_function(fun = pnorm, color = "red")
38 | 
39 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
40 | 
41 | ggplot(Beerwings, aes(Beer, color = Gender)) + stat_ecdf()
42 |  
43 | ```
44 | 
45 | ###Section 2.6
46 | ```{r}
47 | 
48 | ggplot(Beerwings, aes(x=Hotwings, y = Beer)) + geom_point()
49 | 
50 | ggplot(Beerwings, aes(x = Hotwings, y = Beer, color = Gender)) + geom_point()
51 | 
52 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing.R:
--------------------------------------------------------------------------------
  1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
  2 | ##-------------------------------------
  3 | ##Section 3.3
  4 | #Beerwings <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
  5 | 
  6 | tapply(Beerwings$Hotwings, Beerwings$Gender, mean)
  7 | 
  8 | observed <- 14.5333- 9.3333 #store observed mean differences
  9 | 
 10 | #Get hotwings variable
 11 | hotwings <- Beerwings$Hotwings
 12 | 
 13 | #Alternative way:
 14 | hotwings <- subset(Beerwings, select = Hotwings, drop = TRUE)
 15 | #drop = TRUE to convert hotwings to a vector (without this, hotwings will be a
 16 | #30x1 data frame
 17 | 
 18 | #set.seed(0)
 19 | N<- 10^5-1  #set number of times to repeat this process
 20 |  result <- numeric(N) # space to save the random differences
 21 |  for(i in 1:N)
 22 |   {
 23 |   index <- sample(30, size=15, replace = FALSE) # sample of numbers from 1:30
 24 |   result[i] <- mean(hotwings[index]) - mean(hotwings[-index])
 25 | }
 26 | 
 27 | ##Plot
 28 | 
 29 | hist(result, xlab = "xbarM - xbarF", main = "Permutation distribution for hot wings")
 30 | abline(v = observed, col = "blue", lty=5)
 31 | 
 32 | #-------------------------
 33 | #Another visualization of distribution
 34 | plot.ecdf(result)
 35 | abline(v = observed, col = "blue", lty = 5)
 36 | 
 37 | 
 38 | #Compute P-value
 39 | (sum(result >= observed)+1)/(N+ 1)  #P-value
 40 | 
 41 | 
 42 | #----------------------------------------
 43 | #Example 3.4 Verizon
 44 | #Permutation test
 45 | 
 46 | Verizon <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
 47 | 
 48 | tapply(Verizon$Time, Verizon$Group, mean)
 49 | 
 50 | 
 51 | Time <- subset(Verizon, select = Time, drop = T)
 52 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
 53 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
 54 | 
 55 | observed <- mean(Time.ILEC) - mean(Time.CLEC)
 56 | observed
 57 | 
 58 | 
 59 | N <- 10^4-1  #set number of times to repeat this process
 60 | #set.seed(99)
 61 | result <- numeric(N) # space to save the random differences
 62 | for(i in 1:N) {
 63 |   index <- sample(1687, size = 1664, replace = FALSE) #sample of numbers from 1:1687
 64 |   result[i] <- mean(Time[index]) - mean(Time[-index])
 65 | }
 66 | 
 67 | hist(result, xlab = "xbar1 - xbar2",
 68 |       main = "Permutation Distribution for Verizon repair times")
 69 | abline(v = observed, col = "blue", lty = 5)
 70 | 
 71 | (sum(result <= observed) + 1)/(N + 1)  #P-value
 72 | 
 73 | 
 74 | #-------------------------------------------------------
 75 | #Example 3.6, Verizon cont.
 76 | #median, trimmed means
 77 | 
 78 | tapply(Verizon$Time, Verizon$Group, median)
 79 | 
 80 | #Difference in means
 81 | observed <- median(Time.ILEC) - median(Time.CLEC)
 82 | observed
 83 | 
 84 | #Differnce in trimmed means
 85 | observed2 <- mean(Time.ILEC, trim = .25) - mean(Time.CLEC, trim = .25)
 86 | observed2
 87 | 
 88 | N <- 10^4-1  #set number of times to repeat this process
 89 | #set.seed(99)
 90 | result  <- numeric(N) # space to save the random differences
 91 | result2 <- numeric(N)
 92 | for(i in 1:N) {
 93 |   index <- sample(1687, size=1664, replace = FALSE) #sample of numbers from 1:1687
 94 |   result[i] <- median(Time[index]) - median(Time[-index])
 95 |   result2[i] <- mean(Time[index], trim = .25) - mean(Time[-index], trim = .25)
 96 | }
 97 | 
 98 | hist(result, xlab = "median1 - median2",
 99 |     main = "Permutation Distribution for medians")
100 | abline(v = observed, col = "blue", lty = 5)
101 | 
102 | #P-value difference in means
103 | (sum(result <= observed) + 1)/(N+ 1)
104 | 
105 | 
106 | hist(result2, xlab = "trimMean1 - trimMean2",
107 |      main = "Permutation Distribution for trimmed means")
108 | abline(v = observed, col = "blue", lty = 5)
109 | 
110 | #P-value difference in trimmed means
111 | (sum(result2 <= observed2) + 1)/(N+ 1)
112 | 
113 | #------------------------------------------------
114 | #Example 3.6, Verzion continued
115 | #
116 | #difference in proportion of time > 10
117 | #and ratio of variances
118 | observed3 <- mean(Time.ILEC > 10) - mean(Time.CLEC > 10)
119 | observed3
120 | 
121 | #ratio of variances
122 | observed4 <- var(Time.ILEC)/var(Time.CLEC)
123 | observed4
124 | 
125 | N <- 10^4-1  #set number of times to repeat this process
126 | #set.seed(99)
127 |  result3 <- numeric(N)
128 |  result4 <- numeric(N)
129 | 
130 |  for(i in 1:N) {
131 |   index <- sample(1687, size = 1664, replace = FALSE)
132 |   result3[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10)
133 |   result4[i] <- var(Time[index])/var(Time[-index])
134 |   }
135 | 
136 | 
137 | 
138 | hist(result3, xlab = "Difference in proportions",  main = "Repair times > 10 hours")
139 | abline(v = observed3, lty = 5, col = "blue")
140 | #P-value difference in proportion
141 |  (sum(result3 <= observed3) + 1)/(N+ 1)  #P-value
142 | 
143 | 
144 | hist(result4, xlab = "variance1/variance2",  main = "Ratio of variances")
145 | abline(v = observed4, lty = 5, col = "blue")
146 | 
147 | 
148 | #P-value ratio of variances
149 |  (sum(result4 <= observed4) + 1)/(N+ 1)  #P-value
150 | 
151 | #--------------------------------------
152 | #Example 3.8
153 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
154 | 
155 | k <- complete.cases(Recidivism$Age25)
156 | Recid2 <- ifelse(Recidivism$Recid[k] == "Yes", 1, 0)
157 | Age25.2 <- Recidivism$Age25[k]
158 | 
159 | table(Age25.2)
160 | tapply(Recid2, Age25.2, mean)
161 | observed <- .365 - .306
162 | 
163 | N <- 10^4 - 1
164 | result <- numeric(N)
165 | 
166 | for (i in 1:N)
167 | {
168 |   index <- sample(17019, size = 3077, replace = FALSE)
169 |   result[i] <- mean(Recid2[index]) - mean(Recid2[-index])
170 | }
171 | 
172 | 2* (sum(result >= observed) + 1)/(N + 1)
173 | 
174 | #---------------------
175 | #Section 3.4 Matched Pairs
176 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
177 | 
178 | Diff <- Diving2017$Final - Diving2017$Semifinal
179 | observed <- mean(Diff)
180 | 
181 | N <- 10^5 - 1
182 | result <- numeric(N)
183 | 
184 | for (i in 1:N)
185 | {
186 |   Sign <- sample(c(-1, 1), 12, replace = TRUE)
187 |   Diff2 <- Sign*Diff
188 |   result[i] <- mean(Diff2)
189 | }
190 | 
191 | hist(result)
192 | abline(v = observed, col = "blue")
193 | 
194 | 2* (sum(result >= observed) + 1)/(N + 1)
195 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer.R:
--------------------------------------------------------------------------------
 1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
 2 | #R Code for exercise
 3 | 
 4 | #-----------------
 5 | #Exercise 7
 6 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
 7 | 
 8 | N<-10^4-1
 9 | 
10 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE)
11 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE)
12 | 
13 | observedSumUA <- sum(UA.Delay)
14 | observedmeanUA <- mean(UA.Delay)
15 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
16 | m <-length(UA.Delay)  #number of UA observations
17 | 
18 | sumUA<-numeric(N)
19 | meanUA<-numeric(N)
20 | meanDiff<-numeric(N)
21 | 
22 | set.seed(0)
23 | for (i in 1:N)
24 | {
25 |   index <- sample(4029,  m, replace = FALSE)
26 |   sumUA[i] <- sum(FlightDelays$Delay[index])
27 |   meanUA[i] <- mean(FlightDelays$Delay[index])
28 |   meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
29 | 
30 | }
31 | 
32 | (sum(sumUA >= observedSumUA) + 1)/(N + 1)  #P-value
33 | 
34 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1)  #P-value
35 | 
36 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1)  #P-value
37 | 
38 | #-------------------------------
39 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap03Testing_Exer"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 20, 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
13 | ##Exercises
14 | 
15 | ###Exercise 7
16 | ```{r}
17 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
18 | 
19 | N<-10^4-1
20 | 
21 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE)
22 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE)
23 | 
24 | observedSumUA <- sum(UA.Delay)
25 | observedmeanUA <- mean(UA.Delay)
26 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
27 | m <-length(UA.Delay)  #number of UA observations
28 | 
29 | sumUA<-numeric(N)
30 | meanUA<-numeric(N)
31 | meanDiff<-numeric(N)
32 | 
33 | set.seed(0)
34 | for (i in 1:N)
35 | {
36 |   index <- sample(4029,  m, replace = FALSE)
37 |   sumUA[i] <- sum(FlightDelays$Delay[index])
38 |   meanUA[i] <- mean(FlightDelays$Delay[index])
39 |   meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
40 | 
41 | }
42 | 
43 | (sum(sumUA >= observedSumUA) + 1)/(N + 1)  #P-value
44 | 
45 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1)  #P-value
46 | 
47 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1)  #P-value
48 | 
49 | ```
50 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap03Testing_Exer"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(dplyr)
11 | library(ggplot2)
12 | ```
13 | 
14 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
15 | ##Exercises
16 | 
17 | ###Exercise 7
18 | ```{r}
19 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
20 | 
21 | N<-10^4-1
22 | UA.Delay <- FlightDelays %>% filter(Carrier == "UA") %>% pull(Delay)
23 | AA.Delay <- FlightDelays %>% filter(Carrier == "AA") %>% pull(Delay)
24 | 
25 | observedSumUA <- sum(UA.Delay)
26 | observedmeanUA <- mean(UA.Delay)
27 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
28 | m <-length(UA.Delay)  #number of UA observations
29 | 
30 | sumUA<-numeric(N)
31 | meanUA<-numeric(N)
32 | meanDiff<-numeric(N)
33 | 
34 | set.seed(0)
35 | for (i in 1:N)
36 | {
37 |   index <- sample(4029,  m, replace = FALSE)
38 |   sumUA[i] <- sum(FlightDelays$Delay[index])
39 |   meanUA[i] <- mean(FlightDelays$Delay[index])
40 |   meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
41 | 
42 | }
43 | 
44 | (sum(sumUA >= observedSumUA) + 1)/(N + 1)  #P-value
45 | 
46 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1)  #P-value
47 | 
48 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1)  #P-value
49 | 
50 | ```
51 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist.R:
--------------------------------------------------------------------------------
 1 | ###Chapter 4: Sampling Distributions
 2 | 
 3 | #---------------------------------------------
 4 | #Example 4.2: Sampling distribution from Exp(1/15)
 5 | Xbar <- numeric(1000)
 6 | #set.seed(300)
 7 | for (i in 1:1000)
 8 | {
 9 |   x <- rexp(100, rate = 1/15)
10 |   Xbar[i] <- mean(x)
11 | }
12 | 
13 | hist(Xbar, main="Simulated sampling distribution", xlab="means")
14 | 
15 | qqnorm(Xbar)
16 | qqline(Xbar)
17 | 
18 | mean(Xbar)
19 | sd(Xbar)
20 | 
21 | #----------------------------------------------------
22 | ##Example 4.3: Sampling Dist from Unif[0,1]
23 | 
24 | maxY <- numeric(1000)
25 | #set.seed(100)
26 | for (i in 1:1000)
27 |  {
28 |    y <- runif(12)        #draw random sample of size 12
29 |    maxY[i] <- max(y)     #find max, save in position i
30 |  }
31 | 
32 | hist(maxY, main = "", xlab = "maximums")
33 | 
34 | #To create a histogram with a density curve imposed
35 | #scale bars to have area one with prob=TRUE option
36 | hist(maxY, main = "", xlab = "maximums", prob = TRUE)
37 | 
38 | #add pdf to histogram
39 | curve(12*x^{11}, col = "blue", add = TRUE)
40 | 
41 | #---------------------------------------------
42 | #Example 4.6 Sum of Poisson random variables
43 | 
44 | X <- rpois(10^4, 5)   #Draw 10^4 values from Pois(5)
45 | Y <- rpois(10^4, 12)   #Draw 10^4 values from Pois(12)
46 | W <- X + Y
47 | 
48 | hist(W, prob = TRUE)  #prob = TRUE, scales hist to 1
49 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17)
50 | 
51 | mean(W)
52 | var(W)
53 | 
54 | #------------------------------------------------
55 | #Example 4.7
56 | #Sampling distribution simulation
57 | #Sample of size 30 from gamma r=5, lambda=2
58 | 
59 | #set.seed(10)
60 | Xbar <- numeric(1000)
61 | for (i in 1:1000)
62 |   {
63 |     x <- rgamma(30, shape = 5, rate = 2)
64 |     Xbar[i] <- mean(x)
65 |   }
66 | 
67 | hist(Xbar, main = "Distribution of means")
68 | 
69 | qqnorm(Xbar)
70 | qqline(Xbar)
71 | 
72 | mean(Xbar)
73 | sd(Xbar)
74 | sum(Xbar > 3)/1000
75 | #alternatively
76 | mean(Xbar > 3)
77 | 
78 | #----------------------------------------------
79 | #Example 4.11 R Note
80 | dbinom(25, 120, .3)
81 | 
82 | pbinom(25, 120, .3)
83 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chapter 4 Sampling Distributions"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 20, 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | ###Example 4.2: Sampling distribution from Exp(1/15)
13 | ```{r}
14 | Xbar <- numeric(1000)
15 | #set.seed(300)
16 | for (i in 1:1000)
17 | {
18 |   x <- rexp(100, rate = 1/15)
19 |   Xbar[i] <- mean(x)
20 | }
21 | 
22 | hist(Xbar, main="Simulated sampling distribution", xlab="means")
23 | 
24 | qqnorm(Xbar)
25 | qqline(Xbar)
26 | 
27 | mean(Xbar)
28 | sd(Xbar)
29 | ```
30 | 
31 | ###Example 4.3: Sampling Dist from Unif[0,1]
32 | ```{r}
33 | maxY <- numeric(1000)
34 | #set.seed(100)
35 | for (i in 1:1000)
36 |  {
37 |    y <- runif(12)        #draw random sample of size 12
38 |    maxY[i] <- max(y)     #find max, save in position i
39 |  }
40 | 
41 | hist(maxY, main = "", xlab = "maximums")
42 | ```
43 | 
44 | To create a histogram with a density curve imposed, 
45 | scale bars to have area one with the <tt>`prob=TRUE`</tt> argument.
46 | The <tt>`curve()`</tt> command can then be used to add the density curve.
47 | 
48 | ```{r}
49 | hist(maxY, main = "", xlab = "maximums", prob = TRUE)
50 | curve(12*x^{11}, col = "blue", add = TRUE)
51 | ```
52 | 
53 | ###Example 4.6 Sum of Poisson random variables
54 | ```{r}
55 | X <- rpois(10^4, 5)   #Draw 10^4 values from Pois(5)
56 | Y <- rpois(10^4, 12)   #Draw 10^4 values from Pois(12)
57 | W <- X + Y
58 | 
59 | hist(W, prob = TRUE)  #prob = TRUE, scales hist to 1
60 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17)
61 | 
62 | mean(W)
63 | var(W)
64 | ```
65 | 
66 | ###Example 4.7
67 | Sampling distribution simulation
68 | Sample of size 30 from gamma r=5, lambda=2
69 | 
70 | ```{r}
71 | #set.seed(10)
72 | Xbar <- numeric(1000)
73 | for (i in 1:1000)
74 |   {
75 |     x <- rgamma(30, shape = 5, rate = 2)
76 |     Xbar[i] <- mean(x)
77 |   }
78 | 
79 | hist(Xbar, main = "Distribution of means")
80 | 
81 | qqnorm(Xbar)
82 | qqline(Xbar)
83 | 
84 | mean(Xbar)
85 | sd(Xbar)
86 | sum(Xbar > 3)/1000
87 | #alternatively
88 | mean(Xbar > 3)
89 | ```
90 | 
91 | ###Example 4.11 R Note
92 | 
93 | ```{r}
94 | dbinom(25, 120, .3)
95 | 
96 | pbinom(25, 120, .3)
97 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer.R:
--------------------------------------------------------------------------------
  1 | ##Chapter Sampling Distributions
  2 | ##Exercises
  3 | ##R Scripts
  4 | ##
  5 | ##-----------------------------
  6 | #Exercise 4
  7 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
  8 | N <- 10^4
  9 | Xbar <- numeric(N)
 10 | 
 11 | for (i in 1:N)
 12 | {
 13 |  samp <- sample(pop, 4, replace = TRUE)
 14 |  Xbar[i] <- mean(samp)
 15 | }
 16 | 
 17 | hist(Xbar)
 18 | mean(Xbar < 11)
 19 | 
 20 | #----------------------------------------------
 21 | #Exercise 4.6
 22 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
 23 |  N <- 10^4
 24 |  phat <- numeric(N)
 25 |  n <- 25
 26 | 
 27 |  for (i in 1:N)
 28 |  {
 29 |   samp <- sample(Recidivism$Recid, n)
 30 |   phat[i] <- mean(samp == "Yes")
 31 |  }
 32 | 
 33 | #c)  change n <- 250
 34 | 
 35 | #----------------------------------------------------------------------------
 36 | #Exercise 19
 37 | ## X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
 38 | ## W = mean(X)+mean(Y)
 39 |  W <- numeric(1000)
 40 |  set.seed(0)
 41 |     for (i in 1:1000)
 42 |     {
 43 |        x <- rnorm(10, 20, 8)  #draw 10 from N(20, 8^2)
 44 |        y <- rnorm(15, 16, 7)  #draw 15 from N(16, 7^2)
 45 |        W[i] <- mean(x) + mean(y) #save sum of means
 46 |     }
 47 | 
 48 |     hist(W)
 49 | 
 50 |     mean(W < 40)
 51 | 
 52 | 
 53 | #--------------------
 54 | #Exercise 22
 55 | 
 56 | X <- runif(1000, 40, 60)
 57 | Y <- runif(1000, 45, 80)
 58 | 
 59 | total <- X + Y
 60 | 
 61 | hist(total)
 62 | 
 63 | #----------------
 64 | #33 Finite pop simulation
 65 | 
 66 | N <- 400 # population size
 67 | n <- 5 # sample size
 68 | 
 69 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
 70 | # Exp(1/10)
 71 | hist(finpop) # distribution of your finite pop.
 72 | mean(finpop) # mean (mu) of your pop.
 73 | sd(finpop) # stdev (sigma) of your pop.
 74 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
 75 | # dist. of mean(x), with replacement
 76 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
 77 | 
 78 | Xbar <- numeric(1000)
 79 | for (i in 1:1000)
 80 | {
 81 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
 82 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
 83 | }
 84 | hist(Xbar)
 85 | 
 86 | qqnorm(Xbar)
 87 | qqline(Xbar)
 88 | 
 89 | mean(Xbar)
 90 | sd(Xbar) # estimated standard error of sampling
 91 |              # distribution
 92 | 
 93 | #----------------------------
 94 | #34
 95 | W <- numeric(1000)
 96 | for (i in 1:1000)
 97 | {
 98 | x <- rnorm(20, 25, 7)
 99 | W[i] <- var(x)
100 | }
101 | mean(W)
102 | var(W)
103 | hist(W)
104 | 
105 | qqnorm(W)
106 | qqline(W)
107 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chapter 4 Sampling Distribution-Exercises"
  3 | author: "Chihara-Hesterberg"
  4 | date: "July 20, 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | ```
 11 | 
 12 | ###Exercise 4
 13 | ```{r}
 14 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
 15 | N <- 10^4
 16 | Xbar <- numeric(N)
 17 | 
 18 | for (i in 1:N)
 19 | {
 20 |  samp <- sample(pop, 4, replace = TRUE)
 21 |  Xbar[i] <- mean(samp)
 22 | }
 23 | 
 24 | hist(Xbar)
 25 | mean(Xbar < 11)
 26 | ```
 27 | 
 28 | ###Exercise 6
 29 | ```{r}
 30 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
 31 |  N <- 10^4
 32 |  phat <- numeric(N)
 33 |  n <- 25
 34 | 
 35 |  for (i in 1:N)
 36 |  {
 37 |   samp <- sample(Recidivism$Recid, n)
 38 |   phat[i] <- mean(samp == "Yes")
 39 |  }
 40 | 
 41 | #c)  change n <- 250
 42 | ```
 43 | 
 44 | ###Exercise 19
 45 |  X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
 46 |  W = X + Y
 47 | ```{r}
 48 | 
 49 |  W <- numeric(1000)
 50 |  set.seed(0)
 51 |     for (i in 1:1000)
 52 |     {
 53 |        x <- rnorm(10, 20, 8)  #draw 10 from N(20, 8^2)
 54 |        y <- rnorm(15, 16, 7)  #draw 15 from N(16, 7^2)
 55 |        W[i] <- mean(x) + mean(y) #save sum of means
 56 |     }
 57 | 
 58 |     hist(W)
 59 | 
 60 |     mean(W < 40)
 61 | ```
 62 | 
 63 | 
 64 | ###Exercise 22
 65 | ```{r}
 66 | X <- runif(1000, 40, 60)
 67 | Y <- runif(1000, 45, 80)
 68 | 
 69 | total <- X + Y
 70 | 
 71 | hist(total)
 72 | ```
 73 | 
 74 | ###Exercise 33 
 75 | Finite population simulation
 76 | 
 77 | ```{r}
 78 | N <- 400 # population size
 79 | n <- 5 # sample size
 80 | 
 81 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
 82 | # Exp(1/10)
 83 | hist(finpop) # distribution of your finite pop.
 84 | mean(finpop) # mean (mu) of your pop.
 85 | sd(finpop) # stdev (sigma) of your pop.
 86 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
 87 | # dist. of mean(x), with replacement
 88 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
 89 | 
 90 | Xbar <- numeric(1000)
 91 | for (i in 1:1000)
 92 | {
 93 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
 94 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
 95 | }
 96 | hist(Xbar)
 97 | 
 98 | qqnorm(Xbar)
 99 | qqline(Xbar)
100 | 
101 | mean(Xbar)
102 | sd(Xbar) # estimated standard error of sampling
103 |              # distribution
104 | ```
105 | 
106 | ###Exercise 34
107 | ```{r}
108 | W <- numeric(1000)
109 | for (i in 1:1000)
110 | {
111 | x <- rnorm(20, 25, 7)
112 | W[i] <- var(x)
113 | }
114 | mean(W)
115 | var(W)
116 | hist(W)
117 | 
118 | qqnorm(W)
119 | qqline(W)
120 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer_d.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chapter 4 Sampling Distribution-Exercises"
  3 | author: "Chihara-Hesterberg"
  4 | date: "December 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | library(ggplot2)
 11 | library(dplyr)
 12 | ```
 13 | 
 14 | ###Exercise 4
 15 | ```{r}
 16 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
 17 | N <- 10^4
 18 | Xbar <- numeric(N)
 19 | 
 20 | for (i in 1:N)
 21 | {
 22 |  samp <- sample(pop, 4, replace = TRUE)
 23 |  Xbar[i] <- mean(samp)
 24 | }
 25 | 
 26 | ggplot() + geom_histogram(aes(Xbar), bins = 10)
 27 | 
 28 | mean(Xbar < 11)
 29 | ```
 30 | 
 31 | ###Exercise 6
 32 | ```{r}
 33 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
 34 |  N <- 10^4
 35 |  phat <- numeric(N)
 36 |  n <- 25
 37 | 
 38 |  for (i in 1:N)
 39 |  {
 40 |   samp <- sample(Recidivism$Recid, n)
 41 |   phat[i] <- mean(samp == "Yes")
 42 |  }
 43 | 
 44 | #c)  change n <- 250
 45 | ```
 46 | 
 47 | ###Exercise 19
 48 |  X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
 49 |  W = X + Y
 50 | ```{r}
 51 | 
 52 |  W <- numeric(1000)
 53 |  set.seed(0)
 54 |     for (i in 1:1000)
 55 |     {
 56 |        x <- rnorm(10, 20, 8)  #draw 10 from N(20, 8^2)
 57 |        y <- rnorm(15, 16, 7)  #draw 15 from N(16, 7^2)
 58 |        W[i] <- mean(x) + mean(y) #save sum of means
 59 |     }
 60 | 
 61 |  ggplot() + geom_histogram(aes(W), bins = 12)
 62 |  
 63 |  mean(W < 40)
 64 | ```
 65 | 
 66 | 
 67 | ###Exercise 22
 68 | ```{r}
 69 | X <- runif(1000, 40, 60)
 70 | Y <- runif(1000, 45, 80)
 71 | 
 72 | total <- X + Y
 73 | 
 74 | ggplot() + geom_histogram(aes(total), bins = 12)
 75 | ```
 76 | 
 77 | ###Exercise 33 
 78 | Finite population simulation
 79 | 
 80 | ```{r}
 81 | N <- 400 # population size
 82 | n <- 5 # sample size
 83 | 
 84 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
 85 | # Exp(1/10)
 86 | ggplot() + geom_histogram(aes(finpop), bins = 12) # distribution of your finite pop.
 87 | 
 88 | mean(finpop) # mean (mu) of your pop.
 89 | sd(finpop) # stdev (sigma) of your pop.
 90 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
 91 | # dist. of mean(x), with replacement
 92 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
 93 | 
 94 | Xbar <- numeric(1000)
 95 | for (i in 1:1000)
 96 | {
 97 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
 98 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
 99 | }
100 | 
101 | ggplot() + geom_histogram(aes(Xbar), bins = 12)
102 | 
103 | df <- data.frame(Xbar)
104 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line()
105 | 
106 | mean(Xbar)
107 | sd(Xbar) # estimated standard error of sampling
108 |              # distribution
109 | ```
110 | 
111 | ###Exercise 34
112 | ```{r}
113 | W <- numeric(1000)
114 | for (i in 1:1000)
115 | {
116 | x <- rnorm(20, 25, 7)
117 | W[i] <- var(x)
118 | }
119 | mean(W)
120 | var(W)
121 | 
122 | ggplot() + geom_histogram(aes(W), bins = 10)
123 | 
124 | df <- data.frame(W)
125 | ggplot(df, aes(sample = W)) + stat_qq() + stat_qq_line()
126 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_d.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chapter 4 Sampling Distributions"
  3 | author: "Chihara-Hesterberg"
  4 | date:  "November 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | library(dplyr)
 11 | library(ggplot2)
 12 | ```
 13 | 
 14 | ###Example 4.2: Sampling distribution from Exp(1/15)
 15 | ```{r}
 16 | Xbar <- numeric(1000)
 17 | #set.seed(300)
 18 | for (i in 1:1000)
 19 | {
 20 |   x <- rexp(100, rate = 1/15)
 21 |   Xbar[i] <- mean(x)
 22 | }
 23 | 
 24 | ggplot() + geom_histogram(aes(Xbar), bins = 15) + xlab("means")
 25 | 
 26 | df <- data.frame(Xbar)
 27 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line()
 28 | 
 29 | mean(Xbar)
 30 | sd(Xbar)
 31 | ```
 32 | 
 33 | ###Example 4.3: Sampling Dist from Unif[0,1]
 34 | ```{r}
 35 | maxY <- numeric(1000)
 36 | #set.seed(100)
 37 | for (i in 1:1000)
 38 |  {
 39 |    y <- runif(12)        #draw random sample of size 12
 40 |    maxY[i] <- max(y)     #find max, save in position i
 41 |  }
 42 | 
 43 | ggplot() + geom_histogram(aes(maxY), binwidth=.05, center=.975) + xlab("maximums") 
 44 | 
 45 | ```
 46 | 
 47 | To create a histogram with a density curve imposed, we will need to create a data frame that holds the <tt>'maxY'</tt> variable. We also create a function for the density curve $f(x)=12x^{11}$.
 48 | 
 49 | 
 50 | ```{r}
 51 | df <- data.frame(maxY)
 52 | myfun <- function(x){12*x^{11}}
 53 | 
 54 | ggplot(df) + geom_histogram(aes(maxY, y = stat(density)), binwidth=.05, center=.975) +xlab("maximums") + stat_function(fun = myfun)
 55 | ```
 56 | 
 57 | ###Example 4.6 Sum of Poisson random variables
 58 | ```{r}
 59 | X <- rpois(10^4, 5)   #Draw 10^4 values from Pois(5)
 60 | Y <- rpois(10^4, 12)   #Draw 10^4 values from Pois(12)
 61 | W <- X + Y
 62 | 
 63 | df1 <- data.frame(W)
 64 | df2 <- data.frame(x=2:35, y = dpois(2:35,17))
 65 | ggplot(df1, aes(W)) + geom_histogram(aes(y=stat(density)), bins=12) + geom_line(data=df2, aes(x=x, y=y), colour = "red")
 66 | 
 67 | mean(W)
 68 | var(W)
 69 | ```
 70 | 
 71 | ###Example 4.7
 72 | Sampling distribution simulation
 73 | Sample of size 30 from gamma r=5, lambda=2
 74 | 
 75 | ```{r}
 76 | #set.seed(10)
 77 | Xbar <- numeric(1000)
 78 | for (i in 1:1000)
 79 |   {
 80 |     x <- rgamma(30, shape = 5, rate = 2)
 81 |     Xbar[i] <- mean(x)
 82 |   }
 83 | 
 84 | ggplot() + geom_histogram(aes(Xbar), bins=15) + labs(title = "Distribution of means")
 85 | 
 86 | ggplot() + stat_qq(aes(sample = Xbar)) 
 87 | 
 88 | #If you want a line, then
 89 | df <- data.frame(Xbar)
 90 | ggplot(df, aes(sample = Xbar)) + stat_qq() + stat_qq_line()
 91 | 
 92 | mean(Xbar)
 93 | sd(Xbar)
 94 | sum(Xbar > 3)/1000
 95 | #alternatively
 96 | mean(Xbar > 3)
 97 | ```
 98 | 
 99 | ###Example 4.11 R Note
100 | 
101 | ```{r}
102 | dbinom(25, 120, .3)
103 | 
104 | pbinom(25, 120, .3)
105 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer.R:
--------------------------------------------------------------------------------
 1 | ##Chapter 5 The Boostrap
 2 | 
 3 | #Exercises
 4 | 
 5 | #------------------------------------------------
 6 | #10 (medians)
 7 | ##
 8 | ne <- 10000 # n even
 9 | no <- 10001 # n odd
10 | 
11 | wwe <- rnorm(ne) # draw random sample of size ne
12 | wwo <- rnorm(no) # draw random sample of size no
13 | 
14 | N <- 10^4
15 | even.boot <- numeric(N) #save space
16 | odd.boot <- numeric(N)
17 | set.seed(10)
18 | for (i in 1:N)
19 |  {
20 |   x.even <- sample(wwe, ne, replace = TRUE)
21 |   x.odd <- sample(wwo, no, replace = TRUE)
22 |   even.boot[i] <- median(x.even)
23 |   odd.boot[i]  <- median(x.odd)
24 |  }
25 | 
26 | par(mfrow = c(2, 1))
27 | hist(even.boot, xlim = c(-1, 1))  #set x range to be
28 | hist(odd.boot, xlim = c(-1, 1))  #same in both plots
29 | par(mfrow = c(1, 1))               #reset to original
30 | 
31 | #-----------------------------------
32 | #Exercise 20
33 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
34 | N <- 10^5
35 | result <- numeric(N)
36 | for (i in 1:N)
37 | {
38 |   index <- sample(12, replace = TRUE)
39 |   Dive.boot <- Diving2017[index, ]
40 |   result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
41 | }
42 | 
43 | hist(result)
44 | quantile(result, c(0.025, 0.975))
45 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chapter 5 Bootstrap - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 | 
12 | ###Exercise 10 
13 | 
14 | Simulate bootstrap for medians
15 | ```{r}
16 | ne <- 10000 # n even
17 | no <- 10001 # n odd
18 | 
19 | wwe <- rnorm(ne) # draw random sample of size ne
20 | wwo <- rnorm(no) # draw random sample of size no
21 | 
22 | N <- 10^4
23 | even.boot <- numeric(N) #save space
24 | odd.boot <- numeric(N)
25 | set.seed(10)
26 | for (i in 1:N)
27 |  {
28 |   x.even <- sample(wwe, ne, replace = TRUE)
29 |   x.odd <- sample(wwo, no, replace = TRUE)
30 |   even.boot[i] <- median(x.even)
31 |   odd.boot[i]  <- median(x.odd)
32 |  }
33 | 
34 | par(mfrow = c(2, 1))
35 | hist(even.boot, xlim = c(-1, 1))  #set x range to be
36 | hist(odd.boot, xlim = c(-1, 1))  #same in both plots
37 | par(mfrow = c(1, 1))               #reset to original
38 | ```
39 | 
40 | ###Exercise 20
41 | ```{r}
42 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
43 | N <- 10^5
44 | result <- numeric(N)
45 | for (i in 1:N)
46 | {
47 |   index <- sample(12, replace = TRUE)
48 |   Dive.boot <- Diving2017[index, ]
49 |   result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
50 | }
51 | 
52 | hist(result)
53 | quantile(result, c(0.025, 0.975))
54 | ```
55 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chapter 5 Bootstrap - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ###Exercise 10 
15 | 
16 | Simulate bootstrap for medians
17 | ```{r}
18 | ne <- 10000 # n even
19 | no <- 10001 # n odd
20 | 
21 | wwe <- rnorm(ne) # draw random sample of size ne
22 | wwo <- rnorm(no) # draw random sample of size no
23 | 
24 | N <- 10^4
25 | even.boot <- numeric(N) #save space
26 | odd.boot <- numeric(N)
27 | #set.seed(10)
28 | for (i in 1:N)
29 |  {
30 |   x.even <- sample(wwe, ne, replace = TRUE)
31 |   x.odd <- sample(wwo, no, replace = TRUE)
32 |   even.boot[i] <- median(x.even)
33 |   odd.boot[i]  <- median(x.odd)
34 |  }
35 | 
36 | range(even.boot)
37 | range(odd.boot)
38 | p1 <- ggplot() + geom_histogram(aes(even.boot),  breaks = seq(-.06, .04, by = .005)) 
39 | p2 <- ggplot() + geom_histogram(aes(odd.boot), breaks = seq(-.06, .04, by = .005)) 
40 | 
41 | library(gridExtra)
42 | grid.arrange(p1,p2)
43 | 
44 | ```
45 | 
46 | ###Exercise 20
47 | ```{r}
48 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
49 | N <- 10^5
50 | result <- numeric(N)
51 | for (i in 1:N)
52 | {
53 |   index <- sample(12, replace = TRUE)
54 |   Dive.boot <- Diving2017[index, ]
55 |   result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
56 | }
57 | 
58 | ggplot() + geom_histogram(aes(result), bins = 12)
59 | 
60 | quantile(result, c(0.025, 0.975))
61 | ```
62 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals.R:
--------------------------------------------------------------------------------
  1 | #Chapter 7 More Confidence Intervals
  2 | 
  3 | #-----------------------------------------------------
  4 | #Section 7.1.1 CI for normal with known sigma
  5 | 
  6 | #set.seed(1)
  7 | counter <- 0                 # set counter to 0
  8 | plot(x = c(22, 28), y = c(1, 100), type = "n",
  9 |     xlab = "", ylab = "")
 10 | abline(v = 25, col="red")               # vertical line at mu
 11 | for (i in 1:1000)
 12 |  {
 13 |   x <- rnorm(30, 25, 4)           # draw a random sample of size 30
 14 |   L <- mean(x) - 1.96*4/sqrt(30)  # lower limit
 15 |   U <- mean(x) + 1.96*4/sqrt(30)  # upper limit
 16 |   if (L < 25 && 25 < U)           # check to see if 25 is in interval
 17 |      counter <- counter + 1       # increase counter by 1
 18 |   if (i <= 100)                   #plot first 100 intervals
 19 |       segments(L, i, U, i)
 20 |  }
 21 | 
 22 |  abline(v = 25, col = "red")     #vertical line at mu
 23 | 
 24 |  counter/1000    # proportion of times interval contains mu.
 25 | 
 26 | #---------------------------------------------------------------
 27 | # Section 7.1.2
 28 | # Simulate distribution of t statistic
 29 | N <- 10^4
 30 | w <- numeric(N)
 31 | n <- 15  #sample size
 32 | for (i in 1:N)
 33 |  {
 34 |   x <- rnorm(n, 25, 7)  #draw a size 15 sample from N(25, 7^2)
 35 |   xbar <- mean(x)
 36 |   s <- sd(x)
 37 |   w[i] <- (xbar-25) / (s/sqrt(n))
 38 |  }
 39 | 
 40 |  hist(w)
 41 | 
 42 |  qqnorm(w, pch = ".")
 43 |  abline(0, 1, col = 2) # y = x line
 44 | 
 45 | #pch = "."  is point character. This option says to use . for the points.
 46 | 
 47 | #----------------------------------------------------------
 48 | # Example 7.7 Simulation 95% confidence interval from
 49 | # skewed gamma distribution
 50 | # set.seed(0)
 51 | 
 52 | tooLow <- 0           #set counter to 0
 53 | tooHigh <- 0          #sest counter to 0
 54 | n <- 20               # sample size
 55 | N <- 10^5
 56 | for (i in 1:N)
 57 |  {
 58 |   x <- rgamma(n, shape=5, rate=2)
 59 |   xbar <- mean(x)
 60 |   s <- sd(x)
 61 |   lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
 62 |   upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
 63 |   if (upper < 5/2) tooLow <- tooLow + 1
 64 |   if (lower > 5/2) tooHigh <- tooHigh + 1
 65 |  }
 66 | tooLow/N
 67 | tooHigh/N
 68 | 
 69 | 
 70 | 
 71 | #----------------------------------------
 72 | # Example 7.21 One sample bootstrap t confidence interval
 73 | 
 74 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
 75 | Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE)
 76 | 
 77 | xbar <- mean(Arsenic)
 78 | N <- 10^4
 79 | n <- length(Arsenic)
 80 | Tstar <- numeric(N)
 81 | #set.seed(100)
 82 | for (i in 1:N)
 83 |  {
 84 |    x <-sample(Arsenic, size = n, replace = T)
 85 |    Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
 86 |  }
 87 | 
 88 | quantile(Tstar, c(0.025, 0.975))
 89 | 
 90 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*")
 91 | 
 92 | dev.new()
 93 | qqnorm(Tstar)
 94 | qqline(Tstar)
 95 | 
 96 | #-------------------------------------------------------
 97 | # Exampe 7.22 Verizon
 98 | # 2-Sample bootstrap t confidence interval
 99 | 
100 | # Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
101 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
102 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
103 | 
104 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
105 | nx <- length(Time.ILEC)  #nx=1664
106 | ny <- length(Time.CLEC)  #ny=23
107 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
108 | 
109 | N <- 10000
110 | Tstar <- numeric(N)
111 | set.seed(0)
112 | for(i in 1:N)
113 |  {
114 |   bootx <- sample(Time.ILEC, nx, replace = TRUE)
115 |   booty <- sample(Time.CLEC, ny, replace = TRUE)
116 |   Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
117 |                sqrt(var(bootx)/nx + var(booty)/ny)
118 |  }
119 | 
120 | thetahat - quantile(Tstar, c(.975, .025)) * SE
121 | 
122 | t.test(Time.ILEC, Time.CLEC)$conf
123 | 
124 | #----------------------------------------------------------------
125 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 7 More Confidence Intervals"
  3 | author: "Chihara-Hesterberg"
  4 | date: "July  2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | ```
 11 | 
 12 | ###Section 7.1.1 
 13 | CI for normal with known sigma
 14 | ```{r, out.width="100%"}
 15 | #set.seed(1)
 16 | counter <- 0                 # set counter to 0
 17 | plot(x = c(22, 28), y = c(1, 100), type = "n",
 18 |     xlab = "", ylab = "")
 19 | abline(v = 25, col="red")               # vertical line at mu
 20 | for (i in 1:1000)
 21 |  {
 22 |   x <- rnorm(30, 25, 4)           # draw a random sample of size 30
 23 |   L <- mean(x) - 1.96*4/sqrt(30)  # lower limit
 24 |   U <- mean(x) + 1.96*4/sqrt(30)  # upper limit
 25 |   if (L < 25 && 25 < U)           # check to see if 25 is in interval
 26 |      counter <- counter + 1       # increase counter by 1
 27 |   if (i <= 100)                   #plot first 100 intervals
 28 |       segments(L, i, U, i)
 29 |  }
 30 | 
 31 |  abline(v = 25, col = "red")     #vertical line at mu
 32 | 
 33 |  counter/1000    # proportion of times interval contains mu.
 34 | ```
 35 | 
 36 | ### Section 7.1.2
 37 |  Simulate distribution of t statistic
 38 | ```{r}
 39 | N <- 10^4
 40 | w <- numeric(N)
 41 | n <- 15  #sample size
 42 | for (i in 1:N)
 43 |  {
 44 |   x <- rnorm(n, 25, 7)  #draw a size 15 sample from N(25, 7^2)
 45 |   xbar <- mean(x)
 46 |   s <- sd(x)
 47 |   w[i] <- (xbar-25) / (s/sqrt(n))
 48 |  }
 49 | 
 50 |  hist(w)
 51 | 
 52 |  qqnorm(w, pch = ".")
 53 |  abline(0, 1, col = 2) # y = x line
 54 | ```
 55 | The <tt>`pch = "."`</tt> argument in the <tt>`qqnorm`</tt> command sets the point character. Here, the option says to use . for the points.
 56 | 
 57 | 
 58 | ### Example 7.7 
 59 | Simulation 95% confidence interval from
 60 | skewed gamma distribution
 61 | ```{r}
 62 | # set.seed(0)
 63 | 
 64 | tooLow <- 0           #set counter to 0
 65 | tooHigh <- 0          #sest counter to 0
 66 | n <- 20               # sample size
 67 | N <- 10^5
 68 | for (i in 1:N)
 69 |  {
 70 |   x <- rgamma(n, shape=5, rate=2)
 71 |   xbar <- mean(x)
 72 |   s <- sd(x)
 73 |   lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
 74 |   upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
 75 |   if (upper < 5/2) tooLow <- tooLow + 1
 76 |   if (lower > 5/2) tooHigh <- tooHigh + 1
 77 |  }
 78 | tooLow/N
 79 | tooHigh/N
 80 | ```
 81 | 
 82 | 
 83 | ### Example 7.21 
 84 | One sample bootstrap t confidence interval
 85 | 
 86 | ```{r}
 87 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
 88 | Arsenic <- Bangladesh$Arsenic
 89 | #Alternatively
 90 | #Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE)
 91 | 
 92 | xbar <- mean(Arsenic)
 93 | N <- 10^4
 94 | n <- length(Arsenic)
 95 | Tstar <- numeric(N)
 96 | #set.seed(100)
 97 | for (i in 1:N)
 98 |  {
 99 |    x <-sample(Arsenic, size = n, replace = T)
100 |    Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
101 |  }
102 | 
103 | quantile(Tstar, c(0.025, 0.975))
104 | 
105 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*")
106 | 
107 | qqnorm(Tstar)
108 | qqline(Tstar)
109 | ```
110 | 
111 | ### Exampe 7.22 Verizon
112 |  2-Sample bootstrap t confidence interval
113 | 
114 | ```{r}
115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
116 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
117 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
118 | 
119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
120 | nx <- length(Time.ILEC)  #nx=1664
121 | ny <- length(Time.CLEC)  #ny=23
122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
123 | 
124 | N <- 10000
125 | Tstar <- numeric(N)
126 | set.seed(0)
127 | for(i in 1:N)
128 |  {
129 |   bootx <- sample(Time.ILEC, nx, replace = TRUE)
130 |   booty <- sample(Time.CLEC, ny, replace = TRUE)
131 |   Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
132 |                sqrt(var(bootx)/nx + var(booty)/ny)
133 |  }
134 | 
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 | 
137 | t.test(Time.ILEC, Time.CLEC)$conf
138 | ```
139 | 
140 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer.R:
--------------------------------------------------------------------------------
 1 | #Chapter 7: More confidence intervals
 2 | #Exercises
 3 | 
 4 | #Exericse 9
 5 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
 6 | mu <- mean(FlightDelays$Delay)
 7 | 
 8 | counter <- 0
 9 | plot(c(-20, 100), c(1, 100), type = "n")
10 | 
11 | for (i in 1:1000)
12 | {
13 |  x <- sample(FlightDelays$Delay, 30, replace = FALSE)
14 |  L <- t.test(x)$conf.int[1]
15 |  U <- t.test(x)$conf.int[2]
16 | 
17 |  if (L < mu && mu < U)
18 |     counter <- counter + 1
19 |  if (i <= 100)
20 |    segments(L, i, U, i)
21 | }
22 | 
23 |  abline(v = mu, col = "red")
24 |  counter/1000
25 | 
26 | #-------------------------------------------------------
27 | #Exercise 22
28 | %%Simulation to compare pooled/unpooled t-confidence intervals
29 | 
30 | pooled.count <- 0
31 | unpooled.count <- 0
32 | 
33 | m <- 20
34 | n <- 10
35 | 
36 | B <- 10000
37 | for (i in 1:B)
38 | {
39 |     x <- rnorm(m, 8,10)
40 |     y <- rnorm(n, 3, 15)
41 | 
42 |     CI.pooled <- t.test(x,y,var.equal=T)$conf
43 |     CI.unpooled <- t.test(x,y)$conf
44 | 
45 |     if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
46 |     pooled.count <- pooled.count + 1
47 | 
48 |    if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
49 |     unpooled.count <- unpooled.count + 1
50 | }
51 | 
52 | pooled.count/B
53 | 
54 | unpooled.count/B
55 | 
56 | #-----------------
57 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 7 More Confidence Intervals - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | ###Exericse 9
13 | ```{r}
14 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
15 | mu <- mean(FlightDelays$Delay)
16 | 
17 | counter <- 0
18 | plot(c(-20, 100), c(1, 100), type = "n")
19 | 
20 | for (i in 1:1000)
21 | {
22 |  x <- sample(FlightDelays$Delay, 30, replace = FALSE)
23 |  L <- t.test(x)$conf.int[1]
24 |  U <- t.test(x)$conf.int[2]
25 | 
26 |  if (L < mu && mu < U)
27 |     counter <- counter + 1
28 |  if (i <= 100)
29 |    segments(L, i, U, i)
30 | }
31 | 
32 |  abline(v = mu, col = "red")
33 |  counter/1000
34 | ```
35 | 
36 | ###Exercise 22
37 | Simulation to compare pooled/unpooled t-confidence intervals
38 | ```{r}
39 | pooled.count <- 0
40 | unpooled.count <- 0
41 | 
42 | m <- 20
43 | n <- 10
44 | 
45 | N <- 10000
46 | for (i in 1:N)
47 | {
48 |     x <- rnorm(m, 8,10)
49 |     y <- rnorm(n, 3, 15)
50 | 
51 |     CI.pooled <- t.test(x,y,var.equal=T)$conf
52 |     CI.unpooled <- t.test(x,y)$conf
53 | 
54 |     if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
55 |     pooled.count <- pooled.count + 1
56 | 
57 |    if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
58 |     unpooled.count <- unpooled.count + 1
59 | }
60 | 
61 | pooled.count/N
62 | 
63 | unpooled.count/N
64 | 
65 | ```
66 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 7 More Confidence Intervals - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ###Exericse 9
15 | ```{r}
16 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
17 | mu <- mean(FlightDelays$Delay)
18 | 
19 | counter <- 0
20 | 
21 | df <- data.frame(x=c(-20,100), y = c(1,100))
22 | p <- ggplot(df, aes(x = x, y = y))  + geom_vline(xintercept=mu, colour = "red")
23 | 
24 | for (i in 1:1000)
25 | {
26 |  x <- sample(FlightDelays$Delay, 30, replace = FALSE)
27 |  L <- t.test(x)$conf.int[1]
28 |  U <- t.test(x)$conf.int[2]
29 | 
30 |  if (L < mu && mu < U)
31 |     counter <- counter + 1
32 |  if (i <= 100)
33 |   p <- p + annotate("segment", x = L, xend=U, y = i, yend=i )  
34 |   
35 | }
36 | 
37 | 
38 | print(p)
39 | 
40 |  counter/1000
41 | ```
42 | 
43 | ###Exercise 22
44 | Simulation to compare pooled/unpooled t-confidence intervals
45 | ```{r}
46 | pooled.count <- 0
47 | unpooled.count <- 0
48 | 
49 | m <- 20
50 | n <- 10
51 | 
52 | N <- 10000
53 | for (i in 1:N)
54 | {
55 |     x <- rnorm(m, 8,10)
56 |     y <- rnorm(n, 3, 15)
57 | 
58 |     CI.pooled <- t.test(x,y,var.equal=T)$conf
59 |     CI.unpooled <- t.test(x,y)$conf
60 | 
61 |     if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
62 |     pooled.count <- pooled.count + 1
63 | 
64 |    if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
65 |     unpooled.count <- unpooled.count + 1
66 | }
67 | 
68 | pooled.count/N
69 | 
70 | unpooled.count/N
71 | 
72 | ```
73 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_d.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 7 More Confidence Intervals"
  3 | author: "Chihara-Hesterberg"
  4 | date: "December  2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | library(ggplot2)
 11 | library(dplyr)
 12 | ```
 13 | 
 14 | ###Section 7.1.1 
 15 | CI for normal with known sigma
 16 | ```{r, out.width="100%"}
 17 | #set.seed(1)
 18 | counter <- 0                 # set counter to 0
 19 | df <- data.frame(x=c(22,28), y = c(1,100))
 20 | p <- ggplot(df, aes(x=x, y = y)) + geom_vline(xintercept=25, colour = "red")
 21 | 
 22 | for (i in 1:1000)
 23 |  {
 24 |   x <- rnorm(30, 25, 4)           # draw a random sample of size 30
 25 |   L <- mean(x) - 1.96*4/sqrt(30)  # lower limit
 26 |   U <- mean(x) + 1.96*4/sqrt(30)  # upper limit
 27 |   if (L < 25 && 25 < U)           # check to see if 25 is in interval
 28 |      counter <- counter + 1       # increase counter by 1
 29 |   if (i <= 100)                   #plot first 100 intervals
 30 |   p <-    p + annotate("segment", x = L, xend=U, y = i, yend=i )  
 31 |  }
 32 | 
 33 | print(p)
 34 | 
 35 |  counter/1000    # proportion of times interval contains mu.
 36 | ```
 37 | 
 38 | ### Section 7.1.2
 39 |  Simulate distribution of t statistic
 40 | ```{r}
 41 | N <- 10^4
 42 | w <- numeric(N)
 43 | n <- 15  #sample size
 44 | for (i in 1:N)
 45 |  {
 46 |   x <- rnorm(n, 25, 7)  #draw a size 15 sample from N(25, 7^2)
 47 |   xbar <- mean(x)
 48 |   s <- sd(x)
 49 |   w[i] <- (xbar-25) / (s/sqrt(n))
 50 |  }
 51 | 
 52 | ggplot() + geom_histogram(aes(w), bins = 12)
 53 | 
 54 | ggplot() + stat_qq(aes(sample = w)) + geom_abline(intercept = 0, slope = 1, colour = "red")
 55 | 
 56 | ```
 57 | 
 58 | 
 59 | ### Example 7.7 
 60 | Simulation 95% confidence interval from
 61 | skewed gamma distribution
 62 | ```{r}
 63 | # set.seed(0)
 64 | 
 65 | tooLow <- 0           #set counter to 0
 66 | tooHigh <- 0          #sest counter to 0
 67 | n <- 20               # sample size
 68 | N <- 10^5
 69 | for (i in 1:N)
 70 |  {
 71 |   x <- rgamma(n, shape=5, rate=2)
 72 |   xbar <- mean(x)
 73 |   s <- sd(x)
 74 |   lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
 75 |   upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
 76 |   if (upper < 5/2) tooLow <- tooLow + 1
 77 |   if (lower > 5/2) tooHigh <- tooHigh + 1
 78 |  }
 79 | tooLow/N
 80 | tooHigh/N
 81 | ```
 82 | 
 83 | 
 84 | ### Example 7.21 
 85 | One sample bootstrap t confidence interval
 86 | 
 87 | ```{r}
 88 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
 89 | Arsenic <- pull(Bangladesh, Arsenic)
 90 | Arsenic <- Bangladesh$Arsenic
 91 | 
 92 | xbar <- mean(Arsenic)
 93 | N <- 10^4
 94 | n <- length(Arsenic)
 95 | Tstar <- numeric(N)
 96 | #set.seed(100)
 97 | for (i in 1:N)
 98 |  {
 99 |    x <-sample(Arsenic, size = n, replace = T)
100 |    Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
101 |  }
102 | 
103 | quantile(Tstar, c(0.025, 0.975))
104 | 
105 | ggplot() + geom_histogram(aes(Tstar), bins = 12) +  labs(x= "T*", title = "Bootstrap distribution of T*")
106 | 
107 | df <- data.frame(Tstar)
108 | ggplot(df, aes(sample = Tstar)) + stat_qq() + stat_qq_line()
109 | ```
110 | 
111 | ### Exampe 7.22 Verizon
112 |  2-Sample bootstrap t confidence interval
113 | 
114 | ```{r}
115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
116 | 
117 | Time.ILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time)
118 | Time.CLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time)
119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
120 | nx <- length(Time.ILEC)  #nx=1664
121 | ny <- length(Time.CLEC)  #ny=23
122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
123 | 
124 | N <- 10000
125 | Tstar <- numeric(N)
126 | set.seed(0)
127 | for(i in 1:N)
128 |  {
129 |   bootx <- sample(Time.ILEC, nx, replace = TRUE)
130 |   booty <- sample(Time.CLEC, ny, replace = TRUE)
131 |   Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
132 |                sqrt(var(bootx)/nx + var(booty)/ny)
133 |  }
134 | 
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 | 
137 | t.test(Time.ILEC, Time.CLEC)$conf
138 | ```
139 | 
140 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests.R:
--------------------------------------------------------------------------------
 1 | #Chap08MoreHypTest
 2 | 
 3 | #Section 8.2
 4 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
 5 | 
 6 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
 7 | 
 8 | Arsenic <- Bangladesh$Arsenic
 9 | N <- 10^5
10 | 
11 | observedT <- t.test(Arsenic, mu = 100)$statistic
12 | xbar <- mean(Arsenic)
13 | n <- length(Arsenic)
14 | Tstar <- numeric(N)
15 | for (i in 1:N)
16 | {
17 |  bootx <- sample(Arsenic, n , replace = TRUE)
18 |  Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
19 | }
20 | 
21 | hist(Tstar)
22 | abline(v = observedT)
23 | 
24 | (sum(Tstar >= observedT) + 1)/(N + 1)
25 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 8 More Hypothesis Tests"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July  2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | ###Section 8.2
13 | ```{r}
14 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
15 | 
16 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
17 | 
18 | Arsenic <- Bangladesh$Arsenic
19 | N <- 10^5
20 | 
21 | observedT <- t.test(Arsenic, mu = 100)$statistic
22 | xbar <- mean(Arsenic)
23 | n <- length(Arsenic)
24 | Tstar <- numeric(N)
25 | for (i in 1:N)
26 | {
27 |  bootx <- sample(Arsenic, n , replace = TRUE)
28 |  Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
29 | }
30 | 
31 | hist(Tstar)
32 | abline(v = observedT)
33 | 
34 | (sum(Tstar >= observedT) + 1)/(N + 1)
35 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer.R:
--------------------------------------------------------------------------------
 1 | #Chapter 8 More Hypothesis Tests
 2 | #Exercises
 3 | 
 4 | #Exercise 13
 5 | m <- 30
 6 | n <- 30
 7 | sigma1 <- 5
 8 | sigma2 <- 5
 9 | 
10 | pooled.count <- 0
11 | unpooled.count <- 0
12 | 
13 | for (i in 1:10^5)
14 | {
15 |   x <- rnorm(m, 30, 5)
16 |   y <- rnorm(n, 30, 5)
17 | 
18 |   p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
19 |   p.unpooled <- t.test(x, y)$p.value
20 | 
21 |   pooled.count <- pooled.count + (p.pooled < 0.05)
22 |   unpooled.count <- unpooled.count + (p.unpooled < 0.05)
23 | }
24 | 
25 | pooled.count/10^5
26 | unpooled.count/10^5
27 | 
28 | #-------------------------------------------
29 | #Exercise 21
30 | 
31 | n1 <- 100
32 | n2 <- 100
33 | N <- 10^4
34 | p <- 0.1
35 | 
36 | x1 <- rbinom(N, size = n1, p)
37 | x2 <- rbinom(N, size = n2, p)
38 | 
39 | phat <- (x1 + x2)/(n1 + n2)
40 | propDiff <- x1/n1 - x2/n2
41 | 
42 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
43 | 
44 | qqnorm(propDiff/SE)
45 | abline(0, 1, col = "lightgray")
46 | 
47 | #Exercise 40
48 | 
49 | N <- 10^4
50 | tstat <- numeric(N)
51 | for (i in 1:N)
52 | {
53 |   w <- rnorm(30, 7, 1)
54 |   tstat[i] <- (mean(w) - 5)* sqrt(30)
55 | }
56 | 
57 | 
58 | hist(tstat, prob = TRUE)
59 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE)
60 | 
61 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20,
62 |          col = "blue", add = TRUE)
63 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 8 More Hypothesis Tests - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 | 
12 | ###Exercise 13
13 | ```{r}
14 | m <- 30
15 | n <- 30
16 | sigma1 <- 5
17 | sigma2 <- 5
18 | 
19 | pooled.count <- 0
20 | unpooled.count <- 0
21 | 
22 | for (i in 1:10^5)
23 | {
24 |   x <- rnorm(m, 30, 5)
25 |   y <- rnorm(n, 30, 5)
26 | 
27 |   p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
28 |   p.unpooled <- t.test(x, y)$p.value
29 | 
30 |   pooled.count <- pooled.count + (p.pooled < 0.05)
31 |   unpooled.count <- unpooled.count + (p.unpooled < 0.05)
32 | }
33 | 
34 | pooled.count/10^5
35 | unpooled.count/10^5
36 | 
37 | ```
38 | ###Exercise 21
39 | 
40 | ```{r}
41 | n1 <- 100
42 | n2 <- 100
43 | N <- 10^4
44 | p <- 0.1
45 | 
46 | x1 <- rbinom(N, size = n1, p)
47 | x2 <- rbinom(N, size = n2, p)
48 | 
49 | phat <- (x1 + x2)/(n1 + n2)
50 | propDiff <- x1/n1 - x2/n2
51 | 
52 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
53 | 
54 | qqnorm(propDiff/SE)
55 | abline(0, 1, col = "lightgray")
56 | ```
57 | 
58 | ###Exercise 40
59 | ```{r}
60 | N <- 10^4
61 | tstat <- numeric(N)
62 | for (i in 1:N)
63 | {
64 |   w <- rnorm(30, 7, 1)
65 |   tstat[i] <- (mean(w) - 5)* sqrt(30)
66 | }
67 | 
68 | 
69 | hist(tstat, prob = TRUE)
70 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE)
71 | 
72 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20,
73 |          col = "blue", add = TRUE)
74 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 8 More Hypothesis Tests - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ###Exercise 13
15 | ```{r}
16 | m <- 30
17 | n <- 30
18 | sigma1 <- 5
19 | sigma2 <- 5
20 | 
21 | pooled.count <- 0
22 | unpooled.count <- 0
23 | 
24 | for (i in 1:10^5)
25 | {
26 |   x <- rnorm(m, 30, 5)
27 |   y <- rnorm(n, 30, 5)
28 | 
29 |   p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
30 |   p.unpooled <- t.test(x, y)$p.value
31 | 
32 |   pooled.count <- pooled.count + (p.pooled < 0.05)
33 |   unpooled.count <- unpooled.count + (p.unpooled < 0.05)
34 | }
35 | 
36 | pooled.count/10^5
37 | unpooled.count/10^5
38 | 
39 | ```
40 | ###Exercise 21
41 | 
42 | ```{r}
43 | n1 <- 100
44 | n2 <- 100
45 | N <- 10^4
46 | p <- 0.1
47 | 
48 | x1 <- rbinom(N, size = n1, p)
49 | x2 <- rbinom(N, size = n2, p)
50 | 
51 | phat <- (x1 + x2)/(n1 + n2)
52 | propDiff <- x1/n1 - x2/n2
53 | 
54 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
55 | 
56 | df <- data.frame(x=propDiff/SE)
57 | 
58 | ggplot(df, aes(sample = x)) + stat_qq() + 
59 |   geom_abline(intercept = 0, slope = 1, colour = "lightgray")
60 | ```
61 | 
62 | ###Exercise 40
63 | ```{r}
64 | N <- 10^4
65 | tstat <- numeric(N)
66 | for (i in 1:N)
67 | {
68 |   w <- rnorm(30, 7, 1)
69 |   tstat[i] <- (mean(w) - 5)* sqrt(30)
70 | }
71 | 
72 | df <- data.frame(x=tstat)
73 | ggplot(df, aes(x)) + geom_histogram(aes(y =  stat(density)), bins = 12) +  
74 |   stat_function(fun=dt, args=list(df=29), colour = "red") + 
75 |   stat_function(fun=dt, args=list(df=29, ncp = 10.95), colour = "blue")
76 | 
77 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 8 More Hypothesis Tests"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December  2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ###Section 8.2
15 | ```{r}
16 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
17 | 
18 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
19 | 
20 | Arsenic <- Bangladesh %>% pull(Arsenic)
21 | #Arsenic <- Bangladesh$Arsenic
22 | N <- 10^5
23 | 
24 | observedT <- t.test(Arsenic, mu = 100)$statistic
25 | xbar <- mean(Arsenic)
26 | n <- length(Arsenic)
27 | Tstar <- numeric(N)
28 | for (i in 1:N)
29 | {
30 |  bootx <- sample(Arsenic, n , replace = TRUE)
31 |  Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
32 | }
33 | 
34 | ggplot() + geom_histogram(aes(Tstar), bins = 12) + geom_vline(xintercept = observedT)
35 | 
36 | (sum(Tstar >= observedT) + 1)/(N + 1)
37 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression.R:
--------------------------------------------------------------------------------
  1 | #Chapter 9 Regression
  2 | #
  3 | 
  4 | #Section 9.2
  5 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
  6 | 
  7 | plot(Spruce$Di.change, Spruce$Ht.change)
  8 | cor(Spruce$Di.change, Spruce$Ht.change)
  9 | 
 10 | plot(Ht.change ~ Di.change, data = Spruce)
 11 | 
 12 | #Example 9.3
 13 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
 14 | spruce.lm
 15 | 
 16 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals")
 17 | abline(h = 0)
 18 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue")
 19 | 
 20 | #Example 9.8
 21 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
 22 | skate.lm <- lm(Free ~ Short, data = Skating2010)
 23 | summary(skate.lm)
 24 | 
 25 | #Section 9.5
 26 | N <- 10^4
 27 | cor.boot <- numeric(N)
 28 | beta.boot <- numeric(N)
 29 | alpha.boot <- numeric(N)
 30 | yPred.boot <- numeric(N)
 31 | n <- 24       #number of skaters
 32 | for (i in 1:N)
 33 |  {
 34 |   index <- sample(n, replace = TRUE)    #sample f rom 1, 2, ... n
 35 |   Skate.boot <- Skating2010[index, ]
 36 | 
 37 |   cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
 38 | 
 39 |   #recalculate linear model estimates
 40 |   skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
 41 |   alpha.boot[i] <- coef(skateBoot.lm)[1]   # new intercept
 42 |   beta.boot[i] <- coef(skateBoot.lm)[2]    # new slope
 43 |   yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i]  #recompute Y^
 44 |   }
 45 | 
 46 |   mean(cor.boot)
 47 |   sd(cor.boot)
 48 |   quantile(cor.boot, c(0.025, 0.975))
 49 | 
 50 |   hist(cor.boot, main = "Bootstrap distribuiton of correlation",
 51 |       xlab = "Correlation")
 52 |   observed <- cor(Skating2010$Short, Skating2010$Free)
 53 |   abline(v = observed, col = "blue")    #add line at observed cor.
 54 | 
 55 | #-------------------------------------------------------
 56 | # Section 9.5.1 Permutation test
 57 | 
 58 |  N <- 10^5 - 1
 59 |  n <- nrow(Skating2010)   #number of observations
 60 |  result <- numeric(N)
 61 |  observed <- cor(Skating2010$Short, Skating2010$Free)
 62 |  for (i in 1:N)
 63 |  {
 64 |    index <- sample(n , replace = FALSE)
 65 |    Short.permuted <- Skating2010$Short[index]
 66 |    result[i] <- cor(Short.permuted, Skating2010$Free)
 67 |  }
 68 | 
 69 |  (sum(observed <= result) + 1)/(N+1)    #P-value
 70 | 
 71 | #----------------------------------------------
 72 | #Chapter 9.6.1 Inference for logistic regression
 73 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
 74 | 
 75 |  fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
 76 |  data.class(fit)  # is a "glm" object, so for help use:
 77 |  help(glm)
 78 | 
 79 |  fit          # prints the coefficients and other basic info
 80 |  coef(fit)    # the coefficients as a vector
 81 |  summary(fit) # gives standard errors for coefficients, etc.
 82 | 
 83 |  x <- seq(17, 91, length = 500) # vector spanning the age range
 84 |  # compute predicted probabilities
 85 |  y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
 86 |  y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
 87 | 
 88 |  plot(Fatalities$Age, Fatalities$Alcohol,
 89 |       ylab = "Probability of alcohol")
 90 |  lines(x, y2)
 91 | 
 92 |  # Full bootstrap - slope coefficient, and prediction at age 20
 93 |  N <- 10^3
 94 |  n <- nrow(Fatalities)                   # number of observations
 95 |  alpha.boot <- numeric(N)
 96 |  beta.boot <- numeric(N)
 97 |  pPred.boot <- numeric(N)
 98 | 
 99 |  for (i in 1:N)
100 |  {
101 |    index <- sample(n, replace = TRUE)
102 |    Fatal.boot <- Fatalities[index, ]     # resampled data
103 | 
104 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
105 |                    family = binomial)
106 |    alpha.boot[i] <- coef(fit.boot)[1]    # new intercept
107 |    beta.boot[i] <- coef(fit.boot)[2]     # new slope
108 |    pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
109 |  }
110 | 
111 |  quantile(beta.boot, c(.025, .975))      # 95% percentile intervals
112 |  quantile(pPred.boot, c(.025, .975))
113 | 
114 |  par(mfrow=c(2,2))                       # set layout
115 |  hist(beta.boot, xlab = "beta", main = "")
116 |  qqnorm(beta.boot, main = "")
117 | 
118 |  hist(pPred.boot, xlab = "p^", main = "")
119 |  qqnorm(pPred.boot, main = "")
120 | 
121 | #--------------------
122 |  help(predict.glmm)                # for more help on predict
123 | 
124 |  n <- nrow(Fatalities)             # number of observations
125 |  x <- seq(17, 91, length = 500)      # vector spanning the age range
126 |  df.Age <- data.frame(Age = x)     # data frame to hold
127 |      # explanatory variables, will use this for making predictions
128 | 
129 |  plot(Fatalities$Age, Fatalities$Alcohol,
130 |       ylab = "Probability of alcohol")
131 |  for (i in 1:25)
132 |  {
133 |    index <- sample(n, replace = TRUE)
134 |    Fatal.boot <- Fatalities[index, ]     # resampled data
135 | 
136 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
137 |                    family = binomial)
138 |    pPred <- predict(fit.boot, newdata = df.Age, type = "response")
139 |    lines(x, pPred)
140 |  }
141 | 
142 | #end fatalities
143 | #---------------------
144 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 9 Regression"
  3 | author: "Chihara-Hesterberg"
  4 | date: "July 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
 10 | ```
 11 | 
 12 | ###Section 9.2
 13 | ```{r}
 14 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
 15 | 
 16 | plot(Spruce$Di.change, Spruce$Ht.change)
 17 | cor(Spruce$Di.change, Spruce$Ht.change)
 18 | 
 19 | plot(Ht.change ~ Di.change, data = Spruce)
 20 | ```
 21 | 
 22 | ###Example 9.3
 23 | ```{r}
 24 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
 25 | spruce.lm
 26 | 
 27 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals")
 28 | abline(h = 0)
 29 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue")
 30 | ```
 31 | 
 32 | ###Example 9.8
 33 | ```{r}
 34 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
 35 | skate.lm <- lm(Free ~ Short, data = Skating2010)
 36 | summary(skate.lm)
 37 | ```
 38 | 
 39 | ###Section 9.5
 40 | 
 41 | ```{r}
 42 | N <- 10^4
 43 | cor.boot <- numeric(N)
 44 | beta.boot <- numeric(N)
 45 | alpha.boot <- numeric(N)
 46 | yPred.boot <- numeric(N)
 47 | n <- 24       #number of skaters
 48 | for (i in 1:N)
 49 |  {
 50 |   index <- sample(n, replace = TRUE)    #sample f rom 1, 2, ... n
 51 |   Skate.boot <- Skating2010[index, ]
 52 | 
 53 |   cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
 54 | 
 55 |   #recalculate linear model estimates
 56 |   skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
 57 |   alpha.boot[i] <- coef(skateBoot.lm)[1]   # new intercept
 58 |   beta.boot[i] <- coef(skateBoot.lm)[2]    # new slope
 59 |   yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i]  #recompute Y^
 60 |   }
 61 | 
 62 |   mean(cor.boot)
 63 |   sd(cor.boot)
 64 |   quantile(cor.boot, c(0.025, 0.975))
 65 | 
 66 |   hist(cor.boot, main = "Bootstrap distribuiton of correlation",
 67 |       xlab = "Correlation")
 68 |   observed <- cor(Skating2010$Short, Skating2010$Free)
 69 |   abline(v = observed, col = "blue")    #add line at observed cor.
 70 | ```
 71 | 
 72 | ### Section 9.5.1 Permutation test
 73 | 
 74 | ```{r}
 75 |  N <- 10^5 - 1
 76 |  n <- nrow(Skating2010)   #number of observations
 77 |  result <- numeric(N)
 78 |  observed <- cor(Skating2010$Short, Skating2010$Free)
 79 |  for (i in 1:N)
 80 |  {
 81 |    index <- sample(n , replace = FALSE)
 82 |    Short.permuted <- Skating2010$Short[index]
 83 |    result[i] <- cor(Short.permuted, Skating2010$Free)
 84 |  }
 85 | 
 86 |  (sum(observed <= result) + 1)/(N+1)    #P-value
 87 | ```
 88 | 
 89 | 
 90 | ###Chapter 9.6.1 Inference for logistic regression
 91 | 
 92 | ```{r}
 93 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
 94 | 
 95 |  fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
 96 |  data.class(fit)  # is a "glm" object, so for help use:
 97 |  help(glm)
 98 | 
 99 |  fit          # prints the coefficients and other basic info
100 |  coef(fit)    # the coefficients as a vector
101 |  summary(fit) # gives standard errors for coefficients, etc.
102 | 
103 |  x <- seq(17, 91, length = 500) # vector spanning the age range
104 |  # compute predicted probabilities
105 |  y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
106 |  y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
107 | 
108 |  plot(Fatalities$Age, Fatalities$Alcohol,
109 |       ylab = "Probability of alcohol")
110 |  lines(x, y2)
111 | ```
112 | 
113 | 
114 | #### Full bootstrap - slope coefficient, and prediction at age 20
115 | ```{r}
116 |  N <- 10^3
117 |  n <- nrow(Fatalities)                   # number of observations
118 |  alpha.boot <- numeric(N)
119 |  beta.boot <- numeric(N)
120 |  pPred.boot <- numeric(N)
121 | 
122 |  for (i in 1:N)
123 |  {
124 |    index <- sample(n, replace = TRUE)
125 |    Fatal.boot <- Fatalities[index, ]     # resampled data
126 | 
127 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
128 |                    family = binomial)
129 |    alpha.boot[i] <- coef(fit.boot)[1]    # new intercept
130 |    beta.boot[i] <- coef(fit.boot)[2]     # new slope
131 |    pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
132 |  }
133 | 
134 |  quantile(beta.boot, c(.025, .975))      # 95% percentile intervals
135 |  quantile(pPred.boot, c(.025, .975))
136 | 
137 |  par(mfrow=c(2,2))                       # set layout
138 |  hist(beta.boot, xlab = "beta", main = "")
139 |  qqnorm(beta.boot, main = "")
140 | 
141 |  hist(pPred.boot, xlab = "p^", main = "")
142 |  qqnorm(pPred.boot, main = "")
143 | ```
144 | 
145 | 
146 | ```{r}
147 |  n <- nrow(Fatalities)             # number of observations
148 |  x <- seq(17, 91, length = 500)      # vector spanning the age range
149 |  df.Age <- data.frame(Age = x)     # data frame to hold
150 |      # explanatory variables, will use this for making predictions
151 | 
152 |  plot(Fatalities$Age, Fatalities$Alcohol,
153 |       ylab = "Probability of alcohol")
154 |  for (i in 1:25)
155 |  {
156 |    index <- sample(n, replace = TRUE)
157 |    Fatal.boot <- Fatalities[index, ]     # resampled data
158 | 
159 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
160 |                    family = binomial)
161 |    pPred <- predict(fit.boot, newdata = df.Age, type = "response")
162 |    lines(x, pPred)
163 |  }
164 | ```
165 | 
166 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression_d.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 9 Regression"
  3 | author: "Chihara-Hesterberg"
  4 | date: "December 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
 10 | library(dplyr)
 11 | library(ggplot2)
 12 | ```
 13 | 
 14 | ###Section 9.2
 15 | ```{r}
 16 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
 17 | 
 18 | ggplot(Spruce, aes(x = Di.change, y = Ht.change)) + geom_point()
 19 | 
 20 | cor(Spruce$Di.change, Spruce$Ht.change)
 21 | ```
 22 | 
 23 | ###Example 9.3
 24 | ```{r}
 25 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
 26 | spruce.lm
 27 | 
 28 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() +  
 29 |     stat_smooth(method="lm", se = FALSE)
 30 | ```
 31 | 
 32 | We introduce a new package <tt>`broom`</tt> that performs some __tidying__ of the output of base R's <tt>`lm`</tt> command:
 33 | 
 34 | ```{r}
 35 | library(broom)
 36 | 
 37 | fit <- augment(spruce.lm)
 38 | head(fit, 3)
 39 | ```
 40 | In particular, note that we now have a data set that, in addition to the original variables, also contains a column of the fitted (predicted) values and the residuals.
 41 | 
 42 | To create a residual plot:
 43 | 
 44 | ```{r}
 45 | ggplot(fit, aes(x=Ht.change, y = .resid)) + geom_point() +
 46 |    geom_hline(yintercept = 0) + labs(y = "residuals")
 47 | ```
 48 | 
 49 | To add a __smoother__ line to the residual plot, use the <tt>`stat_smooth()`</tt> command:
 50 | 
 51 | ```{r}
 52 | ggplot(fit, aes(x = Ht.change, y = .resid)) + geom_point() + stat_smooth(method = loess, se = FALSE) + geom_hline(yintercept = 0)
 53 | ```
 54 | 
 55 | ###Example 9.8
 56 | ```{r}
 57 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
 58 | skate.lm <- lm(Free ~ Short, data = Skating2010)
 59 | summary(skate.lm)
 60 | ```
 61 | 
 62 | ###Section 9.5
 63 | 
 64 | ```{r}
 65 | N <- 10^4
 66 | cor.boot <- numeric(N)
 67 | beta.boot <- numeric(N)
 68 | alpha.boot <- numeric(N)
 69 | yPred.boot <- numeric(N)
 70 | n <- 24       #number of skaters
 71 | for (i in 1:N)
 72 |  {
 73 |   index <- sample(n, replace = TRUE)    #sample f rom 1, 2, ... n
 74 |   Skate.boot <- Skating2010[index, ]
 75 | 
 76 |   cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
 77 | 
 78 |   #recalculate linear model estimates
 79 |   skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
 80 |   alpha.boot[i] <- coef(skateBoot.lm)[1]   # new intercept
 81 |   beta.boot[i] <- coef(skateBoot.lm)[2]    # new slope
 82 |   yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i]  #recompute Y^
 83 |   }
 84 | 
 85 |   mean(cor.boot)
 86 |   sd(cor.boot)
 87 |   quantile(cor.boot, c(0.025, 0.975))
 88 | 
 89 |   
 90 |    observed <- cor(Skating2010$Short, Skating2010$Free)
 91 |    
 92 |   ggplot() + geom_histogram(aes(cor.boot), bins = 12) + 
 93 |     labs(title = "Bootstrap distribution of correlation", x = "Correlation") + 
 94 |     geom_vline(xintercept = observed, colour = "blue")
 95 | ```
 96 | 
 97 | ### Section 9.5.1 Permutation test
 98 | 
 99 | ```{r}
100 |  N <- 10^5 - 1
101 |  n <- nrow(Skating2010)   #number of observations
102 |  result <- numeric(N)
103 |  observed <- cor(Skating2010$Short, Skating2010$Free)
104 |  for (i in 1:N)
105 |  {
106 |    index <- sample(n , replace = FALSE)
107 |    Short.permuted <- Skating2010$Short[index]
108 |    result[i] <- cor(Short.permuted, Skating2010$Free)
109 |  }
110 | 
111 |  (sum(observed <= result) + 1)/(N+1)    #P-value
112 | ```
113 | 
114 | 
115 | ###Chapter 9.6.1 Inference for logistic regression
116 | 
117 | ```{r}
118 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
119 | 
120 |  fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
121 |  data.class(fit)  # is a "glm" object, so for help use:
122 |  help(glm)
123 | 
124 |  fit          # prints the coefficients and other basic info
125 |  coef(fit)    # the coefficients as a vector
126 |  summary(fit) # gives standard errors for coefficients, etc.
127 | 
128 |  x <- seq(17, 91, length = 500) # vector spanning the age range
129 |  # compute predicted probabilities
130 |  y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
131 |  y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
132 |  
133 |  my.fun <- function(x, lm.object){
134 |    plogis(coef(lm.object)[1] + coef(lm.object)[2]*x)
135 |  }
136 | 
137 |  ggplot(Fatalities, aes(x=Age, y = Alcohol)) + geom_point() + 
138 |     stat_function(fun = my.fun, args=list(lm.object = fit))
139 | 
140 | ```
141 | 
142 | 
143 | #### Full bootstrap - slope coefficient, and prediction at age 20
144 | ```{r}
145 |  N <- 10^3
146 |  n <- nrow(Fatalities)                   # number of observations
147 |  alpha.boot <- numeric(N)
148 |  beta.boot <- numeric(N)
149 |  pPred.boot <- numeric(N)
150 | 
151 |  for (i in 1:N)
152 |  {
153 |    index <- sample(n, replace = TRUE)
154 |    Fatal.boot <- Fatalities[index, ]     # resampled data
155 | 
156 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
157 |                    family = binomial)
158 |    alpha.boot[i] <- coef(fit.boot)[1]    # new intercept
159 |    beta.boot[i] <- coef(fit.boot)[2]     # new slope
160 |    pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
161 |  }
162 | 
163 |  quantile(beta.boot, c(.025, .975))      # 95% percentile intervals
164 |  quantile(pPred.boot, c(.025, .975))
165 | 
166 |  library(gridExtra)
167 |  
168 |  p1 <- ggplot() + geom_histogram(aes(beta.boot), bins = 12) + labs(x = "beta")
169 |  p2 <- ggplot() + stat_qq(aes(sample = beta.boot))
170 |  p3 <- ggplot() + geom_histogram(aes(pPred.boot), bins = 12) + labs(x = "p^")
171 |  p4 <- ggplot() + stat_qq(aes(sample = pPred.boot))
172 |  grid.arrange(p1, p2, p3, p4)
173 | ```
174 | 
175 | 
176 | ```{r}
177 |  n <- nrow(Fatalities)             # number of observations
178 |  x <- seq(17, 91, length = 500)      # vector spanning the age range
179 |  df.Age <- data.frame(Age = x)     # data frame to hold 
180 |    # explanatory variables, will use this for making predictions
181 | 
182 |  p <- ggplot(Fatalities, aes(x= Age, y = Alcohol)) + geom_point() + 
183 |      labs(y = "Probability of alcohol")
184 |  
185 |  for (i in 1:25)
186 |  {
187 |    index <- sample(n, replace = TRUE)
188 |    Fatal.boot <- Fatalities[index, ]     # resampled data
189 | 
190 |    fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
191 |                    family = binomial)
192 |    df.Age$pPred <- predict(fit.boot, newdata = df.Age, type = "response")
193 |    p <- p + geom_line(data = df.Age, aes(x = Age, y = pPred))
194 |  }
195 |  
196 |  print(p)
197 | ```
198 | 
199 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical.R:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------
  2 | #Chapter 10 Categorical data
  3 | #Here is a function that computes the chi-square
  4 | #test statistic
  5 | 
  6 | #This function is a bit more enhanced than the code in the textbook
  7 | chisq <- function(observed, print = TRUE) {
  8 |   # Chi-square statistic for independence in a contingency table,
  9 |   # with related data exploration.
 10 |   # observed is the observed contingency table
 11 | 
 12 |   observedWithTotals <- cbind(observed, total = rowSums(observed))
 13 |   observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
 14 |   expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
 15 |   statistic <- sum((observed-expected)^2/expected)
 16 |   if (print)
 17 |   {
 18 |   cat("Observed, with totals:\n")
 19 |   print(observedWithTotals)
 20 |   cat("\nRow Fractions:\n")
 21 |   print(round(observed / rowSums(observed), 3))
 22 |   cat("\nColumn Fractions:\n")
 23 |   print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
 24 | 
 25 |   cat("\nExpected:\n")
 26 |   print(round(expected, 1))
 27 |   cat("\nDifference:\n")
 28 |   print(round(observed - expected, 1))
 29 | 
 30 |   cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
 31 |   }
 32 |   return(invisible(statistic))
 33 | }
 34 | 
 35 | 
 36 | #-------------------------------------------
 37 | #Uncomment below if you haven't imported GSS2002 yet.
 38 | #GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
 39 | 
 40 | Education <- GSS2002$Education
 41 | DeathPenalty <- GSS2002$DeathPenalty
 42 | #Alternatively
 43 | #Education <- subset(GSS2002, select=Education, drop = TRUE)
 44 | #DeathPenalty <- subset(GSS2002, select=DeathPenalty, drop = TRUE)
 45 | 
 46 | table(Education, DeathPenalty)  #note education ordered alphabetically
 47 | 
 48 | Education <- ordered(GSS2002$Education,
 49 |                      levels = c("Left HS", "HS", "Jr Col", "Bachelors",
 50 |                                 "Graduate"))
 51 | 
 52 | table(Education, DeathPenalty)
 53 | 
 54 | #Use function created above to calculate chi-square test statistic
 55 | observedChi2 <- chisq(table(Education, DeathPenalty))
 56 | observedChi2
 57 | 
 58 | #Find those rows where there is at least one NA
 59 | index <- which(is.na(Education) | is.na(DeathPenalty))
 60 | 
 61 | #Remove those rows from the two variables and define Educ2 and
 62 | #DeathPenalty2 to be the new vectors with those rows removed
 63 | Educ2 <- Education[-index]
 64 | DeathPenalty2 <-  DeathPenalty[-index]
 65 | 
 66 | N <- 10^4-1
 67 | result<-numeric(N)
 68 | 
 69 | for (i in 1:N)
 70 |  {
 71 |    DP.permutation <-sample(DeathPenalty2)
 72 |    GSS.table <- table(Educ2, DP.permutation)
 73 |    result[i]<-chisq(GSS.table, print = FALSE)
 74 |  }
 75 | 
 76 | #Create a histogram
 77 |  hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic")
 78 |  abline(v = observedChi2, col = "blue", lty = 5)
 79 | 
 80 | 
 81 | #optional: Create a histogram with the density curve
 82 | #imposed onto the histogram
 83 | #The prob=TRUE option below scales the histogram to have area 1
 84 |  hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic",
 85 |     ylim = c(0,.2))
 86 |  curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE)
 87 | 
 88 | #Compute P-value
 89 | (sum(result >= observedChi2) + 1)/(N + 1)
 90 | 
 91 | 
 92 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
 93 | mat <- table(Education, DeathPenalty)
 94 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
 95 | 
 96 | #----------------------------------------------------------------
 97 | #Example 10.2
 98 | mat <- rbind(c(42, 50), c(30, 87))
 99 | chisq.test(mat)
100 | 
101 | #Section 10.3.3 Fisher's Exact Test
102 | fisher.test(mat)
103 | 
104 | 
105 | 
106 | #Section 10.4 Test of Homogeneity
107 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
108 | candy.mat
109 | 
110 | chisq.test(candy.mat)
111 | 
112 | #Section 10.6
113 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
114 | Homeruns <- Phillies2009$Homeruns
115 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE)
116 | 
117 | 
118 | 
119 | lambda <- mean(Homeruns)
120 | dpois(0:5, lambda)
121 | table(Homeruns)
122 | 
123 | table(Homeruns)/162
124 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 10 Categorical Data"
  3 | author: "Chihara-Hesterberg"
  4 | date: "July 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
 10 | ```
 11 | 
 12 | ###Section 10.2
 13 | 
 14 | Here is a function that computes the chi-square test statistic
 15 | 
 16 | The code below gives a function that is a bit more enhanced than the code in the textbook:
 17 | ```{r}
 18 | chisq <- function(observed, print = TRUE) {
 19 |   # Chi-square statistic for independence in a contingency table,
 20 |   # with related data exploration.
 21 |   # observed is the observed contingency table
 22 | 
 23 |   observedWithTotals <- cbind(observed, total = rowSums(observed))
 24 |   observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
 25 |   expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
 26 |   statistic <- sum((observed-expected)^2/expected)
 27 |   if (print){
 28 |   cat("Observed, with totals:\n")
 29 |   print(observedWithTotals)
 30 |   cat("\nRow Fractions:\n")
 31 |   print(round(observed / rowSums(observed), 3))
 32 |   cat("\nColumn Fractions:\n")
 33 |   print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
 34 | 
 35 |   cat("\nExpected:\n")
 36 |   print(round(expected, 1))
 37 |   cat("\nDifference:\n")
 38 |   print(round(observed - expected, 1))
 39 | 
 40 |   cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
 41 | }
 42 |   return(invisible(statistic))
 43 | }
 44 | ```
 45 | Import the General Social Survey data and extract the two variables, <tt>`Education`</tt> and
 46 | <tt>`DeathPenalty`</tt>.
 47 | 
 48 | <tt>`Education`</tt> is a factor variable. We use the <tt>`ordered`</tt> command to *order* the levels.
 49 | 
 50 | ```{r}
 51 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
 52 | 
 53 | Education <- GSS2002$Education
 54 | DeathPenalty <- GSS2002$DeathPenalty
 55 | 
 56 | #Alternatively
 57 | #Education <- subset(GSS2002, select = Education, drop = TRUE)
 58 | #DeathPenalty <- subset(GSS2002, select = DeathPenalty, drop = TRUE)
 59 | 
 60 | table(Education, DeathPenalty)  #Education ordered alphabetically
 61 | 
 62 | Education <- ordered(GSS2002$Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate"))                    
 63 | table(Education, DeathPenalty)
 64 | ```
 65 | 
 66 | Use function created above to calculate chi-square test statistic
 67 | 
 68 | ```{r}
 69 | observedChi2 <- chisq(table(Education, DeathPenalty))
 70 | observedChi2
 71 | ```
 72 | 
 73 | There are missing values in both variables so we get the row numbers where there is at least one NA. We remove those rows from the two variables and create two new vectors <tt>`Educ2`</tt> and <tt>`DeathPenalty2`</tt> that hold the non-NA values:
 74 | 
 75 | ```{r}
 76 | str(GSS2002)
 77 | 
 78 | index <- which(is.na(Education) | is.na(DeathPenalty))
 79 | 
 80 | Educ2 <- Education[-index]
 81 | DeathPenalty2 <-  DeathPenalty[-index]
 82 | ```
 83 | Now run the permutation test
 84 | ```{r}
 85 | N <- 10^4-1
 86 | result<-numeric(N)
 87 | 
 88 | for (i in 1:N)
 89 |  {
 90 |    DP.permutation <-sample(DeathPenalty2)
 91 |    GSS.table <- table(Educ2, DP.permutation)
 92 |    result[i]<-chisq(GSS.table, print = FALSE)
 93 |  }
 94 | 
 95 | #Create a histogram
 96 |  hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic")
 97 |  abline(v = observedChi2, col = "blue", lty = 5)
 98 |  
 99 |  #Compute P-value
100 | (sum(result >= observedChi2) + 1)/(N + 1)
101 | ```
102 | 
103 | Optional: Create a histogram with the density curve
104 | imposed onto the histogram
105 | The <tt>`prob = TRUE`</tt> argument scales the histogram to have area 1
106 | ```{r}
107 |  hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic")
108 |  curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE)
109 | ```
110 | 
111 | The <tt>`chisq.test`</tt> command also has an option that will perform this permutation test:
112 | 
113 | ```{r}
114 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
115 | mat <- table(Education, DeathPenalty)
116 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
117 | ```
118 | 
119 | 
120 | 
121 | 
122 | ###Example 10.2
123 | ```{r}
124 | mat <- rbind(c(42, 50), c(30, 87))
125 | chisq.test(mat)
126 | ```
127 | 
128 | ###Section 10.3.3 Fisher's Exact Test
129 | 
130 | ```{r}
131 | fisher.test(mat)
132 | ```
133 | 
134 | ###Section 10.4 Test of Homogeneity
135 | ```{r}
136 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
137 | candy.mat
138 | 
139 | chisq.test(candy.mat)
140 | ```
141 | 
142 | ###Section 10.6
143 | ```{r}
144 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
145 | Homeruns <- Phillies2009$Homeruns
146 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE)
147 | 
148 | lambda <- mean(Homeruns)
149 | dpois(0:5, lambda)
150 | table(Homeruns)
151 | 
152 | table(Homeruns)/162
153 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical_d.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Chap 10 Categorical Data"
  3 | author: "Chihara-Hesterberg"
  4 | date: "December 2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
 10 | library(ggplot2)
 11 | library(dplyr)
 12 | ```
 13 | 
 14 | ###Section 10.2
 15 | 
 16 | Here is a function that computes the chi-square test statistic
 17 | 
 18 | The code below gives a function that is a bit more enhanced than the code in the textbook:
 19 | ```{r}
 20 | chisq <- function(observed, print = TRUE) {
 21 |   # Chi-square statistic for independence in a contingency table,
 22 |   # with related data exploration.
 23 |   # observed is the observed contingency table
 24 | 
 25 |   observedWithTotals <- cbind(observed, total = rowSums(observed))
 26 |   observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
 27 |   expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
 28 |   statistic <- sum((observed-expected)^2/expected)
 29 |   if (print){
 30 |   cat("Observed, with totals:\n")
 31 |   print(observedWithTotals)
 32 |   cat("\nRow Fractions:\n")
 33 |   print(round(observed / rowSums(observed), 3))
 34 |   cat("\nColumn Fractions:\n")
 35 |   print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
 36 | 
 37 |   cat("\nExpected:\n")
 38 |   print(round(expected, 1))
 39 |   cat("\nDifference:\n")
 40 |   print(round(observed - expected, 1))
 41 | 
 42 |   cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
 43 | }
 44 |   return(invisible(statistic))
 45 | }
 46 | ```
 47 | Import the General Social Survey data. We are interested in the two variables, <tt>`Education`</tt> and <tt>`DeathPenalty`</tt>.
 48 | 
 49 | Using the <tt>`str()`</tt> command, we note that these two variables have missing values. We will create a new data frame which contains just the two variables of interest and also, only contains the rows without NA's.
 50 | 
 51 | ```{r}
 52 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
 53 | 
 54 | str(GSS2002)
 55 | 
 56 | df <- GSS2002 %>% 
 57 |        select(Education, DeathPenalty) %>% 
 58 |        filter(!is.na(Education) & !is.na(DeathPenalty))
 59 | 
 60 | Education <- pull(df, Education)
 61 | DeathPenalty <- pull(df, DeathPenalty)
 62 | 
 63 | table(Education, DeathPenalty)
 64 | ```
 65 | 
 66 | <tt>`Education`</tt> is a factor variable and the default ordering of the levels is alphabetical. We use the <tt>`ordered`</tt> command to *order* the levels.
 67 | 
 68 | ```{r}
 69 | Education <- ordered(Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate"))                    
 70 | table(Education, DeathPenalty)
 71 | ```
 72 | 
 73 | 
 74 | Use function created above to calculate chi-square test statistic
 75 | 
 76 | ```{r}
 77 | observedChi2 <- chisq(table(Education, DeathPenalty))
 78 | observedChi2
 79 | ```
 80 | 
 81 | Now run the permutation test
 82 | 
 83 | ```{r}
 84 | N <- 10^4-1
 85 | result<-numeric(N)
 86 | 
 87 | for (i in 1:N)
 88 |  {
 89 |    DP.permutation <-sample(DeathPenalty)
 90 |    GSS.table <- table(Education, DP.permutation)
 91 |    result[i]<-chisq(GSS.table, print = FALSE)
 92 |  }
 93 | 
 94 | ggplot() + geom_histogram(aes(result)) + 
 95 |    labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") + 
 96 |    geom_vline(xintercept = observedChi2, colour = "blue")
 97 | 
 98 | (sum(result >= observedChi2) + 1)/(N + 1)
 99 | ```
100 | 
101 | 
102 | Optional: Create a histogram with the density curve
103 | imposed onto the histogram. The <tt>ggplot()</tt> command will require a data frame which contains the variable of interest.
104 | 
105 | ```{r}
106 | df <- data.frame(result)
107 | ggplot(df) + geom_histogram(aes(result, y = stat(density))) + 
108 |    labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") +   
109 |    geom_vline(xintercept = observedChi2, colour = "blue") +
110 |    stat_function(fun = dchisq, args = list(df = 4), colour = "green")
111 | ```
112 | 
113 | The <tt>`chisq.test`</tt> command also has an option that will perform this permutation test:
114 | 
115 | ```{r}
116 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
117 | mat <- table(Education, DeathPenalty)
118 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
119 | ```
120 | 
121 | ###Example 10.2
122 | ```{r}
123 | mat <- rbind(c(42, 50), c(30, 87))
124 | chisq.test(mat)
125 | ```
126 | 
127 | ###Section 10.3.3 Fisher's Exact Test
128 | 
129 | ```{r}
130 | fisher.test(mat)
131 | ```
132 | 
133 | ###Section 10.4 Test of Homogeneity
134 | ```{r}
135 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
136 | candy.mat
137 | 
138 | chisq.test(candy.mat)
139 | ```
140 | 
141 | ###Section 10.6
142 | ```{r}
143 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
144 | Homeruns <- pull(Phillies2009, Homeruns)
145 | 
146 | lambda <- mean(Homeruns)
147 | dpois(0:5, lambda)
148 | table(Homeruns)
149 | 
150 | table(Homeruns)/162
151 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian.R:
--------------------------------------------------------------------------------
 1 | #Chapter 10 Bayesian Methods
 2 | # R scripts
 3 | 
 4 | #-----------------------------------
 5 | # Example 11.1
 6 |  theta <- seq(0, 1, by = .1)
 7 |  prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
 8 |  likelihood <- theta * (1 - theta)^2
 9 |  constant <- sum(prior * likelihood)
10 |  posterior <- prior * likelihood / constant
11 |  posterior
12 |  sum(theta * prior)            # prior mean
13 |  sum(theta * posterior)        # posterior mean
14 | 
15 | #-----------------------
16 |  likelihood2 <- theta^3 * (1 - theta)^5   # 3 success, 5 fail
17 |  constant2 <- sum(prior * likelihood2)
18 |  posterior2 <- prior * likelihood2 / constant2
19 |  posterior
20 |  likelihood3 <- theta^2 * (1 - theta)^3
21 |  constant3 <- sum(posterior * likelihood3)
22 |  posterior3 <- posterior * likelihood3 / constant3
23 |  posterior3                   # not shown, matches posterior2
24 |  sum(theta*posterior2)        # posterior mean
25 | 
26 |  plot(theta, prior, type = "b", ylim = c(0, max(posterior3)),
27 |       ylab = "probability")
28 |  lines(theta, posterior, type = "b", lty = 2)
29 |  lines(theta, posterior2, type = "b", lty = 3)
30 |  legend("topleft", legend = c("prior", "posterior1", "posterior2"),
31 |         lty = 1:3)
32 | 
33 | #-------------------------
34 | # Chapter 11.5 Sequential data
35 | 
36 |  n <- c(1874, 1867, 1871, 1868, 1875, 1875)
37 |  X <- c(52, 41, 55, 49, 39, 39)
38 |  alpha <- X     # vector of posterior parameters
39 |  beta <- n - X  # vector of posterior parameters
40 |  N <- 10^5                    # replications
41 |  theta <- matrix(0.0, nrow = N, ncol = 6)
42 |  for (j in 1:6)
43 |  {
44 |     theta[, j] <- rbeta(N, alpha[j], beta[j])
45 |  }
46 |  probBest <- numeric(6)       # vector for results
47 |  best <- apply(theta, 1, max) # maximum of each row
48 |  for (j in 1:6)
49 |  {
50 |     probBest[j] <- mean(theta[, j] == best)
51 |  }
52 | 
53 |  probBest
54 | 
55 |  plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".")
56 |  abline(0, 1)
57 |  text(.037, .042, substitute(theta[3] > theta[1]))
58 |  text(.042, .037, substitute(theta[3] > theta[1]))
59 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 11 Bayesian Statistics"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 | 
12 | ### Example 11.1
13 | ```{r}
14 |  theta <- seq(0, 1, by = .1)
15 |  prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
16 |  likelihood <- theta * (1 - theta)^2
17 |  constant <- sum(prior * likelihood)
18 |  posterior <- prior * likelihood / constant
19 |  posterior
20 |  sum(theta * prior)            # prior mean
21 |  sum(theta * posterior)        # posterior mean
22 | 
23 | 
24 |  likelihood2 <- theta^3 * (1 - theta)^5   # 3 success, 5 fail
25 |  constant2 <- sum(prior * likelihood2)
26 |  posterior2 <- prior * likelihood2 / constant2
27 |  posterior2
28 | 
29 |  likelihood3 <- theta^2 * (1 - theta)^3
30 |  constant3 <- sum(posterior * likelihood3)
31 |  posterior3 <- posterior * likelihood3 / constant3
32 |  posterior3                   # not shown, matches posterior2
33 |  sum(theta*posterior2)        # posterior mean
34 | 
35 |  plot(theta, prior, type = "b", ylim = c(0, max(posterior3)),
36 |       ylab = "probability")
37 |  lines(theta, posterior, type = "b", lty = 2)
38 |  lines(theta, posterior2, type = "b", lty = 3)
39 |  legend("topleft", legend = c("prior", "posterior1", "posterior2"),
40 |         lty = 1:3)
41 | ```
42 | 
43 | ### Chapter 11.5 Sequential data
44 | ```{r}
45 |  n <- c(1874, 1867, 1871, 1868, 1875, 1875)
46 |  X <- c(52, 41, 55, 49, 39, 39)
47 |  alpha <- X     # vector of posterior parameters
48 |  beta <- n - X  # vector of posterior parameters
49 |  N <- 10^5                    # replications
50 |  theta <- matrix(0.0, nrow = N, ncol = 6)
51 |  for (j in 1:6)
52 |  {
53 |     theta[, j] <- rbeta(N, alpha[j], beta[j])
54 |  }
55 |  probBest <- numeric(6)       # vector for results
56 |  best <- apply(theta, 1, max) # maximum of each row
57 |  for (j in 1:6)
58 |  {
59 |     probBest[j] <- mean(theta[, j] == best)
60 |  }
61 | 
62 |  probBest
63 | 
64 |  plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".")
65 |  abline(0, 1)
66 |  text(.037, .042, substitute(theta[3] > theta[1]))
67 |  text(.042, .037, substitute(theta[3] > theta[1]))
68 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian_d.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 11 Bayesian Statistics"
 3 | author: "Chihara-Hesterberg"
 4 | date: "December 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 | 
14 | ### Example 11.1
15 | ```{r}
16 |  theta <- seq(0, 1, by = .1)
17 |  prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
18 |  likelihood <- theta * (1 - theta)^2
19 |  constant <- sum(prior * likelihood)
20 |  posterior <- prior * likelihood / constant
21 |  posterior
22 |  sum(theta * prior)            # prior mean
23 |  sum(theta * posterior)        # posterior mean
24 | 
25 | 
26 |  likelihood2 <- theta^3 * (1 - theta)^5   # 3 success, 5 fail
27 |  constant2 <- sum(prior * likelihood2)
28 |  posterior2 <- prior * likelihood2 / constant2
29 |  posterior2
30 | 
31 |  likelihood3 <- theta^2 * (1 - theta)^3
32 |  constant3 <- sum(posterior * likelihood3)
33 |  posterior3 <- posterior * likelihood3 / constant3
34 |  posterior3                   # not shown, matches posterior2
35 |  sum(theta*posterior2)        # posterior mean
36 |  
37 |  df <- data.frame(theta, prior, posterior, posterior2)
38 |  
39 |  ggplot(df) + 
40 |    geom_line(aes(x = theta, y = prior, colour = "prior")) +
41 |    geom_line(aes(x = theta, y = posterior, colour = "posterior")) +
42 |    geom_line(aes(x = theta, y = posterior2, colour = "posterior2")) +
43 |    scale_colour_manual(name=NULL, 
44 |         values= c("prior" = "black", "posterior" = "blue", "posterior2" = "red" ))
45 | ```
46 | 
47 | ### Chapter 11.5 Sequential data
48 | ```{r}
49 |  n <- c(1874, 1867, 1871, 1868, 1875, 1875)
50 |  X <- c(52, 41, 55, 49, 39, 39)
51 |  alpha <- X     # vector of posterior parameters
52 |  beta <- n - X  # vector of posterior parameters
53 |  N <- 10^5                    # replications
54 |  theta <- matrix(0.0, nrow = N, ncol = 6)
55 |  for (j in 1:6)
56 |  {
57 |     theta[, j] <- rbeta(N, alpha[j], beta[j])
58 |  }
59 |  probBest <- numeric(6)       # vector for results
60 |  best <- apply(theta, 1, max) # maximum of each row
61 |  for (j in 1:6)
62 |  {
63 |     probBest[j] <- mean(theta[, j] == best)
64 |  }
65 | 
66 |  probBest
67 |  
68 |  df <- as.data.frame(theta[1:10^4, ])
69 |  names(df) <- paste("x", as.character(1:6), sep = "")
70 |  
71 |  ggplot(df) + geom_point(aes(x = x1, y = x3), pch = ".") +
72 |    geom_abline(slope = 1, intercept = 0) +
73 |    annotate("text", x = 0.037, y = 0.042, parse = TRUE, label ="theta[3] > theta[1]") +
74 |    annotate("text", x = 0.042, y = 0.037, parse = TRUE, label ="theta[1] > theta[3]")
75 |  
76 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA.R:
--------------------------------------------------------------------------------
 1 | #Chapter 12 ANOVA
 2 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv")
 3 | anova(lm(Weight ~ MothersAge, data = ILBoys))
 4 | 
 5 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
 6 | 
 7 | summary(aov(Weight ~ MothersAge, data = ILBoys))
 8 | 
 9 | #-------------------------------- 
10 | #Section 12.1.2 Permutation test approach
11 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
12 | 
13 | n <- length(ILBoys$Weight)
14 | N <- 10^4 - 1
15 | results <- numeric(N)
16 | for (i in 1:N)
17 | {
18 |  index <- sample(n)
19 |  Weight.perm <- ILBoys$Weight[index]
20 |  results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1]
21 | }
22 | 
23 | (sum(results >= observed) + 1) / (N + 1)
24 | 


--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 12 ANOVA"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July 2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 | 
12 | ###Example 12.1
13 | Illinois baby boys
14 | 
15 | ```{r}
16 | 
17 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv")
18 | anova(lm(Weight ~ MothersAge, data = ILBoys))
19 | 
20 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
21 | 
22 | summary(aov(Weight ~ MothersAge, data = ILBoys))
23 | ```
24 | 
25 | ###Section 12.1.2 Permutation test approach
26 | ```{r}
27 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
28 | 
29 | n <- length(ILBoys$Weight)
30 | N <- 10^4 - 1
31 | results <- numeric(N)
32 | for (i in 1:N)
33 | {
34 |  index <- sample(n)
35 |  Weight.perm <- ILBoys$Weight[index]
36 |  results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1]
37 | }
38 | 
39 | (sum(results >= observed) + 1) / (N + 1)
40 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA_Exer.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Chap 12 ANOVA - Exercises"
 3 | author: "Chihara-Hesterberg"
 4 | date: "July  2018"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 | 
12 | ###Exercise 6 
13 | Simulation
14 | 
15 | ```{r}
16 | nA <- 50         # set sample sizes
17 | nB <- 50
18 | nC <- 50
19 |                  # create groups
20 | Group <- rep(c("A","B","C"), c(nA, nB, nC))
21 | 
22 | counter <- 0
23 | N <- 10^4
24 | 
25 | for (i in 1:N)
26 | {
27 |  a <- rnorm(nA, 20, 3)     # Draw samples
28 |  b <- rnorm(nB, 20, 3)
29 |  c <- rnorm(nC, 20, 3)
30 |  X <- c(a, b, c)           # Combine into one vector
31 | 
32 |  Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value
33 |  if (Pvalue < 0.05)          # Reject H0?
34 |   counter <- counter + 1     # If yes, increase counter
35 | 
36 | }
37 | 
38 | counter/N          # proportion of times H0 rejected
39 | ```


--------------------------------------------------------------------------------
/Edition2/R/Chap12Anova_Exer.R:
--------------------------------------------------------------------------------
 1 | #Chapter 12 ANOVA
 2 | 
 3 | #Exercise 6 Simulation
 4 | 
 5 | nA <- 50         # set sample sizes
 6 | nB <- 50
 7 | nC <- 50
 8 |                  # create groups
 9 | Group <- rep(c("A","B","C"), c(nA, nB, nC))
10 | 
11 | counter <- 0
12 | N <- 10^4
13 | 
14 | for (i in 1:N)
15 | {
16 |  a <- rnorm(nA, 20, 3)     # Draw samples
17 |  b <- rnorm(nB, 20, 3)
18 |  c <- rnorm(nC, 20, 3)
19 |  X <- c(a, b, c)           # Combine into one vector
20 | 
21 |  Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value
22 |  if (Pvalue < 0.05)          # Reject H0?
23 |   counter <- counter + 1     # If yes, increase counter
24 | 
25 | }
26 | 
27 | counter/N          # proportion of times H0 rejected
28 | 


--------------------------------------------------------------------------------
/Edition2/README.md:
--------------------------------------------------------------------------------
 1 | # Mathematical Statistics with Resampling and R, 2nd edition (2018)
 2 | 
 3 | This is an older edition. For the current edition, see
 4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR)
 5 | 
 6 | 
 7 | ## Second Edition
 8 | 
 9 | [Author's website](https://sites.google.com/site/chiharahesterberg)
10 | 
11 | [Publisher's website](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+2nd+Edition-p-9781119416531)
12 | 
13 | Available on:
14 | 
15 |   * [Google Books](https://books.google.com/books?id=t2hvDwAAQBAJ)
16 |   * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=t2hvDwAAQBAJ)
17 |   * [Amazon](https://www.google.com/url?q=https%3A%2F%2Fwww.amazon.com%2FMathematical-Statistics-Resampling-Laura-Chihara-ebook%2Fdp%2FB07HH3KXRH%2Fref%3Dsr_1_1%3Fs%3Dbooks%26ie%3DUTF8%26qid%3D1539059394%26sr%3D1-1%26keywords%3DChihara%2BHesterberg&sa=D&sntz=1&usg=AOvVaw25Q7F0vZTyz2h7LR3_xTe0)
18 | 


--------------------------------------------------------------------------------
/Edition3/Chapters/c01_GSS2018Questions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c01_GSS2018Questions.pdf


--------------------------------------------------------------------------------
/Edition3/Chapters/c06_Supplement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c06_Supplement.pdf


--------------------------------------------------------------------------------
/Edition3/Data/Data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Data/Data.zip


--------------------------------------------------------------------------------
/Edition3/Data/Readme.md:
--------------------------------------------------------------------------------
1 | Zip file contains data sets in csv format.
2 | 
3 | Data are also available as an R package (resampledata3) from CRAN.
4 | 


--------------------------------------------------------------------------------
/Edition3/Errata_Edition3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Errata_Edition3.pdf


--------------------------------------------------------------------------------
/Edition3/README.md:
--------------------------------------------------------------------------------
 1 | # Mathematical Statistics with Resampling and R, Third Edition (2022)
 2 | 
 3 | 
 4 | Data sets, R code, supplementary materials for the textbook Mathematical Statistics with Resampling and R
 5 | 
 6 | ## Contents here
 7 | 
 8 | [Chapters](Chapters)
 9 | Supplemental material for chapters, including
10 | additional notes about data and advanced topics.
11 | 
12 | [Data](Data) data as .csv files (they are also available as an R package,
13 | see below).
14 | 
15 | [RScripts](RScripts) R scripts to supplement chapters.
16 | 
17 | 
18 | ## Data in an R package
19 | 
20 | The data are available as an R package
21 | [resampledata3](https://CRAN.R-project.org/package=resampledata3)
22 | on
23 | [CRAN](https://cran.r-project.org/mirrors.html).
24 | 
25 | 
26 | 
27 | ## Other websites
28 | 
29 | 
30 | The publisher's website is
31 | [Mathematical Statistics with Resampling and R, 3rd Edition](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+3rd+Edition-p-9781119874034)
32 | 
33 | Available on:
34 | 
35 |   * [Google Books](https://books.google.com/books?id=d7CAEAAAQBAJ)
36 |   * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=d7CAEAAAQBAJ)
37 |   * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara-ebook/dp/B0B99GCGQQ/ref=sr_1_fkmr2_2)
38 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c02_RIntroEDA1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/RScripts/c02_RIntroEDA1.pdf


--------------------------------------------------------------------------------
/Edition3/RScripts/c02_RIntroEDA2.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to R, part 2"
  3 | author: "Chihara-Hesterberg"
  4 | date: "July 2022"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
 10 | library(resampledata)
 11 | library(ggplot2)
 12 | library(dplyr)
 13 | ```
 14 | 
 15 | 
 16 | ### Vectors in R
 17 | 
 18 | The basic data object in R is the vector.
 19 | Even scalars are vectors of length 1.
 20 | 
 21 | There are several ways to create vectors.
 22 | 
 23 | The <tt>:</tt> operator creates sequences incrementing/decrementing
 24 | by 1.
 25 | 
 26 | ```{r}
 27 | 1:10
 28 | 5:-3
 29 | ```
 30 | 
 31 | The <tt>seq</tt> function creates sequences also.
 32 | ```{r}
 33 | seq(0, 3, by = .2)
 34 | seq(0, 3, length = 15)
 35 | ```
 36 | 
 37 | To create vectors with no particular pattern, use the
 38 | <tt>c()</tt> function (<tt>c</tt> for **c**ombine).
 39 | 
 40 | ```{r}
 41 | c(1, 4, 8, 2, 9)
 42 | x <- c(2, 0, -4)
 43 | x
 44 | c(x, 0:5, x)
 45 | ```
 46 | 
 47 | For vectors of characters,
 48 | 
 49 | ```{r}
 50 | c("a", "b", "c", "d")
 51 | ```
 52 | 
 53 | or logical values (note that there are no double quotes):
 54 | 
 55 | ```{r}
 56 | c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)
 57 | ```
 58 | 
 59 | The <tt>rep</tt> command for repeating values:
 60 | 
 61 | ```{r}
 62 | rep("a", 5)
 63 | rep(c("a", "b"), 5)
 64 | rep(c("a", "b"), c(5, 2))
 65 | ```
 66 | 
 67 | ### The  <tt>class</tt> attribute
 68 | 
 69 | Use <tt>data.class</tt> to determine the class attribute of an object.
 70 | 
 71 | ```{r}
 72 | state.name
 73 | data.class(state.name)
 74 | state.name == "Idaho"
 75 | data.class(state.name == "Idaho")
 76 | 
 77 | head(FlightDelays$Carrier)
 78 | data.class(FlightDelays$Carrier)
 79 | ```
 80 |   
 81 | 
 82 | ### Basic Arithmetic
 83 | 
 84 | ```{r}
 85 | x <- 1:5
 86 | x - 3
 87 | x*10
 88 | x/10
 89 | x^2
 90 | 2^x
 91 | log(x)
 92 | 
 93 | w <- 6:10
 94 | w
 95 | x*w                 #coordinate-wise multiplication
 96 | ```
 97 | 
 98 | #### Logical expressions
 99 | 
100 | ```{r}
101 | x < 3
102 | ```
103 | 
104 | ### Subsetting a vector
105 | 
106 | In many cases, we will want only a portion of a data set. For
107 | subsetting a vector, the basic syntax is <tt>vector[*index*]</tt>.
108 | In particular, note the use of *brackets* to indicate that we are
109 | subsetting.
110 | 
111 | ```{r}
112 | state.name               # 50 states (alphabetical order)
113 | state.name[c(1, 25, 50)] # the 1st, 25th, and 50th 
114 | state.name[-(1:10)]      # remove the first 10.
115 | 
116 | z <- c(8, 3, 0, 9, 9, 2, 1, 3)
117 | z
118 | z[4]              # The fourth element of z
119 | z[c(1, 3, 4)]     # The first, third and fourth element,
120 | z[-c(1, 3, 4)]    # All elements except the first, third and fourth
121 | ```
122 | 
123 | To return the values of <tt>z</tt> less than 4, we first introduce the
124 | <tt>which</tt> command:
125 | 
126 | ```{r}
127 | which(z < 4)             # which positions are z values < 4?
128 | index <- which(z < 4)    # store in index
129 | index
130 | z[index]                 # return z[c(2, 3, 6, 7)]
131 | ```
132 | 
133 | Suppose you want to find those observations when the delay length
134 | was greater than the mean delay length. We'll store this in a vector
135 | called <tt>index</tt>.
136 | 
137 | ```{r}
138 | delay <- FlightDelays$Delay
139 | index <- which(delay > mean(delay))
140 | head(index)
141 | ```
142 | 
143 | Thus, observations in rows  2, 10, 12, 14, 15, 16 are the first
144 | six that correspond to flights that had delays that were larger
145 | than the average delay length.
146 | 
147 | ### Extracting parts of a data frame
148 | 
149 | To subset particular rows of a data frame, use the <tt>filter</tt> command in the *dplyr* package.
150 | 
151 | For example, to create a data frame with just the United Airlines flights:
152 | ```{r}
153 | United <- FlightDelays %>% filter(Carrier == "UA")
154 | ```
155 | The <tt>select</tt> command in the **dplyr** package allows you to  extract just certain variables (columns). For example, to create a data frame containing just the <tt>Carrier</tt> and <tt>Delay</tt> variables:
156 | 
157 | ```{r}
158 | FlightDelays2 <- FlightDelays %>% select(Carrier, Delay)
159 | ```
160 | Finally, we can combine these two actions to extract just certain rows and certain columns:
161 | 
162 | ```{r}
163 | United2 <- FlightDelays %>% filter(Carrier == "UA") %>% select(Carrier, Delay)
164 | ```
165 | 
166 | Now, suppose you want to work with a single variable in a data frame.
167 | 
168 | ```{r}
169 | delay <- FlightDelays %>% select(Delay)
170 | head(delay)
171 | mean(delay)
172 | data.class(delay)
173 | ```
174 | The problem is that in the above, the <tt>select</tt> command returns a data frame, and the <tt>mean</tt> command operates on vectors.
175 | 
176 | If we just want to extract one variable from a data frame and we want that variable to be a vector, use the <tt>pull</tt> command.
177 | 
178 | ```{r}
179 | delay <- FlightDelays %>% pull(Delay)
180 | mean(delay)
181 | 
182 | #Alternatively, we have seen that the $ operator can be used
183 | delay <- FlightDelays$Delay
184 | ```
185 | 
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c03_PermutationTests.R:
--------------------------------------------------------------------------------
  1 | #Chapter 3 Permutation Tests
  2 | library(resampledata3)
  3 | library(dplyr)
  4 | library(ggplot2)
  5 | 
  6 | #Section 3.3
  7 | 
  8 | #Beerwings data set
  9 | Beerwings %>% group_by(Gender) %>% summarize(mean(Hotwings))
 10 | observed <- 14.5333 - 9.3333  # store observed mean difference
 11 | observed
 12 | 
 13 | hotwings <- Beerwings$Hotwings
 14 | # Alternative syntax using the dplyr package:
 15 | # hotwings <- Beerwings %>% pull(Hotwings)
 16 | 
 17 | N <- 10^5 - 1        # number of times to repeat this process
 18 | result <- numeric(N) # space to save the random differences
 19 | for (i in 1:N)
 20 |   { # sample of size 15, from 1 to 30, without replacement
 21 |     index <- sample(30, size = 15, replace = FALSE)
 22 |     result[i] <- mean(hotwings[index]) - mean(hotwings[-index])
 23 | }
 24 | 
 25 | ggplot() + geom_histogram(aes(result), bins = 8) +
 26 |   geom_vline(xintercept = observed, linetype="dashed")
 27 | 
 28 | (sum(result >= observed) + 1)/(N + 1) # P-value
 29 | 
 30 | #-----
 31 | #Verizon data set
 32 | 
 33 | Verizon %>% group_by(Group) %>% summarize(mean(Time))
 34 | Time <- Verizon$Time
 35 | TimeILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time)
 36 | TimeCLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time)
 37 | 
 38 | observed <-  mean(TimeILEC) - mean(TimeCLEC)
 39 | observed
 40 | 
 41 | N <- 10^4-1
 42 | result <- numeric(N)
 43 | for (i in 1:N)
 44 |   {
 45 |     index <- sample(1687, size = 1664, replace = FALSE)
 46 |     result[i] <- mean(Time[index]) - mean(Time[-index])
 47 | }
 48 | 
 49 | ggplot() + geom_histogram(aes(result), bins = 8) +
 50 |   geom_vline(xintercept = observed, linetype = "dashed")
 51 | 
 52 | (sum(result <= observed) + 1)/(N + 1)
 53 | 
 54 | #---------
 55 | #Other statistics
 56 | #Example 3.6
 57 | #median
 58 | observed <- median(TimeILEC) - median(TimeCLEC)
 59 | N <- 10^4-1
 60 | result <- numeric(N)
 61 | for (i in 1:N)
 62 | {
 63 |   index <- sample(1687, size = 1664, replace = FALSE)
 64 |   result[i] <- median(Time[index]) - median(Time[-index])
 65 | }
 66 | (sum(result <= observed) + 1)/(N + 1)  # P-value
 67 | 
 68 | #trimmed mean
 69 | #modifications to above
 70 | observed  <- (mean(TimeILEC, trim = .25) -
 71 |                 mean(TimeCLEC, trim = .25))
 72 | #within for loop above, change to:
 73 | result[i] <- (mean(Time[index], trim = .25) -
 74 |                 mean(Time[-index], trim = .25))
 75 | 
 76 | 
 77 | #for proportion of time ILEC times > 10
 78 | observed <- mean(TimeILEC > 10) - mean(TimeCLEC > 10)
 79 | #and in the for loop, modify to
 80 | result[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10)
 81 | 
 82 | #for ratio of variances
 83 | observed  <- var(TimeILEC) / var(TimeCLEC)
 84 | result[i] <- var(Time[index]) / var(Time[-index])
 85 | 
 86 | #Recidivism case study
 87 | #Example 3.8
 88 | 
 89 | library(tidyr)
 90 | data <- Recidivism %>% drop_na(Age25) %>%
 91 |   select(Age25, Recid)
 92 | table(data$Age25)
 93 | proportions(table(data$Age25, data$Recid), 1)
 94 | 
 95 | Recid <- data$Recid   # create vector
 96 | observed <- .365 - .306
 97 | N <- 10^4 - 1
 98 | result <- numeric(N)
 99 | for (i in 1:N)
100 |   {
101 |     index <- sample(17019, size = 3077, replace = FALSE)
102 |     result[i] <- mean(Recid[index]=="Yes") -
103 |       mean(Recid[-index]=="Yes")
104 |   }
105 | 2*(sum(result >= observed)+1)/(N+1)
106 | 
107 | #Example 3.9
108 | #Pew Research study on Faith among Black Americans
109 | pooled.data <- rep(c(1,0), c(1068, 1283))  # create vector
110 | observed <- (963/2094) - (105/257)  # observed difference
111 | # (Mill-Gen Z)
112 | N <- 10^4-1
113 | result <- numeric(N)
114 | 
115 | for (i in 1:N)
116 |   {
117 |     index <- sample(2351, 2094, replace = FALSE)
118 |     result[i] <- mean(pooled.data[index]) -
119 |       mean(pooled.data[-index])
120 |     }
121 | 2 * (sum(result >= observed)+1) / (N+1)
122 | 
123 | #-----------------------------------------
124 | 
125 | #Section 3.4 Matched pairs
126 | #Diving
127 | Diff <- Diving2017$Final - Diving2017$Semifinal   #difference in two scores
128 | observed <- mean(Diff)                            #mean of difference
129 | 
130 | N <- 10^5-1
131 | result <- numeric(N)
132 | 
133 | for (i in 1:N)
134 |   {
135 |     Sign <- sample(c(-1,1), 12, replace=TRUE)   #random vector of 1's or -1's
136 |     Diff2 <-  Sign*Diff                         #random pairs (a-b) -> (b-a)
137 |     result[i] <- mean(Diff2)                    #mean of difference
138 |    }
139 | 
140 | ggplot() + geom_histogram(aes(result), bins = 8) +
141 |   geom_vline(xintercept = mean(observed), linetype="dashed")
142 | 
143 | 2 * (sum(result >= observed)+1) / (N+1)       #P-value
144 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c03_SolnExercise.R:
--------------------------------------------------------------------------------
 1 | #Chapter 3: Permutation tests
 2 | library(resampledata3)
 3 | library(ggplot2)
 4 | library(dplyr)
 5 | 
 6 | # Exercise 7 Checking different test statistics that will give same P-value
 7 | N <- 10^4 - 1
 8 | table(FlightDelays$Carrier)
 9 | 
10 | FlightDelays %>% group_by(Carrier) %>% summarize(mean(Delay), sum(Delay))
11 | 
12 | #Optionally, using base R
13 | tapply(FlightDelays$Delay, FlightDelays$Carrier, mean)
14 | tapply(FlightDelays$Delay, FlightDelays$Carrier, sum)
15 | 
16 | observedSumUA <- 17949
17 | observedmeanUA <- 15.98308
18 | observedmeanDiff <- 15.98308 - 10.09738
19 | 
20 | sumUA <- numeric(N)
21 | meanUA <- numeric(N)
22 | meanDiff <- numeric(N)
23 | set.seed(2)
24 | for (i in 1:N) {
25 |   index <- sample(4029, 1123, replace = FALSE)
26 |   sumUA[i] <- sum(Delay[index])
27 |   meanUA[i] <- mean(Delay[index])
28 |   meanDiff[i] <- mean(Delay[index]) - mean(Delay[-index])
29 | }
30 | 
31 | 2 * (sum(sumUA >= observedSumUA) + 1) / (N + 1)  #P-value
32 | 
33 | 2 * (sum(meanUA >= observedmeanUA) + 1) / (N + 1)  #P-value
34 | 
35 | 2 * (sum(meanDiff >= observedmeanDiff) + 1) / (N + 1)  #P-value
36 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c04_SamplingDistributions.R:
--------------------------------------------------------------------------------
 1 | #Chapter 4
 2 | #Sampling Distributions
 3 | 
 4 | #Example 4.2
 5 | #Draw 1000 random samples of size 100 from the exponential
 6 | #distribution with lambda = 1/15
 7 | Xbar <- numeric(1000)    # space for results (vector of 0's)
 8 | for (i in 1:1000)
 9 |   {
10 |     x <- rexp(100, rate = 1/15) # draw random sample of size 100
11 |     Xbar[i] <- mean(x)          # compute mean, save in position i
12 |    }
13 | 
14 | df <- data.frame(Xbar)
15 | ggplot(df, aes(Xbar)) + geom_histogram(bins = 10)
16 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line()
17 | mean(Xbar)
18 | sd(Xbar)
19 | 
20 | #Example 4.3
21 | #Sampling distribution of max from Unif[0,1]
22 | 
23 | maxY <- numeric(1000)
24 | for (i in 1:1000)
25 | {
26 |   y <- runif(12) # draw random sample of size 12
27 |   maxY[i] <- max(y) # find max, save in position i
28 | }
29 | df <- data.frame(maxY)
30 | ggplot(df, aes(maxY)) + geom_histogram(bins = 10)
31 | 
32 | #----------------------------------------
33 | #Example 4.6
34 | #Sum of two values drawn from two different Poisson distributions
35 | X <- rpois(10^4, 5)  # Draw 10^4 values from Pois(5)
36 | Y <- rpois(10^4, 12) # Draw 10^4 values from Pois(12)
37 | W <- X + Y
38 | 
39 | df1 <- data.frame(W)
40 | df2 <- data.frame(x = 2:35, y = dpois(2:35,17))
41 | ggplot(df1, aes(W)) +
42 |   geom_histogram(aes(y=stat(density)), color = "white",
43 |                  breaks=seq(2, 36, by = 2)) +
44 |   geom_line(data = df2, aes(x = x, y = y)) +
45 |   geom_point(data = df2, aes(x = x, y = y), pch = 1) + xlab("")
46 | 
47 | mean(W)      #compare to theoretical, lambda = 17
48 | var(W)
49 | 
50 | #Example 4.7
51 | #Sampling distribution of mean of sample of size 30 from Gamma(5, 2)
52 | Xbar <- numeric(1000)
53 | for (i in 1:1000)
54 |   {
55 |     x <- rgamma(30, shape = 5, rate = 2)
56 |     Xbar[i] <- mean(x)
57 |     }
58 | 
59 | df <- data.frame(Xbar)
60 | ggplot(df, aes(x = Xbar)) +
61 |   geom_histogram(aes(y = stat(density)), color = "white", bins = 10) +
62 |   stat_function(fun = dnorm, args = list(mean = 5/2, s = 0.204)) +
63 |   labs(x = "Means", y = "Density")
64 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line()
65 | mean(Xbar)
66 | sd(Xbar)
67 | 
68 | #----------------------------------------------
69 | #Example 4.10
70 | #
71 | dbinom(25, 120, .3)
72 | pbinom(25, 120, .3)
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c05_Bootstrap.R:
--------------------------------------------------------------------------------
  1 | ##Chapter 5 Bootstrap
  2 | library(resampledata3)
  3 | library(ggplot2)
  4 | library(dplyr)
  5 | 
  6 | #Draw random sample of size 16 from Gamma(1, 1/2)
  7 | #Example 5.2
  8 | N <- 10^5
  9 | mean.boot <- numeric(N)
 10 | for (i in 1:N)
 11 | {
 12 |   x <- sample(gamSample, 16, replace = TRUE) # draw resample
 13 |   mean.boot[i] <- mean(x) # compute mean, store in mean.boot
 14 | }
 15 | 
 16 | mean(mean.boot)
 17 | sd(mean.boot)
 18 | 
 19 | df <- data.frame(mean.boot)
 20 | ggplot(df, aes(mean.boot)) +
 21 |   geom_histogram(bins = 20, color = "white")
 22 | 
 23 | #-----------------
 24 | #Example 5.3
 25 | ggplot(Bangladesh, aes(Arsenic))  +
 26 |   geom_histogram(bins = 10, color = "white")
 27 |   ggplot(Bangladesh, aes(sample = Arsenic))  +
 28 |   geom_qq() + geom_qq_line()
 29 | 
 30 | Arsenic <- Bangladesh$Arsenic
 31 | 
 32 | n <- length(Arsenic)
 33 | N <- 10^4
 34 | mean.boot <- numeric(N)
 35 | for (i in 1:N)
 36 |   {
 37 |     x <- sample(Arsenic, n, replace = TRUE)
 38 |     mean.boot[i] <- mean(x)
 39 |    }
 40 | 
 41 | df <- data.frame(mean.boot)
 42 | ggplot(df, aes(mean.boot)) +
 43 |   geom_histogram(bins = 15, color = "white") +
 44 |   geom_vline(xintercept = mean(mean.boot), color = "red", lty = 2)
 45 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line()
 46 | 
 47 | mean(mean.boot)  
 48 | mean(mean.boot)-mean(Arsenic)
 49 | sd(mean.boot)
 50 | 
 51 | quantile(mean.boot, c(0.025, 0.975))
 52 | 
 53 | #----------------------------------
 54 | #Example 5.4 Skateboarders
 55 | testF <- Skateboard %>% filter(Experimenter == "Female") %>%
 56 |   pull(Testosterone)
 57 | testM <- Skateboard %>% filter(Experimenter == "Male") %>%
 58 |   pull(Testosterone)
 59 | 
 60 | observed <- mean(testF) - mean(testM)     #observed difference
 61 | observed
 62 | 
 63 | nf <- length(testF)  #sample size
 64 | nm <- length(testM)  #sample size
 65 | 
 66 | N <- 10^4
 67 | mean.boot <- numeric(N)
 68 | 
 69 | for (i in 1:N)
 70 | {
 71 |   resampleF <- sample(testF, nf, replace = TRUE)
 72 |   resampleM <- sample(testM, nm, replace = TRUE)
 73 |   mean.boot[i] <- mean(resampleF)-mean(resampleM)
 74 | }
 75 | 
 76 | df <- data.frame(mean.boot)
 77 | ggplot(df, aes(mean.boot)) +
 78 |   geom_histogram(bins = 15, color = "white") +
 79 |   geom_vline(xintercept = observed, color = "green", lty = 2)
 80 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line()
 81 | 
 82 | mean(testF) - mean(testM)
 83 | mean(mean.boot)
 84 | sd(mean.boot)
 85 | quantile(mean.boot, c(0.025, 0.975))
 86 | mean(mean.boot) - (mean(testF) - mean(testM))  # bias
 87 | 
 88 | #-------------
 89 | #Example 5.6
 90 | #Verizon data
 91 | 
 92 | TimeILEC <- Verizon %>% filter(Group=="ILEC") %>% pull(Time)
 93 | TimeCLEC <- Verizon %>% filter(Group=="CLEC") %>% pull(Time)
 94 | 
 95 | observed <- mean(TimeILEC)/mean(TimeCLEC)
 96 | observed
 97 | 
 98 | nILEC <- length(TimeILEC)
 99 | nCLEC <- length(TimeCLEC)
100 | 
101 | N <- 10^4
102 | ratio.boot <- numeric(N)
103 | 
104 | for (i in 1:N)
105 | {
106 |   resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE)
107 |   resampleCLEC <- sample(TimeCLEC, nCLEC, replace = TRUE)
108 |   ratio.boot[i] <- mean(resampleILEC)/mean(resampleCLEC)
109 | }
110 | 
111 | df <- data.frame(ratio.boot)
112 | ggplot(df, aes(ratio.boot)) +
113 |   geom_histogram(bins = 15, color="white") +
114 |   xlab("Ratio of means") +
115 |   geom_vline(xintercept = observed, lty = 2, color = "red") +
116 |   geom_vline(xintercept = mean(ratio.boot), lty = 3, color = "blue")
117 | 
118 | ggplot(df, aes(sample = ratio.boot)) +
119 |   geom_qq() + geom_qq_line()
120 | 
121 | mean(ratio.boot)
122 | sd(ratio.boot)
123 | quantile(ratio.boot, c(0.025, 0.975))
124 | mean(ratio.boot) - mean(TimeILEC)/mean(TimeCLEC)
125 | 
126 | #Example 5.7 Verizon continued
127 | #modifications to above for proportion of times the ILEC
128 | #delay time was greater than 24 hours
129 | N <- 10^4
130 | 
131 | prop.boot <- numeric(N)
132 | for (i in 1:N)
133 |   {
134 |     resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE)
135 |     prop.boot[i] <- mean(resampleILEC >  24)
136 |   }
137 | 
138 | quantile(prop.boot, c(0.025, 0.975))
139 | #--------------------
140 | #Example 5.8
141 | #Faith among Black Americans
142 | genZ <- rep(c(1, 0), c(118, 139))
143 | genX <- rep(c(1, 0), c(965, 1510))
144 | 
145 | observed <- mean(genZ) - mean(genX) # observed diff.
146 | observed
147 | 
148 | N <- 10^4
149 | prop.boot <- numeric(N)
150 | for (i in 1:N)
151 |   {
152 |     resampleZ <- sample(genZ, 257, replace = TRUE)
153 |     resampleX <- sample(genX, 2475, replace = TRUE)
154 |     prop.boot[i] <- mean(resampleZ) - mean(resampleX)
155 | }
156 | 
157 | quantile(prop.boot, c(0.025, 0.975))
158 | 
159 | #----------------------------------------
160 | #Example 5.6
161 | #Relative risk
162 | 
163 | highbp <- rep(c(1,0), c(55,3283))  #high bp sample
164 | lowbp <- rep(c(1,0), c(21,2655))   #low  bp sample
165 | 
166 | N <- 10^4
167 | rr.boot <- numeric(N)
168 | 
169 | for (i in 1:N)
170 |  {
171 |     resampleHigh <- sample(highbp, 3338, replace = TRUE)
172 |     resampleLow <- sample(lowbp, 2676, replace = TRUE)
173 |     
174 |     rr.boot[i] <- mean(resampleHigh)/mean(resampleLow)  #rel.
175 |     #risk
176 |   }
177 | 
178 | quantile(rr.boot, c(0.025, 0.975))


--------------------------------------------------------------------------------
/Edition3/RScripts/c07_MoreConfidenceIntervals.R:
--------------------------------------------------------------------------------
  1 | #Chapter 7 Classical confidence intervals
  2 | library(resampledata3)
  3 | library(dplyr)
  4 | library(ggplot2)
  5 | 
  6 | #Example 7.1
  7 | #Confidence intervals of mean of samples of size 30 drawn from N(25, 4)
  8 | counter <- 0                     # set counter to 0
  9 | df <- data.frame(x = c(22, 28), y = c(1,100))
 10 | p <- ggplot(df, aes(x = x, y = y)) + geom_vline(xintercept = 25)
 11 | 
 12 | for (i in 1:1000)
 13 |   {
 14 |     x <- rnorm(30, 25, 4)          # draw a random sample of size 30
 15 |     L <- mean(x) - 1.96*4/sqrt(30) # lower limit
 16 |     U <- mean(x) + 1.96*4/sqrt(30) # upper limit
 17 |     if (L < 25 && 25 < U)          # check if 25 is in interval
 18 |       counter <- counter + 1       # if yes, increase counter by 1
 19 |     if (i <= 100)                  # plot first 100 intervals
 20 |       p <- p + annotate("segment", x = L, xend = U, y = i, yend = i)
 21 |    }
 22 | 
 23 | p
 24 | counter/1000           # proportion of times interval contains mu.
 25 | 
 26 | #------------------------------------
 27 | #Section 7.1.2
 28 | #Confidence intervals for mean of samples drawn from normal
 29 | #distribution, mean and variance unknown
 30 | N <- 10^4
 31 | w <- numeric(N)
 32 | n <- 15             # sample size
 33 | for (i in 1:N)
 34 |   {
 35 |     x <- rnorm(n, 25, 7)  # draw 15 from N(25, 7^2)
 36 |     xbar <- mean(x)
 37 |     s <- sd(x)
 38 |     w[i] <- (xbar-25) / (s/sqrt(n))
 39 |    }
 40 | 
 41 | df <- data.frame(w)
 42 | ggplot(df, aes(sample = w)) + geom_qq(size = .8) +
 43 |   geom_qq_line()
 44 | 
 45 | #-----------------------------------
 46 | #Example 7.5
 47 | pt(2.8, 27)
 48 | qt(0.95, 27)
 49 | 
 50 | #------------------------------------
 51 | #Example 7.6
 52 | girls <- NCBirths2004 %>% filter(Gender == "Female") %>%
 53 |   pull(Weight)
 54 | t.test(girls, conf.level = .99)$conf
 55 | 
 56 | #----------------------------------------------
 57 | #Example 7.7
 58 | #Samples from right-skewed Gamma(5,2)
 59 | tooLow <- 0                  # set counter to 0
 60 | tooHigh <- 0                 # set counter to 0
 61 | n <- 20  # sample size
 62 | q <- qt(0.975, n-1)          # quantile
 63 | N <- 10^5
 64 | for (i in 1:N)
 65 |   {
 66 |     x <- rgamma(n, shape = 5, rate = 2)
 67 |     xbar <- mean(x)
 68 |     s <- sd(x)
 69 |     L <- xbar - q*s/sqrt(n)
 70 |     U <- xbar + q*s/sqrt(n)
 71 |     if (U < 5/2)               # Does right endpt miss 5/2?
 72 |       tooLow <- tooLow + 1     # If yes, increase counter
 73 |     if (5/2 < L)               # Does left endpt miss 5/2?
 74 |       tooHigh <- tooHigh + 1   # If yes, increase counter
 75 |    }
 76 | tooLow/N
 77 | tooHigh/N
 78 | 
 79 | #-------------------------------------------
 80 | #Example 7.8
 81 | t.test(Response ~ Treatment, data = Reading)$conf
 82 | 
 83 | #------------------------------------------
 84 | #Example 7.14
 85 | t.test(NCBirths2004$Weight, alt = "greater")$conf
 86 | 
 87 | #-----------------------------------------
 88 | #Example 7.17
 89 | prop.test(1385, 2193, conf.level = .9)$conf
 90 | 
 91 | prop.test(1385, 2193, conf.level = .9, alt = "greater")$conf
 92 | 
 93 | #----------------------------------------
 94 | #Example 7.20
 95 | 
 96 | prop.test(c(172, 223), c(674, 676))$conf
 97 | 
 98 | #---------------------------------------
 99 | #Example 7.21
100 | #Bootstrap t confidence interval
101 | Arsenic <- Bangladesh$Arsenic
102 | xbar <- mean(Arsenic)
103 | N <- 10^4
104 | n <- length(Arsenic)
105 | Tstar <- numeric(N)
106 | for (i in 1:N)
107 |   {
108 |     x <- sample(Arsenic, size = n, replace = T)
109 |     Tstar[i] <- (mean(x)-xbar) / (sd(x)/sqrt(n))
110 |     }
111 | quantile(Tstar, c(0.025, 0.975))
112 | 
113 | xbar - quantile(Tstar, c(0.975, 0.025)) * sd(Arsenic)/sqrt(n)
114 | 
115 | #--------------------------------------------
116 | #Example 7.22
117 | #Bootstrap t CI for difference in means
118 | TimeILEC <- Verizon \%>\% filter(Group == "ILEC") \%>\% pull(Time)
119 | TimeCLEC <- Verizon \%>\% filter(Group == "CLEC") \%>\% pull(Time)
120 | 
121 | thetahat <- mean(TimeILEC) - mean(TimeCLEC)
122 | nx <- length(TimeILEC)  # nx=1664
123 | ny <- length(TimeCLEC)  # ny=23
124 | SE <- sqrt(var(TimeILEC)/nx + var(TimeCLEC)/ny)
125 | 
126 | N <- 10^4
127 | Tstar <- numeric(N)
128 | for (i in 1:N)
129 |   {
130 |     bootx <- sample(TimeILEC, nx, replace = TRUE)
131 |     booty <- sample(TimeCLEC, ny, replace = TRUE)
132 |     Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
133 |       sqrt(var(bootx)/nx + var(booty)/ny)
134 | }
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 | t.test(TimeILEC, TimeCLEC)$conf # for comparison
137 | 
138 | #---------------------------------------------
139 | #Example 7.3
140 | #Bootstrap t with estimated standard errors iterated bootstrap
141 | Arsenic <- Bangladesh$Arsenic
142 | estimate <- mean(Arsenic, trim = 0.25)  # 35.95985
143 | 
144 | N <- 10^4 # outer loop
145 | N2 <- 10^2 # inner loop
146 | n <- length(Arsenic)
147 | Tstar <- numeric(N)
148 | estimateStar <- numeric(N)
149 | seStar <- numeric(N)
150 | 
151 | for (i in 1:N)
152 |   {
153 |     x <- sample(Arsenic, size = n, replace = T)
154 |     
155 |     # Inner loop to estimate standard error based on x
156 |     estimate2 <- numeric(N2)
157 |     for (j in 1:N2)
158 |       {
159 |         x2 <- sample(x, size = n, replace = T)
160 |         estimate2[j] <- mean(x2, trim = 0.25)
161 |        }
162 |     
163 |     estimateStar[i] <- mean(x, trim = 0.25)
164 |     seStar[i] <- sd(estimate2)
165 |     Tstar[i] <- (estimateStar[i] - estimate) / seStar[i]
166 |     }
167 | 
168 | 
169 | sd(estimateStar) # Standard error 
170 | quantile(Tstar, c(0.025, 0.975))
171 | # Bootstrap t interval
172 | estimate - quantile(Tstar, c(.975, .025)) * sd(estimateStar)
173 | 
174 | #Ordinary t interval with bootstrap SE
175 | estimate + qt(c(0.025, 0.975), n-1) * sd(estimateStar)
176 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c08_MoreHypothesisTests.R:
--------------------------------------------------------------------------------
 1 | #Chapter 8 More Hypothesis Tests
 2 | library(resampledata3)
 3 | library(ggplot2)
 4 | library(dplyr)
 5 | 
 6 | #Example 8.4
 7 | t.test(Bangladesh$Arsenic, mu = 100, alt = "Greater")
 8 | 
 9 | #Bootstrap t test approach
10 | Arsenic <- Bangladesh$Arsenic
11 | observedT <- t.test(Arsenic, mu = 100)$statistic
12 | xbar <- mean(Arsenic)
13 | n <- length(Arsenic)
14 | N <- 10^5
15 | Tstar <- numeric(N)
16 | 
17 | for (i in 1:N)
18 |   {
19 |     bootx <- sample(Arsenic, n, replace = TRUE)
20 |     Tstar[i] <- (mean(bootx)- xbar)/(sd(bootx)/sqrt(n))
21 |   }
22 | 
23 | (sum(Tstar >= observedT)+1)/(N+1)
24 | 
25 | 
26 | #------------------------------------------
27 | #Example 8.5
28 | #Comparing two means
29 | t.test(Weight ~ Smoker, data = NCBirths2004, alt = "greater")
30 | 
31 | #-------------------------------------------
32 | #Example 8.6
33 | prop.test(c(108, 51), c(143, 119))
34 | 
35 | #-------------------------------------------
36 | #Example 8.15
37 | sum(dbinom(5:8, 8, 0.3185)) 
38 | 1 - pbinom(4, 8, 0.3185)   #same
39 | 
40 | #---------------------------------------------
41 | #Example 8.19
42 | binom.test(7, 21, 0.5)
43 | 
44 | pbinom(7, 21, 0.5696755)
45 | 1 - pbinom(6, 21, 0.1458769)
46 | 
47 | #-----------------------------


--------------------------------------------------------------------------------
/Edition3/RScripts/c09_Regression.R:
--------------------------------------------------------------------------------
  1 | #Chapter 9
  2 | #Regression
  3 | library(resampledata3)
  4 | library(ggplot2)
  5 | library(dplyr)
  6 | 
  7 | #Section 9.2
  8 | #base R
  9 | cor(Spruce$Ht.change, Spruce$Dichange)
 10 | #dplyr package
 11 | Spruce %>% summarize(coor = cor(Ht.change, Di.change))
 12 | 
 13 | #ggplot2 package
 14 | qqplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point()
 15 | #base R
 16 | plot(Di.change ~ Ht.change, data = Spruce)
 17 | 
 18 | #-------------------------------------------------
 19 | #Example 9.3
 20 | 
 21 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
 22 | spruce.lm
 23 | 
 24 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() +
 25 |    geom_smooth(method = lm, se = FALSE)
 26 | 
 27 | fitted(spruce.lm)
 28 | predict(spruce.lm) #same
 29 | 
 30 | (nrow(Spruce) -1) * var(Spruce$Ht.change)
 31 | 
 32 | #-----------------------------------------------
 33 | #Section 9.3
 34 | Spruce$Residuals <- resid(spruce.lm)
 35 | ggplot(Spruce, aes(x = Ht.change, y = Residuals)) +
 36 |   geom_point() + geom_hline(yintercept = 0) +
 37 |   geom_smooth(method = "loess", se = FALSE, span = 2)
 38 | 
 39 | #----------------------------------------------
 40 | #Example 9.8
 41 | skate.lm <- lm(Free ~ Short, data = Skating2010)
 42 | summary(skate.lm)
 43 | 
 44 | #Section 9.5
 45 | #Bootstrapping correlation, slope, intercept, 
 46 | 
 47 | N <- 10^4
 48 | cor.boot <- numeric(N)
 49 | beta.boot <- numeric(N)
 50 | alpha.boot <- numeric(N)
 51 | yPred.boot <- numeric(N)
 52 | n <- nrow(Skating2010)              # number of skaters = 24
 53 | for (i in 1:N)
 54 | {
 55 |   index <- sample(n, replace = TRUE) # sample from 1,2,...,n
 56 |   Skate.boot <- Skating2010[index, ] # resampled data
 57 |   
 58 |   cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
 59 |   
 60 |   #recalculate linear model estimates
 61 |   skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
 62 |   alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept
 63 |   beta.boot[i] <- coef(skateBoot.lm)[2]  # new slope
 64 |   yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i]
 65 | }
 66 | 
 67 | mean(cor.boot)
 68 | sd(cor.boot)
 69 | quantile(cor.boot, c(.025,.975))
 70 | 
 71 | observed <- cor(Skating2010$Short, Skating2010$Free)
 72 | 
 73 | df <- data.frame(cor.boot, beta.boot, alpha.boot, yPred.boot)
 74 | 
 75 | ggplot(df, aes(x = cor.boot)) +
 76 |   geom_histogram(bins = 20, color = "white") +
 77 |   geom_vline(xintercept =  observed, color = "red", lty = 2)
 78 | 
 79 | #--------------------------------------------
 80 | #Section 9.5.1 Permutation Tests
 81 | 
 82 | N <- 9999
 83 | n <- nrow(Skating2010)  # number of observations
 84 | result <- numeric(N)
 85 | observed <- cor(Skating2010$Short, Skating2010$Free)
 86 | for (i in 1:N)
 87 | {
 88 |   index <- sample(n, replace=FALSE)
 89 |   Short.permuted <- Skating2010$Short[index]
 90 |   result[i] <- cor(Short.permuted, Skating2010$Free)
 91 | }
 92 | (sum(observed <= result) + 1) / (N + 1)  # P-value
 93 | 
 94 | #----------------------------------------------
 95 | #Example 9.12
 96 | #Fatalities data
 97 | glm(Alcohol ~ Age, data = Fatalities, family = binomial)
 98 | f <- function(x){exp(-0.123-0.029*x)/(1+exp(-0.123-0.029*x))}
 99 | 
100 | ggplot(Fatalities, aes(x = Age, y = Alcohol)) + geom_point() +
101 |   stat_function(fun = f)
102 | 
103 | #alternative way to define f
104 | f <- function(x){plogis(-0.123 - 0.029*x)}
105 | 
106 | #------------------------------------------
107 | #Section 9.6
108 | #Inference for logistic regression
109 | fit <- glm(Alcohol ~ Age, data = Fatalities,
110 |            family = binomial)
111 | data.class(fit)  # is a "glm" object, so for help use:
112 | help(glm)
113 | 
114 | fit          # prints the coefficients and other basic info
115 | coef(fit)    # the coefficients as a vector
116 | summary(fit) # gives standard errors for coefficients, etc.
117 | 
118 | 
119 | # Full bootstrap - slope coeff. and prediction at age 20
120 | N <- 10^3
121 | n <- nrow(Fatalities)              # number of observations
122 | alpha.boot <- numeric(N)
123 | beta.boot <- numeric(N)
124 | pPred.boot <- numeric(N)
125 | 
126 | for (i in 1:N)
127 | {
128 |   index <- sample(n, replace = TRUE)
129 |   Fatal.boot <- Fatalities[index, ]     # resampled data
130 |   
131 |   fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
132 |                   family = binomial)
133 |   alpha.boot[i] <- coef(fit.boot)[1]    # new intercept
134 |   beta.boot[i] <- coef(fit.boot)[2]     # new slope
135 |   pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
136 | }
137 | 
138 | quantile(beta.boot, c(.025, .975))      # 95% percentile CI
139 | df <- data.frame(alpha.boot, beta.boot, pPred.boot)
140 | ggplot(df, aes(x = beta.boot)) +
141 |   geom_histogram(bins = 20, color = "white")
142 | ggplot(df, aes(sample = beta.boot)) + geom_qq() + geom_qq_line()
143 | 
144 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c10_CategoricalData.R:
--------------------------------------------------------------------------------
 1 | #Chapter 10
 2 | #Categorical data
 3 | library(resampledata3)
 4 | library(ggplot2)
 5 | library(dplyr)
 6 | 
 7 | #Section 10.2 Permutation Test of Independence
 8 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty, simulate.p.value = TRUE, B = 10^5-1)
 9 | mat <- table(GSS2018$Degree, GSS2018$DeathPenalty)
10 | chisq.test(mat, simulate.p.value = TRUE, B = 10^5-1)
11 | 
12 | #Section 10.3
13 | 1 - pchisq(50.449, 4)
14 | 
15 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty)
16 | 
17 | mat <- rbind(c(42, 50), c(30, 87))
18 | chisq.test(mat)
19 | fisher.test(mat)
20 | 
21 | #Section 10.4 Test of Homogeneity
22 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
23 | candy.mat
24 | 
25 | chisq.test(candy.mat)
26 | 
27 | #Section 10.5
28 | qchisq(c(.2, .4, .6, .8), 10)
29 | 
30 | 
31 | Homeruns <- Phillies2009$Homeruns
32 | 
33 | lambda <- mean(Homeruns)
34 | dpois(0:4, lambda)
35 | table(Homeruns)
36 | 
37 | table(Homeruns)/162
38 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c10_PermTestIndependence.R:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------
 2 | #Chapter 10 Categorical data
 3 | #Implementation of the permutation test of independence
 4 | #This function computes the chi-square
 5 | #test statistic
 6 | 
 7 | #
 8 | chisq <- function(observed, print = TRUE) {
 9 |   # Chi-square statistic for independence in a contingency table,
10 |   # with related data exploration.
11 |   # observed is the observed contingency table
12 | 
13 |   observedWithTotals <- cbind(observed, total = rowSums(observed))
14 |   observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
15 |   expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
16 |   statistic <- sum((observed-expected)^2/expected)
17 |   if (print)
18 |   {
19 |   cat("Observed, with totals:\n")
20 |   print(observedWithTotals)
21 |   cat("\nRow Fractions:\n")
22 |   print(round(observed / rowSums(observed), 3))
23 |   cat("\nColumn Fractions:\n")
24 |   print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
25 | 
26 |   cat("\nExpected:\n")
27 |   print(round(expected, 1))
28 |   cat("\nDifference:\n")
29 |   print(round(observed - expected, 1))
30 | 
31 |   cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
32 |   }
33 |   return(invisible(statistic))
34 | }
35 | 
36 | #-------------------------------------------
37 | 
38 | #We use this function on the contingency table for Education and
39 | #DeathPenalty
40 | #set.seed(200)
41 | library(resampledata3)
42 | observed <- chisq(table(GSS2018$Degree, GSS2018$DeathPenalty))
43 | observed 
44 | 
45 | #Now, there were 155 people who declined to respond to the
46 | #death penalty question, so we will remove these observations from our
47 | #analysis.
48 | 
49 | #We will use the drop_na() command from the tidyr package. The
50 | #command below with create a data frame with variables Degree and DeathPenalty,
51 | #removing any rows with an NA in either variable (though in this case, only 
52 | #the death penalty variable has missing values.
53 |                                                    
54 | library(tidyr)
55 | df <- drop_na(GSS2018, Degree, DeathPenalty)
56 | #The sample(df$DeathPenalty) command below permutes the
57 | #values in DeathPenalty
58 | N <- 10^5-1
59 | result <- numeric(N)
60 | for (i in 1:N)
61 |  {
62 |    DP.permuted <- sample(df$DeathPenalty)
63 |    GSS.table <- table(df$Degree, DP.permuted)
64 |   result[i] <- chisq(GSS.table)
65 |   }
66 |                                                        
67 |   ggplot() +  geom_histogram(aes(x = result)) +
68 |   geom_vline(xintercept = observed, lty = 2)
69 | 
70 | #Check the distribution of the test statistics to help in determining
71 | #the direction of the inequality when computing the $P$-value.
72 | 
73 | 


--------------------------------------------------------------------------------
/Edition3/RScripts/c11_Bayes.R:
--------------------------------------------------------------------------------
 1 | #Chapter 11
 2 | #Bayesian methods
 3 | library(resampledata3)
 4 | library(ggplot2)
 5 | library(dplyr)
 6 | 
 7 | #Example 11.1
 8 | theta <- seq(0, 1, by = .1)
 9 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
10 | likelihood <- theta * (1 - theta)^2
11 | constant <- sum(prior * likelihood)
12 | posterior <- prior * likelihood / constant
13 | posterior
14 | sum(theta * prior)            # prior mean 
15 | sum(theta * posterior)        # posterior mean
16 | 
17 | #continued
18 | likelihood2 <- theta^3 * (1 - theta)^5  # 3 success, 5 fail
19 | constant2 <- sum(prior * likelihood2)
20 | posterior2 <- prior * likelihood2 / constant2
21 | posterior2
22 | 
23 | likelihood3 <- theta^2 * (1 - theta)^3
24 | constant3 <- sum(posterior * likelihood3)
25 | posterior3 <- posterior * likelihood3 / constant3
26 | posterior3                  # not shown, same as posterior2
27 | sum(theta*posterior2)       # posterior mean
28 | 
29 | ggplot(df, aes(x = theta, y = prior)) +
30 |   geom_point() + geom_line(lty = 1) +
31 |   geom_point(aes(y = posterior)) +
32 |   geom_line(aes(y = posterior),  lty = 2) +
33 |   geom_point(aes(y = posterior2)) +
34 |   geom_line(aes(y = posterior2), lty = 3)
35 | 
36 | #----------------------------------------------------
37 | #Example 11.3
38 | qbeta(.025, 111, 91)
39 | qbeta(.975, 111, 91)
40 | 1-pbeta(.5, 111, 91)
41 | 
42 | ggplot(data.frame(x = c(0,1)), aes(x = x)) +
43 |   stat_function(fun = dbeta, aes(lty = "2"),
44 |                 args = list(shape1 = 1, shape2 = 1)) +
45 |   stat_function(fun = dbeta, aes(lty = "1"),
46 |                 args = list(shape1 = 111, shape2 = 91)) +
47 |   scale_linetype_manual(values = c("2" = 2, "1" = 1),
48 |                         labels = c("Posterior", "Prior"),
49 |                         guide = guide_legend(reverse = TRUE)) +
50 |   scale_x_continuous(breaks = seq(0, 1, by = .2)) +
51 |   labs(x = "", y = "Density") +
52 |   theme(legend.title = element_blank(),
53 |         legend.position = c(.1, .85),
54 |         legend.key = element_blank())
55 | 
56 | #-------------------------------------------
57 | #Section 11.5 Sequential data
58 | 
59 | n <- c(1874, 1867, 1871, 1868, 1875, 1875)
60 | X <- c(52, 41, 55, 49, 39, 39)
61 | alpha <- X     # vector of posterior parameters
62 | beta <- n - X  # vector of posterior parameters
63 | N <- 10^5                    # replications
64 | theta <- matrix(0.0, nrow = N, ncol = 6)
65 | for (j in 1:6)
66 |   {
67 |     theta[, j] <- rbeta(N, alpha[j], beta[j])
68 |     }
69 | probBest <- numeric(6)       # vector for results
70 | best <- apply(theta, 1, max) # maximum of each row
71 | for (j in 1:6)
72 |   {
73 |     probBest[j] <- mean(theta[, j] == best)
74 |    }
75 | 
76 | #probBest contains probabilities of each of the six arms
77 | #being best
78 | 
79 | df <- data.frame(theta[1:10^4,])
80 | names(df)
81 | ggplot(df, aes(x = X1, y = X3)) + geom_point(size = .5) +
82 |   geom_abline(slope = 1, intercept = 0) +
83 |   annotate("text", x = 0.037, y = 0.042, parse = TRUE,
84 |            label = "theta[3] > theta[1]") +
85 |   annotate("text", x = 0.042, y = 0.037, parse = TRUE,
86 |            label = "theta[1] > theta[3]") +
87 |   labs(x = expression(theta[1]), y=expression(theta[3]))
88 | 
89 | #----------------------------------------
90 | probBest
91 | #


--------------------------------------------------------------------------------
/Edition3/RScripts/c12_ANOVA.R:
--------------------------------------------------------------------------------
 1 | #Chapter 12 ANOVA
 2 | library(resampledata3)
 3 | library(ggplot2)
 4 | library(dplyr)
 5 | 
 6 | #Example 12.1
 7 | anova(lm(Weight ~ MothersAge, data = ILBoys))
 8 | anova(lm(Weight ~MothersAge, data = ILBoys))$F[1] #Extract F stat
 9 | 
10 | summary(aov(Weight ~MothersAge, data = ILBoys)) #same
11 | 
12 | #Section 12.1.2 Permutation Test Approach
13 | #Checking the normality condition
14 | ggplot(ILBoys, aes(sample = Weight)) + geom_qq() +
15 |   geom_qq_line() + facet_wrap(. ~ MothersAge)
16 | 
17 | #Permutation test
18 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
19 | n <- length(ILBoys$Weight)
20 | N <- 10^4 - 1
21 | results <- numeric(N)
22 | for (i in 1:N)
23 |   {
24 |     index <- sample(n)
25 |     Wt.perm <- ILBoys$Weight[index]
26 |     results[i] <- anova(lm(Wt.perm ~ MothersAge, data = ILBoys))$F[1]
27 |   }
28 | 
29 | (sum(results >= observed) + 1) / (N + 1)   # P value
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mathematical Statistics with Resampling and R
 2 | 
 3 | Data sets, R code, supplementary materials and errata for the textbook
 4 | *Mathematical Statistics with Resampling and R*
 5 | by
 6 | [Laura Chihara](https://lchihara.people.sites.carleton.edu)
 7 | and
 8 | [Tim Hesterberg](https://www.timhesterberg.net).
 9 | 
10 | 
11 | Current: [Third Edition (2022)](Edition3)
12 | 
13 | 
14 | Older:
15 | [Second Edition (2018)](Edition2),
16 | [First Edition (2011)](Edition1).
17 | 


--------------------------------------------------------------------------------
/readme-MathStatsResamplingR.txt:
--------------------------------------------------------------------------------
 1 | PLEASE IGNORE THIS FILE.
 2 | It contains working notes by Chihara and Hesterberg.
 3 | 
 4 | Some .pdf and .R files listed here are compiled from .tex or .Rmd files in
 5 |   MathStatsTextbook/trunk/StudentWebMaterials/
 6 | 
 7 | Some .R files are copied (and possibly edited) from one of
 8 |   MathStatsTextbook/trunk/R/
 9 |   MathStatsTextbook/trunk/StudentWebMaterials/
10 | 


--------------------------------------------------------------------------------