├── Edition1
├── CorrectedOdds_Ed1.pdf
├── ErrataEdition1.pdf
└── README.md
├── Edition2
├── Chapters
│ ├── Table3.1.pdf
│ └── Table5.1.pdf
├── Data
│ ├── Alelager.csv
│ ├── Bangladesh.csv
│ ├── Beerwings.csv
│ ├── BookPrices.csv
│ ├── Bushmeat.csv
│ ├── Cereals.csv
│ ├── Challenger.csv
│ ├── ChiMarathonMen.csv
│ ├── Cuckoos.csv
│ ├── Diving2017.csv
│ ├── Fatalities.csv
│ ├── FishMercury.csv
│ ├── FlightDelays.csv
│ ├── GSS2002.csv
│ ├── Girls2004.csv
│ ├── Groceries.csv
│ ├── ILBoys.csv
│ ├── IceCream.csv
│ ├── Illiteracy.csv
│ ├── Lottery.csv
│ ├── MathAnxiety.csv
│ ├── MathStatsData_Ed2.zip
│ ├── Maunaloa.csv
│ ├── MnGroundwater.csv
│ ├── MobileAds.csv
│ ├── NBA1617.csv
│ ├── NCBirths2004.csv
│ ├── Nasdaq.csv
│ ├── Olympics2012.csv
│ ├── Phillies2009.csv
│ ├── Quakes.csv
│ ├── Quetzal.csv
│ ├── RangersTwins2016.csv
│ ├── Recidivism.csv
│ ├── Salaries.csv
│ ├── Service.csv
│ ├── Skateboard.csv
│ ├── Skating2010.csv
│ ├── Spruce.csv
│ ├── Starcraft.csv
│ ├── TV.csv
│ ├── TXBirths2004.csv
│ ├── Titanic.csv
│ ├── Turbine.csv
│ ├── Verizon.csv
│ ├── Volleyball2009.csv
│ ├── Walleye.csv
│ ├── Watertable.csv
│ └── wafers.csv
├── Errata_Edition2.pdf
├── R
│ ├── Chap02EDA.R
│ ├── Chap02EDA.Rmd
│ ├── Chap02EDA_d.Rmd
│ ├── Chap03Testing.R
│ ├── Chap03Testing.Rmd
│ ├── Chap03Testing_Exer.R
│ ├── Chap03Testing_Exer.Rmd
│ ├── Chap03Testing_Exer_d.Rmd
│ ├── Chap03Testing_d.Rmd
│ ├── Chap04SamplingDist.R
│ ├── Chap04SamplingDist.Rmd
│ ├── Chap04SamplingDist_Exer.R
│ ├── Chap04SamplingDist_Exer.Rmd
│ ├── Chap04SamplingDist_Exer_d.Rmd
│ ├── Chap04SamplingDist_d.Rmd
│ ├── Chap05Bootstrap.R
│ ├── Chap05Bootstrap.Rmd
│ ├── Chap05Bootstrap_Exer.R
│ ├── Chap05Bootstrap_Exer.Rmd
│ ├── Chap05Bootstrap_Exer_d.Rmd
│ ├── Chap05Bootstrap_d.Rmd
│ ├── Chap06Estimation.R
│ ├── Chap06Estimation.Rmd
│ ├── Chap06Estimation_d.Rmd
│ ├── Chap07MoreConfIntervals.R
│ ├── Chap07MoreConfIntervals.Rmd
│ ├── Chap07MoreConfIntervals_Exer.R
│ ├── Chap07MoreConfIntervals_Exer.Rmd
│ ├── Chap07MoreConfIntervals_Exer_d.Rmd
│ ├── Chap07MoreConfIntervals_d.Rmd
│ ├── Chap08MoreHypTests.R
│ ├── Chap08MoreHypTests.Rmd
│ ├── Chap08MoreHypTests_Exer.R
│ ├── Chap08MoreHypTests_Exer.Rmd
│ ├── Chap08MoreHypTests_Exer_d.Rmd
│ ├── Chap08MoreHypTests_d.Rmd
│ ├── Chap09Regression.R
│ ├── Chap09Regression.Rmd
│ ├── Chap09Regression_d.Rmd
│ ├── Chap10categorical.R
│ ├── Chap10categorical.Rmd
│ ├── Chap10categorical_d.Rmd
│ ├── Chap11Bayesian.R
│ ├── Chap11Bayesian.Rmd
│ ├── Chap11Bayesian_d.Rmd
│ ├── Chap12ANOVA.R
│ ├── Chap12ANOVA.Rmd
│ ├── Chap12ANOVA_Exer.Rmd
│ ├── Chap12Anova_Exer.R
│ ├── Chap13AddTopics.R
│ ├── Chap13AddTopics.Rmd
│ └── Chap13AddTopics_d.Rmd
└── README.md
├── Edition3
├── Chapters
│ ├── c01_GSS2018Questions.pdf
│ └── c06_Supplement.pdf
├── Data
│ ├── Data.zip
│ └── Readme.md
├── Errata_Edition3.pdf
├── README.md
└── RScripts
│ ├── MobileAds.R
│ ├── c02_RIntroEDA1.R
│ ├── c02_RIntroEDA1.Rmd
│ ├── c02_RIntroEDA1.pdf
│ ├── c02_RIntroEDA2.Rmd
│ ├── c03_PermutationTests.R
│ ├── c03_SolnExercise.R
│ ├── c04_SamplingDistributions.R
│ ├── c05_Bootstrap.R
│ ├── c06_Estimation.R
│ ├── c06_WindTurbine.R
│ ├── c07_MoreConfidenceIntervals.R
│ ├── c08_MoreHypothesisTests.R
│ ├── c09_Regression.R
│ ├── c10_CategoricalData.R
│ ├── c10_PermTestIndependence.R
│ ├── c11_Bayes.R
│ ├── c12_ANOVA.R
│ └── c13_AdditionalTopics.R
├── README.md
└── readme-MathStatsResamplingR.txt
/Edition1/CorrectedOdds_Ed1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/CorrectedOdds_Ed1.pdf
--------------------------------------------------------------------------------
/Edition1/ErrataEdition1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/ErrataEdition1.pdf
--------------------------------------------------------------------------------
/Edition1/README.md:
--------------------------------------------------------------------------------
1 | # Mathematical Statistics with Resampling and R, 1st edition (2011)
2 |
3 | This is an older edition. For the current edition, see
4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR)
5 |
6 |
7 | ## First Edition
8 |
9 | [Author's website](https://sites.google.com/site/chiharahesterberg/chapter-materials-Ed1)
10 |
11 | Available on:
12 |
13 | * [Google Books](https://books.google.com/books?id=9KRHFDKDV84C)
14 | * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara/dp/1118029852/ref=sr_1_1?ie=UTF8)
15 |
--------------------------------------------------------------------------------
/Edition2/Chapters/Table3.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table3.1.pdf
--------------------------------------------------------------------------------
/Edition2/Chapters/Table5.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table5.1.pdf
--------------------------------------------------------------------------------
/Edition2/Data/Alelager.csv:
--------------------------------------------------------------------------------
1 | ID,Type,Alcohol,Calories
2 | 1,Ale,5.5,160
3 | 2,Ale,5.4,156
4 | 3,Ale,4.85,146
5 | 4,Ale,4.5,150
6 | 5,Ale,5.2,160
7 | 6,Ale,5.3,174
8 | 7,Ale,5.3,177
9 | 8,Ale,5.2,177
10 | 9,Ale,5.77,179
11 | 10,Ale,4.94,160
12 | 11,Ale,5.6,187
13 | 12,Ale,5.6,175
14 | 13,Ale,6.77,167
15 | 14,Lager,5,145
16 | 15,Lager,5,150
17 | 16,Lager,5,153
18 | 17,Lager,4.9,153
19 | 18,Lager,4.94,163
20 | 19,Lager,4.9,175
21 | 20,Lager,4.7,146
22 | 21,Lager,4.75,160
23 | 22,Lager,4.55,142
24 | 23,Lager,4.81,169
25 | 24,Lager,4.4,156
26 | 25,Lager,5,146
27 | 26,Lager,5,147
28 | 27,Lager,4.6,138
29 | 28,Lager,4.79,153
30 | 29,Lager,5,150
31 | 30,Lager,5.8,160
32 | 31,Lager,4.9,140
33 |
--------------------------------------------------------------------------------
/Edition2/Data/Bangladesh.csv:
--------------------------------------------------------------------------------
1 | "Arsenic","Chlorine","Cobalt"
2 | 2400,6.2,0.42
3 | 6,116,0.45
4 | 904,14.8,0.63
5 | 321,35.9,0.68
6 | 1280,18.9,0.58
7 | 151,7.8,0.35
8 | 141,56.3,0.46
9 | 1050,16,0.59
10 | 511,40.4,0.48
11 | 688,29.3,0.87
12 | 81,31.3,0.6
13 | 8,36.9,0.34
14 | 37,20.3,0.32
15 | 6,1.3,0.41
16 | 22,22.3,0.32
17 | 43,22.1,0.39
18 | 39,25.8,0.38
19 | 92,16.2,0.44
20 | 253,6.6,0.45
21 | 200,1.2,0.46
22 | 255,2,0.33
23 | 1150,16.4,0.61
24 | 1180,16.6,0.97
25 | 9,55.5,0.51
26 | 107,7.2,0.33
27 | 6,12.9,0.31
28 | 149,8.7,0.31
29 | 6,4.4,0.24
30 | 46,70.6,0.38
31 | 13,148,0.39
32 | 6,44.1,0.1
33 | 150,43.2,0.88
34 | 6,37.1,0.08
35 | 189,5.7,0.33
36 | 364,360,0.77
37 | 42,17.6,0.28
38 | 390,67.7,0.8
39 | 6,6.7,0.11
40 | 270,390,1.28
41 | 248,9.6,0.35
42 | 139,59.2,0.63
43 | 6,3.5,1.44
44 | 82,239,0.34
45 | 82,63.8,1.18
46 | 256,5.4,0.52
47 | 165,5.4,0.37
48 | 6,47.3,0.13
49 | 180,1240,1.01
50 | 86,72,0.3
51 | 6,295,0.24
52 | 38,157,0.29
53 | 262,72.1,0.18
54 | 404,23.6,1.01
55 | 8,637,0.32
56 | 85,133,0.57
57 | 98,15.6,0.22
58 | 6,68,0.11
59 | 22,1090,0.66
60 | 6,1290,0.63
61 | 6,24.7,0.08
62 | 6,74.6,0.1
63 | 15,115,0.41
64 | 103,72.1,0.25
65 | 86,96,0.22
66 | 6,324,0.23
67 | 46,155,0.22
68 | 62,64.3,0.43
69 | 43,89,0.45
70 | 6,310,0.16
71 | 6,310,0.21
72 | 55,23.9,0.43
73 | 6,1550,0.66
74 | 107,61.3,0.26
75 | 65,69.5,0.48
76 | 276,82.5,1.11
77 | 114,11.1,1.5
78 | 6,3.7,0.07
79 | 6,2.7,0.08
80 | 6,238,3.18
81 | 65,254,0.41
82 | 142,27.1,2.27
83 | 194,6.2,0.64
84 | 6,129,0.22
85 | 54,14.9,0.4
86 | 702,414,1.41
87 | 6,74,0.23
88 | 986,6.3,0.34
89 | 153,34.8,0.55
90 | 84,7.1,0.3
91 | 16,27.6,0.39
92 | 1460,9.3,0.71
93 | 306,33.9,0.67
94 | 49,16.5,0.38
95 | 36,13.5,0.59
96 | 106,7.2,1.75
97 | 6,3.4,0.45
98 | 41,127,1.57
99 | 84,16.2,0.7
100 | 278,3.6,0.42
101 | 41,1.8,0.3
102 | 123,10.4,0.39
103 | 186,56.7,0.95
104 | 80,86,0.6
105 | 162,2,0.21
106 | 398,7.6,0.56
107 | 39,5.5,2.1
108 | 57,3.8,0.63
109 | 6,18.6,0.34
110 | 6,2.6,0.65
111 | 6,51.9,0.69
112 | 18,16,0.37
113 | 129,1.8,0.72
114 | 245,3,0.59
115 | 148,11.5,0.39
116 | 28,38.7,0.6
117 | 20,79.8,0.65
118 | 6,93,1.17
119 | 52,195,0.94
120 | 6,115,0.45
121 | 6,15.8,0.25
122 | 6,4.2,0.3
123 | 15,4,0.26
124 | 73,2.6,0.26
125 | 30,8.2,0.45
126 | 6,2.6,0.33
127 | 128,14.7,0.31
128 | 45,8.7,0.29
129 | 343,7.2,0.24
130 | 109,5.1,0.26
131 | 191,3.8,0.32
132 | 160,36.6,0.76
133 | 51,11.3,0.44
134 | 35,16.4,0.73
135 | 8,17.7,0.52
136 | 11,11,0.84
137 | 15,4.8,0.57
138 | 6,11.3,0.95
139 | 6,13.8,0.39
140 | 6,10.5,0.44
141 | 6,2.2,0.14
142 | 132,7.5,0.42
143 | 8,87,0.68
144 | 10,17.1,0.34
145 | 6,9.6,0.78
146 | 6,2,0.85
147 | 195,4,0.41
148 | 27,67.8,1.06
149 | 6,4.8,0.86
150 | 24,12.7,1.51
151 | 12,4.8,0.5
152 | 161,7.5,0.47
153 | 9,78.2,1.27
154 | 171,4.8,0.38
155 | 35,42.5,0.33
156 | 742,14.9,0.57
157 | 51,18.2,0.44
158 | 6,8.7,0.33
159 | 6,8.8,0.26
160 | 6,328,0.91
161 | 6,3.2,0.2
162 | 6,7,0.4
163 | 6,3.2,0.2
164 | 6,19.1,0.25
165 | 6,13.4,0.9
166 | 6,13.6,0.45
167 | 20,6.8,0.34
168 | 212,28.5,0.41
169 | 6,1.4,0.43
170 | 6,1.2,0.31
171 | 6,6.9,0.26
172 | 6,3.2,0.28
173 | 40,70.4,0.45
174 | 10,3.4,0.42
175 | 6,6.7,0.24
176 | 6,22.7,0.84
177 | 6,67.3,0.88
178 | 6,8,0.25
179 | 6,25.1,0.3
180 | 6,8.6,0.24
181 | 6,2.4,0.13
182 | 8,4.4,0.3
183 | 67,5.4,0.37
184 | 21,11.5,0.51
185 | 17,4.6,0.22
186 | 6,34.5,0.5
187 | 6,13.9,0.09
188 | 6,2.6,0.23
189 | 6,16.9,0.39
190 | 14,4,0.27
191 | 6,16.5,0.31
192 | 6,51.5,1.19
193 | 6,13.2,0.22
194 | 6,4.1,0.27
195 | 137,3.9,0.43
196 | 136,6.2,0.2
197 | 12,7.2,0.19
198 | 27,59.6,0.34
199 | 234,3.4,0.13
200 | 6,3,0.79
201 | 1520,4.3,0.4
202 | 13,105,0.62
203 | 9,12.9,0.47
204 | 6,4.7,0.21
205 | 6,7.2,0.39
206 | 6,13.7,0.29
207 | 6,10.6,0.76
208 | 6,2.8,0.22
209 | 6,30.7,0.31
210 | 6,11.9,0.45
211 | 14,51.1,0.45
212 | 6,,0.31
213 | 8,64.3,0.63
214 | 6,11.6,0.42
215 | 88,5,0.59
216 | 0.5,2.9,0.18
217 | 0.5,5.1,0.14
218 | 0.5,3.1,0.13
219 | 18.6,14.2,0.68
220 | 5.5,1.4,0.52
221 | 61.2,43.4,0.42
222 | 0.5,18.1,1.3
223 | 103,1,0.28
224 | 1.5,4.4,0.14
225 | 10.7,3.1,0.27
226 | 313,1.7,0.64
227 | 0.5,5.1,0.75
228 | 131,27.5,0.49
229 | 157,3.1,0.24
230 | 8.9,2.7,0.24
231 | 3.9,4.8,0.29
232 | 0.5,33.1,1.88
233 | 0.5,3.4,0.86
234 | 0.5,3.8,0.69
235 | 12.1,3.8,0.39
236 | 0.5,8.2,0.37
237 | 109,12.2,0.41
238 | 0.5,2.4,0.98
239 | 0.5,223,0.46
240 | 2.7,4.4,0.51
241 | 0.5,13.3,0.31
242 | 0.5,1.9,0.53
243 | 0.5,20,0.32
244 | 16.8,16.1,0.31
245 | 29,9.6,0.16
246 | 2340,8.9,0.32
247 | 3.8,95.1,0.38
248 | 108,2.4,0.3
249 | 6.8,3.4,0.14
250 | 0.5,5.1,0.15
251 | 61.4,20.2,0.13
252 | 0.5,1.7,0.47
253 | 670,17.5,
254 | 287,140,0.58
255 | 409,426,0.43
256 | 1.2,1.8,0.05
257 | 125,16.6,0.35
258 | 202,7.7,0.74
259 | 30.3,39.8,0.18
260 | 0.5,1050,0.32
261 | 52.3,342,0.6
262 | 109,1360,0.75
263 | 80.7,5,0.76
264 | 75.6,4.9,0.26
265 | 72.9,195,0.68
266 | 64.5,892,0.92
267 | 36.4,65.7,0.29
268 | 34.5,99.5,0.65
269 | 88.6,,0.58
270 | 9.4,18,0.35
271 | 2.1,17.6,0.09
272 | 94.8,736,0.23
273 |
--------------------------------------------------------------------------------
/Edition2/Data/Beerwings.csv:
--------------------------------------------------------------------------------
1 | ID,Hotwings,Beer,Gender
2 | 1,4,24,F
3 | 2,5,0,F
4 | 3,5,12,F
5 | 4,6,12,F
6 | 5,7,12,F
7 | 6,7,12,F
8 | 7,7,24,M
9 | 8,8,24,F
10 | 9,8,0,M
11 | 10,8,12,M
12 | 11,9,24,F
13 | 12,11,24,F
14 | 13,11,24,M
15 | 14,12,30,F
16 | 15,12,30,F
17 | 16,13,24,F
18 | 17,13,36,F
19 | 18,13,30,M
20 | 19,13,30,M
21 | 20,14,30,F
22 | 21,14,36,F
23 | 22,14,48,M
24 | 23,16,36,M
25 | 24,16,36,M
26 | 25,17,36,M
27 | 26,17,42,M
28 | 27,18,30,M
29 | 28,18,30,M
30 | 29,21,36,M
31 | 30,21,42,M
32 |
--------------------------------------------------------------------------------
/Edition2/Data/BookPrices.csv:
--------------------------------------------------------------------------------
1 | "Subject","Area","Price"
2 | "Biology","Math & Science",190.7
3 | "Biology","Math & Science",160
4 | "Biology","Math & Science",117.3
5 | "Biology","Math & Science",115.15
6 | "Chemistry","Math & Science",222.67
7 | "Chemistry","Math & Science",174.95
8 | "Chemistry","Math & Science",197.15
9 | "Chemistry","Math & Science",196.4
10 | "Chemistry","Math & Science",197.5
11 | "Computer Science","Math & Science",157.8
12 | "Computer Science","Math & Science",178.7
13 | "Computer Science","Math & Science",77.95
14 | "Computer Science","Math & Science",128
15 | "Computer Science","Math & Science",138
16 | "Economics","Social Sciences",31.95
17 | "Economics","Social Sciences",209
18 | "Economics","Social Sciences",104
19 | "Economics","Social Sciences",168
20 | "Economics","Social Sciences",168
21 | "Economics","Social Sciences",163.35
22 | "Economics","Social Sciences",178.7
23 | "Educational Studies","Social Sciences",15
24 | "Geology","Math & Science",134.4
25 | "Mathematics","Math & Science",137.35
26 | "Mathematics","Math & Science",222
27 | "Mathematics","Math & Science",138.7
28 | "Mathematics","Math & Science",138.7
29 | "Mathematics","Math & Science",106.65
30 | "Mathematics","Math & Science",174
31 | "Mathematics","Math & Science",172.35
32 | "Physics","Math & Science",149.35
33 | "Physics","Math & Science",200
34 | "Physics","Math & Science",192.7
35 | "Physics","Math & Science",85.35
36 | "Physics","Math & Science",128
37 | "Political Science","Social Sciences",11
38 | "Political Science","Social Sciences",26.95
39 | "Political Science","Social Sciences",17
40 | "Psychology","Social Sciences",138.7
41 | "Psychology","Social Sciences",136
42 | "Psychology","Social Sciences",139.95
43 | "Psychology","Social Sciences",15.95
44 | "SOAN","Social Sciences",139.33
45 | "SOAN","Social Sciences",19.95
46 |
--------------------------------------------------------------------------------
/Edition2/Data/Bushmeat.csv:
--------------------------------------------------------------------------------
1 | "Fish","Biomass","Year"
2 | 28.6,942.54,1970
3 | 34.7,969.77,1971
4 | 39.3,999.45,1972
5 | 32.4,987.13,1973
6 | 31.8,976.31,1974
7 | 32.8,944.07,1975
8 | 38.4,979.37,1976
9 | 33.2,997.86,1977
10 | 29.7,994.85,1978
11 | 25,936.36,1979
12 | 21.8,862.85,1980
13 | 20.8,815.67,1981
14 | 19.7,756.58,1982
15 | 20.8,725.27,1983
16 | 21.1,662.65,1984
17 | 21.3,625.97,1985
18 | 24.3,621.69,1986
19 | 27.4,589.83,1987
20 | 24.5,548.05,1988
21 | 25.2,524.88,1989
22 | 25.9,529.41,1990
23 | 23,497.37,1991
24 | 27.1,476.86,1992
25 | 23.4,453.8,1993
26 | 18.9,402.7,1994
27 | 19.6,365.25,1995
28 | 25.3,326.02,1996
29 | 22,320.12,1997
30 | 21,296.49,1998
31 | 23,228.72,1999
32 |
--------------------------------------------------------------------------------
/Edition2/Data/Cereals.csv:
--------------------------------------------------------------------------------
1 | "ID","Age","Shelf","Sodiumgram","Proteingram"
2 | 1,"adult","bottom",0.007,0.1
3 | 2,"children","bottom",0.006666667,0.066666667
4 | 3,"children","bottom",0.004666667,0.033333333
5 | 4,"children","bottom",0.006969697,0.03030303
6 | 5,"adult","bottom",0.007,0.1
7 | 6,"children","bottom",0.006,0.033333333
8 | 7,"children","bottom",0.006129032,0.032258065
9 | 8,"children","bottom",0.00483871,0.032258065
10 | 9,"children","bottom",0.001851852,0.074074074
11 | 10,"children","middle",0.005517241,0.034482759
12 | 11,"children","middle",0.006666667,0.066666667
13 | 12,"children","middle",0.0045,0.066666667
14 | 13,"children","middle",0.004375,0.03125
15 | 14,"children","middle",0.007096774,0.064516129
16 | 15,"children","middle",0.007,0.033333333
17 | 16,"children","middle",0.006785714,0.107142857
18 | 17,"adult","middle",0.004545455,0.090909091
19 | 18,"children","middle",0.005,0.09375
20 | 19,"children","middle",0.0046875,0.09375
21 | 20,"children","middle",0.003833333,0.066666667
22 | 21,"children","middle",0.0045,0.066666667
23 | 22,"children","middle",0.006666667,0.066666667
24 | 23,"children","middle",0.006296296,0.037037037
25 | 24,"children","middle",0.007407407,0.037037037
26 | 25,"children","middle",0.004375,0.03125
27 | 26,"children","middle",0.005333333,0.033333333
28 | 27,"children","middle",0.005666667,0.033333333
29 | 28,"children","middle",0.004848485,0.060606061
30 | 29,"adult","top",0.0022,0.2
31 | 30,"children","top",0.007,0.033333333
32 | 31,"adult","top",0.0035,0.266666667
33 | 32,"adult","top",0.001792453,0.169811321
34 | 33,"adult","top",0.0045,0.1
35 | 34,"adult","top",0.0028,0.04
36 | 35,"adult","top",0.000222222,0.177777778
37 | 36,"adult","top",0.001634615,0.25
38 | 37,"adult","top",0.0028,0.14
39 | 38,"adult","top",0.005818182,0.072727273
40 | 39,"adult","top",0.002727273,0.121212121
41 | 40,"adult","top",0.0056,0.06
42 | 41,"adult","top",0,0.074074074
43 | 42,"adult","top",0,0.092592593
44 | 43,"adult","top",0.00245283,0.094339623
45 |
--------------------------------------------------------------------------------
/Edition2/Data/Challenger.csv:
--------------------------------------------------------------------------------
1 | "Date","Temperature","Incident"
2 | "Apr12.81",66,0
3 | "Nov12.81",70,1
4 | "Mar22.82",69,0
5 | "Nov11.82",68,0
6 | "Apr04.83",67,0
7 | "Jun18.83",72,0
8 | "Aug30.83",73,0
9 | "Nov28.83",70,0
10 | "Feb03.84",57,1
11 | "Apr06.84",63,1
12 | "Aug30.84",70,1
13 | "Oct05.84",78,0
14 | "Nov08.84",67,0
15 | "Jan24.85",53,1
16 | "Apr12.85",67,0
17 | "Apr29.85",75,0
18 | "Jun17.85",70,0
19 | "Jul29.85",81,0
20 | "Aug27.85",76,0
21 | "Oct03.85",79,0
22 | "Oct30.85",75,1
23 | "Nov26.85",76,0
24 | "Jan12.86",58,1
25 |
--------------------------------------------------------------------------------
/Edition2/Data/ChiMarathonMen.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/ChiMarathonMen.csv
--------------------------------------------------------------------------------
/Edition2/Data/Cuckoos.csv:
--------------------------------------------------------------------------------
1 | "Eggs","Bird"
2 | 19.65,"MeadowPipit"
3 | 20.05,"MeadowPipit"
4 | 20.65,"MeadowPipit"
5 | 20.85,"MeadowPipit"
6 | 21.65,"MeadowPipit"
7 | 21.65,"MeadowPipit"
8 | 21.65,"MeadowPipit"
9 | 21.85,"MeadowPipit"
10 | 21.85,"MeadowPipit"
11 | 21.85,"MeadowPipit"
12 | 22.05,"MeadowPipit"
13 | 22.05,"MeadowPipit"
14 | 22.05,"MeadowPipit"
15 | 22.05,"MeadowPipit"
16 | 22.05,"MeadowPipit"
17 | 22.05,"MeadowPipit"
18 | 22.05,"MeadowPipit"
19 | 22.05,"MeadowPipit"
20 | 22.05,"MeadowPipit"
21 | 22.05,"MeadowPipit"
22 | 22.25,"MeadowPipit"
23 | 22.25,"MeadowPipit"
24 | 22.25,"MeadowPipit"
25 | 22.25,"MeadowPipit"
26 | 22.25,"MeadowPipit"
27 | 22.25,"MeadowPipit"
28 | 22.25,"MeadowPipit"
29 | 22.25,"MeadowPipit"
30 | 22.45,"MeadowPipit"
31 | 22.45,"MeadowPipit"
32 | 22.45,"MeadowPipit"
33 | 22.65,"MeadowPipit"
34 | 22.65,"MeadowPipit"
35 | 22.85,"MeadowPipit"
36 | 22.85,"MeadowPipit"
37 | 22.85,"MeadowPipit"
38 | 22.85,"MeadowPipit"
39 | 23.05,"MeadowPipit"
40 | 23.25,"MeadowPipit"
41 | 23.25,"MeadowPipit"
42 | 23.45,"MeadowPipit"
43 | 23.65,"MeadowPipit"
44 | 23.85,"MeadowPipit"
45 | 24.25,"MeadowPipit"
46 | 24.45,"TreePipit"
47 | 21.05,"TreePipit"
48 | 21.85,"TreePipit"
49 | 22.05,"TreePipit"
50 | 22.45,"TreePipit"
51 | 22.65,"TreePipit"
52 | 23.25,"TreePipit"
53 | 23.25,"TreePipit"
54 | 23.25,"TreePipit"
55 | 23.45,"TreePipit"
56 | 23.45,"TreePipit"
57 | 23.65,"TreePipit"
58 | 23.85,"TreePipit"
59 | 24.05,"TreePipit"
60 | 24.05,"TreePipit"
61 | 24.05,"TreePipit"
62 | 20.85,"HedgeSparrow"
63 | 21.65,"HedgeSparrow"
64 | 22.05,"HedgeSparrow"
65 | 22.85,"HedgeSparrow"
66 | 23.05,"HedgeSparrow"
67 | 23.05,"HedgeSparrow"
68 | 23.05,"HedgeSparrow"
69 | 23.05,"HedgeSparrow"
70 | 23.45,"HedgeSparrow"
71 | 23.85,"HedgeSparrow"
72 | 23.85,"HedgeSparrow"
73 | 23.85,"HedgeSparrow"
74 | 24.05,"HedgeSparrow"
75 | 25.05,"HedgeSparrow"
76 | 21.05,"Robin"
77 | 21.85,"Robin"
78 | 22.05,"Robin"
79 | 22.05,"Robin"
80 | 22.05,"Robin"
81 | 22.25,"Robin"
82 | 22.45,"Robin"
83 | 22.45,"Robin"
84 | 22.65,"Robin"
85 | 23.05,"Robin"
86 | 23.05,"Robin"
87 | 23.05,"Robin"
88 | 23.05,"Robin"
89 | 23.05,"Robin"
90 | 23.25,"Robin"
91 | 23.85,"Robin"
92 | 21.05,"PiedWagtail"
93 | 21.85,"PiedWagtail"
94 | 21.85,"PiedWagtail"
95 | 21.85,"PiedWagtail"
96 | 22.05,"PiedWagtail"
97 | 22.45,"PiedWagtail"
98 | 22.65,"PiedWagtail"
99 | 23.05,"PiedWagtail"
100 | 23.05,"PiedWagtail"
101 | 23.25,"PiedWagtail"
102 | 23.45,"PiedWagtail"
103 | 24.05,"PiedWagtail"
104 | 24.05,"PiedWagtail"
105 | 24.05,"PiedWagtail"
106 | 24.85,"PiedWagtail"
107 | 19.85,"Wren"
108 | 20.05,"Wren"
109 | 20.25,"Wren"
110 | 20.85,"Wren"
111 | 20.85,"Wren"
112 | 20.85,"Wren"
113 | 21.05,"Wren"
114 | 21.05,"Wren"
115 | 21.05,"Wren"
116 | 21.25,"Wren"
117 | 21.45,"Wren"
118 | 22.05,"Wren"
119 | 22.05,"Wren"
120 | 22.05,"Wren"
121 | 22.25,"Wren"
122 |
--------------------------------------------------------------------------------
/Edition2/Data/Diving2017.csv:
--------------------------------------------------------------------------------
1 | Name,Country,Semifinal,Final
2 | CHEONG Jun Hoong,Malaysia,325.50,397.50
3 | SI Yajie,China,382.80,396.00
4 | REN Qian,China,367.50,391.95
5 | KIM Mi Rae,North Korea,346.00,385.55
6 | WU Melissa,Australia,318.70,370.20
7 | KIM Kuk Hyang,North Korea,360.85,360.00
8 | ITAHASHI Minami,Japan,313.70,357.85
9 | BENFEITO Meaghan,Canada,355.15,331.40
10 | PAMG Pandelela,Malaysia,322.75,322.40
11 | CHAMANDY Olivia,Canada,320.55,307.15
12 | PARRATTO Jessica,USA,322.75,302.35
13 | MURILLO URREA Carolina,Colombia,325.75,283.35
14 |
--------------------------------------------------------------------------------
/Edition2/Data/Fatalities.csv:
--------------------------------------------------------------------------------
1 | "ID","Alcohol","Age"
2 | 1,0,86
3 | 2,0,38
4 | 3,0,40
5 | 4,1,20
6 | 5,1,27
7 | 6,0,19
8 | 7,1,43
9 | 8,0,71
10 | 9,0,63
11 | 10,0,37
12 | 11,0,24
13 | 12,0,60
14 | 13,0,52
15 | 14,0,53
16 | 15,0,71
17 | 16,1,21
18 | 17,1,17
19 | 18,0,58
20 | 19,1,39
21 | 20,1,21
22 | 21,0,23
23 | 22,0,19
24 | 23,0,52
25 | 24,0,46
26 | 25,0,50
27 | 26,1,59
28 | 27,0,43
29 | 28,1,25
30 | 29,0,80
31 | 30,0,32
32 | 31,1,40
33 | 32,0,75
34 | 33,0,20
35 | 34,0,21
36 | 35,1,57
37 | 36,0,44
38 | 37,0,17
39 | 38,0,18
40 | 39,0,20
41 | 40,0,21
42 | 41,0,84
43 | 42,0,19
44 | 43,0,18
45 | 44,0,42
46 | 45,0,73
47 | 46,0,27
48 | 47,0,62
49 | 48,1,47
50 | 49,0,45
51 | 50,0,49
52 | 51,0,54
53 | 52,0,79
54 | 53,0,53
55 | 54,0,82
56 | 55,0,71
57 | 56,0,37
58 | 57,0,45
59 | 58,0,19
60 | 59,0,73
61 | 60,0,78
62 | 61,0,45
63 | 62,0,25
64 | 63,0,33
65 | 64,0,28
66 | 65,0,59
67 | 66,0,48
68 | 67,0,49
69 | 68,0,57
70 | 69,0,18
71 | 70,0,61
72 | 71,1,28
73 | 72,0,21
74 | 73,0,83
75 | 74,0,76
76 | 75,0,57
77 | 76,0,21
78 | 77,0,79
79 | 78,0,29
80 | 79,0,20
81 | 80,0,22
82 | 81,1,27
83 | 82,0,36
84 | 83,0,19
85 | 84,1,60
86 | 85,1,23
87 | 86,0,91
88 | 87,1,28
89 | 88,1,49
90 | 89,0,29
91 | 90,0,56
92 | 91,0,20
93 | 92,0,69
94 | 93,0,17
95 | 94,1,28
96 | 95,1,28
97 | 96,0,65
98 | 97,1,37
99 | 98,1,30
100 | 99,0,20
101 | 100,0,21
102 |
--------------------------------------------------------------------------------
/Edition2/Data/FishMercury.csv:
--------------------------------------------------------------------------------
1 | "Mercury"
2 | 1.87
3 | 0.16
4 | 0.088
5 | 0.16
6 | 0.145
7 | 0.099
8 | 0.101
9 | 0.18
10 | 0.187
11 | 0.097
12 | 0.18
13 | 0.132
14 | 0.065
15 | 0.126
16 | 0.107
17 | 0.152
18 | 0.11
19 | 0.076
20 | 0.168
21 | 0.151
22 | 0.048
23 | 0.15
24 | 0.162
25 | 0.118
26 | 0.163
27 | 0.178
28 | 0.076
29 | 0.078
30 | 0.039
31 | 0.09
32 |
--------------------------------------------------------------------------------
/Edition2/Data/Girls2004.csv:
--------------------------------------------------------------------------------
1 | "ID","State","MothersAge","Smoker","Weight","Gestation"
2 | 1,"WY","15-19","No",3085,40
3 | 2,"WY","35-39","No",3515,39
4 | 3,"WY","25-29","No",3775,40
5 | 4,"WY","20-24","No",3265,39
6 | 5,"WY","25-29","No",2970,40
7 | 6,"WY","20-24","No",2850,38
8 | 7,"WY","20-24","No",2737,38
9 | 8,"WY","25-29","No",3515,37
10 | 9,"WY","25-29","No",3742,39
11 | 10,"WY","35-39","No",3570,40
12 | 11,"WY","20-24","No",3834,41
13 | 12,"WY","20-24","Yes",3090,39
14 | 13,"WY","25-29","Yes",3350,40
15 | 14,"WY","30-34","No",3292,37
16 | 15,"WY","15-19","No",3317,40
17 | 16,"WY","30-34","No",2485,37
18 | 17,"WY","20-24","No",3215,39
19 | 18,"WY","20-24","No",3230,40
20 | 19,"WY","30-34","No",3345,39
21 | 20,"WY","25-29","No",3050,41
22 | 21,"WY","30-34","No",2212,37
23 | 22,"WY","35-39","No",3605,39
24 | 23,"WY","30-34","No",2722,39
25 | 24,"WY","30-34","No",2880,39
26 | 25,"WY","20-24","No",3610,39
27 | 26,"WY","30-34","No",3355,39
28 | 27,"WY","20-24","No",3995,41
29 | 28,"WY","20-24","Yes",2948,39
30 | 29,"WY","35-39","No",3345,41
31 | 30,"WY","30-34","Yes",2892,39
32 | 31,"WY","20-24","No",2466,37
33 | 32,"WY","20-24","Yes",3290,39
34 | 33,"WY","25-29","No",3310,39
35 | 34,"WY","40-44","No",3175,37
36 | 35,"WY","25-29","No",2715,38
37 | 36,"WY","25-29","No",3540,38
38 | 37,"WY","25-29","No",3402,38
39 | 38,"WY","25-29","Yes",3923,39
40 | 39,"WY","20-24","No",3204,37
41 | 40,"WY","15-19","Yes",2495,37
42 | 41,"AK","20-24","No",4337,41
43 | 42,"AK","20-24","No",2948,40
44 | 43,"AK","30-34","No",3269,39
45 | 44,"AK","20-24","No",3608,38
46 | 45,"AK","30-34","No",4016,39
47 | 46,"AK","25-29","No",2919,40
48 | 47,"AK","20-24","No",2608,37
49 | 48,"AK","40-44","No",4309,39
50 | 49,"AK","20-24","No",3288,39
51 | 50,"AK","25-29","No",3742,38
52 | 51,"AK","15-19","No",4394,41
53 | 52,"AK","20-24","No",2182,37
54 | 53,"AK","25-29","No",4592,40
55 | 54,"AK","20-24","No",3090,39
56 | 55,"AK","30-34","No",3770,40
57 | 56,"AK","20-24","No",3977,39
58 | 57,"AK","25-29","No",3153,40
59 | 58,"AK","25-29","No",3458,41
60 | 59,"AK","15-19","No",3912,38
61 | 60,"AK","20-24","Yes",2863,40
62 | 61,"AK","35-39","No",3190,39
63 | 62,"AK","25-29","Yes",3515,38
64 | 63,"AK","25-29","No",3288,39
65 | 64,"AK","15-19","No",3114,40
66 | 65,"AK","30-34","Yes",3543,41
67 | 66,"AK","20-24","No",3825,39
68 | 67,"AK","25-29","No",3458,39
69 | 68,"AK","30-34","No",3698,41
70 | 69,"AK","20-24","No",3572,39
71 | 70,"AK","30-34","Yes",2352,40
72 | 71,"AK","20-24","No",3175,40
73 | 72,"AK","25-29","No",3742,41
74 | 73,"AK","20-24","No",3997,39
75 | 74,"AK","25-29","No",2576,38
76 | 75,"AK","30-34","No",3572,40
77 | 76,"AK","35-39","No",3968,39
78 | 77,"AK","20-24","No",4564,42
79 | 78,"AK","20-24","No",4210,40
80 | 79,"AK","25-29","No",3260,38
81 | 80,"AK","20-24","No",3600,40
82 |
--------------------------------------------------------------------------------
/Edition2/Data/Groceries.csv:
--------------------------------------------------------------------------------
1 | Product,Size,Target,Walmart
2 | Kellogg NutriGrain Bars,8 bars,2.50,2.78
3 | Quaker Oats Life Cereal Original ,18oz,3.19,6.01
4 | General Mills Lucky Charms,11.50z,3.19,2.98
5 | Quaker Oats Old Fashioned,18oz,2.82,2.68
6 | Nabisco Oreo Cookies,14.3oz ,2.99,2.98
7 | Nabisco Chips Ahoy,13oz,2.64,1.98
8 | Doritos Nacho Cheese Chips,10oz,3.99,2.5
9 | Cheez-it Original Baked,21oz,4.79,4.79
10 | Swiss Miss Hot Chocolate,10 count,1.49,1.28
11 | Tazo Chai Classic Latte Black Tea,32 oz ,3.49,2.98
12 | Annie's Macaroni & Cheese,6oz,1.79,1.72
13 | Rice A Roni Chicken,6.9oz,1.00,1.00
14 | Zatarain's Jambalaya Rice Mix,8oz,1.62,1.54
15 | SPAM Original Lunch Meat,12oz,2.79,2.64
16 | Campbell's Chicken Noodle Soup,10.75oz,0.99,1.58
17 | Dinty Moore Hearty Meals Beef Stew,15oz,1.99,1.98
18 | Hormel Chili with Beans,15oz,1.94,1.88
19 | Dole Pineapple Chunks,20 oz,1.59,1.47
20 | Skippy Creamy Peanut Butter,16.3oz,2.59,2.58
21 | Smucker's Strawberry Preserve,18oz,2.99,2.84
22 | Heinz Tomato Ketchup,32oz,2.99,2.88
23 | Near East Couscous Toasted Pine Nuts mix,5.6oz,2.12,1.98
24 | Barilla Angel Hair Pasta,16oz,1.42,1.38
25 | Betty Crocker Super Moist Chocolate Fudge Cake Mix,15.25oz,1.22,1.17
26 | Kraft Jet-Puffed Marshmllows,16oz,1.99,1.96
27 | Dunkin' Donuts Original Blend Medium Roast Ground Coffee,12oz,7.19,6.98
28 | Dove Promises Milk Chocolate,8.87oz,3.19,3.50
29 | Skittles,41oz,7.99,6.98
30 | Vlasic Kosher Dill Pickle Spears,24oz,2.39,2.18
31 | Vlasic Old Fashioned Sauerkraut,32oz,1.99,1.97
32 |
--------------------------------------------------------------------------------
/Edition2/Data/ILBoys.csv:
--------------------------------------------------------------------------------
1 | "MothersAge","Weight"
2 | "25-29",3005
3 | "25-29",3686
4 | "25-29",3714
5 | "20-24",2807
6 | "25-29",4054
7 | "15-19",3884
8 | "25-29",3005
9 | "20-24",2920
10 | "25-29",3236
11 | "20-24",3345
12 | "20-24",2948
13 | "25-29",3345
14 | "25-29",3997
15 | "20-24",3025
16 | "20-24",4026
17 | "20-24",4423
18 | "20-24",3487
19 | "20-24",3232
20 | "15-19",3005
21 | "15-19",3300
22 | "20-24",3575
23 | "25-29",3742
24 | "15-19",3232
25 | "20-24",3572
26 | "25-29",4167
27 | "20-24",2870
28 | "20-24",3374
29 | "15-19",3515
30 | "20-24",3232
31 | "20-24",3600
32 | "20-24",3025
33 | "25-29",4139
34 | "20-24",3232
35 | "25-29",3345
36 | "20-24",3837
37 | "15-19",3430
38 | "25-29",3544
39 | "15-19",4030
40 | "20-24",3600
41 | "20-24",3770
42 | "20-24",4394
43 | "25-29",4253
44 | "25-29",3374
45 | "25-29",3487
46 | "15-19",3629
47 | "25-29",3289
48 | "25-29",3535
49 | "15-19",3680
50 | "20-24",3090
51 | "25-29",3997
52 | "25-29",2892
53 | "15-19",2552
54 | "25-29",3487
55 | "20-24",2410
56 | "20-24",2920
57 | "20-24",3260
58 | "25-29",3260
59 | "25-29",3657
60 | "25-29",3515
61 | "20-24",2438
62 | "25-29",3856
63 | "15-19",3317
64 | "20-24",3165
65 | "20-24",3572
66 | "15-19",3884
67 | "20-24",3544
68 | "20-24",3608
69 | "15-19",3487
70 | "25-29",4564
71 | "25-29",4054
72 | "20-24",2336
73 | "20-24",3119
74 | "15-19",2778
75 | "20-24",3741
76 | "20-24",3119
77 | "15-19",3317
78 | "20-24",3260
79 | "20-24",3742
80 | "25-29",3629
81 | "20-24",3033
82 | "25-29",3390
83 | "15-19",3374
84 | "25-29",4335
85 | "20-24",3090
86 | "25-29",3520
87 | "20-24",3657
88 | "20-24",3920
89 | "25-29",3430
90 | "20-24",3742
91 | "15-19",2878
92 | "20-24",3912
93 | "25-29",4082
94 | "25-29",3119
95 | "15-19",3204
96 | "20-24",3430
97 | "20-24",2975
98 | "15-19",2990
99 | "25-29",4200
100 | "20-24",3656
101 | "20-24",2948
102 | "20-24",2948
103 | "20-24",3459
104 | "25-29",3657
105 | "25-29",3930
106 | "25-29",3232
107 | "25-29",2892
108 | "15-19",2580
109 | "25-29",3505
110 | "20-24",3232
111 | "25-29",3345
112 | "20-24",3430
113 | "20-24",3657
114 | "15-19",3459
115 | "15-19",3657
116 | "25-29",3058
117 | "25-29",3771
118 | "25-29",3317
119 | "25-29",3317
120 | "20-24",3033
121 | "20-24",3090
122 | "15-19",3827
123 | "25-29",3175
124 | "25-29",3289
125 | "20-24",3771
126 | "25-29",3317
127 | "15-19",2792
128 | "20-24",3130
129 | "25-29",3175
130 | "20-24",3033
131 | "25-29",3175
132 | "25-29",3459
133 | "25-29",3997
134 | "20-24",3260
135 | "15-19",2955
136 | "25-29",3232
137 | "25-29",3289
138 | "20-24",3175
139 | "25-29",3313
140 | "20-24",3771
141 | "15-19",2000
142 | "20-24",3255
143 | "25-29",2920
144 | "20-24",3572
145 | "25-29",3232
146 | "15-19",3147
147 | "25-29",4026
148 | "20-24",2977
149 | "25-29",3380
150 | "25-29",3033
151 | "20-24",3459
152 | "25-29",3750
153 | "20-24",3600
154 | "15-19",3374
155 | "25-29",3375
156 | "25-29",2778
157 | "25-29",3686
158 | "15-19",3430
159 | "25-29",3714
160 | "25-29",4253
161 | "15-19",2540
162 | "25-29",3827
163 | "20-24",3544
164 | "25-29",3250
165 | "25-29",3317
166 | "20-24",3827
167 | "20-24",3912
168 | "25-29",3289
169 | "25-29",4204
170 | "20-24",3289
171 | "15-19",3204
172 | "15-19",3033
173 | "20-24",3062
174 | "25-29",2807
175 | "20-24",3572
176 | "20-24",3515
177 | "20-24",3487
178 | "20-24",3289
179 | "25-29",4338
180 | "20-24",3250
181 | "20-24",3515
182 | "25-29",3289
183 | "20-24",3430
184 | "20-24",3747
185 | "25-29",3714
186 | "20-24",4005
187 | "25-29",4082
188 | "25-29",3686
189 | "25-29",3515
190 | "20-24",3260
191 | "25-29",3629
192 | "25-29",3296
193 | "20-24",3147
194 | "15-19",2863
195 | "25-29",3175
196 | "25-29",4139
197 | "25-29",3062
198 | "20-24",2523
199 | "25-29",3771
200 | "25-29",3714
201 | "15-19",2905
202 | "15-19",3997
203 | "25-29",3090
204 | "25-29",3575
205 | "20-24",3941
206 | "25-29",2975
207 | "25-29",2977
208 | "15-19",3119
209 | "25-29",3175
210 | "25-29",4423
211 | "25-29",4590
212 | "25-29",3430
213 | "25-29",3657
214 | "25-29",3657
215 | "25-29",2778
216 | "20-24",3515
217 | "20-24",2655
218 | "15-19",3119
219 | "25-29",3340
220 | "15-19",2863
221 | "15-19",3232
222 | "25-29",3317
223 | "25-29",3799
224 | "20-24",3941
225 | "15-19",3175
226 | "20-24",3005
227 | "15-19",2892
228 | "25-29",3374
229 | "25-29",3374
230 | "20-24",4139
231 | "25-29",3487
232 | "25-29",3260
233 | "20-24",3090
234 | "15-19",2920
235 | "25-29",3204
236 | "15-19",3515
237 | "25-29",3260
238 | "25-29",3970
239 | "20-24",3430
240 | "25-29",3969
241 | "15-19",3033
242 | "20-24",3107
243 |
--------------------------------------------------------------------------------
/Edition2/Data/IceCream.csv:
--------------------------------------------------------------------------------
1 | "Brand","VanillaCalories","VanillaFat","VanillaSugar","ChocolateCalories","ChocolateFat","ChocolateSugar"
2 | "Baskin Robbins",260,16,26,260,14,31
3 | "Ben & Jerry's",240,16,19,260,16,22
4 | "Blue Bunny",140,7,12,130,7,14
5 | "Breyers",140,7,13,140,8,16
6 | "Brigham's",190,12,17,200,12,18
7 | "Bulla",234,13.5,21.8,266,15,22.6
8 | "Carvel",240,14,21,250,13,25
9 | "Cass-Clay",130,7,11,150,7,16
10 | "Chapman's",120,6,11,120,5,12
11 | "Cold Stone",270,15.5,23,264,16.2,23.6
12 | "Culver's",222,13,19,205,10,20
13 | "Dairy Queen",140,4.5,19,150,5,17
14 | "Dove",240,15,20,290,17,27
15 | "Dreamery",260,15,24,280,12,33
16 | "Edy's Grand",140,8,13,150,8,15
17 | "Emack & Bolio's",160,9,12,170,9,13
18 | "Good Humor",120,6,12,120,6,14
19 | "Graeter's",260,16,24,260,16,24
20 | "Green and Black",194,11.6,18,227,12.8,22.7
21 | "Green's",150,8,17,140,8,15
22 | "Haagen Dazs",270,18,21,270,18,21
23 | "Hershey's",140,9,14,140,8,13
24 | "Hill Station",226,15.6,16.8,235,14.3,21.2
25 | "Kemp's",130,7,13,140,6,17
26 | "Klein's",130,8,15,140,8,14
27 | "Oberweis Dairy",307,21,23,320,21,19
28 | "Our Family",130,7,11,130,6,15
29 | "Perry's",140,8,15,140,7,15
30 | "Ronnybrook Farm",240,16,20,260,19,21
31 | "Ruggles",150,8,12,150,8,16
32 | "Sara Lee",242,15.5,21.5,234,14.4,20.9
33 | "Schwan's",140,7,12,140,7,12
34 | "Sheer Bliss",300,19,27,320,19,29
35 | "Smith's",150,8,13,150,8,13
36 | "Stonyfield Farm",240,16,20,250,17,20
37 | "Tillamook",160,9,10,170,9,13
38 | "Turkey Hill",140,8,16,150,8,19
39 | "Value Choice",130,6,12,130,6,15
40 | "Whitey's",250,14,23,250,13,25
41 |
--------------------------------------------------------------------------------
/Edition2/Data/Illiteracy.csv:
--------------------------------------------------------------------------------
1 | ID,Country,Illit,Births
2 | 1,Albania,20.5,1.78
3 | 2,Algeria,39.1,2.44
4 | 3,Bahrain,15,2.34
5 | 4,Belize,5.9,2.97
6 | 5,Benin,73.5,5.6
7 | 6,Bolivia,18.5,3.65
8 | 7,Botswana,17.6,3.03
9 | 8,Brazil,11.9,2.29
10 | 9,Brunei,11.5,2.38
11 | 10,Burkina Faso,83.4,5.9
12 | 11,Burma,18.1,2.23
13 | 12,Burundi,54.8,6.8
14 | 13,Cambodia,39.8,3.89
15 | 14,Cape Verde,30.9,3.53
16 | 15,Central African Republic,60.1,4.73
17 | 16,Chad,60.7,6.3
18 | 17,China,19.6,1.81
19 | 18,Colombia,7.5,2.4
20 | 19,Comoros,50.7,3.76
21 | 20,"Congo, Democratic Republic of the",44.9,6.7
22 | 21,"Congo, Republic of the",21.6,5.6
23 | 22,Cote d'Ivoire,59.1,4.7
24 | 23,Djibouti,41.6,4.74
25 | 24,Dominican Republic,15.2,2.73
26 | 25,Ecuador,9.1,2.67
27 | 26,Egypt,53.1,3.1
28 | 27,El Salvador,22.3,2.76
29 | 28,Equatorial Guinea,21.6,5.89
30 | 29,Eritrea,52.4,5.24
31 | 30,Ethiopia,64.8,5.32
32 | 31,Fiji,8.1,2.79
33 | 32,"Gambia, The",66.9,4.4
34 | 33,Ghana,32.7,4.06
35 | 34,Guatemala,36.8,4.33
36 | 35,Guinea-Bissau,72.4,7.08
37 | 36,Haiti,48.8,3.75
38 | 37,Honduras,23,3.47
39 | 38,Hong Kong,9.5,0.97
40 | 39,India,51.7,2.84
41 | 40,Indonesia,15.9,2.27
42 | 41,Iran,27.4,2.07
43 | 42,Israel,6.2,2.82
44 | 43,Jamaica,8.3,2.38
45 | 44,Jordan,13.4,3.29
46 | 45,Kenya,20.3,4.98
47 | 46,Kuwait,18.3,2.39
48 | 47,Laos,43.4,4.5
49 | 48,Lebanon,17.8,2.25
50 | 49,Lesotho,5.5,3.4
51 | 50,Liberia,59.5,6.78
52 | 51,Libya,28.1,2.85
53 | 52,Macau,8,0.88
54 | 53,Madagascar,37.5,5.04
55 | 54,Malawi,50.3,5.84
56 | 55,Malaysia,14.7,2.74
57 | 56,Mali,82,6.72
58 | 57,Malta,6.4,1.37
59 | 58,Mauritania,68.1,5.59
60 | 59,Mauritius,17.4,1.98
61 | 60,Mexico,9.8,2.11
62 | 61,Mozambique,67.3,5.3
63 | 62,Namibia,16.3,3.66
64 | 63,Nicaragua,32.2,3.08
65 | 64,Niger,90.2,7.67
66 | 65,Nigeria,39,5.5
67 | 66,Oman,32.8,3.44
68 | 67,Pakistan,69.4,4.12
69 | 68,Panama,8.1,2.62
70 | 69,Papua New Guinea,40.6,3.8
71 | 70,Paraguay,7,3.67
72 | 71,Peru,13.3,2.74
73 | 72,Portugal,8.8,1.4
74 | 73,Puerto Rico,5.5,1.8
75 | 74,Qatar,15,2.89
76 | 75,Rwanda,35.2,5.8
77 | 76,Saudi Arabia,29.3,3.83
78 | 77,Senegal,69.2,4.9
79 | 78,Singapore,10.4,1.24
80 | 79,South Africa,14.3,2.78
81 | 80,Sri Lanka,10.1,1.91
82 | 81,Sudan,49.5,4.15
83 | 82,Swaziland,19.2,3.91
84 | 83,Syria,36.1,3.24
85 | 84,Tanzania,29.4,5.2
86 | 85,Thailand,5.4,1.89
87 | 86,Togo,53.1,5.03
88 | 87,Tunisia,35.6,2.04
89 | 88,Turkey,21.4,2.19
90 | 89,United Arab Emirates,18.5,2.43
91 | 90,Venezuela,6.9,2.65
92 | 91,Vietnam,8.5,1.78
93 | 92,Yemen,69.8,5.87
94 | 93,Zambia,25.2,5.4
95 | 94,Zimbabwe,12.9,3.34
96 |
--------------------------------------------------------------------------------
/Edition2/Data/Lottery.csv:
--------------------------------------------------------------------------------
1 | "Win"
2 | 25
3 | 30
4 | 32
5 | 16
6 | 17
7 | 23
8 | 28
9 | 1
10 | 36
11 | 10
12 | 26
13 | 15
14 | 22
15 | 7
16 | 21
17 | 8
18 | 22
19 | 14
20 | 23
21 | 5
22 | 19
23 | 31
24 | 27
25 | 15
26 | 19
27 | 35
28 | 20
29 | 20
30 | 10
31 | 35
32 | 19
33 | 38
34 | 36
35 | 12
36 | 12
37 | 16
38 | 17
39 | 2
40 | 13
41 | 31
42 | 37
43 | 36
44 | 25
45 | 23
46 | 16
47 | 32
48 | 7
49 | 38
50 | 31
51 | 31
52 | 11
53 | 1
54 | 25
55 | 11
56 | 10
57 | 6
58 | 1
59 | 15
60 | 37
61 | 6
62 | 34
63 | 5
64 | 31
65 | 13
66 | 32
67 | 36
68 | 36
69 | 24
70 | 38
71 | 34
72 | 32
73 | 9
74 | 14
75 | 20
76 | 29
77 | 29
78 | 34
79 | 13
80 | 24
81 | 39
82 | 7
83 | 35
84 | 17
85 | 18
86 | 23
87 | 15
88 | 38
89 | 23
90 | 8
91 | 30
92 | 5
93 | 20
94 | 33
95 | 17
96 | 34
97 | 35
98 | 32
99 | 21
100 | 32
101 | 19
102 | 33
103 | 19
104 | 4
105 | 9
106 | 28
107 | 1
108 | 16
109 | 36
110 | 13
111 | 36
112 | 18
113 | 26
114 | 34
115 | 36
116 | 30
117 | 20
118 | 39
119 | 38
120 | 10
121 | 23
122 | 2
123 | 13
124 | 39
125 | 26
126 | 22
127 | 18
128 | 1
129 | 7
130 | 2
131 | 20
132 | 16
133 | 8
134 | 35
135 | 8
136 | 18
137 | 35
138 | 30
139 | 17
140 | 24
141 | 4
142 | 26
143 | 39
144 | 2
145 | 32
146 | 15
147 | 27
148 | 29
149 | 10
150 | 33
151 | 13
152 | 4
153 | 20
154 | 15
155 | 2
156 | 28
157 | 24
158 | 23
159 | 21
160 | 36
161 | 5
162 | 27
163 | 9
164 | 24
165 | 11
166 | 5
167 | 31
168 | 20
169 | 16
170 | 9
171 | 30
172 | 25
173 | 31
174 | 3
175 | 10
176 | 26
177 | 13
178 | 35
179 | 10
180 | 8
181 | 31
182 | 18
183 | 8
184 | 11
185 | 37
186 | 11
187 | 4
188 | 22
189 | 39
190 | 32
191 | 22
192 | 8
193 | 39
194 | 18
195 | 20
196 | 32
197 | 21
198 | 31
199 | 27
200 | 23
201 | 26
202 | 23
203 | 29
204 | 32
205 | 8
206 | 14
207 | 1
208 | 28
209 | 31
210 | 36
211 | 29
212 | 22
213 | 26
214 | 37
215 | 36
216 | 39
217 | 9
218 | 20
219 | 35
220 | 4
221 | 10
222 | 22
223 | 36
224 | 24
225 | 5
226 | 31
227 | 20
228 | 7
229 | 3
230 | 25
231 | 24
232 | 37
233 | 33
234 | 29
235 | 4
236 | 2
237 | 1
238 | 22
239 | 23
240 | 28
241 | 6
242 | 8
243 | 4
244 | 33
245 | 20
246 | 16
247 | 34
248 | 27
249 | 34
250 | 5
251 | 22
252 | 38
253 | 23
254 | 20
255 | 20
256 | 27
257 | 27
258 | 27
259 | 9
260 | 20
261 | 7
262 | 11
263 | 7
264 | 34
265 | 31
266 | 19
267 | 11
268 | 36
269 | 35
270 | 8
271 | 30
272 | 3
273 | 30
274 | 15
275 | 11
276 | 15
277 | 12
278 | 12
279 | 11
280 | 23
281 | 22
282 | 30
283 | 6
284 | 36
285 | 33
286 | 18
287 | 24
288 | 37
289 | 29
290 | 38
291 | 20
292 | 34
293 | 21
294 | 32
295 | 5
296 | 10
297 | 33
298 | 3
299 | 12
300 | 33
301 | 24
302 | 15
303 | 36
304 | 21
305 | 11
306 | 22
307 | 17
308 | 18
309 | 10
310 | 3
311 | 4
312 | 29
313 | 11
314 | 14
315 | 34
316 | 34
317 | 18
318 | 23
319 | 20
320 | 32
321 | 17
322 | 37
323 | 2
324 | 31
325 | 15
326 | 13
327 | 6
328 | 27
329 | 37
330 | 5
331 | 2
332 | 26
333 | 5
334 | 39
335 | 16
336 | 10
337 | 26
338 | 37
339 | 27
340 | 12
341 | 14
342 | 18
343 | 18
344 | 14
345 | 35
346 | 18
347 | 8
348 | 17
349 | 32
350 | 39
351 | 10
352 | 9
353 | 18
354 | 28
355 | 24
356 | 24
357 | 16
358 | 38
359 | 6
360 | 36
361 | 23
362 | 25
363 | 11
364 | 37
365 | 12
366 | 32
367 | 1
368 | 11
369 | 21
370 | 15
371 | 14
372 | 24
373 | 28
374 | 8
375 | 38
376 | 4
377 | 21
378 | 17
379 | 28
380 | 16
381 | 4
382 | 31
383 | 16
384 | 10
385 | 1
386 | 24
387 | 21
388 | 22
389 | 22
390 | 22
391 | 19
392 | 31
393 | 10
394 | 3
395 | 6
396 | 29
397 | 5
398 | 36
399 | 38
400 | 39
401 | 24
402 | 26
403 | 6
404 | 6
405 | 37
406 | 32
407 | 24
408 | 3
409 | 15
410 | 23
411 | 38
412 | 33
413 | 1
414 | 35
415 | 28
416 | 32
417 | 34
418 | 28
419 | 33
420 | 29
421 | 39
422 | 3
423 | 27
424 | 36
425 | 2
426 | 13
427 | 28
428 | 14
429 | 39
430 | 8
431 | 31
432 | 27
433 | 25
434 | 11
435 | 1
436 | 28
437 | 11
438 | 14
439 | 18
440 | 23
441 | 33
442 | 10
443 | 18
444 | 4
445 | 1
446 | 17
447 | 13
448 | 24
449 | 12
450 | 36
451 | 9
452 | 9
453 | 26
454 | 32
455 | 14
456 | 9
457 | 23
458 | 32
459 | 11
460 | 28
461 | 11
462 | 22
463 | 36
464 | 33
465 | 12
466 | 2
467 | 30
468 | 39
469 | 24
470 | 13
471 | 5
472 | 26
473 | 38
474 | 39
475 | 3
476 | 5
477 | 39
478 | 9
479 | 35
480 | 7
481 | 23
482 | 5
483 | 29
484 | 12
485 | 26
486 | 33
487 | 11
488 | 35
489 | 38
490 | 24
491 | 3
492 | 31
493 | 28
494 | 12
495 | 26
496 | 35
497 | 36
498 | 18
499 | 6
500 | 35
501 | 5
502 |
--------------------------------------------------------------------------------
/Edition2/Data/MathStatsData_Ed2.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/MathStatsData_Ed2.zip
--------------------------------------------------------------------------------
/Edition2/Data/Maunaloa.csv:
--------------------------------------------------------------------------------
1 | ID,Year,Level
2 | 1,1990,357.08
3 | 2,1991,359
4 | 3,1992,359.45
5 | 4,1993,360.07
6 | 5,1994,361.48
7 | 6,1995,363.62
8 | 7,1996,364.93
9 | 8,1997,366.47
10 | 9,1998,369.33
11 | 10,1999,370.66
12 | 11,2000,371.32
13 | 12,2001,373.77
14 | 13,2002,375.58
15 | 14,2003,378.54
16 | 15,2004,380.62
17 | 16,2005,382.45
18 | 17,2006,384.94
19 | 18,2007,386.53
20 | 19,2008,388.5
21 | 20,2009,390.18
22 | 21,2010,392.94
23 |
--------------------------------------------------------------------------------
/Edition2/Data/NBA1617.csv:
--------------------------------------------------------------------------------
1 | Name,Position,Team,Games,Minutes,PercFG,Perc3P,Perc2P,PercFT,OffReb,DefReb,Assists,Blocks
2 | Quincy Acy,PF,Brooklyn,32,510,42.5,43.4,41.4,75.4,18,89,18,15
3 | Anthony Bennett,PF,Brooklyn,23,264,41.3,27.1,56.8,72.2,25,53,12,3
4 | Bojan Bogdanovic,SF,Brooklyn,55,1482,44,35.7,50.9,87.4,23,174,90,3
5 | Trevor Booker,PF,Brooklyn,71,1754,51.6,32.1,54.6,67.3,143,428,138,28
6 | Spencer Dinwiddie,PG,Brooklyn,59,1334,44.4,37.6,47.8,79.2,27,137,185,23
7 | Yogi Ferrell,PG,Brooklyn,10,151,36.7,29.6,45.5,62.5,4,8,17,2
8 | Randy Foye,SG,Brooklyn,69,1284,36.3,33,41.8,85.7,9,146,135,9
9 | Archie Goodwin,SG,Brooklyn,12,184,55.7,30.8,62.5,71.9,7,21,23,4
10 | Justin Hamilton,C,Brooklyn,64,1177,45.9,30.6,59.8,75,71,191,55,43
11 | Joe Harris,SG,Brooklyn,52,1138,42.5,38.5,48.9,71.4,16,131,54,8
12 | Rondae Hollis-Jefferson,SF,Brooklyn,78,1761,43.4,22.4,46.3,75.1,96,356,154,44
13 | Sean Kilpatrick,SG,Brooklyn,70,1754,41.5,34.1,46.8,84.3,22,258,157,6
14 | Caris LeVert,SF,Brooklyn,57,1237,45,32.1,57.1,72,23,165,110,8
15 | Jeremy Lin,PG,Brooklyn,36,883,43.8,37.2,48,81.6,11,124,184,14
16 | Brook Lopez,C,Brooklyn,75,2222,47.4,34.6,53.6,81,121,282,176,124
17 | K.J. McDaniels,SF,Brooklyn,20,293,45.5,28.2,56.5,82.1,9,43,9,10
18 | Andrew Nicholson,PF,Brooklyn,10,111,38.2,18.2,47.8,100,4,23,3,0
19 | Luis Scola,PF,Brooklyn,36,461,47,34,53.5,67.6,51,88,37,4
20 | Isaiah Whitehead,PG,Brooklyn,73,1643,40.2,29.5,44.6,80.5,32,152,192,36
21 | Nicolas Batum,SG,Charlotte,77,2617,40.3,33.3,45.3,85.6,46,435,456,29
22 | Marco Belinelli,SG,Charlotte,74,1778,42.9,36,48.8,89.3,14,164,147,9
23 | Treveon Graham,SG,Charlotte,27,189,47.5,60,40,66.7,5,17,6,1
24 | Spencer Hawes,PF,Charlotte,35,626,47.7,29.1,54,88.2,32,114,62,26
25 | Roy Hibbert,C,Charlotte,42,671,54.2,NA,54.2,81.3,53,97,20,44
26 | Frank Kaminsky,C,Charlotte,75,1954,39.9,32.8,45.5,75.6,57,279,162,34
27 | Michael Kidd-Gilchrist,SF,Charlotte,81,2349,47.7,11.1,48.3,78.4,156,409,114,77
28 | Jeremy Lamb,SG,Charlotte,62,1143,46,28.1,53.6,85.3,30,234,75,23
29 | Miles Plumlee,C,Charlotte,13,174,58.3,NA,58.3,75,15,27,3,4
30 | Brian Roberts,PG,Charlotte,41,416,37.7,38.6,37.2,84.6,5,34,52,1
31 | Ramon Sessions,PG,Charlotte,50,811,38,33.9,39.3,77.1,11,62,129,3
32 | Kemba Walker,PG,Charlotte,79,2739,44.4,39.9,47.6,84.7,45,263,434,22
33 | Briante Weber,PG,Charlotte,13,159,43.5,14.3,48.7,69.2,8,14,16,0
34 | Marvin Williams,PF,Charlotte,76,2295,42.2,35,49.4,87.3,89,411,106,53
35 | Christian Wood,PF,Charlotte,13,107,52.2,0,66.7,73.3,14,15,2,6
36 | Cody Zeller,PF,Charlotte,62,1725,57.1,0,57.2,67.9,135,270,99,58
37 | Chris Andersen,C,Cleveland,12,114,40.9,0,47.4,71.4,9,22,5,7
38 | Mike Dunleavy,SF,Cleveland,23,366,40,35.1,48.5,73.7,5,42,20,2
39 | Kay Felder,PG,Cleveland,42,386,39.2,31.8,40.4,71.4,3,38,58,7
40 | Channing Frye,C,Cleveland,74,1398,45.8,40.9,54.6,85.1,37,253,45,37
41 | Kyrie Irving,PG,Cleveland,72,2525,47.3,40.1,50.5,90.5,52,178,418,24
42 | LeBron James,SF,Cleveland,74,2794,54.8,36.3,61.1,67.4,97,542,646,44
43 | Richard Jefferson,SF,Cleveland,79,1614,44.6,33.3,58,74.1,28,175,78,10
44 | James Jones,SF,Cleveland,48,381,47.8,47,50,65,3,34,14,10
45 | Kyle Korver,SG,Cleveland,35,859,48.7,48.5,49.3,93.3,7,90,35,8
46 | DeAndre Liggins,SG,Cleveland,61,752,38.2,37.8,38.5,62.2,17,84,54,14
47 | Kevin Love,PF,Cleveland,60,1885,42.7,37.3,47.1,87.1,148,518,116,21
48 | Jordan McRae,SG,Cleveland,37,384,38.7,35.3,40.4,79.4,6,35,19,6
49 | Iman Shumpert,SG,Cleveland,76,1937,41.1,36,46.9,78.9,39,179,109,27
50 | J.R. Smith,SG,Cleveland,41,1187,34.6,35.1,32.9,66.7,17,96,62,11
51 | Tristan Thompson,C,Cleveland,78,2336,60,0,60.4,49.8,286,429,77,84
52 | Deron Williams,PG,Cleveland,24,486,46.3,41.5,48.9,84,1,44,86,6
53 | Derrick Williams,PF,Cleveland,25,427,50.5,40.4,60,69.2,3,54,14,2
54 | LaMarcus Aldridge,PF,San Antonio,72,2335,47.7,41.1,48,81.2,172,351,139,88
55 | Kyle Anderson,SG,San Antonio,72,1020,44.5,37.5,46.2,78.9,33,175,91,26
56 | Joel Anthony,C,San Antonio,19,122,62.5,NA,62.5,62.5,8,23,3,6
57 | Davis Bertans,PF,San Antonio,67,808,44,39.9,55.7,82.4,22,76,46,28
58 | Dewayne Dedmon,C,San Antonio,76,1330,62.2,NA,62.2,69.9,129,367,44,61
59 | Bryn Forbes,SG,San Antonio,36,285,36.4,32.1,41.3,83.3,2,21,23,0
60 | Pau Gasol,C,San Antonio,64,1627,50.2,53.8,49.4,70.7,107,394,150,70
61 | Manu Ginobili,SG,San Antonio,69,1291,39,39.2,38.7,80.4,28,129,183,16
62 | Danny Green,SG,San Antonio,68,1807,39.2,37.9,42,84.4,31,193,124,57
63 | Nicolas Laprovittola,PG,San Antonio,18,174,42.6,37,50,100,1,9,28,1
64 | David Lee,PF,San Antonio,79,1477,59,NA,59,70.8,149,292,124,40
65 | Kawhi Leonard,SF,San Antonio,74,2474,48.5,38,52.9,88,80,350,260,55
66 | Patty Mills,PG,San Antonio,80,1754,44,41.4,47.4,82.5,24,117,280,3
67 | Dejounte Murray,PG,San Antonio,38,322,43.1,39.1,44.1,70,6,36,48,6
68 | Tony Parker,PG,San Antonio,63,1587,46.6,33.3,48.4,72.6,9,104,285,2
69 | Jonathon Simmons,SG,San Antonio,78,1392,42,29.4,46.1,75,20,140,126,25
70 |
--------------------------------------------------------------------------------
/Edition2/Data/Nasdaq.csv:
--------------------------------------------------------------------------------
1 | "Symbol","Open","Close","Volume"
2 | "LQDT",5.75,5.8,58900
3 | "FEUZ",43.55,43.45,5400
4 | "PRIM",28,27.89,206600
5 | "OMED",4.84,4.9,202100
6 | "AGND",43.34,43.29,100400
7 | "WEBK",27.8,27.9,2200
8 | "MNDO",2.68,2.63,10800
9 | "RTRX",22.39,21.8,222400
10 | "VRNS",51,52.35,426700
11 | "NFEC",1.03,1.02,18400
12 | "HLG",41.36,41.75,44300
13 | "CCD",20.51,20.44,28200
14 | "PGLC",2.8,2.81,54100
15 | "FHB",29.27,29.62,249500
16 | "ZIONW",17.75,17.71,121500
17 | "USAU",1.4,1.44,211100
18 | "PTH",71.29,71.02,15700
19 | "FAT",9.25,9.29,26700
20 | "ROCK",32.9,33.05,218900
21 | "WEB",23,22.7,242600
22 | "GRVY",75.88,74.26,26100
23 | "ENFC",29,29.15,8900
24 | "CHRS",9,8.8,318100
25 | "LUNA",2.31,2.31,453800
26 | "RUTH",21.4,21.4,218900
27 | "UPL",9.63,9.45,2725400
28 | "HAYN",32.09,31.67,76900
29 | "LIVE",14.71,14.98,7600
30 | "CCXI",6.48,6.85,79200
31 | "QTNT",4.87,4.78,75000
32 | "HPT",29.98,29.91,459300
33 | "ZUMZ",20.9,19.95,2652900
34 | "FTGC",20.65,20.65,42900
35 | "FLEX",18.06,18.01,4869200
36 | "FLKS",4.23,4.18,16800
37 | "IPCI",0.845,0.85,786600
38 | "MRBK",18.365,18.35,3000
39 | "VDSI",13.35,12.95,154000
40 | "MLVF",26.3,26.7,4300
41 | "OCLR",7.07,7.01,5764000
42 | "FLL",3.68,3.68,115000
43 | "CDXC",6.84,6.59,660100
44 | "WNEB",10.6,10.65,35300
45 | "NEON",0.83,0.81,95700
46 | "EBAYL",26.95,27.01,26900
47 | "DRAD",2.3,2.35,94000
48 | "CXSE",83.28,82.72,39100
49 | "DORM",68.11,67.1,392800
50 | "GRFS",22.41,22.2,527200
51 | "ACGL",94.63,94.86,633400
52 |
--------------------------------------------------------------------------------
/Edition2/Data/Olympics2012.csv:
--------------------------------------------------------------------------------
1 | Name,Country,Age,Sex,Height,Weight,Sport
2 | Chiara Cainero,Italy,34,F,67,165,Shooting
3 | Ciara Michel,Great Britain,27,F,76,154,Volleyball
4 | Claudette Mukasakindi,Rwanda,29,F,63,110,Archery
5 | Claudia Wurzel,Italy,25,F,71,146,Rowing
6 | Daria Korczynska,Poland,31,F,66,132,Track/Field
7 | Elizabeth Beisel,United States of America,19,F,66,146,Swimming
8 | Evelyn Yesenia Garcia Marroquin,El Salvador,29,F,64,119,Cycling - Road
9 | Giulia Rambaldi,Italy,25,F,70,170,Water Polo
10 | Holley Mangold,United States of America,22,F,68,340,Weightlifting
11 | Joanne Morgan,Great Britain,28,F,67,132,Volleyball
12 | Joyce Sombroek,Netherlands,21,F,70,141,Hockey
13 | Kim Conley,United States of America,26,F,63,108,Track/Field
14 | Liu Ying Goh,Malaysia,23,F,65,110,Badminton
15 | Maria Alexandra Escobar Guerrero,Ecuador,32,F,62,126,Weightlifting
16 | Maria Vasco,Spain,36,F,62,104,Track/Field
17 | Michelle Vittese,United States of America,22,F,63,128,Hockey
18 | Paola Croce,Italy,34,F,66,115,Volleyball
19 | Sara Hendershot,United States of America,24,F,71,165,Rowing
20 | Sara Winther,New Zealand,30,F,65,148,Sailing
21 | Sheilla Castro,Brazil,29,F,74,148,Volleyball
22 | Shereefa Lloyd,Jamaica,29,F,66,134,Track/Field
23 | Urszula Sadkowska,Poland,28,F,76,172,Judo
24 | Valerie Vermeersch,Belgium,26,F,69,148,Hockey
25 | Willy Kanis,Netherlands,28,F,68,176,Cycling-Track
26 | Xiang Wei Jasmine Ser,Singapore,21,F,61,106,Shooting
27 | Xuerui Li,People's Republic of China,21,F,67,132,Badminton
28 | Christopher Duenas,Guam,20,M,73,185,Swimming
29 | Emmanuel Dyen,France,33,M,71,154,Sailing
30 | Gregory Wathelet,Belgium,31,M,75,187,Equestrian
31 | Hiroshi Hoketsu,Japan,71,M,66,134,Equestrian
32 | Kazuya Kaneda,Japan,24,M,67,146,Swimming
33 | Marco Fortes,Portugal,29,M,74,298,Track/FIeld
34 | Mickael Gelabale,France,29,M,79,198,Basketball
35 | Minwoo Kim,Republic of Korea,22,M,68,152,Football
36 | Nahom Mesfin,Ethiopia,23,M,71,137,Track/Field
37 | Ramunas Navardauskas,Lithuania,24,M,75,172,Cycling - Road
38 | Ruslan Ismailov,Kyrgyzstan,25,M,68,132,Shooting
39 | Timothy Kitum,Kenya,17,M,67,132,Track/Field
40 | Tonci Stipanovic,Croatia,26,M,70,183,Sailing
41 | Victor Minibaev,Russian Federation,21,M,68,139,Diving
42 | Youcef Abdi,Australia,34,M,70,146,Track/Field
43 | Yu-Cheng Chen,Taipei (Chinese Taipei),19,M,71,159,Archery
44 |
--------------------------------------------------------------------------------
/Edition2/Data/Phillies2009.csv:
--------------------------------------------------------------------------------
1 | Date,Location,Outcome,Hits,Doubles,Homeruns,StrikeOuts
2 | 5-Apr,Home,Lose,4,2,0,6
3 | 7-Apr,Home,Lose,6,1,0,3
4 | 8-Apr,Home,Win,11,3,1,6
5 | 10-Apr,Away,Lose,7,2,1,3
6 | 11-Apr,Away,Win,15,3,1,6
7 | 12-Apr,Away,Win,13,3,2,4
8 | 13-Apr,Away,Win,10,3,3,7
9 | 16-Apr,Away,Lose,5,1,0,3
10 | 17-Apr,Home,Lose,14,3,1,5
11 | 18-Apr,Home,Lose,8,2,3,7
12 | 19-Apr,Home,Win,9,1,3,5
13 | 21-Apr,Home,Win,13,4,1,8
14 | 22-Apr,Home,Lose,8,0,1,4
15 | 23-Apr,Home,Lose,2,0,1,4
16 | 24-Apr,Away,Win,8,2,2,12
17 | 25-Apr,Away,Win,9,0,2,8
18 | 26-Apr,Away,Win,12,2,0,7
19 | 27-Apr,Home,Win,14,5,2,7
20 | 28-Apr,Home,Win,11,0,4,5
21 | 29-Apr,Home,Lose,7,0,1,8
22 | 1-May,Home,Lose,8,0,1,4
23 | 2-May,Home,Win,9,3,1,6
24 | 4-May,Away,Win,8,1,2,11
25 | 5-May,Away,Win,15,4,2,7
26 | 6-May,Away,Lose,3,2,0,11
27 | 7-May,Away,Lose,10,2,2,0
28 | 8-May,Home,Win,8,3,3,7
29 | 9-May,Home,Lose,5,2,2,8
30 | 10-May,Home,Lose,6,3,0,8
31 | 12-May,Home,Win,6,1,0,6
32 | 13-May,Home,Lose,5,2,2,9
33 | 14-May,Home,Lose,5,2,0,13
34 | 15-May,Away,Win,16,1,1,9
35 | May 16 (1),Away,Win,14,2,3,5
36 | May 16 (2),Away,Win,9,3,2,3
37 | 17-May,Away,Win,10,3,0,6
38 | 19-May,Away,Win,7,2,1,5
39 | 20-May,Away,Lose,4,3,1,10
40 | 21-May,Away,Win,14,5,4,11
41 | 22-May,Away,Win,14,0,4,9
42 | 23-May,Away,Lose,6,1,2,5
43 | 24-May,Away,Win,11,3,0,7
44 | 25-May,Home,Lose,6,2,2,8
45 | 26-May,Home,Win,10,5,0,4
46 | 27-May,Home,Lose,8,2,2,7
47 | 29-May,Home,Win,16,6,0,5
48 | 30-May,Home,Win,10,4,2,5
49 | 31-May,Home,Win,7,1,1,10
50 | 1-Jun,Away,Win,12,4,2,9
51 | 2-Jun,Away,Win,11,2,3,6
52 | 3-Jun,Away,Win,10,2,1,3
53 | 4-Jun,Away,Win,7,3,0,7
54 | 5-Jun,Away,Lose,11,2,0,8
55 | 6-Jun,Away,Lose,6,1,0,8
56 | 7-Jun,Away,Win,9,1,3,6
57 | 9-Jun,Away,Lose,10,1,4,4
58 | 10-Jun,Away,Win,9,0,2,3
59 | 11-Jun,Away,Win,9,2,1,9
60 | 12-Jun,Home,Lose,5,2,1,20
61 | 13-Jun,Home,Lose,13,2,3,8
62 | 14-Jun,Home,Win,14,3,1,9
63 | 16-Jun,Home,Lose,8,1,1,10
64 | 17-Jun,Home,Lose,6,0,1,12
65 | 18-Jun,Home,Lose,12,1,4,9
66 | 19-Jun,Home,Lose,6,3,0,7
67 | 20-Jun,Home,Lose,8,2,1,4
68 | 21-Jun,Home,Lose,4,1,1,4
69 | 23-Jun,Away,Win,10,2,2,7
70 | 24-Jun,Away,Lose,4,0,1,7
71 | 25-Jun,Away,Lose,8,2,0,10
72 | 26-Jun,Away,Lose,3,0,0,8
73 | 27-Jun,Away,Win,14,3,3,9
74 | 28-Jun,Away,Win,10,1,0,9
75 | 30-Jun,Away,Lose,10,2,3,6
76 | 1-Jul,Away,Lose,2,0,0,7
77 | 2-Jul,Away,Lose,8,3,0,7
78 | 3-Jul,Home,Win,11,3,1,6
79 | 4-Jul,Home,Win,9,1,0,5
80 | 5-Jul,Home,Win,3,0,2,4
81 | 6-Jul,Home,Win,21,6,4,9
82 | 7-Jul,Home,Lose,10,2,2,8
83 | 8-Jul,Home,Win,9,0,1,11
84 | 9-Jul,Home,Win,12,2,2,5
85 | 10-Jul,Home,Win,5,3,0,6
86 | 11-Jul,Home,Win,13,3,2,8
87 | 12-Jul,Home,Win,6,2,1,7
88 | 16-Jul,Away,Win,10,2,3,10
89 | 17-Jul,Away,Win,12,2,1,10
90 | 19-Jul,Away,Win,8,2,0,7
91 | 20-Jul,Home,Win,9,2,3,10
92 | 21-Jul,Home,Win,6,0,2,12
93 | 22-Jul,Home,Lose,11,0,0,8
94 | 23-Jul,Home,Win,14,4,1,8
95 | 24-Jul,Home,Lose,8,2,0,6
96 | 25-Jul,Home,Win,14,3,2,5
97 | 26-Jul,Home,Win,13,1,4,6
98 | 27-Jul,Away,Win,6,1,1,6
99 | 28-Jul,Away,Win,7,2,1,9
100 | 29-Jul,Away,Lose,7,2,0,12
101 | 30-Jul,Away,Lose,5,3,1,9
102 | 31-Jul,Away,Win,8,2,1,6
103 | 1-Aug,Away,Lose,7,0,0,8
104 | 2-Aug,Away,Lose,9,1,1,6
105 | 4-Aug,Home,Lose,9,2,1,9
106 | 5-Aug,Home,Win,11,5,3,5
107 | 6-Aug,Home,Win,7,1,1,3
108 | 7-Aug,Home,Lose,4,1,1,9
109 | 8-Aug,Home,Lose,11,3,0,6
110 | 9-Aug,Home,Lose,6,0,1,9
111 | 11-Aug,Away,Win,3,0,2,9
112 | 12-Aug,Away,Win,14,3,3,9
113 | 13-Aug,Away,Win,10,2,2,9
114 | 14-Aug,Away,Win,7,2,2,9
115 | 15-Aug,Away,Lose,7,3,1,4
116 | 16-Aug,Away,Win,7,1,2,8
117 | 18-Aug,Home,Win,12,1,2,6
118 | 19-Aug,Home,Win,13,2,4,10
119 | 20-Aug,Home,Win,12,1,3,7
120 | 21-Aug,Away,Lose,10,1,0,10
121 | 22-Aug,Away,Win,7,4,1,6
122 | 23-Aug,Away,Win,10,1,2,6
123 | 24-Aug,Away,Win,7,2,2,9
124 | 25-Aug,Away,Lose,9,5,2,8
125 | 26-Aug,Away,Win,9,4,2,6
126 | 27-Aug,Away,Lose,6,1,1,9
127 | 28-Aug,Home,Win,9,2,2,8
128 | 29-Aug,Home,Lose,11,1,1,7
129 | 30-Aug,Home,Win,8,3,1,4
130 | 1-Sep,Home,Win,5,2,0,10
131 | 2-Sep,Home,Lose,5,0,0,4
132 | 3-Sep,Home,Win,4,3,1,12
133 | 4-Sep,Away,Lose,8,1,0,9
134 | 5-Sep,Away,Lose,10,1,0,9
135 | 6-Sep,Away,Lose,9,1,3,8
136 | 7-Sep,Away,Lose,7,2,2,7
137 | 8-Sep,Away,Win,7,0,5,2
138 | 9-Sep,Away,Win,11,3,2,8
139 | 10-Sep,Away,Lose,11,1,1,2
140 | 11-Sep,Home,Win,12,6,0,6
141 | 12-Sep,Home,Lose,12,3,3,7
142 | Sep 13 (1),Home,Win,10,2,2,5
143 | Sep 13 (2),Home,Win,3,1,0,4
144 | 15-Sep,Home,Win,10,4,0,3
145 | 16-Sep,Home,Win,8,0,1,5
146 | 17-Sep,Home,Win,6,1,0,7
147 | 18-Sep,Away,Win,10,1,4,4
148 | 19-Sep,Away,Lose,8,1,1,8
149 | 20-Sep,Away,Win,10,5,0,9
150 | Sep 22 (1),Away,Win,11,1,1,16
151 | Sep 22 (2),Away,Lose,2,1,0,7
152 | 23-Sep,Away,Lose,9,3,2,6
153 | 24-Sep,Away,Win,14,2,1,8
154 | 25-Sep,Away,Lose,9,0,0,8
155 | 26-Sep,Away,Lose,8,0,2,5
156 | 27-Sep,Away,Win,14,5,1,9
157 | 28-Sep,Home,Lose,4,1,0,5
158 | 29-Sep,Home,Win,8,1,2,6
159 | 30-Sep,Home,Win,9,3,1,2
160 | 1-Oct,Home,Lose,13,3,0,8
161 | 2-Oct,Home,Lose,7,1,1,4
162 | 3-Oct,Home,Lose,6,0,2,4
163 | 4-Oct,Home,Win,12,0,1,13
164 |
--------------------------------------------------------------------------------
/Edition2/Data/Quetzal.csv:
--------------------------------------------------------------------------------
1 | "Country","Nest","Snag"
2 | "Guatemala",4.62,6.15
3 | "Guatemala",18.46,24.62
4 | "Guatemala",5.23,6.15
5 | "Guatemala",9.85,12.31
6 | "Guatemala",7.69,9.23
7 | "Guatemala",9.85,15.38
8 | "Guatemala",4,5.85
9 | "Guatemala",10.77,12.31
10 | "Guatemala",13.85,16.92
11 | "Guatemala",10.77,16.92
12 | "Guatemala",24.62,29.23
13 | "Costa Rica",6.9,8.7
14 | "Costa Rica",5.6,7.7
15 | "Costa Rica",4.3,5.2
16 | "Costa Rica",8.3,9.7
17 | "Costa Rica",1.5,1.8
18 | "Costa Rica",6.2,9.6
19 | "Costa Rica",7.4,10.7
20 | "Costa Rica",4.1,7.3
21 | "Costa Rica",10.1,10.8
22 | "Costa Rica",8.4,9.7
23 |
--------------------------------------------------------------------------------
/Edition2/Data/RangersTwins2016.csv:
--------------------------------------------------------------------------------
1 | Name,Team,Pos,Age,Games,AtBats,Runs,Hits,Doubles,Triples,HR,RBI,SB,CS,BB,SO,BA
2 | Robinson Chirinos,Rangers,C,32,57,147,21,33,11,0,9,20,0,1,15,44,0.224
3 | Mitch Moreland,Rangers,1B,30,147,460,49,107,21,0,22,60,1,0,35,118,0.233
4 | Rougned Odor,Rangers,2B,22,150,605,89,164,33,4,33,88,14,7,19,135,0.271
5 | Elvis Andrus,Rangers,SS,27,147,506,75,153,31,7,8,69,24,8,47,70,0.302
6 | Adrian Beltre,Rangers,3B,37,153,583,89,175,31,1,32,104,1,1,48,66,0.3
7 | Ryan Rua,Rangers,LF,26,99,240,40,62,8,1,8,22,9,0,21,76,0.258
8 | Ian Desmond,Rangers,CF,30,156,625,107,178,29,3,22,86,21,6,44,160,0.285
9 | Nomar Mazara,Rangers,RF,21,145,516,59,137,13,3,20,64,0,2,39,112,0.266
10 | Prince Fielder,Rangers,DH,32,89,326,29,69,16,0,8,44,0,0,32,63,0.212
11 | Jurickson Profar,Rangers,UT,23,90,272,35,65,6,3,5,20,2,1,30,61,0.239
12 | Carlos Beltran,Rangers,DH,39,52,193,23,54,12,0,7,29,1,0,13,31,0.28
13 | Delino DeShields,Rangers,OF,23,74,182,36,38,7,0,4,13,8,3,15,54,0.209
14 | Kurt Suzuki,Twins,C,32,106,345,34,89,24,1,8,49,0,0,18,48,0.258
15 | Joe Mauer,Twins,1B,33,134,494,68,129,22,4,11,49,2,0,79,93,0.261
16 | Brian Dozier,Twins,2B,29,155,615,104,165,35,5,42,99,18,2,61,138,0.268
17 | Eduardo Escobar,Twins,SS,27,105,352,32,83,14,2,6,37,1,3,21,72,0.236
18 | Trevor Plouffe,Twins,3B,30,84,319,35,83,13,1,12,47,1,0,19,60,0.26
19 | Robbie Grossman,Twins,LF,26,99,332,49,93,19,1,11,37,2,3,55,96,0.28
20 | Byron Buxton,Twins,CF,22,92,298,44,67,19,6,10,38,10,2,23,118,0.225
21 | Max Kepler,Twins,RF,23,113,396,52,93,20,2,17,63,6,2,42,93,0.235
22 | ByungHo Park,Twins,DH,29,62,215,28,41,9,1,12,24,1,0,21,80,0.191
23 | Miguel Sano,Twins,UT,23,116,437,57,103,22,1,25,66,1,0,54,178,0.236
24 | Eduardo Nunez,Twins,IF,29,91,371,49,110,15,1,12,47,27,6,15,58,0.296
25 | Eddie Rosario,Twins,OF,24,92,335,52,90,17,2,10,32,5,2,12,91,0.269
26 | Jorge Polanco,Twins,SS,22,69,245,24,69,15,4,4,27,4,3,17,46,0.282
27 | Danny Santana,Twins,CF,25,75,233,29,56,10,2,2,14,12,9,12,55,0.24
28 | Juan Centeno,Twins,C,26,55,176,16,46,12,1,3,25,0,0,12,38,0.261
29 |
--------------------------------------------------------------------------------
/Edition2/Data/Salaries.csv:
--------------------------------------------------------------------------------
1 | League,Salary,Year
2 | National,0.55775,1985
3 | National,0.5187075,1985
4 | National,2.1,2015
5 | American,8,2015
6 | National,0.568905,1985
7 | American,0.5081,2015
8 | American,1.45015,1985
9 | American,1.5,2015
10 | National,0.8,2015
11 | American,0.2231,1985
12 | American,0.51,2015
13 | National,1.7848,1985
14 | National,1,2015
15 | National,1.88,2015
16 | National,1.333333,2015
17 | American,0.24541,1985
18 | American,0.501975,1985
19 | National,0.5075525,1985
20 | National,2.0079,1985
21 | American,0.66,2015
22 | American,1.675,2015
23 | National,0.523925,2015
24 | National,0.5125,2015
25 | American,0.517,2015
26 | American,1.710434077,1985
27 | National,1.776730473,1985
28 | American,0.55775,1985
29 | American,0.5109,2015
30 | American,2.95,2015
31 | American,0.5201,2015
32 | National,0.5165,2015
33 | National,2.5,2015
34 | National,0.78085,1985
35 | American,1.29398,1985
36 | American,8.5,2015
37 | American,0.3290725,1985
38 | American,1.153746033,1985
39 | National,0.5057,2015
40 | National,0.5085,2015
41 | American,1.225,2015
42 | American,1.47246,1985
43 | American,2.475,2015
44 | National,0.836625,1985
45 | American,0.26772,1985
46 | National,2.6,2015
47 | National,0.531,2015
48 | American,1.394375,1985
49 | American,2.416915923,1985
50 | National,0.4462,1985
51 | National,0.518,2015
52 | American,7.7,2015
53 | American,0.525415,2015
54 | National,0.5229,2015
55 | National,0.29003,1985
56 | American,0.301185,1985
57 | American,0.5095,2015
58 | American,0.747385,1985
59 | American,1.24936,1985
60 | American,0.603,2015
61 | American,0.5129,2015
62 | American,1.6,2015
63 | National,0.825,2015
64 | National,0.525,2015
65 | National,1.75,2015
66 | National,0.858935,1985
67 | National,6.857143,2015
68 | National,2.536647,1985
69 | American,1.015105,1985
70 | National,0.55775,1985
71 | American,0.5085,2015
72 |
--------------------------------------------------------------------------------
/Edition2/Data/Service.csv:
--------------------------------------------------------------------------------
1 | ID,Times
2 | 1,1.1
3 | 2,1.4
4 | 3,0.683333333
5 | 4,0.716666666
6 | 5,0.316666666
7 | 6,0.533333334
8 | 7,0.35
9 | 8,0.7
10 | 9,1.633333333
11 | 10,0.933333334
12 | 11,0.533333334
13 | 12,0.283333333
14 | 13,0.516666666
15 | 14,0.383333334
16 | 15,1.783333333
17 | 16,1.216666667
18 | 17,0.883333333
19 | 18,0.383333334
20 | 19,1.566666667
21 | 20,0.216666667
22 | 21,0.483333333
23 | 22,1.333333334
24 | 23,0.133333333
25 | 24,0.833333334
26 | 25,0.766666667
27 | 26,0.783333333
28 | 27,0.3
29 | 28,0.683333333
30 | 29,0.566666667
31 | 30,0.683333333
32 | 31,0.866666667
33 | 32,0.7
34 | 33,0.333333333
35 | 34,0.983333334
36 | 35,0.616666667
37 | 36,0.383333333
38 | 37,0.35
39 | 38,0.283333333
40 | 39,0.566666667
41 | 40,1.766666667
42 | 41,1.6
43 | 42,0.783333334
44 | 43,0.316666666
45 | 44,0.616666667
46 | 45,0.316666666
47 | 46,0.15
48 | 47,0.516666667
49 | 48,0.7
50 | 49,0.233333333
51 | 50,0.833333333
52 | 51,0.65
53 | 52,0.916666667
54 | 53,1.333333334
55 | 54,0.25
56 | 55,0.133333333
57 | 56,0.383333333
58 | 57,2.2
59 | 58,0.35
60 | 59,0.55
61 | 60,0.933333334
62 | 61,0.4
63 | 62,0.566666666
64 | 63,0.433333334
65 | 64,1.633333334
66 | 65,1.966666666
67 | 66,0.65
68 | 67,1.133333333
69 | 68,0.5
70 | 69,0.15
71 | 70,1.05
72 | 71,0.75
73 | 72,1
74 | 73,1.133333334
75 | 74,1.15
76 | 75,0.816666667
77 | 76,0.866666667
78 | 77,0.45
79 | 78,0.15
80 | 79,0.383333333
81 | 80,0.55
82 | 81,0.683333333
83 | 82,0.966666667
84 | 83,0.533333333
85 | 84,0.35
86 | 85,0.666666667
87 | 86,0.633333334
88 | 87,0.466666667
89 | 88,0.466666667
90 | 89,0.783333334
91 | 90,0.6
92 | 91,1.483333333
93 | 92,0.733333334
94 | 93,1.4
95 | 94,1.033333333
96 | 95,0.683333333
97 | 96,0.1
98 | 97,0.45
99 | 98,0.416666667
100 | 99,0.516666666
101 | 100,1.15
102 | 101,0.466666667
103 | 102,0.183333334
104 | 103,0.433333333
105 | 104,0.3
106 | 105,0.666666666
107 | 106,1.15
108 | 107,0.55
109 | 108,0.733333334
110 | 109,0.9
111 | 110,0.95
112 | 111,0.583333333
113 | 112,1.3
114 | 113,0.316666666
115 | 114,0.733333334
116 | 115,0.433333334
117 | 116,0.283333333
118 | 117,0.316666667
119 | 118,0.416666666
120 | 119,0.933333334
121 | 120,0.8
122 | 121,0.45
123 | 122,0.6
124 | 123,0.25
125 | 124,1.783333334
126 | 125,0.5
127 | 126,0.5
128 | 127,0.233333333
129 | 128,0.216666667
130 | 129,1.033333333
131 | 130,0.516666667
132 | 131,0.7
133 | 132,0.216666667
134 | 133,0.833333333
135 | 134,1.183333333
136 | 135,1.116666667
137 | 136,0.433333333
138 | 137,0.283333334
139 | 138,0.35
140 | 139,0.716666667
141 | 140,0.4
142 | 141,0.333333334
143 | 142,0.216666667
144 | 143,0.433333333
145 | 144,0.3
146 | 145,0.35
147 | 146,1.666666666
148 | 147,0.983333334
149 | 148,0.316666667
150 | 149,1.416666667
151 | 150,1.033333333
152 | 151,1.116666667
153 | 152,0.55
154 | 153,0.466666667
155 | 154,0.566666666
156 | 155,0.55
157 | 156,0.983333333
158 | 157,0.8
159 | 158,0.533333333
160 | 159,0.3
161 | 160,0.3
162 | 161,1.183333333
163 | 162,0.166666666
164 | 163,0.366666667
165 | 164,0.366666667
166 | 165,0.15
167 | 166,0.183333333
168 | 167,0.283333334
169 | 168,0.633333334
170 | 169,0.566666667
171 | 170,0.45
172 | 171,0.983333333
173 | 172,1.433333333
174 | 173,1.816666667
175 | 174,1.183333333
176 |
--------------------------------------------------------------------------------
/Edition2/Data/Skateboard.csv:
--------------------------------------------------------------------------------
1 | "Age","Experimenter","Testosterone"
2 | 18,"Female",206
3 | 18,"Female",197
4 | 18,"Female",135.8
5 | 18,"Female",170.2
6 | 19,"Female",107.3
7 | 19,"Female",351.6
8 | 18,"Female",282.6
9 | 18,"Female",257
10 | 18,"Female",117.8
11 | 19,"Female",342.4
12 | 34,"Female",129.6
13 | 26,"Female",208.6
14 | 19,"Female",253.8
15 | 22,"Female",213.6
16 | 25,"Female",344
17 | 25,"Female",127.7
18 | 29,"Female",351.6
19 | 19,"Female",179.6
20 | 21,"Female",469.6
21 | 19,"Female",411.4
22 | 25,"Female",267.2
23 | 20,"Female",267.4
24 | 19,"Female",308.8
25 | 33,"Female",568.2
26 | 23,"Female",293.8
27 | 27,"Female",495
28 | 20,"Female",408
29 | 21,"Female",644.8
30 | 18,"Female",206.6
31 | 18,"Female",472
32 | 18,"Female",369.8
33 | 23,"Female",286.4
34 | 22,"Female",246.4
35 | 18,"Female",232
36 | 18,"Female",126.4
37 | 32,"Female",106.2
38 | 22,"Female",160.6
39 | 19,"Female",146.8
40 | 21,"Female",361.2
41 | 29,"Female",196.4
42 | 23,"Female",307.4
43 | 24,"Female",625
44 | 23,"Female",209
45 | 19,"Female",502
46 | 19,"Female",236.6
47 | 19,"Female",183.8
48 | 20,"Female",320
49 | 27,"Female",544
50 | 21,"Female",552.6
51 | 19,"Male",127.2
52 | 21,"Male",143.2
53 | 18,"Male",273
54 | 25,"Male",131.6
55 | 20,"Male",190
56 | 29,"Male",193.2
57 | 19,"Male",112
58 | 24,"Male",457
59 | 26,"Male",155.4
60 | 20,"Male",277.8
61 | 22,"Male",139.6
62 | 20,"Male",420.6
63 | 20,"Male",206
64 | 18,"Male",411
65 | 22,"Male",151.2
66 | 18,"Male",271.6
67 | 22,"Male",202
68 | 35,"Male",204
69 | 19,"Male",227
70 | 21,"Male",141.2
71 | 20,"Male",124.4
72 | 19,"Male",124.4
73 |
--------------------------------------------------------------------------------
/Edition2/Data/Skating2010.csv:
--------------------------------------------------------------------------------
1 | "Name","Country","Short","Free","Total"
2 | "LYSACEK Evan","United States",90.3,167.37,257.67
3 | "PLUSHENKO Evgeni","Russian Federation",90.85,165.51,256.36
4 | "TAKAHASHI Daisuke","Japan",90.25,156.98,247.23
5 | "LAMBIEL Stephane","Switzerland",84.63,162.09,246.72
6 | "CHAN Patrick","Canada",81.12,160.3,241.42
7 | "WEIR Johnny","United States",82.1,156.77,238.87
8 | "ODA Nobunari","Japan",84.85,153.69,238.54
9 | "KOZUKA Takahiko","Japan",79.59,151.6,231.19
10 | "ABBOTT Jeremy","United States",69.4,149.56,218.96
11 | "BREZINA Michal","Czech Republic",78.8,137.93,216.73
12 | "TEN Denis","Kazakhstan",76.24,135.01,211.25
13 | "AMODIO Florent","France",75.35,134.95,210.3
14 | "BORODULIN Artem","Russian Federation",72.24,137.92,210.16
15 | "FERNANDEZ Javier","Spain",68.69,137.99,206.68
16 | "SCHULTHEISS Adrian","Sweden",63.13,137.31,200.44
17 | "JOUBERT Brian","France",68,132.22,200.22
18 | "van der PERREN Kevin","Belgium",72.9,116.94,189.84
19 | "CONTESTI Samuel","Italy",70.6,116.9,187.5
20 | "VERNER Tomas","Czech Republic",65.32,119.42,184.74
21 | "BACCHINI Paolo","Italy",64.42,112.79,177.21
22 | "PFEIFER Viktor","Austria",60.88,115.05,175.93
23 | "LINDEMANN Stefan","Germany",68.5,103.48,171.98
24 | "CHIPEUR Vaughn","Canada",57.22,113.7,170.92
25 | "KOVALEVSKI Anton","Ukraine",63.81,102.09,165.9
26 |
--------------------------------------------------------------------------------
/Edition2/Data/Spruce.csv:
--------------------------------------------------------------------------------
1 | "Tree","Competition","Fertilizer","Height0","Height5","Diameter0","Diameter5","Ht.change","Di.change"
2 | 1,"NC","F",15,60,1.984375,7.4,45,5.415625
3 | 2,"NC","F",9,45.2,1.190625,5.2,36.2,4.009375
4 | 3,"NC","F",12,42,1.7859375,5.7,30,3.9140625
5 | 4,"NC","F",13.7,49.5,1.5875,6.4,35.8,4.8125
6 | 5,"NC","F",12,47.3,1.5875,6.2,35.3,4.612500000000001
7 | 6,"NC","F",12,56.4,1.5875,7.4,44.4,5.8125
8 | 7,"NC","NF",16.8,43.5,1.984375,4.9,26.7,2.9156250000000004
9 | 8,"NC","NF",14.6,49.2,1.984375,5.4,34.6,3.4156250000000004
10 | 9,"NC","NF",16,54,1.984375,7.1,38,5.115625
11 | 10,"NC","NF",15.4,45,1.984375,5.1,29.6,3.1156249999999996
12 | 11,"NC","NF",11.7,38,1.3890625,4.1,26.3,2.7109374999999996
13 | 12,"NC","NF",15,60.5,1.5875,7.3,45.5,5.7125
14 | 13,"C","F",13.1,45.4,1.984375,6.3,32.3,4.315625
15 | 14,"C","F",11,50,1.5875,6.2,39,4.612500000000001
16 | 15,"C","F",16,53,2.1828125,5.6,37,3.4171875
17 | 16,"C","F",13.5,54,1.5875,6.3,40.5,4.7125
18 | 17,"C","F",11.6,39,1.5875,4.5,27.4,2.9125
19 | 18,"C","F",13.5,54.2,1.984375,6.5,40.7,4.515625
20 | 19,"C","NF",13.2,28.3,1.5875,3.4,15.100000000000001,1.8125
21 | 20,"C","NF",15.8,29.5,2.38125,3.4,13.7,1.0187499999999998
22 | 21,"C","NF",13.5,42,1.7859375,4.6,28.5,2.8140624999999995
23 | 22,"C","NF",13.4,29,1.984375,4.2,15.6,2.215625
24 | 23,"C","NF",12.5,31,1.984375,3.5,18.5,1.515625
25 | 24,"C","NF",14.7,38,1.984375,4.7,23.3,2.715625
26 | 25,"NC","F",11.5,63,1.984375,8.7,51.5,6.715624999999999
27 | 26,"NC","F",13.7,64.5,1.7859375,8.4,50.8,6.6140625
28 | 27,"NC","F",18.7,58.3,1.984375,7.1,39.599999999999994,5.115625
29 | 28,"NC","F",15.8,66.2,1.5875,9.1,50.400000000000006,7.512499999999999
30 | 29,"NC","F",17.5,62.2,1.984375,8,44.7,6.015625
31 | 30,"NC","F",15,63,1.7859375,8.9,48,7.1140625
32 | 31,"NC","NF",17,53,1.984375,6.9,36,4.915625
33 | 32,"NC","NF",14.2,46.2,1.5875,4.9,32,3.3125000000000004
34 | 33,"NC","NF",11.2,41.4,1.5875,4.3,30.2,2.7125
35 | 34,"NC","NF",16.7,36.2,1.984375,4.8,19.500000000000004,2.815625
36 | 35,"NC","NF",12.5,46.9,1.5875,5.2,34.4,3.6125000000000003
37 | 36,"NC","NF",15.2,43.5,1.984375,5.8,28.3,3.815625
38 | 37,"C","F",15.5,43,1.984375,7.1,27.5,5.115625
39 | 38,"C","F",13.7,43.2,1.984375,5.9,29.500000000000004,3.9156250000000004
40 | 39,"C","F",17.8,48,1.984375,6.4,30.2,4.415625
41 | 40,"C","F",12.8,41,1.5875,6.3,28.2,4.7125
42 | 41,"C","F",15,46,2.38125,5.7,31,3.31875
43 | 42,"C","F",15,45.5,1.984375,6,30.5,4.015625
44 | 43,"C","NF",14,40,1.7859375,4.3,26,2.5140624999999996
45 | 44,"C","NF",15.7,24,1.7859375,3.7,8.3,1.9140625000000002
46 | 45,"C","NF",15.1,37.1,1.5875,4.3,22,2.7125
47 | 46,"C","NF",14,30,1.984375,4.3,16,2.315625
48 | 47,"C","NF",14.6,35,1.984375,3.9,20.4,1.915625
49 | 48,"C","NF",16,37,1.984375,4.1,21,2.1156249999999996
50 | 49,"NC","F",17,68,2.38125,11.3,51,8.918750000000001
51 | 50,"NC","F",17.3,56,1.7859375,9.4,38.7,7.6140625
52 | 51,"NC","F",18.2,68,2.38125,8.9,49.8,6.518750000000001
53 | 52,"NC","F",15,55.4,1.984375,8.7,40.4,6.715624999999999
54 | 53,"NC","F",15.3,62,2.1828125,8.7,46.7,6.5171874999999995
55 | 54,"NC","F",17,48.6,2.38125,8.1,31.6,5.71875
56 | 55,"NC","NF",16,45,1.984375,6.5,29,4.515625
57 | 56,"NC","NF",16.4,43.5,1.984375,5.1,27.1,3.1156249999999996
58 | 57,"NC","NF",14.8,37.7,2.38125,4.3,22.900000000000002,1.9187499999999997
59 | 58,"NC","NF",12,40,1.7859375,4.7,28,2.9140625
60 | 59,"NC","NF",14.5,40.5,1.7859375,5.2,26,3.4140625
61 | 60,"NC","NF",17.1,35,2.38125,4.9,17.9,2.5187500000000003
62 | 61,"C","F",14.3,52,2.38125,6.7,37.7,4.31875
63 | 62,"C","F",12.5,64,1.984375,9,51.5,7.015625
64 | 63,"C","F",14.7,50,2.38125,7,35.3,4.61875
65 | 64,"C","F",16.3,46.2,2.1828125,6.8,29.900000000000002,4.6171875
66 | 65,"C","F",16.2,47,2.38125,7.8,30.8,5.418749999999999
67 | 66,"C","F",17.5,47,2.38125,6.6,29.5,4.21875
68 | 67,"C","NF",16.2,24.7,1.984375,3.3,8.5,1.3156249999999998
69 | 68,"C","NF",11.3,26.4,1.5875,2.7,15.099999999999998,1.1125000000000003
70 | 69,"C","NF",17.5,36,1.984375,3.8,18.5,1.8156249999999998
71 | 70,"C","NF",13.3,24.4,1.7859375,3.5,11.099999999999998,1.7140625
72 | 71,"C","NF",11,27.2,2.38125,4,16.2,1.61875
73 | 72,"C","NF",14.6,33.6,2.38125,4.5,19,2.11875
74 |
--------------------------------------------------------------------------------
/Edition2/Data/Starcraft.csv:
--------------------------------------------------------------------------------
1 | ID,Race,Age,Wins
2 | 1,Protoss,20,29
3 | 2,Protoss,19,27
4 | 3,Protoss,19,26
5 | 4,Protoss,18,19
6 | 5,Protoss,22,23
7 | 6,Protoss,18,25
8 | 7,Protoss,24,19
9 | 8,Protoss,20,20
10 | 9,Protoss,21,19
11 | 10,Protoss,18,21
12 | 11,Protoss,22,16
13 | 12,Protoss,23,21
14 | 13,Protoss,21,18
15 | 14,Protoss,19,18
16 | 15,Protoss,24,17
17 | 16,Terran,18,26
18 | 17,Terran,16,21
19 | 18,Terran,20,25
20 | 19,Terran,18,24
21 | 20,Terran,20,23
22 | 21,Terran,18,26
23 | 22,Terran,17,22
24 | 23,Terran,21,21
25 | 24,Terran,21,21
26 | 25,Terran,21,20
27 | 26,Terran,18,22
28 | 27,Terran,23,20
29 | 28,Terran,18,22
30 | 29,Terran,19,21
31 | 30,Terran,17,24
32 | 31,Zerg,18,28
33 | 32,Zerg,20,24
34 | 33,Zerg,20,20
35 | 34,Zerg,21,23
36 | 35,Zerg,22,17
37 | 36,Zerg,18,20
38 | 37,Zerg,20,15
39 | 38,Zerg,24,17
40 | 39,Zerg,23,14
41 | 40,Zerg,21,18
42 | 41,Zerg,23,12
43 | 42,Zerg,21,15
44 | 43,Zerg,24,16
45 | 44,Zerg,18,14
46 | 45,Zerg,23,11
47 |
--------------------------------------------------------------------------------
/Edition2/Data/TV.csv:
--------------------------------------------------------------------------------
1 | ID,Times,Cable
2 | 1,7,Basic
3 | 2,10,Basic
4 | 3,10.6,Basic
5 | 4,10.2,Basic
6 | 5,8.6,Basic
7 | 6,7.6,Basic
8 | 7,8.2,Basic
9 | 8,10.4,Basic
10 | 9,11,Basic
11 | 10,8.5,Basic
12 | 11,3.4,Extended
13 | 12,7.8,Extended
14 | 13,9.4,Extended
15 | 14,4.7,Extended
16 | 15,5.4,Extended
17 | 16,7.6,Extended
18 | 17,5,Extended
19 | 18,8,Extended
20 | 19,7.8,Extended
21 | 20,9.6,Extended
22 |
--------------------------------------------------------------------------------
/Edition2/Data/Turbine.csv:
--------------------------------------------------------------------------------
1 | "Date2010","AveKW","AveSpeed","Production"
2 | "Feb 14",547.9,7.8,13146
3 | "Feb 15",776,8.9,18626
4 | "Feb 16",944.4,9.7,22667
5 | "Feb 17",506.2,7.7,12148
6 | "Feb 18",322.9,6.4,7742
7 | "Feb 19",67.9,3.1,1585
8 | "Feb 20",79.9,3.9,1876
9 | "Feb 21",123.6,4.5,2936
10 | "Feb 22",273.3,6.5,6559
11 | "Feb 23",626.8,7.8,15041
12 | "Feb 24",242.2,5.8,5800
13 | "Feb 25",2.2,2.5,6
14 | "Feb 26",124.6,3.8,2940
15 | "Feb 27",494.6,7.7,11871
16 | "Feb 28",187.2,5.8,4481
17 | "Mar 01",303.5,5.9,7258
18 | "Mar 02",74.6,3.5,1743
19 | "Mar 03",148.6,5.1,3543
20 | "Mar 04",120.2,4.2,2848
21 | "Mar 05",581.9,8,13965
22 | "Mar 06",503.6,7.6,12087
23 | "Mar 07",89.4,3.5,2099
24 | "Mar 08",210.1,5.9,5037
25 | "Mar 09",347.9,7.1,8348
26 | "Mar 10",594.9,8.4,14279
27 | "Mar 11",611.5,8.2,14674
28 | "Mar 12",35.3,3.1,793
29 | "Mar 13",675,8.7,16202
30 | "Mar 14",317,6.7,7607
31 | "Mar 15",334.3,6.7,8019
32 | "Mar 16",201.5,5.8,4833
33 | "Mar 17",255.7,6,6125
34 | "Mar 18",454.2,6.7,10870
35 | "Mar 19",564.1,8.6,13768
36 | "Mar 20",278.4,6.4,6678
37 | "Mar 21",72.4,3.7,1692
38 | "Mar 22",405.6,7.6,9708
39 | "Mar 23",304.1,6.7,7226
40 | "Mar 24",176.2,6.3,4190
41 | "Mar 25",736.1,8.5,17666
42 | "Mar 26",1072,10.2,25729
43 | "Mar 27",601.5,7.1,14420
44 | "Mar 28",448.3,7,10752
45 | "Mar 29",849.6,9.2,20386
46 | "Mar 30",841.2,13.2,20146
47 | "Mar 31",271.4,8.4,6485
48 | "Apr 01",867.2,9.9,20811
49 | "Apr 02",945.6,11.3,22663
50 | "Apr 03",824.2,9.1,19781
51 | "Apr 04",716.9,8.7,17201
52 | "Apr 05",318.5,5.9,7618
53 | "Apr 06",971.4,10,23315
54 | "Apr 07",884.4,9.4,21225
55 | "Apr 08",309.3,6.5,7418
56 | "Apr 09",438.9,6.2,10502
57 | "Apr 10",276.8,5.4,6613
58 | "Apr 11",160.1,4.9,3814
59 | "Apr 12",755.8,9,18139
60 | "Apr 13",1149,10.9,27572
61 | "Apr 14",315,9.6,7514
62 | "Apr 15",898.3,9.3,21554
63 | "Apr 16",1142.6,10.6,27422
64 | "Apr 17",434.2,7.1,10411
65 | "Apr 18",44.7,4.3,1047
66 | "Apr 19",148.4,4.8,3530
67 | "Apr 20",175.9,4.3,4169
68 | "Apr 21",356.4,7,8552
69 | "Apr 22",80.6,3.7,1889
70 | "Apr 23",1041,10.5,24985
71 | "Apr 24",623.1,8.2,14952
72 | "Apr 25",981,10,23546
73 | "Apr 26",218.1,5.2,5208
74 | "Apr 27",233.7,5.7,5589
75 | "Apr 28",614.5,8,14744
76 | "Apr 29",1285.6,12.2,30854
77 | "Apr 30",556.3,7.6,13338
78 | "May 01",1111.7,11.1,26680
79 | "May 02",781.7,9.2,18762
80 | "May 03",339.2,6.4,8127
81 | "May 04",727.3,8.5,17443
82 | "May 05",1254.4,12.9,30096
83 | "May 06",190.2,4.5,4522
84 | "May 07",164.6,5.1,3925
85 | "May 08",732.7,8.7,17584
86 | "May 09",88.8,3.8,2091
87 | "May 10",1210.8,11,29059
88 | "May 11",660,8.3,15841
89 | "May 12",432.7,7.2,10386
90 | "May 13",490,6.9,11744
91 | "May 14",881.8,9.4,21164
92 | "May 15",77.5,4.6,1830
93 | "May 16",286.5,6.6,6870
94 | "May 17",149.8,5.1,3568
95 | "May 18",100.2,4.8,2370
96 | "May 19",166.2,5,3959
97 | "May 20",426.4,6.9,10224
98 | "May 21",170.2,5.1,4060
99 | "May 22",1005.6,11.2,24132
100 | "May 23",1139.4,11,27343
101 | "May 24",690,10.6,16531
102 | "May 25",360,6,8606
103 | "May 26",195.3,5.6,4669
104 | "May 27",257.5,6.5,6179
105 | "May 28",347.5,6.9,8334
106 | "May 29",765.5,9.3,18370
107 | "May 30",643.5,8.2,15432
108 | "May 31",144.3,4.2,3410
109 | "Jun 01",571.9,7.8,13718
110 | "Jun 02",101.6,4.4,2405
111 | "Jun 03",136.2,4.4,3222
112 | "Jun 04",400.2,6.8,995
113 | "Jun 05",142.6,5.2,3399
114 | "Jun 06",338.1,6.3,8096
115 | "Jun 07",55.1,3.4,1261
116 | "Jun 08",633.3,8.3,15200
117 | "Jun 09",869.4,9.6,20866
118 | "Jun 10",383.9,7.1,9212
119 | "Jun 11",624.9,8.3,14984
120 | "Jun 12",170.3,5.4,4073
121 | "Jun 13",14.9,3.3,309
122 | "Jun 14",259.5,6.1,6218
123 | "Jun 15",282,6.4,6762
124 | "Jun 16",184.4,4.8,4384
125 | "Jun 17",1079.3,12.6,25896
126 | "Jun 18",687.2,8.8,16490
127 | "Jun 19",602.9,7.7,14453
128 | "Jun 20",30.1,3.7,674
129 | "Jun 21",127,4.9,3017
130 | "Jun 22",408.2,7,9785
131 | "Jun 23",531,7.5,12731
132 | "Jun 24",156.6,4.3,3713
133 | "Jun 25",743.4,9.2,17841
134 | "Jun 26",114,3.7,2681
135 | "Jun 27",356.1,5.6,8511
136 | "Jun 28",676.7,8.5,16241
137 | "Jun 29",76.2,4.2,1787
138 | "Jun 30",171.8,5,4094
139 | "Jul 01",973.7,10.2,23367
140 | "Jul 03",1238.8,11.5,29731
141 | "Jul 04",533.1,7.6,12793
142 | "Jul 05",54.3,3.7,1258
143 | "Jul 06",18.5,2.6,379
144 | "Jul 07",113,4,2671
145 | "Jul 08",144.9,5.5,3470
146 | "Jul 09",162.9,5.5,3899
147 | "Jul 10",564.9,7.9,13549
148 | "Jul 11",462.7,6.9,11090
149 | "Jul 12",102.7,4.3,2417
150 | "Jul 13",442.7,7.5,10617
151 | "Jul 14",953.6,10.2,22882
152 | "Jul 15",439.2,7.6,10542
153 | "Jul 16",382.5,7,9177
154 | "Jul 17",385.1,6.2,9214
155 | "Jul 18",459.7,7.6,11034
156 | "Jul 19",59.1,3.1,1349
157 | "Jul 20",129.6,5.3,3096
158 | "Jul 21",123,4.4,2910
159 | "Jul 22",444,6.5,10630
160 | "Jul 23",120.2,4.6,2848
161 | "Jul 24",311.7,7,7470
162 | "Jul 25",32.6,3.3,720
163 | "Jul 26",259.9,6.5,6235
164 | "Jul 27",696.8,9,16722
165 | "Jul 28",248.4,6.4,5959
166 | "Jul 29",2.2,2.2,0
167 | "Jul 30",140.3,5.2,3340
168 | "Jul 31",27.1,3.5,595
169 | "Aug 01",231.7,6,5548
170 |
--------------------------------------------------------------------------------
/Edition2/Data/Volleyball2009.csv:
--------------------------------------------------------------------------------
1 | "Team","HitPercent","Assts","Kills"
2 | "Penn St.",38.1,13.64,14.62
3 | "Texas",33.800000000000004,13.37,14.59
4 | "Hawaii",30.5,13.56,14.58
5 | "Florida St.",30,12.6,13.75
6 | "Florida",29.099999999999998,13.21,14.47
7 | "Washington",28.799999999999997,13.35,14.25
8 | "Md.-East. Shore",28.799999999999997,11.87,12.76
9 | "Middle Tenn.",28.1,12.78,13.65
10 | "St. Mary's (CA)",27.900000000000002,13.31,14.24
11 | "Kentucky",27.900000000000002,13.63,14.74
12 | "Ohio",27.3,12.77,14.06
13 | "California",27.1,12.89,13.95
14 | "LSU",26.900000000000002,13.22,13.99
15 | "Stanford",26.8,13.03,13.85
16 | "Ohio St.",26.8,12.26,13.32
17 | "UNI",26.700000000000003,13.65,14.83
18 | "Oregon",26.6,13.83,14.69
19 | "Lipscomb",26.3,13.79,14.75
20 | "Tulsa",26.200000000000003,13.48,14.43
21 | "Western Ky.",26.1,11.94,13.06
22 | "St. Louis",26.1,12.73,14.04
23 | "Clemson",25.8,12.14,13.36
24 | "Nebraska",25.6,13.63,14.65
25 | "Yale",25.5,12.8,14.02
26 | "Duke",25.5,12.9,13.83
27 | "Minnesota",25.4,12.88,13.86
28 | "FIU",25.4,13.42,14.52
29 | "Louisville",25.4,11.92,12.99
30 | "Notre Dame",25.3,12.55,13.68
31 | "Pepperdine",25,12.9,13.93
32 |
--------------------------------------------------------------------------------
/Edition2/Data/Walleye.csv:
--------------------------------------------------------------------------------
1 | "Length","Weight"
2 | 11.1,0.4
3 | 16.1,1.39
4 | 20.7,2.8
5 | 14.3,1.03
6 | 11.5,0.5
7 | 15.7,0.9
8 | 12.5,0.6
9 | 15.3,1.3
10 | 26.6,7.5
11 | 17.8,2.2
12 | 15.2,1.2
13 | 15,1
14 | 20.8,3.5
15 | 11,0.4
16 | 21,3.1
17 | 14.1,1.2
18 | 12.5,0.6
19 | 13.5,0.8
20 | 16.4,1.2
21 | 29.3,10.3
22 | 12.7,0.8
23 | 12.6,0.6
24 | 11,0.4
25 | 20,2
26 | 14.6,1.1
27 | 20.5,3
28 | 23.3,5.4
29 | 16.2,1.5
30 | 9.2,0.4
31 | 25.5,5.5
32 | 15.9,1.4
33 | 17.6,1.6
34 | 20.8,3
35 | 11,0.4
36 | 17.2,1.9
37 | 14.7,1.08
38 | 20.5,3.1
39 | 9.1,0.3
40 | 10.3,0.3
41 | 19.4,2.66
42 | 13.6,0.81
43 | 20.3,2.8
44 | 16,1.4
45 | 22,4
46 | 12.5,0.8
47 | 14.2,1
48 | 17.5,2.2
49 | 16.1,1.47
50 | 13.5,0.7
51 | 24.2,4.5
52 | 12.8,0.68
53 | 20.6,3.1
54 | 10.5,0.4
55 | 27,6.4
56 | 14,1.1
57 | 10.9,0.4
58 | 17.6,1.8
59 | 16.1,1.7
60 | 23.6,4.5
61 | 15.8,1.2
62 |
--------------------------------------------------------------------------------
/Edition2/Data/Watertable.csv:
--------------------------------------------------------------------------------
1 | "Depth","Alive"
2 | 50,1
3 | 43,1
4 | 50,1
5 | 46,1
6 | 25,1
7 | 19,1
8 | 30,1
9 | 35,1
10 | 8,1
11 | 8,0
12 | 11,1
13 | 9,1
14 | 30,1
15 | 46,1
16 | 38,1
17 | 34,1
18 | 24,1
19 | 30,1
20 | 34,1
21 | 36,1
22 | 12,1
23 | 18,1
24 | 20,1
25 | 22,1
26 | 25,1
27 | 18,1
28 | 30,1
29 | 23,1
30 | 6,1
31 | 4,1
32 | 6,1
33 | 6,1
34 | 4,1
35 | 3,0
36 | 10,1
37 | 4,0
38 | 36,1
39 | 42,1
40 | 27,1
41 | 42,1
42 | 47,1
43 | 56,1
44 | 50,1
45 | 51,1
46 | 48,1
47 | 48,1
48 | 53,1
49 | 55,1
50 | 30,1
51 | 29,1
52 | 28,1
53 | 25,1
54 | 28,1
55 | 25,1
56 | 27,1
57 | 27,1
58 | 24,1
59 | 20,1
60 | 26,1
61 | 22,1
62 | 2,0
63 | 6,0
64 | 8,1
65 | 9,0
66 | 9,1
67 | 1,0
68 | 8,0
69 | 8,0
70 | 1,0
71 | 4,0
72 | 3,0
73 | 8,0
74 | 26,1
75 | 31,1
76 | 31,1
77 | 32,1
78 | 30,1
79 | 24,1
80 | 30,1
81 | 28,1
82 | 15,1
83 | 19,1
84 | 20,1
85 | 24,1
86 | 19,1
87 | 15,1
88 | 17,1
89 | 20,1
90 | 5,0
91 | 9,1
92 | 7,1
93 | 11,0
94 | 7,1
95 | 9,1
96 | 8,1
97 | 9,1
98 | 33,1
99 | 37,1
100 | 36,1
101 | 36,1
102 | 46,1
103 | 41,1
104 | 44,1
105 | 47,1
106 | 25,1
107 | 24,1
108 | 24,1
109 | 21,1
110 | 20,1
111 | 24,1
112 | 23,1
113 | 22,1
114 | 2,0
115 | 1,0
116 | 5,0
117 | 3,0
118 | 4,0
119 | 4,0
120 | 7,0
121 | 7,0
122 | 43,1
123 | 43,1
124 | 47,1
125 | 36,1
126 | 32,1
127 | 30,1
128 | 29,1
129 | 33,1
130 | 22,1
131 | 24,1
132 | 23,1
133 | 22,1
134 | 18,1
135 | 18,1
136 | 21,1
137 | 17,1
138 | 14,1
139 | 13,0
140 | 13,1
141 | 14,1
142 | 5,0
143 | 10,0
144 | 13,1
145 | 4,0
146 | 43,1
147 | 40,1
148 | 47,1
149 | 50,1
150 | 39,1
151 | 39,1
152 | 48,1
153 | 50,1
154 | 41,1
155 | 40,1
156 | 39,1
157 | 38,1
158 | 20,1
159 | 18,1
160 | 26,1
161 | 18,1
162 | 19,1
163 | 20,1
164 | 18,1
165 | 25,1
166 | 23,1
167 | 19,1
168 | 27,1
169 | 26,1
170 | 9,0
171 | 9,0
172 | 11,0
173 | 10,0
174 | 8,0
175 | 6,0
176 | 14,0
177 | 12,0
178 | 10,1
179 | 13,1
180 | 15,1
181 | 12,1
182 | 41,1
183 | 42,1
184 | 48,1
185 | 45,1
186 | 38,1
187 | 39,1
188 | 38,1
189 | 39,1
190 | 40,1
191 | 36,1
192 | 38,1
193 | 39,1
194 | 17,1
195 | 18,1
196 | 20,1
197 | 22,1
198 | 18,1
199 | 18,1
200 | 20,1
201 | 19,1
202 | 13,1
203 | 18,1
204 | 22,1
205 | 24,1
206 | 6,0
207 | 10,0
208 | 9,0
209 | 13,0
210 | 7,0
211 | 4,0
212 | 5,0
213 | 7,0
214 | 5,0
215 | 7,0
216 | 4,0
217 | 11,0
218 | 30,1
219 | 31,1
220 | 42,1
221 | 35,1
222 | 30,1
223 | 30,1
224 | 32,1
225 | 50,1
226 | 18,1
227 | 17,1
228 | 30,1
229 | 27,1
230 | 17,1
231 | 18,1
232 | 22,1
233 | 20,1
234 | 13,1
235 | 11,1
236 | 12,1
237 | 11,0
238 | 7,0
239 | 4,0
240 | 7,0
241 | 8,0
242 | 45,1
243 | 37,1
244 | 50,1
245 | 44,1
246 | 18,1
247 | 13,1
248 | 17,1
249 | 19,1
250 | 10,1
251 | 10,1
252 | 11,0
253 | 7,0
254 | 37,1
255 | 36,1
256 | 36,1
257 | 30,1
258 | 30,1
259 | 24,1
260 | 37,1
261 | 32,1
262 | 12,1
263 | 14,1
264 | 23,1
265 | 22,1
266 | 15,1
267 | 24,1
268 | 24,1
269 | 17,1
270 | 12,1
271 | 12,1
272 | 16,1
273 | 16,0
274 | 10,1
275 | 8,0
276 | 9,1
277 | 11,1
278 | 67,1
279 | 69,1
280 | 64,1
281 | 65,1
282 | 35,1
283 | 35,1
284 | 40,1
285 | 40,1
286 | 50,1
287 | 55,1
288 | 50,1
289 | 53,1
290 | 14,1
291 | 16,1
292 | 18,1
293 | 21,1
294 | 28,1
295 | 27,1
296 | 22,1
297 | 30,1
298 | 21,1
299 | 20,1
300 | 28,1
301 | 25,1
302 | 14,1
303 | 12,1
304 | 14,1
305 | 14,1
306 | 10,0
307 | 6,0
308 | 4,0
309 | 8,0
310 | 8,1
311 | 10,0
312 | 11,0
313 | 11,1
314 | 28,1
315 | 27,1
316 | 32,1
317 | 43,1
318 | 27,1
319 | 35,1
320 | 37,1
321 | 33,1
322 | 16,1
323 | 16,1
324 | 20,1
325 | 18,1
326 | 17,1
327 | 15,1
328 | 17,1
329 | 18,1
330 | 14,1
331 | 9,0
332 | 18,1
333 | 15,1
334 | 11,0
335 | 11,0
336 | 12,0
337 | 12,0
338 | 33,1
339 | 30,1
340 | 30,1
341 | 33,1
342 | 14,1
343 | 18,1
344 | 17,1
345 | 16,1
346 | 10,1
347 | 7,1
348 | 10,0
349 | 12,0
350 | 28,1
351 | 21,1
352 | 34,1
353 | 34,1
354 | 20,1
355 | 19,1
356 | 20,1
357 | 27,1
358 | 11,1
359 | 11,1
360 | 11,1
361 | 9,0
362 |
--------------------------------------------------------------------------------
/Edition2/Data/wafers.csv:
--------------------------------------------------------------------------------
1 | "Instrument","Resistance"
2 | 1,196.3052
3 | 1,196.124
4 | 1,196.189
5 | 1,196.2569
6 | 1,196.3403
7 | 2,196.3042
8 | 2,196.3825
9 | 2,196.1669
10 | 2,196.3257
11 | 2,196.0422
12 | 3,196.1303
13 | 3,196.2005
14 | 3,196.2889
15 | 3,196.0343
16 | 3,196.1811
17 | 4,196.2795
18 | 4,196.1748
19 | 4,196.1494
20 | 4,196.1485
21 | 4,195.9885
22 | 5,196.2119
23 | 5,196.1051
24 | 5,196.185
25 | 5,196.0052
26 | 5,196.209
27 |
--------------------------------------------------------------------------------
/Edition2/Errata_Edition2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Errata_Edition2.pdf
--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA.R:
--------------------------------------------------------------------------------
1 | #Chap 2: Exploratory Data Analysis
2 |
3 |
4 | #Section 2.4
5 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
6 | qqnorm(x) # plot points
7 | qqline(x) # add straight line
8 |
9 |
10 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
11 |
12 | qqnorm(NCBirths$Weight)
13 | qqline(NCBirths$Weight)
14 |
15 | #---------------------------------------------------------------------------
16 | #Section 2.5
17 | #R Note
18 | x <- c(3, 6, 15, 15, 17, 19, 24)
19 | plot.ecdf(x)
20 | x <- rnorm(25) # random sample of size 25 from N(0,1)
21 | plot.ecdf(x, xlim = c(-4, 4)) # adjust x range
22 | curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf
23 |
24 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
25 |
26 | beerM <- subset(Beerwings, select = Beer, subset = Gender == "M",
27 | drop = T)
28 | beerF <- subset(Beerwings, select = Beer, subset = Gender == "F",
29 | drop = T)
30 |
31 | plot.ecdf(beerM, xlab = "ounces")
32 | plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE)
33 | abline(v = 25, lty = 2)
34 | legend(5, .8, legend = c("Males", "Females"),
35 | col = c("black", "blue"), pch = c(19, 2))
36 |
37 | #--------------------------
38 | #Section 2.6
39 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten",
40 | ylab = "Beer consumed")
41 |
42 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten",
43 | ylab = "Beer consumed")
44 |
45 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender, xlab = "Hot wings eaten",
46 | ylab = "Beer consumed")
47 |
--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 2 Exploratory Data Analysis"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 |
13 | ###Section 2.4
14 |
15 | ```{r}
16 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
17 | qqnorm(x) # plot points
18 | qqline(x) # add straight line
19 |
20 |
21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
22 |
23 | qqnorm(NCBirths$Weight)
24 | qqline(NCBirths$Weight)
25 | ```
26 |
27 | ###Section 2.5
28 | ####R Note
29 | ```{r}
30 | x <- c(3, 6, 15, 15, 17, 19, 24)
31 | plot.ecdf(x)
32 | x <- rnorm(25) # random sample of size 25 from N(0,1)
33 | plot.ecdf(x, xlim = c(-4, 4)) # adjust x range
34 | curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf
35 |
36 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
37 |
38 | beerM <- subset(Beerwings, select = Beer, subset = Gender == "M",
39 | drop = T)
40 | beerF <- subset(Beerwings, select = Beer, subset = Gender == "F",
41 | drop = T)
42 |
43 | plot.ecdf(beerM, xlab = "ounces")
44 | plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE)
45 | abline(v = 25, lty = 2)
46 | legend(5, .8, legend = c("Males", "Females"),
47 | col = c("black", "blue"), pch = c(19, 2))
48 | ```
49 |
50 | ###Section 2.6
51 | ```{r}
52 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten",
53 | ylab = "Beer consumed")
54 |
55 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten",
56 | ylab = "Beer consumed")
57 |
58 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender, xlab = "Hot wings eaten",
59 | ylab = "Beer consumed")
60 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap02EDA_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 2 Exploratory Data Analysis"
3 | author: "Chihara-Hesterberg"
4 | date: "November 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Section 2.4
15 |
16 | ```{r}
17 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36)
18 | df <- data.frame(x)
19 | ggplot(df, aes(sample = x)) + stat_qq() + stat_qq_line()
20 |
21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv")
22 |
23 | ggplot(NCBirths, aes(sample = Weight)) + stat_qq() + stat_qq_line()
24 |
25 | ```
26 |
27 | ###Section 2.5
28 | ####R Note
29 | ```{r}
30 | x <- c(3, 6, 15, 15, 17, 19, 24)
31 | df <- data.frame(x)
32 | ggplot(df, aes(x)) + stat_ecdf(geom = "step")
33 |
34 | # random sample of size 25 from N(0,1)
35 | df <- data.frame(x = rnorm(25))
36 |
37 | ggplot(df, aes(x)) + stat_ecdf() + stat_function(fun = pnorm, color = "red")
38 |
39 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
40 |
41 | ggplot(Beerwings, aes(Beer, color = Gender)) + stat_ecdf()
42 |
43 | ```
44 |
45 | ###Section 2.6
46 | ```{r}
47 |
48 | ggplot(Beerwings, aes(x=Hotwings, y = Beer)) + geom_point()
49 |
50 | ggplot(Beerwings, aes(x = Hotwings, y = Beer, color = Gender)) + geom_point()
51 |
52 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing.R:
--------------------------------------------------------------------------------
1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
2 | ##-------------------------------------
3 | ##Section 3.3
4 | #Beerwings <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv")
5 |
6 | tapply(Beerwings$Hotwings, Beerwings$Gender, mean)
7 |
8 | observed <- 14.5333- 9.3333 #store observed mean differences
9 |
10 | #Get hotwings variable
11 | hotwings <- Beerwings$Hotwings
12 |
13 | #Alternative way:
14 | hotwings <- subset(Beerwings, select = Hotwings, drop = TRUE)
15 | #drop = TRUE to convert hotwings to a vector (without this, hotwings will be a
16 | #30x1 data frame
17 |
18 | #set.seed(0)
19 | N<- 10^5-1 #set number of times to repeat this process
20 | result <- numeric(N) # space to save the random differences
21 | for(i in 1:N)
22 | {
23 | index <- sample(30, size=15, replace = FALSE) # sample of numbers from 1:30
24 | result[i] <- mean(hotwings[index]) - mean(hotwings[-index])
25 | }
26 |
27 | ##Plot
28 |
29 | hist(result, xlab = "xbarM - xbarF", main = "Permutation distribution for hot wings")
30 | abline(v = observed, col = "blue", lty=5)
31 |
32 | #-------------------------
33 | #Another visualization of distribution
34 | plot.ecdf(result)
35 | abline(v = observed, col = "blue", lty = 5)
36 |
37 |
38 | #Compute P-value
39 | (sum(result >= observed)+1)/(N+ 1) #P-value
40 |
41 |
42 | #----------------------------------------
43 | #Example 3.4 Verizon
44 | #Permutation test
45 |
46 | Verizon <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
47 |
48 | tapply(Verizon$Time, Verizon$Group, mean)
49 |
50 |
51 | Time <- subset(Verizon, select = Time, drop = T)
52 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
53 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
54 |
55 | observed <- mean(Time.ILEC) - mean(Time.CLEC)
56 | observed
57 |
58 |
59 | N <- 10^4-1 #set number of times to repeat this process
60 | #set.seed(99)
61 | result <- numeric(N) # space to save the random differences
62 | for(i in 1:N) {
63 | index <- sample(1687, size = 1664, replace = FALSE) #sample of numbers from 1:1687
64 | result[i] <- mean(Time[index]) - mean(Time[-index])
65 | }
66 |
67 | hist(result, xlab = "xbar1 - xbar2",
68 | main = "Permutation Distribution for Verizon repair times")
69 | abline(v = observed, col = "blue", lty = 5)
70 |
71 | (sum(result <= observed) + 1)/(N + 1) #P-value
72 |
73 |
74 | #-------------------------------------------------------
75 | #Example 3.6, Verizon cont.
76 | #median, trimmed means
77 |
78 | tapply(Verizon$Time, Verizon$Group, median)
79 |
80 | #Difference in means
81 | observed <- median(Time.ILEC) - median(Time.CLEC)
82 | observed
83 |
84 | #Differnce in trimmed means
85 | observed2 <- mean(Time.ILEC, trim = .25) - mean(Time.CLEC, trim = .25)
86 | observed2
87 |
88 | N <- 10^4-1 #set number of times to repeat this process
89 | #set.seed(99)
90 | result <- numeric(N) # space to save the random differences
91 | result2 <- numeric(N)
92 | for(i in 1:N) {
93 | index <- sample(1687, size=1664, replace = FALSE) #sample of numbers from 1:1687
94 | result[i] <- median(Time[index]) - median(Time[-index])
95 | result2[i] <- mean(Time[index], trim = .25) - mean(Time[-index], trim = .25)
96 | }
97 |
98 | hist(result, xlab = "median1 - median2",
99 | main = "Permutation Distribution for medians")
100 | abline(v = observed, col = "blue", lty = 5)
101 |
102 | #P-value difference in means
103 | (sum(result <= observed) + 1)/(N+ 1)
104 |
105 |
106 | hist(result2, xlab = "trimMean1 - trimMean2",
107 | main = "Permutation Distribution for trimmed means")
108 | abline(v = observed, col = "blue", lty = 5)
109 |
110 | #P-value difference in trimmed means
111 | (sum(result2 <= observed2) + 1)/(N+ 1)
112 |
113 | #------------------------------------------------
114 | #Example 3.6, Verzion continued
115 | #
116 | #difference in proportion of time > 10
117 | #and ratio of variances
118 | observed3 <- mean(Time.ILEC > 10) - mean(Time.CLEC > 10)
119 | observed3
120 |
121 | #ratio of variances
122 | observed4 <- var(Time.ILEC)/var(Time.CLEC)
123 | observed4
124 |
125 | N <- 10^4-1 #set number of times to repeat this process
126 | #set.seed(99)
127 | result3 <- numeric(N)
128 | result4 <- numeric(N)
129 |
130 | for(i in 1:N) {
131 | index <- sample(1687, size = 1664, replace = FALSE)
132 | result3[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10)
133 | result4[i] <- var(Time[index])/var(Time[-index])
134 | }
135 |
136 |
137 |
138 | hist(result3, xlab = "Difference in proportions", main = "Repair times > 10 hours")
139 | abline(v = observed3, lty = 5, col = "blue")
140 | #P-value difference in proportion
141 | (sum(result3 <= observed3) + 1)/(N+ 1) #P-value
142 |
143 |
144 | hist(result4, xlab = "variance1/variance2", main = "Ratio of variances")
145 | abline(v = observed4, lty = 5, col = "blue")
146 |
147 |
148 | #P-value ratio of variances
149 | (sum(result4 <= observed4) + 1)/(N+ 1) #P-value
150 |
151 | #--------------------------------------
152 | #Example 3.8
153 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
154 |
155 | k <- complete.cases(Recidivism$Age25)
156 | Recid2 <- ifelse(Recidivism$Recid[k] == "Yes", 1, 0)
157 | Age25.2 <- Recidivism$Age25[k]
158 |
159 | table(Age25.2)
160 | tapply(Recid2, Age25.2, mean)
161 | observed <- .365 - .306
162 |
163 | N <- 10^4 - 1
164 | result <- numeric(N)
165 |
166 | for (i in 1:N)
167 | {
168 | index <- sample(17019, size = 3077, replace = FALSE)
169 | result[i] <- mean(Recid2[index]) - mean(Recid2[-index])
170 | }
171 |
172 | 2* (sum(result >= observed) + 1)/(N + 1)
173 |
174 | #---------------------
175 | #Section 3.4 Matched Pairs
176 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
177 |
178 | Diff <- Diving2017$Final - Diving2017$Semifinal
179 | observed <- mean(Diff)
180 |
181 | N <- 10^5 - 1
182 | result <- numeric(N)
183 |
184 | for (i in 1:N)
185 | {
186 | Sign <- sample(c(-1, 1), 12, replace = TRUE)
187 | Diff2 <- Sign*Diff
188 | result[i] <- mean(Diff2)
189 | }
190 |
191 | hist(result)
192 | abline(v = observed, col = "blue")
193 |
194 | 2* (sum(result >= observed) + 1)/(N + 1)
195 |
--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer.R:
--------------------------------------------------------------------------------
1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
2 | #R Code for exercise
3 |
4 | #-----------------
5 | #Exercise 7
6 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
7 |
8 | N<-10^4-1
9 |
10 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE)
11 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE)
12 |
13 | observedSumUA <- sum(UA.Delay)
14 | observedmeanUA <- mean(UA.Delay)
15 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
16 | m <-length(UA.Delay) #number of UA observations
17 |
18 | sumUA<-numeric(N)
19 | meanUA<-numeric(N)
20 | meanDiff<-numeric(N)
21 |
22 | set.seed(0)
23 | for (i in 1:N)
24 | {
25 | index <- sample(4029, m, replace = FALSE)
26 | sumUA[i] <- sum(FlightDelays$Delay[index])
27 | meanUA[i] <- mean(FlightDelays$Delay[index])
28 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
29 |
30 | }
31 |
32 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value
33 |
34 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value
35 |
36 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value
37 |
38 | #-------------------------------
39 |
--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap03Testing_Exer"
3 | author: "Chihara-Hesterberg"
4 | date: "July 20, 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
13 | ##Exercises
14 |
15 | ###Exercise 7
16 | ```{r}
17 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
18 |
19 | N<-10^4-1
20 |
21 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE)
22 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE)
23 |
24 | observedSumUA <- sum(UA.Delay)
25 | observedmeanUA <- mean(UA.Delay)
26 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
27 | m <-length(UA.Delay) #number of UA observations
28 |
29 | sumUA<-numeric(N)
30 | meanUA<-numeric(N)
31 | meanDiff<-numeric(N)
32 |
33 | set.seed(0)
34 | for (i in 1:N)
35 | {
36 | index <- sample(4029, m, replace = FALSE)
37 | sumUA[i] <- sum(FlightDelays$Delay[index])
38 | meanUA[i] <- mean(FlightDelays$Delay[index])
39 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
40 |
41 | }
42 |
43 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value
44 |
45 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value
46 |
47 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value
48 |
49 | ```
50 |
--------------------------------------------------------------------------------
/Edition2/R/Chap03Testing_Exer_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap03Testing_Exer"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(dplyr)
11 | library(ggplot2)
12 | ```
13 |
14 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests
15 | ##Exercises
16 |
17 | ###Exercise 7
18 | ```{r}
19 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
20 |
21 | N<-10^4-1
22 | UA.Delay <- FlightDelays %>% filter(Carrier == "UA") %>% pull(Delay)
23 | AA.Delay <- FlightDelays %>% filter(Carrier == "AA") %>% pull(Delay)
24 |
25 | observedSumUA <- sum(UA.Delay)
26 | observedmeanUA <- mean(UA.Delay)
27 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay)
28 | m <-length(UA.Delay) #number of UA observations
29 |
30 | sumUA<-numeric(N)
31 | meanUA<-numeric(N)
32 | meanDiff<-numeric(N)
33 |
34 | set.seed(0)
35 | for (i in 1:N)
36 | {
37 | index <- sample(4029, m, replace = FALSE)
38 | sumUA[i] <- sum(FlightDelays$Delay[index])
39 | meanUA[i] <- mean(FlightDelays$Delay[index])
40 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index])
41 |
42 | }
43 |
44 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value
45 |
46 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value
47 |
48 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value
49 |
50 | ```
51 |
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist.R:
--------------------------------------------------------------------------------
1 | ###Chapter 4: Sampling Distributions
2 |
3 | #---------------------------------------------
4 | #Example 4.2: Sampling distribution from Exp(1/15)
5 | Xbar <- numeric(1000)
6 | #set.seed(300)
7 | for (i in 1:1000)
8 | {
9 | x <- rexp(100, rate = 1/15)
10 | Xbar[i] <- mean(x)
11 | }
12 |
13 | hist(Xbar, main="Simulated sampling distribution", xlab="means")
14 |
15 | qqnorm(Xbar)
16 | qqline(Xbar)
17 |
18 | mean(Xbar)
19 | sd(Xbar)
20 |
21 | #----------------------------------------------------
22 | ##Example 4.3: Sampling Dist from Unif[0,1]
23 |
24 | maxY <- numeric(1000)
25 | #set.seed(100)
26 | for (i in 1:1000)
27 | {
28 | y <- runif(12) #draw random sample of size 12
29 | maxY[i] <- max(y) #find max, save in position i
30 | }
31 |
32 | hist(maxY, main = "", xlab = "maximums")
33 |
34 | #To create a histogram with a density curve imposed
35 | #scale bars to have area one with prob=TRUE option
36 | hist(maxY, main = "", xlab = "maximums", prob = TRUE)
37 |
38 | #add pdf to histogram
39 | curve(12*x^{11}, col = "blue", add = TRUE)
40 |
41 | #---------------------------------------------
42 | #Example 4.6 Sum of Poisson random variables
43 |
44 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5)
45 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12)
46 | W <- X + Y
47 |
48 | hist(W, prob = TRUE) #prob = TRUE, scales hist to 1
49 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17)
50 |
51 | mean(W)
52 | var(W)
53 |
54 | #------------------------------------------------
55 | #Example 4.7
56 | #Sampling distribution simulation
57 | #Sample of size 30 from gamma r=5, lambda=2
58 |
59 | #set.seed(10)
60 | Xbar <- numeric(1000)
61 | for (i in 1:1000)
62 | {
63 | x <- rgamma(30, shape = 5, rate = 2)
64 | Xbar[i] <- mean(x)
65 | }
66 |
67 | hist(Xbar, main = "Distribution of means")
68 |
69 | qqnorm(Xbar)
70 | qqline(Xbar)
71 |
72 | mean(Xbar)
73 | sd(Xbar)
74 | sum(Xbar > 3)/1000
75 | #alternatively
76 | mean(Xbar > 3)
77 |
78 | #----------------------------------------------
79 | #Example 4.11 R Note
80 | dbinom(25, 120, .3)
81 |
82 | pbinom(25, 120, .3)
83 |
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 4 Sampling Distributions"
3 | author: "Chihara-Hesterberg"
4 | date: "July 20, 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Example 4.2: Sampling distribution from Exp(1/15)
13 | ```{r}
14 | Xbar <- numeric(1000)
15 | #set.seed(300)
16 | for (i in 1:1000)
17 | {
18 | x <- rexp(100, rate = 1/15)
19 | Xbar[i] <- mean(x)
20 | }
21 |
22 | hist(Xbar, main="Simulated sampling distribution", xlab="means")
23 |
24 | qqnorm(Xbar)
25 | qqline(Xbar)
26 |
27 | mean(Xbar)
28 | sd(Xbar)
29 | ```
30 |
31 | ###Example 4.3: Sampling Dist from Unif[0,1]
32 | ```{r}
33 | maxY <- numeric(1000)
34 | #set.seed(100)
35 | for (i in 1:1000)
36 | {
37 | y <- runif(12) #draw random sample of size 12
38 | maxY[i] <- max(y) #find max, save in position i
39 | }
40 |
41 | hist(maxY, main = "", xlab = "maximums")
42 | ```
43 |
44 | To create a histogram with a density curve imposed,
45 | scale bars to have area one with the `prob=TRUE` argument.
46 | The `curve()` command can then be used to add the density curve.
47 |
48 | ```{r}
49 | hist(maxY, main = "", xlab = "maximums", prob = TRUE)
50 | curve(12*x^{11}, col = "blue", add = TRUE)
51 | ```
52 |
53 | ###Example 4.6 Sum of Poisson random variables
54 | ```{r}
55 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5)
56 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12)
57 | W <- X + Y
58 |
59 | hist(W, prob = TRUE) #prob = TRUE, scales hist to 1
60 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17)
61 |
62 | mean(W)
63 | var(W)
64 | ```
65 |
66 | ###Example 4.7
67 | Sampling distribution simulation
68 | Sample of size 30 from gamma r=5, lambda=2
69 |
70 | ```{r}
71 | #set.seed(10)
72 | Xbar <- numeric(1000)
73 | for (i in 1:1000)
74 | {
75 | x <- rgamma(30, shape = 5, rate = 2)
76 | Xbar[i] <- mean(x)
77 | }
78 |
79 | hist(Xbar, main = "Distribution of means")
80 |
81 | qqnorm(Xbar)
82 | qqline(Xbar)
83 |
84 | mean(Xbar)
85 | sd(Xbar)
86 | sum(Xbar > 3)/1000
87 | #alternatively
88 | mean(Xbar > 3)
89 | ```
90 |
91 | ###Example 4.11 R Note
92 |
93 | ```{r}
94 | dbinom(25, 120, .3)
95 |
96 | pbinom(25, 120, .3)
97 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer.R:
--------------------------------------------------------------------------------
1 | ##Chapter Sampling Distributions
2 | ##Exercises
3 | ##R Scripts
4 | ##
5 | ##-----------------------------
6 | #Exercise 4
7 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
8 | N <- 10^4
9 | Xbar <- numeric(N)
10 |
11 | for (i in 1:N)
12 | {
13 | samp <- sample(pop, 4, replace = TRUE)
14 | Xbar[i] <- mean(samp)
15 | }
16 |
17 | hist(Xbar)
18 | mean(Xbar < 11)
19 |
20 | #----------------------------------------------
21 | #Exercise 4.6
22 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
23 | N <- 10^4
24 | phat <- numeric(N)
25 | n <- 25
26 |
27 | for (i in 1:N)
28 | {
29 | samp <- sample(Recidivism$Recid, n)
30 | phat[i] <- mean(samp == "Yes")
31 | }
32 |
33 | #c) change n <- 250
34 |
35 | #----------------------------------------------------------------------------
36 | #Exercise 19
37 | ## X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
38 | ## W = mean(X)+mean(Y)
39 | W <- numeric(1000)
40 | set.seed(0)
41 | for (i in 1:1000)
42 | {
43 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2)
44 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2)
45 | W[i] <- mean(x) + mean(y) #save sum of means
46 | }
47 |
48 | hist(W)
49 |
50 | mean(W < 40)
51 |
52 |
53 | #--------------------
54 | #Exercise 22
55 |
56 | X <- runif(1000, 40, 60)
57 | Y <- runif(1000, 45, 80)
58 |
59 | total <- X + Y
60 |
61 | hist(total)
62 |
63 | #----------------
64 | #33 Finite pop simulation
65 |
66 | N <- 400 # population size
67 | n <- 5 # sample size
68 |
69 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
70 | # Exp(1/10)
71 | hist(finpop) # distribution of your finite pop.
72 | mean(finpop) # mean (mu) of your pop.
73 | sd(finpop) # stdev (sigma) of your pop.
74 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
75 | # dist. of mean(x), with replacement
76 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
77 |
78 | Xbar <- numeric(1000)
79 | for (i in 1:1000)
80 | {
81 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
82 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
83 | }
84 | hist(Xbar)
85 |
86 | qqnorm(Xbar)
87 | qqline(Xbar)
88 |
89 | mean(Xbar)
90 | sd(Xbar) # estimated standard error of sampling
91 | # distribution
92 |
93 | #----------------------------
94 | #34
95 | W <- numeric(1000)
96 | for (i in 1:1000)
97 | {
98 | x <- rnorm(20, 25, 7)
99 | W[i] <- var(x)
100 | }
101 | mean(W)
102 | var(W)
103 | hist(W)
104 |
105 | qqnorm(W)
106 | qqline(W)
107 |
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 4 Sampling Distribution-Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "July 20, 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Exercise 4
13 | ```{r}
14 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
15 | N <- 10^4
16 | Xbar <- numeric(N)
17 |
18 | for (i in 1:N)
19 | {
20 | samp <- sample(pop, 4, replace = TRUE)
21 | Xbar[i] <- mean(samp)
22 | }
23 |
24 | hist(Xbar)
25 | mean(Xbar < 11)
26 | ```
27 |
28 | ###Exercise 6
29 | ```{r}
30 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
31 | N <- 10^4
32 | phat <- numeric(N)
33 | n <- 25
34 |
35 | for (i in 1:N)
36 | {
37 | samp <- sample(Recidivism$Recid, n)
38 | phat[i] <- mean(samp == "Yes")
39 | }
40 |
41 | #c) change n <- 250
42 | ```
43 |
44 | ###Exercise 19
45 | X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
46 | W = X + Y
47 | ```{r}
48 |
49 | W <- numeric(1000)
50 | set.seed(0)
51 | for (i in 1:1000)
52 | {
53 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2)
54 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2)
55 | W[i] <- mean(x) + mean(y) #save sum of means
56 | }
57 |
58 | hist(W)
59 |
60 | mean(W < 40)
61 | ```
62 |
63 |
64 | ###Exercise 22
65 | ```{r}
66 | X <- runif(1000, 40, 60)
67 | Y <- runif(1000, 45, 80)
68 |
69 | total <- X + Y
70 |
71 | hist(total)
72 | ```
73 |
74 | ###Exercise 33
75 | Finite population simulation
76 |
77 | ```{r}
78 | N <- 400 # population size
79 | n <- 5 # sample size
80 |
81 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
82 | # Exp(1/10)
83 | hist(finpop) # distribution of your finite pop.
84 | mean(finpop) # mean (mu) of your pop.
85 | sd(finpop) # stdev (sigma) of your pop.
86 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
87 | # dist. of mean(x), with replacement
88 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
89 |
90 | Xbar <- numeric(1000)
91 | for (i in 1:1000)
92 | {
93 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
94 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
95 | }
96 | hist(Xbar)
97 |
98 | qqnorm(Xbar)
99 | qqline(Xbar)
100 |
101 | mean(Xbar)
102 | sd(Xbar) # estimated standard error of sampling
103 | # distribution
104 | ```
105 |
106 | ###Exercise 34
107 | ```{r}
108 | W <- numeric(1000)
109 | for (i in 1:1000)
110 | {
111 | x <- rnorm(20, 25, 7)
112 | W[i] <- var(x)
113 | }
114 | mean(W)
115 | var(W)
116 | hist(W)
117 |
118 | qqnorm(W)
119 | qqline(W)
120 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_Exer_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 4 Sampling Distribution-Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Exercise 4
15 | ```{r}
16 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20)
17 | N <- 10^4
18 | Xbar <- numeric(N)
19 |
20 | for (i in 1:N)
21 | {
22 | samp <- sample(pop, 4, replace = TRUE)
23 | Xbar[i] <- mean(samp)
24 | }
25 |
26 | ggplot() + geom_histogram(aes(Xbar), bins = 10)
27 |
28 | mean(Xbar < 11)
29 | ```
30 |
31 | ###Exercise 6
32 | ```{r}
33 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv")
34 | N <- 10^4
35 | phat <- numeric(N)
36 | n <- 25
37 |
38 | for (i in 1:N)
39 | {
40 | samp <- sample(Recidivism$Recid, n)
41 | phat[i] <- mean(samp == "Yes")
42 | }
43 |
44 | #c) change n <- 250
45 | ```
46 |
47 | ###Exercise 19
48 | X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2)
49 | W = X + Y
50 | ```{r}
51 |
52 | W <- numeric(1000)
53 | set.seed(0)
54 | for (i in 1:1000)
55 | {
56 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2)
57 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2)
58 | W[i] <- mean(x) + mean(y) #save sum of means
59 | }
60 |
61 | ggplot() + geom_histogram(aes(W), bins = 12)
62 |
63 | mean(W < 40)
64 | ```
65 |
66 |
67 | ###Exercise 22
68 | ```{r}
69 | X <- runif(1000, 40, 60)
70 | Y <- runif(1000, 45, 80)
71 |
72 | total <- X + Y
73 |
74 | ggplot() + geom_histogram(aes(total), bins = 12)
75 | ```
76 |
77 | ###Exercise 33
78 | Finite population simulation
79 |
80 | ```{r}
81 | N <- 400 # population size
82 | n <- 5 # sample size
83 |
84 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from
85 | # Exp(1/10)
86 | ggplot() + geom_histogram(aes(finpop), bins = 12) # distribution of your finite pop.
87 |
88 | mean(finpop) # mean (mu) of your pop.
89 | sd(finpop) # stdev (sigma) of your pop.
90 | sd(finpop)/sqrt(n) # theoretical standard error of sampling
91 | # dist. of mean(x), with replacement
92 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement
93 |
94 | Xbar <- numeric(1000)
95 | for (i in 1:1000)
96 | {
97 | x <- sample(finpop, n) # Random sample of size n (w/o replacement)
98 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means
99 | }
100 |
101 | ggplot() + geom_histogram(aes(Xbar), bins = 12)
102 |
103 | df <- data.frame(Xbar)
104 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line()
105 |
106 | mean(Xbar)
107 | sd(Xbar) # estimated standard error of sampling
108 | # distribution
109 | ```
110 |
111 | ###Exercise 34
112 | ```{r}
113 | W <- numeric(1000)
114 | for (i in 1:1000)
115 | {
116 | x <- rnorm(20, 25, 7)
117 | W[i] <- var(x)
118 | }
119 | mean(W)
120 | var(W)
121 |
122 | ggplot() + geom_histogram(aes(W), bins = 10)
123 |
124 | df <- data.frame(W)
125 | ggplot(df, aes(sample = W)) + stat_qq() + stat_qq_line()
126 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap04SamplingDist_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 4 Sampling Distributions"
3 | author: "Chihara-Hesterberg"
4 | date: "November 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(dplyr)
11 | library(ggplot2)
12 | ```
13 |
14 | ###Example 4.2: Sampling distribution from Exp(1/15)
15 | ```{r}
16 | Xbar <- numeric(1000)
17 | #set.seed(300)
18 | for (i in 1:1000)
19 | {
20 | x <- rexp(100, rate = 1/15)
21 | Xbar[i] <- mean(x)
22 | }
23 |
24 | ggplot() + geom_histogram(aes(Xbar), bins = 15) + xlab("means")
25 |
26 | df <- data.frame(Xbar)
27 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line()
28 |
29 | mean(Xbar)
30 | sd(Xbar)
31 | ```
32 |
33 | ###Example 4.3: Sampling Dist from Unif[0,1]
34 | ```{r}
35 | maxY <- numeric(1000)
36 | #set.seed(100)
37 | for (i in 1:1000)
38 | {
39 | y <- runif(12) #draw random sample of size 12
40 | maxY[i] <- max(y) #find max, save in position i
41 | }
42 |
43 | ggplot() + geom_histogram(aes(maxY), binwidth=.05, center=.975) + xlab("maximums")
44 |
45 | ```
46 |
47 | To create a histogram with a density curve imposed, we will need to create a data frame that holds the 'maxY' variable. We also create a function for the density curve $f(x)=12x^{11}$.
48 |
49 |
50 | ```{r}
51 | df <- data.frame(maxY)
52 | myfun <- function(x){12*x^{11}}
53 |
54 | ggplot(df) + geom_histogram(aes(maxY, y = stat(density)), binwidth=.05, center=.975) +xlab("maximums") + stat_function(fun = myfun)
55 | ```
56 |
57 | ###Example 4.6 Sum of Poisson random variables
58 | ```{r}
59 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5)
60 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12)
61 | W <- X + Y
62 |
63 | df1 <- data.frame(W)
64 | df2 <- data.frame(x=2:35, y = dpois(2:35,17))
65 | ggplot(df1, aes(W)) + geom_histogram(aes(y=stat(density)), bins=12) + geom_line(data=df2, aes(x=x, y=y), colour = "red")
66 |
67 | mean(W)
68 | var(W)
69 | ```
70 |
71 | ###Example 4.7
72 | Sampling distribution simulation
73 | Sample of size 30 from gamma r=5, lambda=2
74 |
75 | ```{r}
76 | #set.seed(10)
77 | Xbar <- numeric(1000)
78 | for (i in 1:1000)
79 | {
80 | x <- rgamma(30, shape = 5, rate = 2)
81 | Xbar[i] <- mean(x)
82 | }
83 |
84 | ggplot() + geom_histogram(aes(Xbar), bins=15) + labs(title = "Distribution of means")
85 |
86 | ggplot() + stat_qq(aes(sample = Xbar))
87 |
88 | #If you want a line, then
89 | df <- data.frame(Xbar)
90 | ggplot(df, aes(sample = Xbar)) + stat_qq() + stat_qq_line()
91 |
92 | mean(Xbar)
93 | sd(Xbar)
94 | sum(Xbar > 3)/1000
95 | #alternatively
96 | mean(Xbar > 3)
97 | ```
98 |
99 | ###Example 4.11 R Note
100 |
101 | ```{r}
102 | dbinom(25, 120, .3)
103 |
104 | pbinom(25, 120, .3)
105 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer.R:
--------------------------------------------------------------------------------
1 | ##Chapter 5 The Boostrap
2 |
3 | #Exercises
4 |
5 | #------------------------------------------------
6 | #10 (medians)
7 | ##
8 | ne <- 10000 # n even
9 | no <- 10001 # n odd
10 |
11 | wwe <- rnorm(ne) # draw random sample of size ne
12 | wwo <- rnorm(no) # draw random sample of size no
13 |
14 | N <- 10^4
15 | even.boot <- numeric(N) #save space
16 | odd.boot <- numeric(N)
17 | set.seed(10)
18 | for (i in 1:N)
19 | {
20 | x.even <- sample(wwe, ne, replace = TRUE)
21 | x.odd <- sample(wwo, no, replace = TRUE)
22 | even.boot[i] <- median(x.even)
23 | odd.boot[i] <- median(x.odd)
24 | }
25 |
26 | par(mfrow = c(2, 1))
27 | hist(even.boot, xlim = c(-1, 1)) #set x range to be
28 | hist(odd.boot, xlim = c(-1, 1)) #same in both plots
29 | par(mfrow = c(1, 1)) #reset to original
30 |
31 | #-----------------------------------
32 | #Exercise 20
33 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
34 | N <- 10^5
35 | result <- numeric(N)
36 | for (i in 1:N)
37 | {
38 | index <- sample(12, replace = TRUE)
39 | Dive.boot <- Diving2017[index, ]
40 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
41 | }
42 |
43 | hist(result)
44 | quantile(result, c(0.025, 0.975))
45 |
--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 5 Bootstrap - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ###Exercise 10
13 |
14 | Simulate bootstrap for medians
15 | ```{r}
16 | ne <- 10000 # n even
17 | no <- 10001 # n odd
18 |
19 | wwe <- rnorm(ne) # draw random sample of size ne
20 | wwo <- rnorm(no) # draw random sample of size no
21 |
22 | N <- 10^4
23 | even.boot <- numeric(N) #save space
24 | odd.boot <- numeric(N)
25 | set.seed(10)
26 | for (i in 1:N)
27 | {
28 | x.even <- sample(wwe, ne, replace = TRUE)
29 | x.odd <- sample(wwo, no, replace = TRUE)
30 | even.boot[i] <- median(x.even)
31 | odd.boot[i] <- median(x.odd)
32 | }
33 |
34 | par(mfrow = c(2, 1))
35 | hist(even.boot, xlim = c(-1, 1)) #set x range to be
36 | hist(odd.boot, xlim = c(-1, 1)) #same in both plots
37 | par(mfrow = c(1, 1)) #reset to original
38 | ```
39 |
40 | ###Exercise 20
41 | ```{r}
42 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
43 | N <- 10^5
44 | result <- numeric(N)
45 | for (i in 1:N)
46 | {
47 | index <- sample(12, replace = TRUE)
48 | Dive.boot <- Diving2017[index, ]
49 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
50 | }
51 |
52 | hist(result)
53 | quantile(result, c(0.025, 0.975))
54 | ```
55 |
--------------------------------------------------------------------------------
/Edition2/R/Chap05Bootstrap_Exer_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chapter 5 Bootstrap - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Exercise 10
15 |
16 | Simulate bootstrap for medians
17 | ```{r}
18 | ne <- 10000 # n even
19 | no <- 10001 # n odd
20 |
21 | wwe <- rnorm(ne) # draw random sample of size ne
22 | wwo <- rnorm(no) # draw random sample of size no
23 |
24 | N <- 10^4
25 | even.boot <- numeric(N) #save space
26 | odd.boot <- numeric(N)
27 | #set.seed(10)
28 | for (i in 1:N)
29 | {
30 | x.even <- sample(wwe, ne, replace = TRUE)
31 | x.odd <- sample(wwo, no, replace = TRUE)
32 | even.boot[i] <- median(x.even)
33 | odd.boot[i] <- median(x.odd)
34 | }
35 |
36 | range(even.boot)
37 | range(odd.boot)
38 | p1 <- ggplot() + geom_histogram(aes(even.boot), breaks = seq(-.06, .04, by = .005))
39 | p2 <- ggplot() + geom_histogram(aes(odd.boot), breaks = seq(-.06, .04, by = .005))
40 |
41 | library(gridExtra)
42 | grid.arrange(p1,p2)
43 |
44 | ```
45 |
46 | ###Exercise 20
47 | ```{r}
48 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv")
49 | N <- 10^5
50 | result <- numeric(N)
51 | for (i in 1:N)
52 | {
53 | index <- sample(12, replace = TRUE)
54 | Dive.boot <- Diving2017[index, ]
55 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal)
56 | }
57 |
58 | ggplot() + geom_histogram(aes(result), bins = 12)
59 |
60 | quantile(result, c(0.025, 0.975))
61 | ```
62 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals.R:
--------------------------------------------------------------------------------
1 | #Chapter 7 More Confidence Intervals
2 |
3 | #-----------------------------------------------------
4 | #Section 7.1.1 CI for normal with known sigma
5 |
6 | #set.seed(1)
7 | counter <- 0 # set counter to 0
8 | plot(x = c(22, 28), y = c(1, 100), type = "n",
9 | xlab = "", ylab = "")
10 | abline(v = 25, col="red") # vertical line at mu
11 | for (i in 1:1000)
12 | {
13 | x <- rnorm(30, 25, 4) # draw a random sample of size 30
14 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit
15 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit
16 | if (L < 25 && 25 < U) # check to see if 25 is in interval
17 | counter <- counter + 1 # increase counter by 1
18 | if (i <= 100) #plot first 100 intervals
19 | segments(L, i, U, i)
20 | }
21 |
22 | abline(v = 25, col = "red") #vertical line at mu
23 |
24 | counter/1000 # proportion of times interval contains mu.
25 |
26 | #---------------------------------------------------------------
27 | # Section 7.1.2
28 | # Simulate distribution of t statistic
29 | N <- 10^4
30 | w <- numeric(N)
31 | n <- 15 #sample size
32 | for (i in 1:N)
33 | {
34 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2)
35 | xbar <- mean(x)
36 | s <- sd(x)
37 | w[i] <- (xbar-25) / (s/sqrt(n))
38 | }
39 |
40 | hist(w)
41 |
42 | qqnorm(w, pch = ".")
43 | abline(0, 1, col = 2) # y = x line
44 |
45 | #pch = "." is point character. This option says to use . for the points.
46 |
47 | #----------------------------------------------------------
48 | # Example 7.7 Simulation 95% confidence interval from
49 | # skewed gamma distribution
50 | # set.seed(0)
51 |
52 | tooLow <- 0 #set counter to 0
53 | tooHigh <- 0 #sest counter to 0
54 | n <- 20 # sample size
55 | N <- 10^5
56 | for (i in 1:N)
57 | {
58 | x <- rgamma(n, shape=5, rate=2)
59 | xbar <- mean(x)
60 | s <- sd(x)
61 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
62 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
63 | if (upper < 5/2) tooLow <- tooLow + 1
64 | if (lower > 5/2) tooHigh <- tooHigh + 1
65 | }
66 | tooLow/N
67 | tooHigh/N
68 |
69 |
70 |
71 | #----------------------------------------
72 | # Example 7.21 One sample bootstrap t confidence interval
73 |
74 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
75 | Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE)
76 |
77 | xbar <- mean(Arsenic)
78 | N <- 10^4
79 | n <- length(Arsenic)
80 | Tstar <- numeric(N)
81 | #set.seed(100)
82 | for (i in 1:N)
83 | {
84 | x <-sample(Arsenic, size = n, replace = T)
85 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
86 | }
87 |
88 | quantile(Tstar, c(0.025, 0.975))
89 |
90 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*")
91 |
92 | dev.new()
93 | qqnorm(Tstar)
94 | qqline(Tstar)
95 |
96 | #-------------------------------------------------------
97 | # Exampe 7.22 Verizon
98 | # 2-Sample bootstrap t confidence interval
99 |
100 | # Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
101 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
102 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
103 |
104 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
105 | nx <- length(Time.ILEC) #nx=1664
106 | ny <- length(Time.CLEC) #ny=23
107 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
108 |
109 | N <- 10000
110 | Tstar <- numeric(N)
111 | set.seed(0)
112 | for(i in 1:N)
113 | {
114 | bootx <- sample(Time.ILEC, nx, replace = TRUE)
115 | booty <- sample(Time.CLEC, ny, replace = TRUE)
116 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
117 | sqrt(var(bootx)/nx + var(booty)/ny)
118 | }
119 |
120 | thetahat - quantile(Tstar, c(.975, .025)) * SE
121 |
122 | t.test(Time.ILEC, Time.CLEC)$conf
123 |
124 | #----------------------------------------------------------------
125 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 7 More Confidence Intervals"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Section 7.1.1
13 | CI for normal with known sigma
14 | ```{r, out.width="100%"}
15 | #set.seed(1)
16 | counter <- 0 # set counter to 0
17 | plot(x = c(22, 28), y = c(1, 100), type = "n",
18 | xlab = "", ylab = "")
19 | abline(v = 25, col="red") # vertical line at mu
20 | for (i in 1:1000)
21 | {
22 | x <- rnorm(30, 25, 4) # draw a random sample of size 30
23 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit
24 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit
25 | if (L < 25 && 25 < U) # check to see if 25 is in interval
26 | counter <- counter + 1 # increase counter by 1
27 | if (i <= 100) #plot first 100 intervals
28 | segments(L, i, U, i)
29 | }
30 |
31 | abline(v = 25, col = "red") #vertical line at mu
32 |
33 | counter/1000 # proportion of times interval contains mu.
34 | ```
35 |
36 | ### Section 7.1.2
37 | Simulate distribution of t statistic
38 | ```{r}
39 | N <- 10^4
40 | w <- numeric(N)
41 | n <- 15 #sample size
42 | for (i in 1:N)
43 | {
44 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2)
45 | xbar <- mean(x)
46 | s <- sd(x)
47 | w[i] <- (xbar-25) / (s/sqrt(n))
48 | }
49 |
50 | hist(w)
51 |
52 | qqnorm(w, pch = ".")
53 | abline(0, 1, col = 2) # y = x line
54 | ```
55 | The `pch = "."` argument in the `qqnorm` command sets the point character. Here, the option says to use . for the points.
56 |
57 |
58 | ### Example 7.7
59 | Simulation 95% confidence interval from
60 | skewed gamma distribution
61 | ```{r}
62 | # set.seed(0)
63 |
64 | tooLow <- 0 #set counter to 0
65 | tooHigh <- 0 #sest counter to 0
66 | n <- 20 # sample size
67 | N <- 10^5
68 | for (i in 1:N)
69 | {
70 | x <- rgamma(n, shape=5, rate=2)
71 | xbar <- mean(x)
72 | s <- sd(x)
73 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
74 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
75 | if (upper < 5/2) tooLow <- tooLow + 1
76 | if (lower > 5/2) tooHigh <- tooHigh + 1
77 | }
78 | tooLow/N
79 | tooHigh/N
80 | ```
81 |
82 |
83 | ### Example 7.21
84 | One sample bootstrap t confidence interval
85 |
86 | ```{r}
87 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
88 | Arsenic <- Bangladesh$Arsenic
89 | #Alternatively
90 | #Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE)
91 |
92 | xbar <- mean(Arsenic)
93 | N <- 10^4
94 | n <- length(Arsenic)
95 | Tstar <- numeric(N)
96 | #set.seed(100)
97 | for (i in 1:N)
98 | {
99 | x <-sample(Arsenic, size = n, replace = T)
100 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
101 | }
102 |
103 | quantile(Tstar, c(0.025, 0.975))
104 |
105 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*")
106 |
107 | qqnorm(Tstar)
108 | qqline(Tstar)
109 | ```
110 |
111 | ### Exampe 7.22 Verizon
112 | 2-Sample bootstrap t confidence interval
113 |
114 | ```{r}
115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
116 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE)
117 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE)
118 |
119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
120 | nx <- length(Time.ILEC) #nx=1664
121 | ny <- length(Time.CLEC) #ny=23
122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
123 |
124 | N <- 10000
125 | Tstar <- numeric(N)
126 | set.seed(0)
127 | for(i in 1:N)
128 | {
129 | bootx <- sample(Time.ILEC, nx, replace = TRUE)
130 | booty <- sample(Time.CLEC, ny, replace = TRUE)
131 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
132 | sqrt(var(bootx)/nx + var(booty)/ny)
133 | }
134 |
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 |
137 | t.test(Time.ILEC, Time.CLEC)$conf
138 | ```
139 |
140 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer.R:
--------------------------------------------------------------------------------
1 | #Chapter 7: More confidence intervals
2 | #Exercises
3 |
4 | #Exericse 9
5 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
6 | mu <- mean(FlightDelays$Delay)
7 |
8 | counter <- 0
9 | plot(c(-20, 100), c(1, 100), type = "n")
10 |
11 | for (i in 1:1000)
12 | {
13 | x <- sample(FlightDelays$Delay, 30, replace = FALSE)
14 | L <- t.test(x)$conf.int[1]
15 | U <- t.test(x)$conf.int[2]
16 |
17 | if (L < mu && mu < U)
18 | counter <- counter + 1
19 | if (i <= 100)
20 | segments(L, i, U, i)
21 | }
22 |
23 | abline(v = mu, col = "red")
24 | counter/1000
25 |
26 | #-------------------------------------------------------
27 | #Exercise 22
28 | %%Simulation to compare pooled/unpooled t-confidence intervals
29 |
30 | pooled.count <- 0
31 | unpooled.count <- 0
32 |
33 | m <- 20
34 | n <- 10
35 |
36 | B <- 10000
37 | for (i in 1:B)
38 | {
39 | x <- rnorm(m, 8,10)
40 | y <- rnorm(n, 3, 15)
41 |
42 | CI.pooled <- t.test(x,y,var.equal=T)$conf
43 | CI.unpooled <- t.test(x,y)$conf
44 |
45 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
46 | pooled.count <- pooled.count + 1
47 |
48 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
49 | unpooled.count <- unpooled.count + 1
50 | }
51 |
52 | pooled.count/B
53 |
54 | unpooled.count/B
55 |
56 | #-----------------
57 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 7 More Confidence Intervals - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Exericse 9
13 | ```{r}
14 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
15 | mu <- mean(FlightDelays$Delay)
16 |
17 | counter <- 0
18 | plot(c(-20, 100), c(1, 100), type = "n")
19 |
20 | for (i in 1:1000)
21 | {
22 | x <- sample(FlightDelays$Delay, 30, replace = FALSE)
23 | L <- t.test(x)$conf.int[1]
24 | U <- t.test(x)$conf.int[2]
25 |
26 | if (L < mu && mu < U)
27 | counter <- counter + 1
28 | if (i <= 100)
29 | segments(L, i, U, i)
30 | }
31 |
32 | abline(v = mu, col = "red")
33 | counter/1000
34 | ```
35 |
36 | ###Exercise 22
37 | Simulation to compare pooled/unpooled t-confidence intervals
38 | ```{r}
39 | pooled.count <- 0
40 | unpooled.count <- 0
41 |
42 | m <- 20
43 | n <- 10
44 |
45 | N <- 10000
46 | for (i in 1:N)
47 | {
48 | x <- rnorm(m, 8,10)
49 | y <- rnorm(n, 3, 15)
50 |
51 | CI.pooled <- t.test(x,y,var.equal=T)$conf
52 | CI.unpooled <- t.test(x,y)$conf
53 |
54 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
55 | pooled.count <- pooled.count + 1
56 |
57 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
58 | unpooled.count <- unpooled.count + 1
59 | }
60 |
61 | pooled.count/N
62 |
63 | unpooled.count/N
64 |
65 | ```
66 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_Exer_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 7 More Confidence Intervals - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Exericse 9
15 | ```{r}
16 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv")
17 | mu <- mean(FlightDelays$Delay)
18 |
19 | counter <- 0
20 |
21 | df <- data.frame(x=c(-20,100), y = c(1,100))
22 | p <- ggplot(df, aes(x = x, y = y)) + geom_vline(xintercept=mu, colour = "red")
23 |
24 | for (i in 1:1000)
25 | {
26 | x <- sample(FlightDelays$Delay, 30, replace = FALSE)
27 | L <- t.test(x)$conf.int[1]
28 | U <- t.test(x)$conf.int[2]
29 |
30 | if (L < mu && mu < U)
31 | counter <- counter + 1
32 | if (i <= 100)
33 | p <- p + annotate("segment", x = L, xend=U, y = i, yend=i )
34 |
35 | }
36 |
37 |
38 | print(p)
39 |
40 | counter/1000
41 | ```
42 |
43 | ###Exercise 22
44 | Simulation to compare pooled/unpooled t-confidence intervals
45 | ```{r}
46 | pooled.count <- 0
47 | unpooled.count <- 0
48 |
49 | m <- 20
50 | n <- 10
51 |
52 | N <- 10000
53 | for (i in 1:N)
54 | {
55 | x <- rnorm(m, 8,10)
56 | y <- rnorm(n, 3, 15)
57 |
58 | CI.pooled <- t.test(x,y,var.equal=T)$conf
59 | CI.unpooled <- t.test(x,y)$conf
60 |
61 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2])
62 | pooled.count <- pooled.count + 1
63 |
64 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2])
65 | unpooled.count <- unpooled.count + 1
66 | }
67 |
68 | pooled.count/N
69 |
70 | unpooled.count/N
71 |
72 | ```
73 |
--------------------------------------------------------------------------------
/Edition2/R/Chap07MoreConfIntervals_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 7 More Confidence Intervals"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Section 7.1.1
15 | CI for normal with known sigma
16 | ```{r, out.width="100%"}
17 | #set.seed(1)
18 | counter <- 0 # set counter to 0
19 | df <- data.frame(x=c(22,28), y = c(1,100))
20 | p <- ggplot(df, aes(x=x, y = y)) + geom_vline(xintercept=25, colour = "red")
21 |
22 | for (i in 1:1000)
23 | {
24 | x <- rnorm(30, 25, 4) # draw a random sample of size 30
25 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit
26 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit
27 | if (L < 25 && 25 < U) # check to see if 25 is in interval
28 | counter <- counter + 1 # increase counter by 1
29 | if (i <= 100) #plot first 100 intervals
30 | p <- p + annotate("segment", x = L, xend=U, y = i, yend=i )
31 | }
32 |
33 | print(p)
34 |
35 | counter/1000 # proportion of times interval contains mu.
36 | ```
37 |
38 | ### Section 7.1.2
39 | Simulate distribution of t statistic
40 | ```{r}
41 | N <- 10^4
42 | w <- numeric(N)
43 | n <- 15 #sample size
44 | for (i in 1:N)
45 | {
46 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2)
47 | xbar <- mean(x)
48 | s <- sd(x)
49 | w[i] <- (xbar-25) / (s/sqrt(n))
50 | }
51 |
52 | ggplot() + geom_histogram(aes(w), bins = 12)
53 |
54 | ggplot() + stat_qq(aes(sample = w)) + geom_abline(intercept = 0, slope = 1, colour = "red")
55 |
56 | ```
57 |
58 |
59 | ### Example 7.7
60 | Simulation 95% confidence interval from
61 | skewed gamma distribution
62 | ```{r}
63 | # set.seed(0)
64 |
65 | tooLow <- 0 #set counter to 0
66 | tooHigh <- 0 #sest counter to 0
67 | n <- 20 # sample size
68 | N <- 10^5
69 | for (i in 1:N)
70 | {
71 | x <- rgamma(n, shape=5, rate=2)
72 | xbar <- mean(x)
73 | s <- sd(x)
74 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n)
75 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n)
76 | if (upper < 5/2) tooLow <- tooLow + 1
77 | if (lower > 5/2) tooHigh <- tooHigh + 1
78 | }
79 | tooLow/N
80 | tooHigh/N
81 | ```
82 |
83 |
84 | ### Example 7.21
85 | One sample bootstrap t confidence interval
86 |
87 | ```{r}
88 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
89 | Arsenic <- pull(Bangladesh, Arsenic)
90 | Arsenic <- Bangladesh$Arsenic
91 |
92 | xbar <- mean(Arsenic)
93 | N <- 10^4
94 | n <- length(Arsenic)
95 | Tstar <- numeric(N)
96 | #set.seed(100)
97 | for (i in 1:N)
98 | {
99 | x <-sample(Arsenic, size = n, replace = T)
100 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n))
101 | }
102 |
103 | quantile(Tstar, c(0.025, 0.975))
104 |
105 | ggplot() + geom_histogram(aes(Tstar), bins = 12) + labs(x= "T*", title = "Bootstrap distribution of T*")
106 |
107 | df <- data.frame(Tstar)
108 | ggplot(df, aes(sample = Tstar)) + stat_qq() + stat_qq_line()
109 | ```
110 |
111 | ### Exampe 7.22 Verizon
112 | 2-Sample bootstrap t confidence interval
113 |
114 | ```{r}
115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv")
116 |
117 | Time.ILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time)
118 | Time.CLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time)
119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC)
120 | nx <- length(Time.ILEC) #nx=1664
121 | ny <- length(Time.CLEC) #ny=23
122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny)
123 |
124 | N <- 10000
125 | Tstar <- numeric(N)
126 | set.seed(0)
127 | for(i in 1:N)
128 | {
129 | bootx <- sample(Time.ILEC, nx, replace = TRUE)
130 | booty <- sample(Time.CLEC, ny, replace = TRUE)
131 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
132 | sqrt(var(bootx)/nx + var(booty)/ny)
133 | }
134 |
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 |
137 | t.test(Time.ILEC, Time.CLEC)$conf
138 | ```
139 |
140 |
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests.R:
--------------------------------------------------------------------------------
1 | #Chap08MoreHypTest
2 |
3 | #Section 8.2
4 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
5 |
6 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
7 |
8 | Arsenic <- Bangladesh$Arsenic
9 | N <- 10^5
10 |
11 | observedT <- t.test(Arsenic, mu = 100)$statistic
12 | xbar <- mean(Arsenic)
13 | n <- length(Arsenic)
14 | Tstar <- numeric(N)
15 | for (i in 1:N)
16 | {
17 | bootx <- sample(Arsenic, n , replace = TRUE)
18 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
19 | }
20 |
21 | hist(Tstar)
22 | abline(v = observedT)
23 |
24 | (sum(Tstar >= observedT) + 1)/(N + 1)
25 |
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 8 More Hypothesis Tests"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Section 8.2
13 | ```{r}
14 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
15 |
16 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
17 |
18 | Arsenic <- Bangladesh$Arsenic
19 | N <- 10^5
20 |
21 | observedT <- t.test(Arsenic, mu = 100)$statistic
22 | xbar <- mean(Arsenic)
23 | n <- length(Arsenic)
24 | Tstar <- numeric(N)
25 | for (i in 1:N)
26 | {
27 | bootx <- sample(Arsenic, n , replace = TRUE)
28 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
29 | }
30 |
31 | hist(Tstar)
32 | abline(v = observedT)
33 |
34 | (sum(Tstar >= observedT) + 1)/(N + 1)
35 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer.R:
--------------------------------------------------------------------------------
1 | #Chapter 8 More Hypothesis Tests
2 | #Exercises
3 |
4 | #Exercise 13
5 | m <- 30
6 | n <- 30
7 | sigma1 <- 5
8 | sigma2 <- 5
9 |
10 | pooled.count <- 0
11 | unpooled.count <- 0
12 |
13 | for (i in 1:10^5)
14 | {
15 | x <- rnorm(m, 30, 5)
16 | y <- rnorm(n, 30, 5)
17 |
18 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
19 | p.unpooled <- t.test(x, y)$p.value
20 |
21 | pooled.count <- pooled.count + (p.pooled < 0.05)
22 | unpooled.count <- unpooled.count + (p.unpooled < 0.05)
23 | }
24 |
25 | pooled.count/10^5
26 | unpooled.count/10^5
27 |
28 | #-------------------------------------------
29 | #Exercise 21
30 |
31 | n1 <- 100
32 | n2 <- 100
33 | N <- 10^4
34 | p <- 0.1
35 |
36 | x1 <- rbinom(N, size = n1, p)
37 | x2 <- rbinom(N, size = n2, p)
38 |
39 | phat <- (x1 + x2)/(n1 + n2)
40 | propDiff <- x1/n1 - x2/n2
41 |
42 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
43 |
44 | qqnorm(propDiff/SE)
45 | abline(0, 1, col = "lightgray")
46 |
47 | #Exercise 40
48 |
49 | N <- 10^4
50 | tstat <- numeric(N)
51 | for (i in 1:N)
52 | {
53 | w <- rnorm(30, 7, 1)
54 | tstat[i] <- (mean(w) - 5)* sqrt(30)
55 | }
56 |
57 |
58 | hist(tstat, prob = TRUE)
59 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE)
60 |
61 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20,
62 | col = "blue", add = TRUE)
63 |
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 8 More Hypothesis Tests - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | ```
11 |
12 | ###Exercise 13
13 | ```{r}
14 | m <- 30
15 | n <- 30
16 | sigma1 <- 5
17 | sigma2 <- 5
18 |
19 | pooled.count <- 0
20 | unpooled.count <- 0
21 |
22 | for (i in 1:10^5)
23 | {
24 | x <- rnorm(m, 30, 5)
25 | y <- rnorm(n, 30, 5)
26 |
27 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
28 | p.unpooled <- t.test(x, y)$p.value
29 |
30 | pooled.count <- pooled.count + (p.pooled < 0.05)
31 | unpooled.count <- unpooled.count + (p.unpooled < 0.05)
32 | }
33 |
34 | pooled.count/10^5
35 | unpooled.count/10^5
36 |
37 | ```
38 | ###Exercise 21
39 |
40 | ```{r}
41 | n1 <- 100
42 | n2 <- 100
43 | N <- 10^4
44 | p <- 0.1
45 |
46 | x1 <- rbinom(N, size = n1, p)
47 | x2 <- rbinom(N, size = n2, p)
48 |
49 | phat <- (x1 + x2)/(n1 + n2)
50 | propDiff <- x1/n1 - x2/n2
51 |
52 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
53 |
54 | qqnorm(propDiff/SE)
55 | abline(0, 1, col = "lightgray")
56 | ```
57 |
58 | ###Exercise 40
59 | ```{r}
60 | N <- 10^4
61 | tstat <- numeric(N)
62 | for (i in 1:N)
63 | {
64 | w <- rnorm(30, 7, 1)
65 | tstat[i] <- (mean(w) - 5)* sqrt(30)
66 | }
67 |
68 |
69 | hist(tstat, prob = TRUE)
70 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE)
71 |
72 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20,
73 | col = "blue", add = TRUE)
74 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_Exer_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 8 More Hypothesis Tests - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Exercise 13
15 | ```{r}
16 | m <- 30
17 | n <- 30
18 | sigma1 <- 5
19 | sigma2 <- 5
20 |
21 | pooled.count <- 0
22 | unpooled.count <- 0
23 |
24 | for (i in 1:10^5)
25 | {
26 | x <- rnorm(m, 30, 5)
27 | y <- rnorm(n, 30, 5)
28 |
29 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value
30 | p.unpooled <- t.test(x, y)$p.value
31 |
32 | pooled.count <- pooled.count + (p.pooled < 0.05)
33 | unpooled.count <- unpooled.count + (p.unpooled < 0.05)
34 | }
35 |
36 | pooled.count/10^5
37 | unpooled.count/10^5
38 |
39 | ```
40 | ###Exercise 21
41 |
42 | ```{r}
43 | n1 <- 100
44 | n2 <- 100
45 | N <- 10^4
46 | p <- 0.1
47 |
48 | x1 <- rbinom(N, size = n1, p)
49 | x2 <- rbinom(N, size = n2, p)
50 |
51 | phat <- (x1 + x2)/(n1 + n2)
52 | propDiff <- x1/n1 - x2/n2
53 |
54 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2))
55 |
56 | df <- data.frame(x=propDiff/SE)
57 |
58 | ggplot(df, aes(sample = x)) + stat_qq() +
59 | geom_abline(intercept = 0, slope = 1, colour = "lightgray")
60 | ```
61 |
62 | ###Exercise 40
63 | ```{r}
64 | N <- 10^4
65 | tstat <- numeric(N)
66 | for (i in 1:N)
67 | {
68 | w <- rnorm(30, 7, 1)
69 | tstat[i] <- (mean(w) - 5)* sqrt(30)
70 | }
71 |
72 | df <- data.frame(x=tstat)
73 | ggplot(df, aes(x)) + geom_histogram(aes(y = stat(density)), bins = 12) +
74 | stat_function(fun=dt, args=list(df=29), colour = "red") +
75 | stat_function(fun=dt, args=list(df=29, ncp = 10.95), colour = "blue")
76 |
77 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap08MoreHypTests_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 8 More Hypothesis Tests"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Section 8.2
15 | ```{r}
16 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv")
17 |
18 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater")
19 |
20 | Arsenic <- Bangladesh %>% pull(Arsenic)
21 | #Arsenic <- Bangladesh$Arsenic
22 | N <- 10^5
23 |
24 | observedT <- t.test(Arsenic, mu = 100)$statistic
25 | xbar <- mean(Arsenic)
26 | n <- length(Arsenic)
27 | Tstar <- numeric(N)
28 | for (i in 1:N)
29 | {
30 | bootx <- sample(Arsenic, n , replace = TRUE)
31 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n))
32 | }
33 |
34 | ggplot() + geom_histogram(aes(Tstar), bins = 12) + geom_vline(xintercept = observedT)
35 |
36 | (sum(Tstar >= observedT) + 1)/(N + 1)
37 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression.R:
--------------------------------------------------------------------------------
1 | #Chapter 9 Regression
2 | #
3 |
4 | #Section 9.2
5 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
6 |
7 | plot(Spruce$Di.change, Spruce$Ht.change)
8 | cor(Spruce$Di.change, Spruce$Ht.change)
9 |
10 | plot(Ht.change ~ Di.change, data = Spruce)
11 |
12 | #Example 9.3
13 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
14 | spruce.lm
15 |
16 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals")
17 | abline(h = 0)
18 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue")
19 |
20 | #Example 9.8
21 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
22 | skate.lm <- lm(Free ~ Short, data = Skating2010)
23 | summary(skate.lm)
24 |
25 | #Section 9.5
26 | N <- 10^4
27 | cor.boot <- numeric(N)
28 | beta.boot <- numeric(N)
29 | alpha.boot <- numeric(N)
30 | yPred.boot <- numeric(N)
31 | n <- 24 #number of skaters
32 | for (i in 1:N)
33 | {
34 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n
35 | Skate.boot <- Skating2010[index, ]
36 |
37 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
38 |
39 | #recalculate linear model estimates
40 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
41 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept
42 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope
43 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^
44 | }
45 |
46 | mean(cor.boot)
47 | sd(cor.boot)
48 | quantile(cor.boot, c(0.025, 0.975))
49 |
50 | hist(cor.boot, main = "Bootstrap distribuiton of correlation",
51 | xlab = "Correlation")
52 | observed <- cor(Skating2010$Short, Skating2010$Free)
53 | abline(v = observed, col = "blue") #add line at observed cor.
54 |
55 | #-------------------------------------------------------
56 | # Section 9.5.1 Permutation test
57 |
58 | N <- 10^5 - 1
59 | n <- nrow(Skating2010) #number of observations
60 | result <- numeric(N)
61 | observed <- cor(Skating2010$Short, Skating2010$Free)
62 | for (i in 1:N)
63 | {
64 | index <- sample(n , replace = FALSE)
65 | Short.permuted <- Skating2010$Short[index]
66 | result[i] <- cor(Short.permuted, Skating2010$Free)
67 | }
68 |
69 | (sum(observed <= result) + 1)/(N+1) #P-value
70 |
71 | #----------------------------------------------
72 | #Chapter 9.6.1 Inference for logistic regression
73 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
74 |
75 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
76 | data.class(fit) # is a "glm" object, so for help use:
77 | help(glm)
78 |
79 | fit # prints the coefficients and other basic info
80 | coef(fit) # the coefficients as a vector
81 | summary(fit) # gives standard errors for coefficients, etc.
82 |
83 | x <- seq(17, 91, length = 500) # vector spanning the age range
84 | # compute predicted probabilities
85 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
86 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
87 |
88 | plot(Fatalities$Age, Fatalities$Alcohol,
89 | ylab = "Probability of alcohol")
90 | lines(x, y2)
91 |
92 | # Full bootstrap - slope coefficient, and prediction at age 20
93 | N <- 10^3
94 | n <- nrow(Fatalities) # number of observations
95 | alpha.boot <- numeric(N)
96 | beta.boot <- numeric(N)
97 | pPred.boot <- numeric(N)
98 |
99 | for (i in 1:N)
100 | {
101 | index <- sample(n, replace = TRUE)
102 | Fatal.boot <- Fatalities[index, ] # resampled data
103 |
104 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
105 | family = binomial)
106 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept
107 | beta.boot[i] <- coef(fit.boot)[2] # new slope
108 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
109 | }
110 |
111 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals
112 | quantile(pPred.boot, c(.025, .975))
113 |
114 | par(mfrow=c(2,2)) # set layout
115 | hist(beta.boot, xlab = "beta", main = "")
116 | qqnorm(beta.boot, main = "")
117 |
118 | hist(pPred.boot, xlab = "p^", main = "")
119 | qqnorm(pPred.boot, main = "")
120 |
121 | #--------------------
122 | help(predict.glmm) # for more help on predict
123 |
124 | n <- nrow(Fatalities) # number of observations
125 | x <- seq(17, 91, length = 500) # vector spanning the age range
126 | df.Age <- data.frame(Age = x) # data frame to hold
127 | # explanatory variables, will use this for making predictions
128 |
129 | plot(Fatalities$Age, Fatalities$Alcohol,
130 | ylab = "Probability of alcohol")
131 | for (i in 1:25)
132 | {
133 | index <- sample(n, replace = TRUE)
134 | Fatal.boot <- Fatalities[index, ] # resampled data
135 |
136 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
137 | family = binomial)
138 | pPred <- predict(fit.boot, newdata = df.Age, type = "response")
139 | lines(x, pPred)
140 | }
141 |
142 | #end fatalities
143 | #---------------------
144 |
--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 9 Regression"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ###Section 9.2
13 | ```{r}
14 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
15 |
16 | plot(Spruce$Di.change, Spruce$Ht.change)
17 | cor(Spruce$Di.change, Spruce$Ht.change)
18 |
19 | plot(Ht.change ~ Di.change, data = Spruce)
20 | ```
21 |
22 | ###Example 9.3
23 | ```{r}
24 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
25 | spruce.lm
26 |
27 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals")
28 | abline(h = 0)
29 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue")
30 | ```
31 |
32 | ###Example 9.8
33 | ```{r}
34 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
35 | skate.lm <- lm(Free ~ Short, data = Skating2010)
36 | summary(skate.lm)
37 | ```
38 |
39 | ###Section 9.5
40 |
41 | ```{r}
42 | N <- 10^4
43 | cor.boot <- numeric(N)
44 | beta.boot <- numeric(N)
45 | alpha.boot <- numeric(N)
46 | yPred.boot <- numeric(N)
47 | n <- 24 #number of skaters
48 | for (i in 1:N)
49 | {
50 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n
51 | Skate.boot <- Skating2010[index, ]
52 |
53 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
54 |
55 | #recalculate linear model estimates
56 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
57 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept
58 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope
59 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^
60 | }
61 |
62 | mean(cor.boot)
63 | sd(cor.boot)
64 | quantile(cor.boot, c(0.025, 0.975))
65 |
66 | hist(cor.boot, main = "Bootstrap distribuiton of correlation",
67 | xlab = "Correlation")
68 | observed <- cor(Skating2010$Short, Skating2010$Free)
69 | abline(v = observed, col = "blue") #add line at observed cor.
70 | ```
71 |
72 | ### Section 9.5.1 Permutation test
73 |
74 | ```{r}
75 | N <- 10^5 - 1
76 | n <- nrow(Skating2010) #number of observations
77 | result <- numeric(N)
78 | observed <- cor(Skating2010$Short, Skating2010$Free)
79 | for (i in 1:N)
80 | {
81 | index <- sample(n , replace = FALSE)
82 | Short.permuted <- Skating2010$Short[index]
83 | result[i] <- cor(Short.permuted, Skating2010$Free)
84 | }
85 |
86 | (sum(observed <= result) + 1)/(N+1) #P-value
87 | ```
88 |
89 |
90 | ###Chapter 9.6.1 Inference for logistic regression
91 |
92 | ```{r}
93 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
94 |
95 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
96 | data.class(fit) # is a "glm" object, so for help use:
97 | help(glm)
98 |
99 | fit # prints the coefficients and other basic info
100 | coef(fit) # the coefficients as a vector
101 | summary(fit) # gives standard errors for coefficients, etc.
102 |
103 | x <- seq(17, 91, length = 500) # vector spanning the age range
104 | # compute predicted probabilities
105 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
106 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
107 |
108 | plot(Fatalities$Age, Fatalities$Alcohol,
109 | ylab = "Probability of alcohol")
110 | lines(x, y2)
111 | ```
112 |
113 |
114 | #### Full bootstrap - slope coefficient, and prediction at age 20
115 | ```{r}
116 | N <- 10^3
117 | n <- nrow(Fatalities) # number of observations
118 | alpha.boot <- numeric(N)
119 | beta.boot <- numeric(N)
120 | pPred.boot <- numeric(N)
121 |
122 | for (i in 1:N)
123 | {
124 | index <- sample(n, replace = TRUE)
125 | Fatal.boot <- Fatalities[index, ] # resampled data
126 |
127 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
128 | family = binomial)
129 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept
130 | beta.boot[i] <- coef(fit.boot)[2] # new slope
131 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
132 | }
133 |
134 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals
135 | quantile(pPred.boot, c(.025, .975))
136 |
137 | par(mfrow=c(2,2)) # set layout
138 | hist(beta.boot, xlab = "beta", main = "")
139 | qqnorm(beta.boot, main = "")
140 |
141 | hist(pPred.boot, xlab = "p^", main = "")
142 | qqnorm(pPred.boot, main = "")
143 | ```
144 |
145 |
146 | ```{r}
147 | n <- nrow(Fatalities) # number of observations
148 | x <- seq(17, 91, length = 500) # vector spanning the age range
149 | df.Age <- data.frame(Age = x) # data frame to hold
150 | # explanatory variables, will use this for making predictions
151 |
152 | plot(Fatalities$Age, Fatalities$Alcohol,
153 | ylab = "Probability of alcohol")
154 | for (i in 1:25)
155 | {
156 | index <- sample(n, replace = TRUE)
157 | Fatal.boot <- Fatalities[index, ] # resampled data
158 |
159 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
160 | family = binomial)
161 | pPred <- predict(fit.boot, newdata = df.Age, type = "response")
162 | lines(x, pPred)
163 | }
164 | ```
165 |
166 |
--------------------------------------------------------------------------------
/Edition2/R/Chap09Regression_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 9 Regression"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(dplyr)
11 | library(ggplot2)
12 | ```
13 |
14 | ###Section 9.2
15 | ```{r}
16 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv")
17 |
18 | ggplot(Spruce, aes(x = Di.change, y = Ht.change)) + geom_point()
19 |
20 | cor(Spruce$Di.change, Spruce$Ht.change)
21 | ```
22 |
23 | ###Example 9.3
24 | ```{r}
25 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
26 | spruce.lm
27 |
28 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() +
29 | stat_smooth(method="lm", se = FALSE)
30 | ```
31 |
32 | We introduce a new package `broom` that performs some __tidying__ of the output of base R's `lm` command:
33 |
34 | ```{r}
35 | library(broom)
36 |
37 | fit <- augment(spruce.lm)
38 | head(fit, 3)
39 | ```
40 | In particular, note that we now have a data set that, in addition to the original variables, also contains a column of the fitted (predicted) values and the residuals.
41 |
42 | To create a residual plot:
43 |
44 | ```{r}
45 | ggplot(fit, aes(x=Ht.change, y = .resid)) + geom_point() +
46 | geom_hline(yintercept = 0) + labs(y = "residuals")
47 | ```
48 |
49 | To add a __smoother__ line to the residual plot, use the `stat_smooth()` command:
50 |
51 | ```{r}
52 | ggplot(fit, aes(x = Ht.change, y = .resid)) + geom_point() + stat_smooth(method = loess, se = FALSE) + geom_hline(yintercept = 0)
53 | ```
54 |
55 | ###Example 9.8
56 | ```{r}
57 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv")
58 | skate.lm <- lm(Free ~ Short, data = Skating2010)
59 | summary(skate.lm)
60 | ```
61 |
62 | ###Section 9.5
63 |
64 | ```{r}
65 | N <- 10^4
66 | cor.boot <- numeric(N)
67 | beta.boot <- numeric(N)
68 | alpha.boot <- numeric(N)
69 | yPred.boot <- numeric(N)
70 | n <- 24 #number of skaters
71 | for (i in 1:N)
72 | {
73 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n
74 | Skate.boot <- Skating2010[index, ]
75 |
76 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
77 |
78 | #recalculate linear model estimates
79 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
80 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept
81 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope
82 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^
83 | }
84 |
85 | mean(cor.boot)
86 | sd(cor.boot)
87 | quantile(cor.boot, c(0.025, 0.975))
88 |
89 |
90 | observed <- cor(Skating2010$Short, Skating2010$Free)
91 |
92 | ggplot() + geom_histogram(aes(cor.boot), bins = 12) +
93 | labs(title = "Bootstrap distribution of correlation", x = "Correlation") +
94 | geom_vline(xintercept = observed, colour = "blue")
95 | ```
96 |
97 | ### Section 9.5.1 Permutation test
98 |
99 | ```{r}
100 | N <- 10^5 - 1
101 | n <- nrow(Skating2010) #number of observations
102 | result <- numeric(N)
103 | observed <- cor(Skating2010$Short, Skating2010$Free)
104 | for (i in 1:N)
105 | {
106 | index <- sample(n , replace = FALSE)
107 | Short.permuted <- Skating2010$Short[index]
108 | result[i] <- cor(Short.permuted, Skating2010$Free)
109 | }
110 |
111 | (sum(observed <= result) + 1)/(N+1) #P-value
112 | ```
113 |
114 |
115 | ###Chapter 9.6.1 Inference for logistic regression
116 |
117 | ```{r}
118 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv")
119 |
120 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial)
121 | data.class(fit) # is a "glm" object, so for help use:
122 | help(glm)
123 |
124 | fit # prints the coefficients and other basic info
125 | coef(fit) # the coefficients as a vector
126 | summary(fit) # gives standard errors for coefficients, etc.
127 |
128 | x <- seq(17, 91, length = 500) # vector spanning the age range
129 | # compute predicted probabilities
130 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x))
131 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x)
132 |
133 | my.fun <- function(x, lm.object){
134 | plogis(coef(lm.object)[1] + coef(lm.object)[2]*x)
135 | }
136 |
137 | ggplot(Fatalities, aes(x=Age, y = Alcohol)) + geom_point() +
138 | stat_function(fun = my.fun, args=list(lm.object = fit))
139 |
140 | ```
141 |
142 |
143 | #### Full bootstrap - slope coefficient, and prediction at age 20
144 | ```{r}
145 | N <- 10^3
146 | n <- nrow(Fatalities) # number of observations
147 | alpha.boot <- numeric(N)
148 | beta.boot <- numeric(N)
149 | pPred.boot <- numeric(N)
150 |
151 | for (i in 1:N)
152 | {
153 | index <- sample(n, replace = TRUE)
154 | Fatal.boot <- Fatalities[index, ] # resampled data
155 |
156 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
157 | family = binomial)
158 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept
159 | beta.boot[i] <- coef(fit.boot)[2] # new slope
160 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
161 | }
162 |
163 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals
164 | quantile(pPred.boot, c(.025, .975))
165 |
166 | library(gridExtra)
167 |
168 | p1 <- ggplot() + geom_histogram(aes(beta.boot), bins = 12) + labs(x = "beta")
169 | p2 <- ggplot() + stat_qq(aes(sample = beta.boot))
170 | p3 <- ggplot() + geom_histogram(aes(pPred.boot), bins = 12) + labs(x = "p^")
171 | p4 <- ggplot() + stat_qq(aes(sample = pPred.boot))
172 | grid.arrange(p1, p2, p3, p4)
173 | ```
174 |
175 |
176 | ```{r}
177 | n <- nrow(Fatalities) # number of observations
178 | x <- seq(17, 91, length = 500) # vector spanning the age range
179 | df.Age <- data.frame(Age = x) # data frame to hold
180 | # explanatory variables, will use this for making predictions
181 |
182 | p <- ggplot(Fatalities, aes(x= Age, y = Alcohol)) + geom_point() +
183 | labs(y = "Probability of alcohol")
184 |
185 | for (i in 1:25)
186 | {
187 | index <- sample(n, replace = TRUE)
188 | Fatal.boot <- Fatalities[index, ] # resampled data
189 |
190 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
191 | family = binomial)
192 | df.Age$pPred <- predict(fit.boot, newdata = df.Age, type = "response")
193 | p <- p + geom_line(data = df.Age, aes(x = Age, y = pPred))
194 | }
195 |
196 | print(p)
197 | ```
198 |
199 |
--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical.R:
--------------------------------------------------------------------------------
1 | #------------------------------------------------
2 | #Chapter 10 Categorical data
3 | #Here is a function that computes the chi-square
4 | #test statistic
5 |
6 | #This function is a bit more enhanced than the code in the textbook
7 | chisq <- function(observed, print = TRUE) {
8 | # Chi-square statistic for independence in a contingency table,
9 | # with related data exploration.
10 | # observed is the observed contingency table
11 |
12 | observedWithTotals <- cbind(observed, total = rowSums(observed))
13 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
14 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
15 | statistic <- sum((observed-expected)^2/expected)
16 | if (print)
17 | {
18 | cat("Observed, with totals:\n")
19 | print(observedWithTotals)
20 | cat("\nRow Fractions:\n")
21 | print(round(observed / rowSums(observed), 3))
22 | cat("\nColumn Fractions:\n")
23 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
24 |
25 | cat("\nExpected:\n")
26 | print(round(expected, 1))
27 | cat("\nDifference:\n")
28 | print(round(observed - expected, 1))
29 |
30 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
31 | }
32 | return(invisible(statistic))
33 | }
34 |
35 |
36 | #-------------------------------------------
37 | #Uncomment below if you haven't imported GSS2002 yet.
38 | #GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
39 |
40 | Education <- GSS2002$Education
41 | DeathPenalty <- GSS2002$DeathPenalty
42 | #Alternatively
43 | #Education <- subset(GSS2002, select=Education, drop = TRUE)
44 | #DeathPenalty <- subset(GSS2002, select=DeathPenalty, drop = TRUE)
45 |
46 | table(Education, DeathPenalty) #note education ordered alphabetically
47 |
48 | Education <- ordered(GSS2002$Education,
49 | levels = c("Left HS", "HS", "Jr Col", "Bachelors",
50 | "Graduate"))
51 |
52 | table(Education, DeathPenalty)
53 |
54 | #Use function created above to calculate chi-square test statistic
55 | observedChi2 <- chisq(table(Education, DeathPenalty))
56 | observedChi2
57 |
58 | #Find those rows where there is at least one NA
59 | index <- which(is.na(Education) | is.na(DeathPenalty))
60 |
61 | #Remove those rows from the two variables and define Educ2 and
62 | #DeathPenalty2 to be the new vectors with those rows removed
63 | Educ2 <- Education[-index]
64 | DeathPenalty2 <- DeathPenalty[-index]
65 |
66 | N <- 10^4-1
67 | result<-numeric(N)
68 |
69 | for (i in 1:N)
70 | {
71 | DP.permutation <-sample(DeathPenalty2)
72 | GSS.table <- table(Educ2, DP.permutation)
73 | result[i]<-chisq(GSS.table, print = FALSE)
74 | }
75 |
76 | #Create a histogram
77 | hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic")
78 | abline(v = observedChi2, col = "blue", lty = 5)
79 |
80 |
81 | #optional: Create a histogram with the density curve
82 | #imposed onto the histogram
83 | #The prob=TRUE option below scales the histogram to have area 1
84 | hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic",
85 | ylim = c(0,.2))
86 | curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE)
87 |
88 | #Compute P-value
89 | (sum(result >= observedChi2) + 1)/(N + 1)
90 |
91 |
92 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
93 | mat <- table(Education, DeathPenalty)
94 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
95 |
96 | #----------------------------------------------------------------
97 | #Example 10.2
98 | mat <- rbind(c(42, 50), c(30, 87))
99 | chisq.test(mat)
100 |
101 | #Section 10.3.3 Fisher's Exact Test
102 | fisher.test(mat)
103 |
104 |
105 |
106 | #Section 10.4 Test of Homogeneity
107 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
108 | candy.mat
109 |
110 | chisq.test(candy.mat)
111 |
112 | #Section 10.6
113 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
114 | Homeruns <- Phillies2009$Homeruns
115 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE)
116 |
117 |
118 |
119 | lambda <- mean(Homeruns)
120 | dpois(0:5, lambda)
121 | table(Homeruns)
122 |
123 | table(Homeruns)/162
124 |
--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 10 Categorical Data"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ###Section 10.2
13 |
14 | Here is a function that computes the chi-square test statistic
15 |
16 | The code below gives a function that is a bit more enhanced than the code in the textbook:
17 | ```{r}
18 | chisq <- function(observed, print = TRUE) {
19 | # Chi-square statistic for independence in a contingency table,
20 | # with related data exploration.
21 | # observed is the observed contingency table
22 |
23 | observedWithTotals <- cbind(observed, total = rowSums(observed))
24 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
25 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
26 | statistic <- sum((observed-expected)^2/expected)
27 | if (print){
28 | cat("Observed, with totals:\n")
29 | print(observedWithTotals)
30 | cat("\nRow Fractions:\n")
31 | print(round(observed / rowSums(observed), 3))
32 | cat("\nColumn Fractions:\n")
33 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
34 |
35 | cat("\nExpected:\n")
36 | print(round(expected, 1))
37 | cat("\nDifference:\n")
38 | print(round(observed - expected, 1))
39 |
40 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
41 | }
42 | return(invisible(statistic))
43 | }
44 | ```
45 | Import the General Social Survey data and extract the two variables, `Education` and
46 | `DeathPenalty`.
47 |
48 | `Education` is a factor variable. We use the `ordered` command to *order* the levels.
49 |
50 | ```{r}
51 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
52 |
53 | Education <- GSS2002$Education
54 | DeathPenalty <- GSS2002$DeathPenalty
55 |
56 | #Alternatively
57 | #Education <- subset(GSS2002, select = Education, drop = TRUE)
58 | #DeathPenalty <- subset(GSS2002, select = DeathPenalty, drop = TRUE)
59 |
60 | table(Education, DeathPenalty) #Education ordered alphabetically
61 |
62 | Education <- ordered(GSS2002$Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate"))
63 | table(Education, DeathPenalty)
64 | ```
65 |
66 | Use function created above to calculate chi-square test statistic
67 |
68 | ```{r}
69 | observedChi2 <- chisq(table(Education, DeathPenalty))
70 | observedChi2
71 | ```
72 |
73 | There are missing values in both variables so we get the row numbers where there is at least one NA. We remove those rows from the two variables and create two new vectors `Educ2` and `DeathPenalty2` that hold the non-NA values:
74 |
75 | ```{r}
76 | str(GSS2002)
77 |
78 | index <- which(is.na(Education) | is.na(DeathPenalty))
79 |
80 | Educ2 <- Education[-index]
81 | DeathPenalty2 <- DeathPenalty[-index]
82 | ```
83 | Now run the permutation test
84 | ```{r}
85 | N <- 10^4-1
86 | result<-numeric(N)
87 |
88 | for (i in 1:N)
89 | {
90 | DP.permutation <-sample(DeathPenalty2)
91 | GSS.table <- table(Educ2, DP.permutation)
92 | result[i]<-chisq(GSS.table, print = FALSE)
93 | }
94 |
95 | #Create a histogram
96 | hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic")
97 | abline(v = observedChi2, col = "blue", lty = 5)
98 |
99 | #Compute P-value
100 | (sum(result >= observedChi2) + 1)/(N + 1)
101 | ```
102 |
103 | Optional: Create a histogram with the density curve
104 | imposed onto the histogram
105 | The `prob = TRUE` argument scales the histogram to have area 1
106 | ```{r}
107 | hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic")
108 | curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE)
109 | ```
110 |
111 | The `chisq.test` command also has an option that will perform this permutation test:
112 |
113 | ```{r}
114 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
115 | mat <- table(Education, DeathPenalty)
116 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
117 | ```
118 |
119 |
120 |
121 |
122 | ###Example 10.2
123 | ```{r}
124 | mat <- rbind(c(42, 50), c(30, 87))
125 | chisq.test(mat)
126 | ```
127 |
128 | ###Section 10.3.3 Fisher's Exact Test
129 |
130 | ```{r}
131 | fisher.test(mat)
132 | ```
133 |
134 | ###Section 10.4 Test of Homogeneity
135 | ```{r}
136 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
137 | candy.mat
138 |
139 | chisq.test(candy.mat)
140 | ```
141 |
142 | ###Section 10.6
143 | ```{r}
144 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
145 | Homeruns <- Phillies2009$Homeruns
146 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE)
147 |
148 | lambda <- mean(Homeruns)
149 | dpois(0:5, lambda)
150 | table(Homeruns)
151 |
152 | table(Homeruns)/162
153 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap10categorical_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 10 Categorical Data"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ###Section 10.2
15 |
16 | Here is a function that computes the chi-square test statistic
17 |
18 | The code below gives a function that is a bit more enhanced than the code in the textbook:
19 | ```{r}
20 | chisq <- function(observed, print = TRUE) {
21 | # Chi-square statistic for independence in a contingency table,
22 | # with related data exploration.
23 | # observed is the observed contingency table
24 |
25 | observedWithTotals <- cbind(observed, total = rowSums(observed))
26 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
27 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
28 | statistic <- sum((observed-expected)^2/expected)
29 | if (print){
30 | cat("Observed, with totals:\n")
31 | print(observedWithTotals)
32 | cat("\nRow Fractions:\n")
33 | print(round(observed / rowSums(observed), 3))
34 | cat("\nColumn Fractions:\n")
35 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
36 |
37 | cat("\nExpected:\n")
38 | print(round(expected, 1))
39 | cat("\nDifference:\n")
40 | print(round(observed - expected, 1))
41 |
42 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
43 | }
44 | return(invisible(statistic))
45 | }
46 | ```
47 | Import the General Social Survey data. We are interested in the two variables, `Education` and `DeathPenalty`.
48 |
49 | Using the `str()` command, we note that these two variables have missing values. We will create a new data frame which contains just the two variables of interest and also, only contains the rows without NA's.
50 |
51 | ```{r}
52 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv")
53 |
54 | str(GSS2002)
55 |
56 | df <- GSS2002 %>%
57 | select(Education, DeathPenalty) %>%
58 | filter(!is.na(Education) & !is.na(DeathPenalty))
59 |
60 | Education <- pull(df, Education)
61 | DeathPenalty <- pull(df, DeathPenalty)
62 |
63 | table(Education, DeathPenalty)
64 | ```
65 |
66 | `Education` is a factor variable and the default ordering of the levels is alphabetical. We use the `ordered` command to *order* the levels.
67 |
68 | ```{r}
69 | Education <- ordered(Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate"))
70 | table(Education, DeathPenalty)
71 | ```
72 |
73 |
74 | Use function created above to calculate chi-square test statistic
75 |
76 | ```{r}
77 | observedChi2 <- chisq(table(Education, DeathPenalty))
78 | observedChi2
79 | ```
80 |
81 | Now run the permutation test
82 |
83 | ```{r}
84 | N <- 10^4-1
85 | result<-numeric(N)
86 |
87 | for (i in 1:N)
88 | {
89 | DP.permutation <-sample(DeathPenalty)
90 | GSS.table <- table(Education, DP.permutation)
91 | result[i]<-chisq(GSS.table, print = FALSE)
92 | }
93 |
94 | ggplot() + geom_histogram(aes(result)) +
95 | labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") +
96 | geom_vline(xintercept = observedChi2, colour = "blue")
97 |
98 | (sum(result >= observedChi2) + 1)/(N + 1)
99 | ```
100 |
101 |
102 | Optional: Create a histogram with the density curve
103 | imposed onto the histogram. The ggplot() command will require a data frame which contains the variable of interest.
104 |
105 | ```{r}
106 | df <- data.frame(result)
107 | ggplot(df) + geom_histogram(aes(result, y = stat(density))) +
108 | labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") +
109 | geom_vline(xintercept = observedChi2, colour = "blue") +
110 | stat_function(fun = dchisq, args = list(df = 4), colour = "green")
111 | ```
112 |
113 | The `chisq.test` command also has an option that will perform this permutation test:
114 |
115 | ```{r}
116 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1)
117 | mat <- table(Education, DeathPenalty)
118 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1)
119 | ```
120 |
121 | ###Example 10.2
122 | ```{r}
123 | mat <- rbind(c(42, 50), c(30, 87))
124 | chisq.test(mat)
125 | ```
126 |
127 | ###Section 10.3.3 Fisher's Exact Test
128 |
129 | ```{r}
130 | fisher.test(mat)
131 | ```
132 |
133 | ###Section 10.4 Test of Homogeneity
134 | ```{r}
135 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
136 | candy.mat
137 |
138 | chisq.test(candy.mat)
139 | ```
140 |
141 | ###Section 10.6
142 | ```{r}
143 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv")
144 | Homeruns <- pull(Phillies2009, Homeruns)
145 |
146 | lambda <- mean(Homeruns)
147 | dpois(0:5, lambda)
148 | table(Homeruns)
149 |
150 | table(Homeruns)/162
151 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian.R:
--------------------------------------------------------------------------------
1 | #Chapter 10 Bayesian Methods
2 | # R scripts
3 |
4 | #-----------------------------------
5 | # Example 11.1
6 | theta <- seq(0, 1, by = .1)
7 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
8 | likelihood <- theta * (1 - theta)^2
9 | constant <- sum(prior * likelihood)
10 | posterior <- prior * likelihood / constant
11 | posterior
12 | sum(theta * prior) # prior mean
13 | sum(theta * posterior) # posterior mean
14 |
15 | #-----------------------
16 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail
17 | constant2 <- sum(prior * likelihood2)
18 | posterior2 <- prior * likelihood2 / constant2
19 | posterior
20 | likelihood3 <- theta^2 * (1 - theta)^3
21 | constant3 <- sum(posterior * likelihood3)
22 | posterior3 <- posterior * likelihood3 / constant3
23 | posterior3 # not shown, matches posterior2
24 | sum(theta*posterior2) # posterior mean
25 |
26 | plot(theta, prior, type = "b", ylim = c(0, max(posterior3)),
27 | ylab = "probability")
28 | lines(theta, posterior, type = "b", lty = 2)
29 | lines(theta, posterior2, type = "b", lty = 3)
30 | legend("topleft", legend = c("prior", "posterior1", "posterior2"),
31 | lty = 1:3)
32 |
33 | #-------------------------
34 | # Chapter 11.5 Sequential data
35 |
36 | n <- c(1874, 1867, 1871, 1868, 1875, 1875)
37 | X <- c(52, 41, 55, 49, 39, 39)
38 | alpha <- X # vector of posterior parameters
39 | beta <- n - X # vector of posterior parameters
40 | N <- 10^5 # replications
41 | theta <- matrix(0.0, nrow = N, ncol = 6)
42 | for (j in 1:6)
43 | {
44 | theta[, j] <- rbeta(N, alpha[j], beta[j])
45 | }
46 | probBest <- numeric(6) # vector for results
47 | best <- apply(theta, 1, max) # maximum of each row
48 | for (j in 1:6)
49 | {
50 | probBest[j] <- mean(theta[, j] == best)
51 | }
52 |
53 | probBest
54 |
55 | plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".")
56 | abline(0, 1)
57 | text(.037, .042, substitute(theta[3] > theta[1]))
58 | text(.042, .037, substitute(theta[3] > theta[1]))
59 |
--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 11 Bayesian Statistics"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ### Example 11.1
13 | ```{r}
14 | theta <- seq(0, 1, by = .1)
15 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
16 | likelihood <- theta * (1 - theta)^2
17 | constant <- sum(prior * likelihood)
18 | posterior <- prior * likelihood / constant
19 | posterior
20 | sum(theta * prior) # prior mean
21 | sum(theta * posterior) # posterior mean
22 |
23 |
24 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail
25 | constant2 <- sum(prior * likelihood2)
26 | posterior2 <- prior * likelihood2 / constant2
27 | posterior2
28 |
29 | likelihood3 <- theta^2 * (1 - theta)^3
30 | constant3 <- sum(posterior * likelihood3)
31 | posterior3 <- posterior * likelihood3 / constant3
32 | posterior3 # not shown, matches posterior2
33 | sum(theta*posterior2) # posterior mean
34 |
35 | plot(theta, prior, type = "b", ylim = c(0, max(posterior3)),
36 | ylab = "probability")
37 | lines(theta, posterior, type = "b", lty = 2)
38 | lines(theta, posterior2, type = "b", lty = 3)
39 | legend("topleft", legend = c("prior", "posterior1", "posterior2"),
40 | lty = 1:3)
41 | ```
42 |
43 | ### Chapter 11.5 Sequential data
44 | ```{r}
45 | n <- c(1874, 1867, 1871, 1868, 1875, 1875)
46 | X <- c(52, 41, 55, 49, 39, 39)
47 | alpha <- X # vector of posterior parameters
48 | beta <- n - X # vector of posterior parameters
49 | N <- 10^5 # replications
50 | theta <- matrix(0.0, nrow = N, ncol = 6)
51 | for (j in 1:6)
52 | {
53 | theta[, j] <- rbeta(N, alpha[j], beta[j])
54 | }
55 | probBest <- numeric(6) # vector for results
56 | best <- apply(theta, 1, max) # maximum of each row
57 | for (j in 1:6)
58 | {
59 | probBest[j] <- mean(theta[, j] == best)
60 | }
61 |
62 | probBest
63 |
64 | plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".")
65 | abline(0, 1)
66 | text(.037, .042, substitute(theta[3] > theta[1]))
67 | text(.042, .037, substitute(theta[3] > theta[1]))
68 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap11Bayesian_d.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 11 Bayesian Statistics"
3 | author: "Chihara-Hesterberg"
4 | date: "December 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | library(ggplot2)
11 | library(dplyr)
12 | ```
13 |
14 | ### Example 11.1
15 | ```{r}
16 | theta <- seq(0, 1, by = .1)
17 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
18 | likelihood <- theta * (1 - theta)^2
19 | constant <- sum(prior * likelihood)
20 | posterior <- prior * likelihood / constant
21 | posterior
22 | sum(theta * prior) # prior mean
23 | sum(theta * posterior) # posterior mean
24 |
25 |
26 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail
27 | constant2 <- sum(prior * likelihood2)
28 | posterior2 <- prior * likelihood2 / constant2
29 | posterior2
30 |
31 | likelihood3 <- theta^2 * (1 - theta)^3
32 | constant3 <- sum(posterior * likelihood3)
33 | posterior3 <- posterior * likelihood3 / constant3
34 | posterior3 # not shown, matches posterior2
35 | sum(theta*posterior2) # posterior mean
36 |
37 | df <- data.frame(theta, prior, posterior, posterior2)
38 |
39 | ggplot(df) +
40 | geom_line(aes(x = theta, y = prior, colour = "prior")) +
41 | geom_line(aes(x = theta, y = posterior, colour = "posterior")) +
42 | geom_line(aes(x = theta, y = posterior2, colour = "posterior2")) +
43 | scale_colour_manual(name=NULL,
44 | values= c("prior" = "black", "posterior" = "blue", "posterior2" = "red" ))
45 | ```
46 |
47 | ### Chapter 11.5 Sequential data
48 | ```{r}
49 | n <- c(1874, 1867, 1871, 1868, 1875, 1875)
50 | X <- c(52, 41, 55, 49, 39, 39)
51 | alpha <- X # vector of posterior parameters
52 | beta <- n - X # vector of posterior parameters
53 | N <- 10^5 # replications
54 | theta <- matrix(0.0, nrow = N, ncol = 6)
55 | for (j in 1:6)
56 | {
57 | theta[, j] <- rbeta(N, alpha[j], beta[j])
58 | }
59 | probBest <- numeric(6) # vector for results
60 | best <- apply(theta, 1, max) # maximum of each row
61 | for (j in 1:6)
62 | {
63 | probBest[j] <- mean(theta[, j] == best)
64 | }
65 |
66 | probBest
67 |
68 | df <- as.data.frame(theta[1:10^4, ])
69 | names(df) <- paste("x", as.character(1:6), sep = "")
70 |
71 | ggplot(df) + geom_point(aes(x = x1, y = x3), pch = ".") +
72 | geom_abline(slope = 1, intercept = 0) +
73 | annotate("text", x = 0.037, y = 0.042, parse = TRUE, label ="theta[3] > theta[1]") +
74 | annotate("text", x = 0.042, y = 0.037, parse = TRUE, label ="theta[1] > theta[3]")
75 |
76 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA.R:
--------------------------------------------------------------------------------
1 | #Chapter 12 ANOVA
2 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv")
3 | anova(lm(Weight ~ MothersAge, data = ILBoys))
4 |
5 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
6 |
7 | summary(aov(Weight ~ MothersAge, data = ILBoys))
8 |
9 | #--------------------------------
10 | #Section 12.1.2 Permutation test approach
11 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
12 |
13 | n <- length(ILBoys$Weight)
14 | N <- 10^4 - 1
15 | results <- numeric(N)
16 | for (i in 1:N)
17 | {
18 | index <- sample(n)
19 | Weight.perm <- ILBoys$Weight[index]
20 | results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1]
21 | }
22 |
23 | (sum(results >= observed) + 1) / (N + 1)
24 |
--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 12 ANOVA"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ###Example 12.1
13 | Illinois baby boys
14 |
15 | ```{r}
16 |
17 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv")
18 | anova(lm(Weight ~ MothersAge, data = ILBoys))
19 |
20 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
21 |
22 | summary(aov(Weight ~ MothersAge, data = ILBoys))
23 | ```
24 |
25 | ###Section 12.1.2 Permutation test approach
26 | ```{r}
27 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
28 |
29 | n <- length(ILBoys$Weight)
30 | N <- 10^4 - 1
31 | results <- numeric(N)
32 | for (i in 1:N)
33 | {
34 | index <- sample(n)
35 | Weight.perm <- ILBoys$Weight[index]
36 | results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1]
37 | }
38 |
39 | (sum(results >= observed) + 1) / (N + 1)
40 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap12ANOVA_Exer.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Chap 12 ANOVA - Exercises"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2018"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%")
10 | ```
11 |
12 | ###Exercise 6
13 | Simulation
14 |
15 | ```{r}
16 | nA <- 50 # set sample sizes
17 | nB <- 50
18 | nC <- 50
19 | # create groups
20 | Group <- rep(c("A","B","C"), c(nA, nB, nC))
21 |
22 | counter <- 0
23 | N <- 10^4
24 |
25 | for (i in 1:N)
26 | {
27 | a <- rnorm(nA, 20, 3) # Draw samples
28 | b <- rnorm(nB, 20, 3)
29 | c <- rnorm(nC, 20, 3)
30 | X <- c(a, b, c) # Combine into one vector
31 |
32 | Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value
33 | if (Pvalue < 0.05) # Reject H0?
34 | counter <- counter + 1 # If yes, increase counter
35 |
36 | }
37 |
38 | counter/N # proportion of times H0 rejected
39 | ```
--------------------------------------------------------------------------------
/Edition2/R/Chap12Anova_Exer.R:
--------------------------------------------------------------------------------
1 | #Chapter 12 ANOVA
2 |
3 | #Exercise 6 Simulation
4 |
5 | nA <- 50 # set sample sizes
6 | nB <- 50
7 | nC <- 50
8 | # create groups
9 | Group <- rep(c("A","B","C"), c(nA, nB, nC))
10 |
11 | counter <- 0
12 | N <- 10^4
13 |
14 | for (i in 1:N)
15 | {
16 | a <- rnorm(nA, 20, 3) # Draw samples
17 | b <- rnorm(nB, 20, 3)
18 | c <- rnorm(nC, 20, 3)
19 | X <- c(a, b, c) # Combine into one vector
20 |
21 | Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value
22 | if (Pvalue < 0.05) # Reject H0?
23 | counter <- counter + 1 # If yes, increase counter
24 |
25 | }
26 |
27 | counter/N # proportion of times H0 rejected
28 |
--------------------------------------------------------------------------------
/Edition2/README.md:
--------------------------------------------------------------------------------
1 | # Mathematical Statistics with Resampling and R, 2nd edition (2018)
2 |
3 | This is an older edition. For the current edition, see
4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR)
5 |
6 |
7 | ## Second Edition
8 |
9 | [Author's website](https://sites.google.com/site/chiharahesterberg)
10 |
11 | [Publisher's website](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+2nd+Edition-p-9781119416531)
12 |
13 | Available on:
14 |
15 | * [Google Books](https://books.google.com/books?id=t2hvDwAAQBAJ)
16 | * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=t2hvDwAAQBAJ)
17 | * [Amazon](https://www.google.com/url?q=https%3A%2F%2Fwww.amazon.com%2FMathematical-Statistics-Resampling-Laura-Chihara-ebook%2Fdp%2FB07HH3KXRH%2Fref%3Dsr_1_1%3Fs%3Dbooks%26ie%3DUTF8%26qid%3D1539059394%26sr%3D1-1%26keywords%3DChihara%2BHesterberg&sa=D&sntz=1&usg=AOvVaw25Q7F0vZTyz2h7LR3_xTe0)
18 |
--------------------------------------------------------------------------------
/Edition3/Chapters/c01_GSS2018Questions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c01_GSS2018Questions.pdf
--------------------------------------------------------------------------------
/Edition3/Chapters/c06_Supplement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c06_Supplement.pdf
--------------------------------------------------------------------------------
/Edition3/Data/Data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Data/Data.zip
--------------------------------------------------------------------------------
/Edition3/Data/Readme.md:
--------------------------------------------------------------------------------
1 | Zip file contains data sets in csv format.
2 |
3 | Data are also available as an R package (resampledata3) from CRAN.
4 |
--------------------------------------------------------------------------------
/Edition3/Errata_Edition3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Errata_Edition3.pdf
--------------------------------------------------------------------------------
/Edition3/README.md:
--------------------------------------------------------------------------------
1 | # Mathematical Statistics with Resampling and R, Third Edition (2022)
2 |
3 |
4 | Data sets, R code, supplementary materials for the textbook Mathematical Statistics with Resampling and R
5 |
6 | ## Contents here
7 |
8 | [Chapters](Chapters)
9 | Supplemental material for chapters, including
10 | additional notes about data and advanced topics.
11 |
12 | [Data](Data) data as .csv files (they are also available as an R package,
13 | see below).
14 |
15 | [RScripts](RScripts) R scripts to supplement chapters.
16 |
17 |
18 | ## Data in an R package
19 |
20 | The data are available as an R package
21 | [resampledata3](https://CRAN.R-project.org/package=resampledata3)
22 | on
23 | [CRAN](https://cran.r-project.org/mirrors.html).
24 |
25 |
26 |
27 | ## Other websites
28 |
29 |
30 | The publisher's website is
31 | [Mathematical Statistics with Resampling and R, 3rd Edition](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+3rd+Edition-p-9781119874034)
32 |
33 | Available on:
34 |
35 | * [Google Books](https://books.google.com/books?id=d7CAEAAAQBAJ)
36 | * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=d7CAEAAAQBAJ)
37 | * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara-ebook/dp/B0B99GCGQQ/ref=sr_1_fkmr2_2)
38 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c02_RIntroEDA1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/RScripts/c02_RIntroEDA1.pdf
--------------------------------------------------------------------------------
/Edition3/RScripts/c02_RIntroEDA2.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introduction to R, part 2"
3 | author: "Chihara-Hesterberg"
4 | date: "July 2022"
5 | output: html_document
6 | ---
7 |
8 | ```{r setup, include=FALSE}
9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%")
10 | library(resampledata)
11 | library(ggplot2)
12 | library(dplyr)
13 | ```
14 |
15 |
16 | ### Vectors in R
17 |
18 | The basic data object in R is the vector.
19 | Even scalars are vectors of length 1.
20 |
21 | There are several ways to create vectors.
22 |
23 | The : operator creates sequences incrementing/decrementing
24 | by 1.
25 |
26 | ```{r}
27 | 1:10
28 | 5:-3
29 | ```
30 |
31 | The seq function creates sequences also.
32 | ```{r}
33 | seq(0, 3, by = .2)
34 | seq(0, 3, length = 15)
35 | ```
36 |
37 | To create vectors with no particular pattern, use the
38 | c() function (c for **c**ombine).
39 |
40 | ```{r}
41 | c(1, 4, 8, 2, 9)
42 | x <- c(2, 0, -4)
43 | x
44 | c(x, 0:5, x)
45 | ```
46 |
47 | For vectors of characters,
48 |
49 | ```{r}
50 | c("a", "b", "c", "d")
51 | ```
52 |
53 | or logical values (note that there are no double quotes):
54 |
55 | ```{r}
56 | c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE)
57 | ```
58 |
59 | The rep command for repeating values:
60 |
61 | ```{r}
62 | rep("a", 5)
63 | rep(c("a", "b"), 5)
64 | rep(c("a", "b"), c(5, 2))
65 | ```
66 |
67 | ### The class attribute
68 |
69 | Use data.class to determine the class attribute of an object.
70 |
71 | ```{r}
72 | state.name
73 | data.class(state.name)
74 | state.name == "Idaho"
75 | data.class(state.name == "Idaho")
76 |
77 | head(FlightDelays$Carrier)
78 | data.class(FlightDelays$Carrier)
79 | ```
80 |
81 |
82 | ### Basic Arithmetic
83 |
84 | ```{r}
85 | x <- 1:5
86 | x - 3
87 | x*10
88 | x/10
89 | x^2
90 | 2^x
91 | log(x)
92 |
93 | w <- 6:10
94 | w
95 | x*w #coordinate-wise multiplication
96 | ```
97 |
98 | #### Logical expressions
99 |
100 | ```{r}
101 | x < 3
102 | ```
103 |
104 | ### Subsetting a vector
105 |
106 | In many cases, we will want only a portion of a data set. For
107 | subsetting a vector, the basic syntax is vector[*index*].
108 | In particular, note the use of *brackets* to indicate that we are
109 | subsetting.
110 |
111 | ```{r}
112 | state.name # 50 states (alphabetical order)
113 | state.name[c(1, 25, 50)] # the 1st, 25th, and 50th
114 | state.name[-(1:10)] # remove the first 10.
115 |
116 | z <- c(8, 3, 0, 9, 9, 2, 1, 3)
117 | z
118 | z[4] # The fourth element of z
119 | z[c(1, 3, 4)] # The first, third and fourth element,
120 | z[-c(1, 3, 4)] # All elements except the first, third and fourth
121 | ```
122 |
123 | To return the values of z less than 4, we first introduce the
124 | which command:
125 |
126 | ```{r}
127 | which(z < 4) # which positions are z values < 4?
128 | index <- which(z < 4) # store in index
129 | index
130 | z[index] # return z[c(2, 3, 6, 7)]
131 | ```
132 |
133 | Suppose you want to find those observations when the delay length
134 | was greater than the mean delay length. We'll store this in a vector
135 | called index.
136 |
137 | ```{r}
138 | delay <- FlightDelays$Delay
139 | index <- which(delay > mean(delay))
140 | head(index)
141 | ```
142 |
143 | Thus, observations in rows 2, 10, 12, 14, 15, 16 are the first
144 | six that correspond to flights that had delays that were larger
145 | than the average delay length.
146 |
147 | ### Extracting parts of a data frame
148 |
149 | To subset particular rows of a data frame, use the filter command in the *dplyr* package.
150 |
151 | For example, to create a data frame with just the United Airlines flights:
152 | ```{r}
153 | United <- FlightDelays %>% filter(Carrier == "UA")
154 | ```
155 | The select command in the **dplyr** package allows you to extract just certain variables (columns). For example, to create a data frame containing just the Carrier and Delay variables:
156 |
157 | ```{r}
158 | FlightDelays2 <- FlightDelays %>% select(Carrier, Delay)
159 | ```
160 | Finally, we can combine these two actions to extract just certain rows and certain columns:
161 |
162 | ```{r}
163 | United2 <- FlightDelays %>% filter(Carrier == "UA") %>% select(Carrier, Delay)
164 | ```
165 |
166 | Now, suppose you want to work with a single variable in a data frame.
167 |
168 | ```{r}
169 | delay <- FlightDelays %>% select(Delay)
170 | head(delay)
171 | mean(delay)
172 | data.class(delay)
173 | ```
174 | The problem is that in the above, the select command returns a data frame, and the mean command operates on vectors.
175 |
176 | If we just want to extract one variable from a data frame and we want that variable to be a vector, use the pull command.
177 |
178 | ```{r}
179 | delay <- FlightDelays %>% pull(Delay)
180 | mean(delay)
181 |
182 | #Alternatively, we have seen that the $ operator can be used
183 | delay <- FlightDelays$Delay
184 | ```
185 |
186 |
187 |
188 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c03_PermutationTests.R:
--------------------------------------------------------------------------------
1 | #Chapter 3 Permutation Tests
2 | library(resampledata3)
3 | library(dplyr)
4 | library(ggplot2)
5 |
6 | #Section 3.3
7 |
8 | #Beerwings data set
9 | Beerwings %>% group_by(Gender) %>% summarize(mean(Hotwings))
10 | observed <- 14.5333 - 9.3333 # store observed mean difference
11 | observed
12 |
13 | hotwings <- Beerwings$Hotwings
14 | # Alternative syntax using the dplyr package:
15 | # hotwings <- Beerwings %>% pull(Hotwings)
16 |
17 | N <- 10^5 - 1 # number of times to repeat this process
18 | result <- numeric(N) # space to save the random differences
19 | for (i in 1:N)
20 | { # sample of size 15, from 1 to 30, without replacement
21 | index <- sample(30, size = 15, replace = FALSE)
22 | result[i] <- mean(hotwings[index]) - mean(hotwings[-index])
23 | }
24 |
25 | ggplot() + geom_histogram(aes(result), bins = 8) +
26 | geom_vline(xintercept = observed, linetype="dashed")
27 |
28 | (sum(result >= observed) + 1)/(N + 1) # P-value
29 |
30 | #-----
31 | #Verizon data set
32 |
33 | Verizon %>% group_by(Group) %>% summarize(mean(Time))
34 | Time <- Verizon$Time
35 | TimeILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time)
36 | TimeCLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time)
37 |
38 | observed <- mean(TimeILEC) - mean(TimeCLEC)
39 | observed
40 |
41 | N <- 10^4-1
42 | result <- numeric(N)
43 | for (i in 1:N)
44 | {
45 | index <- sample(1687, size = 1664, replace = FALSE)
46 | result[i] <- mean(Time[index]) - mean(Time[-index])
47 | }
48 |
49 | ggplot() + geom_histogram(aes(result), bins = 8) +
50 | geom_vline(xintercept = observed, linetype = "dashed")
51 |
52 | (sum(result <= observed) + 1)/(N + 1)
53 |
54 | #---------
55 | #Other statistics
56 | #Example 3.6
57 | #median
58 | observed <- median(TimeILEC) - median(TimeCLEC)
59 | N <- 10^4-1
60 | result <- numeric(N)
61 | for (i in 1:N)
62 | {
63 | index <- sample(1687, size = 1664, replace = FALSE)
64 | result[i] <- median(Time[index]) - median(Time[-index])
65 | }
66 | (sum(result <= observed) + 1)/(N + 1) # P-value
67 |
68 | #trimmed mean
69 | #modifications to above
70 | observed <- (mean(TimeILEC, trim = .25) -
71 | mean(TimeCLEC, trim = .25))
72 | #within for loop above, change to:
73 | result[i] <- (mean(Time[index], trim = .25) -
74 | mean(Time[-index], trim = .25))
75 |
76 |
77 | #for proportion of time ILEC times > 10
78 | observed <- mean(TimeILEC > 10) - mean(TimeCLEC > 10)
79 | #and in the for loop, modify to
80 | result[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10)
81 |
82 | #for ratio of variances
83 | observed <- var(TimeILEC) / var(TimeCLEC)
84 | result[i] <- var(Time[index]) / var(Time[-index])
85 |
86 | #Recidivism case study
87 | #Example 3.8
88 |
89 | library(tidyr)
90 | data <- Recidivism %>% drop_na(Age25) %>%
91 | select(Age25, Recid)
92 | table(data$Age25)
93 | proportions(table(data$Age25, data$Recid), 1)
94 |
95 | Recid <- data$Recid # create vector
96 | observed <- .365 - .306
97 | N <- 10^4 - 1
98 | result <- numeric(N)
99 | for (i in 1:N)
100 | {
101 | index <- sample(17019, size = 3077, replace = FALSE)
102 | result[i] <- mean(Recid[index]=="Yes") -
103 | mean(Recid[-index]=="Yes")
104 | }
105 | 2*(sum(result >= observed)+1)/(N+1)
106 |
107 | #Example 3.9
108 | #Pew Research study on Faith among Black Americans
109 | pooled.data <- rep(c(1,0), c(1068, 1283)) # create vector
110 | observed <- (963/2094) - (105/257) # observed difference
111 | # (Mill-Gen Z)
112 | N <- 10^4-1
113 | result <- numeric(N)
114 |
115 | for (i in 1:N)
116 | {
117 | index <- sample(2351, 2094, replace = FALSE)
118 | result[i] <- mean(pooled.data[index]) -
119 | mean(pooled.data[-index])
120 | }
121 | 2 * (sum(result >= observed)+1) / (N+1)
122 |
123 | #-----------------------------------------
124 |
125 | #Section 3.4 Matched pairs
126 | #Diving
127 | Diff <- Diving2017$Final - Diving2017$Semifinal #difference in two scores
128 | observed <- mean(Diff) #mean of difference
129 |
130 | N <- 10^5-1
131 | result <- numeric(N)
132 |
133 | for (i in 1:N)
134 | {
135 | Sign <- sample(c(-1,1), 12, replace=TRUE) #random vector of 1's or -1's
136 | Diff2 <- Sign*Diff #random pairs (a-b) -> (b-a)
137 | result[i] <- mean(Diff2) #mean of difference
138 | }
139 |
140 | ggplot() + geom_histogram(aes(result), bins = 8) +
141 | geom_vline(xintercept = mean(observed), linetype="dashed")
142 |
143 | 2 * (sum(result >= observed)+1) / (N+1) #P-value
144 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c03_SolnExercise.R:
--------------------------------------------------------------------------------
1 | #Chapter 3: Permutation tests
2 | library(resampledata3)
3 | library(ggplot2)
4 | library(dplyr)
5 |
6 | # Exercise 7 Checking different test statistics that will give same P-value
7 | N <- 10^4 - 1
8 | table(FlightDelays$Carrier)
9 |
10 | FlightDelays %>% group_by(Carrier) %>% summarize(mean(Delay), sum(Delay))
11 |
12 | #Optionally, using base R
13 | tapply(FlightDelays$Delay, FlightDelays$Carrier, mean)
14 | tapply(FlightDelays$Delay, FlightDelays$Carrier, sum)
15 |
16 | observedSumUA <- 17949
17 | observedmeanUA <- 15.98308
18 | observedmeanDiff <- 15.98308 - 10.09738
19 |
20 | sumUA <- numeric(N)
21 | meanUA <- numeric(N)
22 | meanDiff <- numeric(N)
23 | set.seed(2)
24 | for (i in 1:N) {
25 | index <- sample(4029, 1123, replace = FALSE)
26 | sumUA[i] <- sum(Delay[index])
27 | meanUA[i] <- mean(Delay[index])
28 | meanDiff[i] <- mean(Delay[index]) - mean(Delay[-index])
29 | }
30 |
31 | 2 * (sum(sumUA >= observedSumUA) + 1) / (N + 1) #P-value
32 |
33 | 2 * (sum(meanUA >= observedmeanUA) + 1) / (N + 1) #P-value
34 |
35 | 2 * (sum(meanDiff >= observedmeanDiff) + 1) / (N + 1) #P-value
36 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c04_SamplingDistributions.R:
--------------------------------------------------------------------------------
1 | #Chapter 4
2 | #Sampling Distributions
3 |
4 | #Example 4.2
5 | #Draw 1000 random samples of size 100 from the exponential
6 | #distribution with lambda = 1/15
7 | Xbar <- numeric(1000) # space for results (vector of 0's)
8 | for (i in 1:1000)
9 | {
10 | x <- rexp(100, rate = 1/15) # draw random sample of size 100
11 | Xbar[i] <- mean(x) # compute mean, save in position i
12 | }
13 |
14 | df <- data.frame(Xbar)
15 | ggplot(df, aes(Xbar)) + geom_histogram(bins = 10)
16 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line()
17 | mean(Xbar)
18 | sd(Xbar)
19 |
20 | #Example 4.3
21 | #Sampling distribution of max from Unif[0,1]
22 |
23 | maxY <- numeric(1000)
24 | for (i in 1:1000)
25 | {
26 | y <- runif(12) # draw random sample of size 12
27 | maxY[i] <- max(y) # find max, save in position i
28 | }
29 | df <- data.frame(maxY)
30 | ggplot(df, aes(maxY)) + geom_histogram(bins = 10)
31 |
32 | #----------------------------------------
33 | #Example 4.6
34 | #Sum of two values drawn from two different Poisson distributions
35 | X <- rpois(10^4, 5) # Draw 10^4 values from Pois(5)
36 | Y <- rpois(10^4, 12) # Draw 10^4 values from Pois(12)
37 | W <- X + Y
38 |
39 | df1 <- data.frame(W)
40 | df2 <- data.frame(x = 2:35, y = dpois(2:35,17))
41 | ggplot(df1, aes(W)) +
42 | geom_histogram(aes(y=stat(density)), color = "white",
43 | breaks=seq(2, 36, by = 2)) +
44 | geom_line(data = df2, aes(x = x, y = y)) +
45 | geom_point(data = df2, aes(x = x, y = y), pch = 1) + xlab("")
46 |
47 | mean(W) #compare to theoretical, lambda = 17
48 | var(W)
49 |
50 | #Example 4.7
51 | #Sampling distribution of mean of sample of size 30 from Gamma(5, 2)
52 | Xbar <- numeric(1000)
53 | for (i in 1:1000)
54 | {
55 | x <- rgamma(30, shape = 5, rate = 2)
56 | Xbar[i] <- mean(x)
57 | }
58 |
59 | df <- data.frame(Xbar)
60 | ggplot(df, aes(x = Xbar)) +
61 | geom_histogram(aes(y = stat(density)), color = "white", bins = 10) +
62 | stat_function(fun = dnorm, args = list(mean = 5/2, s = 0.204)) +
63 | labs(x = "Means", y = "Density")
64 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line()
65 | mean(Xbar)
66 | sd(Xbar)
67 |
68 | #----------------------------------------------
69 | #Example 4.10
70 | #
71 | dbinom(25, 120, .3)
72 | pbinom(25, 120, .3)
73 |
74 |
75 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c05_Bootstrap.R:
--------------------------------------------------------------------------------
1 | ##Chapter 5 Bootstrap
2 | library(resampledata3)
3 | library(ggplot2)
4 | library(dplyr)
5 |
6 | #Draw random sample of size 16 from Gamma(1, 1/2)
7 | #Example 5.2
8 | N <- 10^5
9 | mean.boot <- numeric(N)
10 | for (i in 1:N)
11 | {
12 | x <- sample(gamSample, 16, replace = TRUE) # draw resample
13 | mean.boot[i] <- mean(x) # compute mean, store in mean.boot
14 | }
15 |
16 | mean(mean.boot)
17 | sd(mean.boot)
18 |
19 | df <- data.frame(mean.boot)
20 | ggplot(df, aes(mean.boot)) +
21 | geom_histogram(bins = 20, color = "white")
22 |
23 | #-----------------
24 | #Example 5.3
25 | ggplot(Bangladesh, aes(Arsenic)) +
26 | geom_histogram(bins = 10, color = "white")
27 | ggplot(Bangladesh, aes(sample = Arsenic)) +
28 | geom_qq() + geom_qq_line()
29 |
30 | Arsenic <- Bangladesh$Arsenic
31 |
32 | n <- length(Arsenic)
33 | N <- 10^4
34 | mean.boot <- numeric(N)
35 | for (i in 1:N)
36 | {
37 | x <- sample(Arsenic, n, replace = TRUE)
38 | mean.boot[i] <- mean(x)
39 | }
40 |
41 | df <- data.frame(mean.boot)
42 | ggplot(df, aes(mean.boot)) +
43 | geom_histogram(bins = 15, color = "white") +
44 | geom_vline(xintercept = mean(mean.boot), color = "red", lty = 2)
45 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line()
46 |
47 | mean(mean.boot)
48 | mean(mean.boot)-mean(Arsenic)
49 | sd(mean.boot)
50 |
51 | quantile(mean.boot, c(0.025, 0.975))
52 |
53 | #----------------------------------
54 | #Example 5.4 Skateboarders
55 | testF <- Skateboard %>% filter(Experimenter == "Female") %>%
56 | pull(Testosterone)
57 | testM <- Skateboard %>% filter(Experimenter == "Male") %>%
58 | pull(Testosterone)
59 |
60 | observed <- mean(testF) - mean(testM) #observed difference
61 | observed
62 |
63 | nf <- length(testF) #sample size
64 | nm <- length(testM) #sample size
65 |
66 | N <- 10^4
67 | mean.boot <- numeric(N)
68 |
69 | for (i in 1:N)
70 | {
71 | resampleF <- sample(testF, nf, replace = TRUE)
72 | resampleM <- sample(testM, nm, replace = TRUE)
73 | mean.boot[i] <- mean(resampleF)-mean(resampleM)
74 | }
75 |
76 | df <- data.frame(mean.boot)
77 | ggplot(df, aes(mean.boot)) +
78 | geom_histogram(bins = 15, color = "white") +
79 | geom_vline(xintercept = observed, color = "green", lty = 2)
80 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line()
81 |
82 | mean(testF) - mean(testM)
83 | mean(mean.boot)
84 | sd(mean.boot)
85 | quantile(mean.boot, c(0.025, 0.975))
86 | mean(mean.boot) - (mean(testF) - mean(testM)) # bias
87 |
88 | #-------------
89 | #Example 5.6
90 | #Verizon data
91 |
92 | TimeILEC <- Verizon %>% filter(Group=="ILEC") %>% pull(Time)
93 | TimeCLEC <- Verizon %>% filter(Group=="CLEC") %>% pull(Time)
94 |
95 | observed <- mean(TimeILEC)/mean(TimeCLEC)
96 | observed
97 |
98 | nILEC <- length(TimeILEC)
99 | nCLEC <- length(TimeCLEC)
100 |
101 | N <- 10^4
102 | ratio.boot <- numeric(N)
103 |
104 | for (i in 1:N)
105 | {
106 | resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE)
107 | resampleCLEC <- sample(TimeCLEC, nCLEC, replace = TRUE)
108 | ratio.boot[i] <- mean(resampleILEC)/mean(resampleCLEC)
109 | }
110 |
111 | df <- data.frame(ratio.boot)
112 | ggplot(df, aes(ratio.boot)) +
113 | geom_histogram(bins = 15, color="white") +
114 | xlab("Ratio of means") +
115 | geom_vline(xintercept = observed, lty = 2, color = "red") +
116 | geom_vline(xintercept = mean(ratio.boot), lty = 3, color = "blue")
117 |
118 | ggplot(df, aes(sample = ratio.boot)) +
119 | geom_qq() + geom_qq_line()
120 |
121 | mean(ratio.boot)
122 | sd(ratio.boot)
123 | quantile(ratio.boot, c(0.025, 0.975))
124 | mean(ratio.boot) - mean(TimeILEC)/mean(TimeCLEC)
125 |
126 | #Example 5.7 Verizon continued
127 | #modifications to above for proportion of times the ILEC
128 | #delay time was greater than 24 hours
129 | N <- 10^4
130 |
131 | prop.boot <- numeric(N)
132 | for (i in 1:N)
133 | {
134 | resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE)
135 | prop.boot[i] <- mean(resampleILEC > 24)
136 | }
137 |
138 | quantile(prop.boot, c(0.025, 0.975))
139 | #--------------------
140 | #Example 5.8
141 | #Faith among Black Americans
142 | genZ <- rep(c(1, 0), c(118, 139))
143 | genX <- rep(c(1, 0), c(965, 1510))
144 |
145 | observed <- mean(genZ) - mean(genX) # observed diff.
146 | observed
147 |
148 | N <- 10^4
149 | prop.boot <- numeric(N)
150 | for (i in 1:N)
151 | {
152 | resampleZ <- sample(genZ, 257, replace = TRUE)
153 | resampleX <- sample(genX, 2475, replace = TRUE)
154 | prop.boot[i] <- mean(resampleZ) - mean(resampleX)
155 | }
156 |
157 | quantile(prop.boot, c(0.025, 0.975))
158 |
159 | #----------------------------------------
160 | #Example 5.6
161 | #Relative risk
162 |
163 | highbp <- rep(c(1,0), c(55,3283)) #high bp sample
164 | lowbp <- rep(c(1,0), c(21,2655)) #low bp sample
165 |
166 | N <- 10^4
167 | rr.boot <- numeric(N)
168 |
169 | for (i in 1:N)
170 | {
171 | resampleHigh <- sample(highbp, 3338, replace = TRUE)
172 | resampleLow <- sample(lowbp, 2676, replace = TRUE)
173 |
174 | rr.boot[i] <- mean(resampleHigh)/mean(resampleLow) #rel.
175 | #risk
176 | }
177 |
178 | quantile(rr.boot, c(0.025, 0.975))
--------------------------------------------------------------------------------
/Edition3/RScripts/c07_MoreConfidenceIntervals.R:
--------------------------------------------------------------------------------
1 | #Chapter 7 Classical confidence intervals
2 | library(resampledata3)
3 | library(dplyr)
4 | library(ggplot2)
5 |
6 | #Example 7.1
7 | #Confidence intervals of mean of samples of size 30 drawn from N(25, 4)
8 | counter <- 0 # set counter to 0
9 | df <- data.frame(x = c(22, 28), y = c(1,100))
10 | p <- ggplot(df, aes(x = x, y = y)) + geom_vline(xintercept = 25)
11 |
12 | for (i in 1:1000)
13 | {
14 | x <- rnorm(30, 25, 4) # draw a random sample of size 30
15 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit
16 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit
17 | if (L < 25 && 25 < U) # check if 25 is in interval
18 | counter <- counter + 1 # if yes, increase counter by 1
19 | if (i <= 100) # plot first 100 intervals
20 | p <- p + annotate("segment", x = L, xend = U, y = i, yend = i)
21 | }
22 |
23 | p
24 | counter/1000 # proportion of times interval contains mu.
25 |
26 | #------------------------------------
27 | #Section 7.1.2
28 | #Confidence intervals for mean of samples drawn from normal
29 | #distribution, mean and variance unknown
30 | N <- 10^4
31 | w <- numeric(N)
32 | n <- 15 # sample size
33 | for (i in 1:N)
34 | {
35 | x <- rnorm(n, 25, 7) # draw 15 from N(25, 7^2)
36 | xbar <- mean(x)
37 | s <- sd(x)
38 | w[i] <- (xbar-25) / (s/sqrt(n))
39 | }
40 |
41 | df <- data.frame(w)
42 | ggplot(df, aes(sample = w)) + geom_qq(size = .8) +
43 | geom_qq_line()
44 |
45 | #-----------------------------------
46 | #Example 7.5
47 | pt(2.8, 27)
48 | qt(0.95, 27)
49 |
50 | #------------------------------------
51 | #Example 7.6
52 | girls <- NCBirths2004 %>% filter(Gender == "Female") %>%
53 | pull(Weight)
54 | t.test(girls, conf.level = .99)$conf
55 |
56 | #----------------------------------------------
57 | #Example 7.7
58 | #Samples from right-skewed Gamma(5,2)
59 | tooLow <- 0 # set counter to 0
60 | tooHigh <- 0 # set counter to 0
61 | n <- 20 # sample size
62 | q <- qt(0.975, n-1) # quantile
63 | N <- 10^5
64 | for (i in 1:N)
65 | {
66 | x <- rgamma(n, shape = 5, rate = 2)
67 | xbar <- mean(x)
68 | s <- sd(x)
69 | L <- xbar - q*s/sqrt(n)
70 | U <- xbar + q*s/sqrt(n)
71 | if (U < 5/2) # Does right endpt miss 5/2?
72 | tooLow <- tooLow + 1 # If yes, increase counter
73 | if (5/2 < L) # Does left endpt miss 5/2?
74 | tooHigh <- tooHigh + 1 # If yes, increase counter
75 | }
76 | tooLow/N
77 | tooHigh/N
78 |
79 | #-------------------------------------------
80 | #Example 7.8
81 | t.test(Response ~ Treatment, data = Reading)$conf
82 |
83 | #------------------------------------------
84 | #Example 7.14
85 | t.test(NCBirths2004$Weight, alt = "greater")$conf
86 |
87 | #-----------------------------------------
88 | #Example 7.17
89 | prop.test(1385, 2193, conf.level = .9)$conf
90 |
91 | prop.test(1385, 2193, conf.level = .9, alt = "greater")$conf
92 |
93 | #----------------------------------------
94 | #Example 7.20
95 |
96 | prop.test(c(172, 223), c(674, 676))$conf
97 |
98 | #---------------------------------------
99 | #Example 7.21
100 | #Bootstrap t confidence interval
101 | Arsenic <- Bangladesh$Arsenic
102 | xbar <- mean(Arsenic)
103 | N <- 10^4
104 | n <- length(Arsenic)
105 | Tstar <- numeric(N)
106 | for (i in 1:N)
107 | {
108 | x <- sample(Arsenic, size = n, replace = T)
109 | Tstar[i] <- (mean(x)-xbar) / (sd(x)/sqrt(n))
110 | }
111 | quantile(Tstar, c(0.025, 0.975))
112 |
113 | xbar - quantile(Tstar, c(0.975, 0.025)) * sd(Arsenic)/sqrt(n)
114 |
115 | #--------------------------------------------
116 | #Example 7.22
117 | #Bootstrap t CI for difference in means
118 | TimeILEC <- Verizon \%>\% filter(Group == "ILEC") \%>\% pull(Time)
119 | TimeCLEC <- Verizon \%>\% filter(Group == "CLEC") \%>\% pull(Time)
120 |
121 | thetahat <- mean(TimeILEC) - mean(TimeCLEC)
122 | nx <- length(TimeILEC) # nx=1664
123 | ny <- length(TimeCLEC) # ny=23
124 | SE <- sqrt(var(TimeILEC)/nx + var(TimeCLEC)/ny)
125 |
126 | N <- 10^4
127 | Tstar <- numeric(N)
128 | for (i in 1:N)
129 | {
130 | bootx <- sample(TimeILEC, nx, replace = TRUE)
131 | booty <- sample(TimeCLEC, ny, replace = TRUE)
132 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) /
133 | sqrt(var(bootx)/nx + var(booty)/ny)
134 | }
135 | thetahat - quantile(Tstar, c(.975, .025)) * SE
136 | t.test(TimeILEC, TimeCLEC)$conf # for comparison
137 |
138 | #---------------------------------------------
139 | #Example 7.3
140 | #Bootstrap t with estimated standard errors iterated bootstrap
141 | Arsenic <- Bangladesh$Arsenic
142 | estimate <- mean(Arsenic, trim = 0.25) # 35.95985
143 |
144 | N <- 10^4 # outer loop
145 | N2 <- 10^2 # inner loop
146 | n <- length(Arsenic)
147 | Tstar <- numeric(N)
148 | estimateStar <- numeric(N)
149 | seStar <- numeric(N)
150 |
151 | for (i in 1:N)
152 | {
153 | x <- sample(Arsenic, size = n, replace = T)
154 |
155 | # Inner loop to estimate standard error based on x
156 | estimate2 <- numeric(N2)
157 | for (j in 1:N2)
158 | {
159 | x2 <- sample(x, size = n, replace = T)
160 | estimate2[j] <- mean(x2, trim = 0.25)
161 | }
162 |
163 | estimateStar[i] <- mean(x, trim = 0.25)
164 | seStar[i] <- sd(estimate2)
165 | Tstar[i] <- (estimateStar[i] - estimate) / seStar[i]
166 | }
167 |
168 |
169 | sd(estimateStar) # Standard error
170 | quantile(Tstar, c(0.025, 0.975))
171 | # Bootstrap t interval
172 | estimate - quantile(Tstar, c(.975, .025)) * sd(estimateStar)
173 |
174 | #Ordinary t interval with bootstrap SE
175 | estimate + qt(c(0.025, 0.975), n-1) * sd(estimateStar)
176 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c08_MoreHypothesisTests.R:
--------------------------------------------------------------------------------
1 | #Chapter 8 More Hypothesis Tests
2 | library(resampledata3)
3 | library(ggplot2)
4 | library(dplyr)
5 |
6 | #Example 8.4
7 | t.test(Bangladesh$Arsenic, mu = 100, alt = "Greater")
8 |
9 | #Bootstrap t test approach
10 | Arsenic <- Bangladesh$Arsenic
11 | observedT <- t.test(Arsenic, mu = 100)$statistic
12 | xbar <- mean(Arsenic)
13 | n <- length(Arsenic)
14 | N <- 10^5
15 | Tstar <- numeric(N)
16 |
17 | for (i in 1:N)
18 | {
19 | bootx <- sample(Arsenic, n, replace = TRUE)
20 | Tstar[i] <- (mean(bootx)- xbar)/(sd(bootx)/sqrt(n))
21 | }
22 |
23 | (sum(Tstar >= observedT)+1)/(N+1)
24 |
25 |
26 | #------------------------------------------
27 | #Example 8.5
28 | #Comparing two means
29 | t.test(Weight ~ Smoker, data = NCBirths2004, alt = "greater")
30 |
31 | #-------------------------------------------
32 | #Example 8.6
33 | prop.test(c(108, 51), c(143, 119))
34 |
35 | #-------------------------------------------
36 | #Example 8.15
37 | sum(dbinom(5:8, 8, 0.3185))
38 | 1 - pbinom(4, 8, 0.3185) #same
39 |
40 | #---------------------------------------------
41 | #Example 8.19
42 | binom.test(7, 21, 0.5)
43 |
44 | pbinom(7, 21, 0.5696755)
45 | 1 - pbinom(6, 21, 0.1458769)
46 |
47 | #-----------------------------
--------------------------------------------------------------------------------
/Edition3/RScripts/c09_Regression.R:
--------------------------------------------------------------------------------
1 | #Chapter 9
2 | #Regression
3 | library(resampledata3)
4 | library(ggplot2)
5 | library(dplyr)
6 |
7 | #Section 9.2
8 | #base R
9 | cor(Spruce$Ht.change, Spruce$Dichange)
10 | #dplyr package
11 | Spruce %>% summarize(coor = cor(Ht.change, Di.change))
12 |
13 | #ggplot2 package
14 | qqplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point()
15 | #base R
16 | plot(Di.change ~ Ht.change, data = Spruce)
17 |
18 | #-------------------------------------------------
19 | #Example 9.3
20 |
21 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce)
22 | spruce.lm
23 |
24 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() +
25 | geom_smooth(method = lm, se = FALSE)
26 |
27 | fitted(spruce.lm)
28 | predict(spruce.lm) #same
29 |
30 | (nrow(Spruce) -1) * var(Spruce$Ht.change)
31 |
32 | #-----------------------------------------------
33 | #Section 9.3
34 | Spruce$Residuals <- resid(spruce.lm)
35 | ggplot(Spruce, aes(x = Ht.change, y = Residuals)) +
36 | geom_point() + geom_hline(yintercept = 0) +
37 | geom_smooth(method = "loess", se = FALSE, span = 2)
38 |
39 | #----------------------------------------------
40 | #Example 9.8
41 | skate.lm <- lm(Free ~ Short, data = Skating2010)
42 | summary(skate.lm)
43 |
44 | #Section 9.5
45 | #Bootstrapping correlation, slope, intercept,
46 |
47 | N <- 10^4
48 | cor.boot <- numeric(N)
49 | beta.boot <- numeric(N)
50 | alpha.boot <- numeric(N)
51 | yPred.boot <- numeric(N)
52 | n <- nrow(Skating2010) # number of skaters = 24
53 | for (i in 1:N)
54 | {
55 | index <- sample(n, replace = TRUE) # sample from 1,2,...,n
56 | Skate.boot <- Skating2010[index, ] # resampled data
57 |
58 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free)
59 |
60 | #recalculate linear model estimates
61 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot)
62 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept
63 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope
64 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i]
65 | }
66 |
67 | mean(cor.boot)
68 | sd(cor.boot)
69 | quantile(cor.boot, c(.025,.975))
70 |
71 | observed <- cor(Skating2010$Short, Skating2010$Free)
72 |
73 | df <- data.frame(cor.boot, beta.boot, alpha.boot, yPred.boot)
74 |
75 | ggplot(df, aes(x = cor.boot)) +
76 | geom_histogram(bins = 20, color = "white") +
77 | geom_vline(xintercept = observed, color = "red", lty = 2)
78 |
79 | #--------------------------------------------
80 | #Section 9.5.1 Permutation Tests
81 |
82 | N <- 9999
83 | n <- nrow(Skating2010) # number of observations
84 | result <- numeric(N)
85 | observed <- cor(Skating2010$Short, Skating2010$Free)
86 | for (i in 1:N)
87 | {
88 | index <- sample(n, replace=FALSE)
89 | Short.permuted <- Skating2010$Short[index]
90 | result[i] <- cor(Short.permuted, Skating2010$Free)
91 | }
92 | (sum(observed <= result) + 1) / (N + 1) # P-value
93 |
94 | #----------------------------------------------
95 | #Example 9.12
96 | #Fatalities data
97 | glm(Alcohol ~ Age, data = Fatalities, family = binomial)
98 | f <- function(x){exp(-0.123-0.029*x)/(1+exp(-0.123-0.029*x))}
99 |
100 | ggplot(Fatalities, aes(x = Age, y = Alcohol)) + geom_point() +
101 | stat_function(fun = f)
102 |
103 | #alternative way to define f
104 | f <- function(x){plogis(-0.123 - 0.029*x)}
105 |
106 | #------------------------------------------
107 | #Section 9.6
108 | #Inference for logistic regression
109 | fit <- glm(Alcohol ~ Age, data = Fatalities,
110 | family = binomial)
111 | data.class(fit) # is a "glm" object, so for help use:
112 | help(glm)
113 |
114 | fit # prints the coefficients and other basic info
115 | coef(fit) # the coefficients as a vector
116 | summary(fit) # gives standard errors for coefficients, etc.
117 |
118 |
119 | # Full bootstrap - slope coeff. and prediction at age 20
120 | N <- 10^3
121 | n <- nrow(Fatalities) # number of observations
122 | alpha.boot <- numeric(N)
123 | beta.boot <- numeric(N)
124 | pPred.boot <- numeric(N)
125 |
126 | for (i in 1:N)
127 | {
128 | index <- sample(n, replace = TRUE)
129 | Fatal.boot <- Fatalities[index, ] # resampled data
130 |
131 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot,
132 | family = binomial)
133 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept
134 | beta.boot[i] <- coef(fit.boot)[2] # new slope
135 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i])
136 | }
137 |
138 | quantile(beta.boot, c(.025, .975)) # 95% percentile CI
139 | df <- data.frame(alpha.boot, beta.boot, pPred.boot)
140 | ggplot(df, aes(x = beta.boot)) +
141 | geom_histogram(bins = 20, color = "white")
142 | ggplot(df, aes(sample = beta.boot)) + geom_qq() + geom_qq_line()
143 |
144 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c10_CategoricalData.R:
--------------------------------------------------------------------------------
1 | #Chapter 10
2 | #Categorical data
3 | library(resampledata3)
4 | library(ggplot2)
5 | library(dplyr)
6 |
7 | #Section 10.2 Permutation Test of Independence
8 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty, simulate.p.value = TRUE, B = 10^5-1)
9 | mat <- table(GSS2018$Degree, GSS2018$DeathPenalty)
10 | chisq.test(mat, simulate.p.value = TRUE, B = 10^5-1)
11 |
12 | #Section 10.3
13 | 1 - pchisq(50.449, 4)
14 |
15 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty)
16 |
17 | mat <- rbind(c(42, 50), c(30, 87))
18 | chisq.test(mat)
19 | fisher.test(mat)
20 |
21 | #Section 10.4 Test of Homogeneity
22 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50))
23 | candy.mat
24 |
25 | chisq.test(candy.mat)
26 |
27 | #Section 10.5
28 | qchisq(c(.2, .4, .6, .8), 10)
29 |
30 |
31 | Homeruns <- Phillies2009$Homeruns
32 |
33 | lambda <- mean(Homeruns)
34 | dpois(0:4, lambda)
35 | table(Homeruns)
36 |
37 | table(Homeruns)/162
38 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c10_PermTestIndependence.R:
--------------------------------------------------------------------------------
1 | #------------------------------------------------
2 | #Chapter 10 Categorical data
3 | #Implementation of the permutation test of independence
4 | #This function computes the chi-square
5 | #test statistic
6 |
7 | #
8 | chisq <- function(observed, print = TRUE) {
9 | # Chi-square statistic for independence in a contingency table,
10 | # with related data exploration.
11 | # observed is the observed contingency table
12 |
13 | observedWithTotals <- cbind(observed, total = rowSums(observed))
14 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals))
15 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed)
16 | statistic <- sum((observed-expected)^2/expected)
17 | if (print)
18 | {
19 | cat("Observed, with totals:\n")
20 | print(observedWithTotals)
21 | cat("\nRow Fractions:\n")
22 | print(round(observed / rowSums(observed), 3))
23 | cat("\nColumn Fractions:\n")
24 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3))
25 |
26 | cat("\nExpected:\n")
27 | print(round(expected, 1))
28 | cat("\nDifference:\n")
29 | print(round(observed - expected, 1))
30 |
31 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n")
32 | }
33 | return(invisible(statistic))
34 | }
35 |
36 | #-------------------------------------------
37 |
38 | #We use this function on the contingency table for Education and
39 | #DeathPenalty
40 | #set.seed(200)
41 | library(resampledata3)
42 | observed <- chisq(table(GSS2018$Degree, GSS2018$DeathPenalty))
43 | observed
44 |
45 | #Now, there were 155 people who declined to respond to the
46 | #death penalty question, so we will remove these observations from our
47 | #analysis.
48 |
49 | #We will use the drop_na() command from the tidyr package. The
50 | #command below with create a data frame with variables Degree and DeathPenalty,
51 | #removing any rows with an NA in either variable (though in this case, only
52 | #the death penalty variable has missing values.
53 |
54 | library(tidyr)
55 | df <- drop_na(GSS2018, Degree, DeathPenalty)
56 | #The sample(df$DeathPenalty) command below permutes the
57 | #values in DeathPenalty
58 | N <- 10^5-1
59 | result <- numeric(N)
60 | for (i in 1:N)
61 | {
62 | DP.permuted <- sample(df$DeathPenalty)
63 | GSS.table <- table(df$Degree, DP.permuted)
64 | result[i] <- chisq(GSS.table)
65 | }
66 |
67 | ggplot() + geom_histogram(aes(x = result)) +
68 | geom_vline(xintercept = observed, lty = 2)
69 |
70 | #Check the distribution of the test statistics to help in determining
71 | #the direction of the inequality when computing the $P$-value.
72 |
73 |
--------------------------------------------------------------------------------
/Edition3/RScripts/c11_Bayes.R:
--------------------------------------------------------------------------------
1 | #Chapter 11
2 | #Bayesian methods
3 | library(resampledata3)
4 | library(ggplot2)
5 | library(dplyr)
6 |
7 | #Example 11.1
8 | theta <- seq(0, 1, by = .1)
9 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0)
10 | likelihood <- theta * (1 - theta)^2
11 | constant <- sum(prior * likelihood)
12 | posterior <- prior * likelihood / constant
13 | posterior
14 | sum(theta * prior) # prior mean
15 | sum(theta * posterior) # posterior mean
16 |
17 | #continued
18 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail
19 | constant2 <- sum(prior * likelihood2)
20 | posterior2 <- prior * likelihood2 / constant2
21 | posterior2
22 |
23 | likelihood3 <- theta^2 * (1 - theta)^3
24 | constant3 <- sum(posterior * likelihood3)
25 | posterior3 <- posterior * likelihood3 / constant3
26 | posterior3 # not shown, same as posterior2
27 | sum(theta*posterior2) # posterior mean
28 |
29 | ggplot(df, aes(x = theta, y = prior)) +
30 | geom_point() + geom_line(lty = 1) +
31 | geom_point(aes(y = posterior)) +
32 | geom_line(aes(y = posterior), lty = 2) +
33 | geom_point(aes(y = posterior2)) +
34 | geom_line(aes(y = posterior2), lty = 3)
35 |
36 | #----------------------------------------------------
37 | #Example 11.3
38 | qbeta(.025, 111, 91)
39 | qbeta(.975, 111, 91)
40 | 1-pbeta(.5, 111, 91)
41 |
42 | ggplot(data.frame(x = c(0,1)), aes(x = x)) +
43 | stat_function(fun = dbeta, aes(lty = "2"),
44 | args = list(shape1 = 1, shape2 = 1)) +
45 | stat_function(fun = dbeta, aes(lty = "1"),
46 | args = list(shape1 = 111, shape2 = 91)) +
47 | scale_linetype_manual(values = c("2" = 2, "1" = 1),
48 | labels = c("Posterior", "Prior"),
49 | guide = guide_legend(reverse = TRUE)) +
50 | scale_x_continuous(breaks = seq(0, 1, by = .2)) +
51 | labs(x = "", y = "Density") +
52 | theme(legend.title = element_blank(),
53 | legend.position = c(.1, .85),
54 | legend.key = element_blank())
55 |
56 | #-------------------------------------------
57 | #Section 11.5 Sequential data
58 |
59 | n <- c(1874, 1867, 1871, 1868, 1875, 1875)
60 | X <- c(52, 41, 55, 49, 39, 39)
61 | alpha <- X # vector of posterior parameters
62 | beta <- n - X # vector of posterior parameters
63 | N <- 10^5 # replications
64 | theta <- matrix(0.0, nrow = N, ncol = 6)
65 | for (j in 1:6)
66 | {
67 | theta[, j] <- rbeta(N, alpha[j], beta[j])
68 | }
69 | probBest <- numeric(6) # vector for results
70 | best <- apply(theta, 1, max) # maximum of each row
71 | for (j in 1:6)
72 | {
73 | probBest[j] <- mean(theta[, j] == best)
74 | }
75 |
76 | #probBest contains probabilities of each of the six arms
77 | #being best
78 |
79 | df <- data.frame(theta[1:10^4,])
80 | names(df)
81 | ggplot(df, aes(x = X1, y = X3)) + geom_point(size = .5) +
82 | geom_abline(slope = 1, intercept = 0) +
83 | annotate("text", x = 0.037, y = 0.042, parse = TRUE,
84 | label = "theta[3] > theta[1]") +
85 | annotate("text", x = 0.042, y = 0.037, parse = TRUE,
86 | label = "theta[1] > theta[3]") +
87 | labs(x = expression(theta[1]), y=expression(theta[3]))
88 |
89 | #----------------------------------------
90 | probBest
91 | #
--------------------------------------------------------------------------------
/Edition3/RScripts/c12_ANOVA.R:
--------------------------------------------------------------------------------
1 | #Chapter 12 ANOVA
2 | library(resampledata3)
3 | library(ggplot2)
4 | library(dplyr)
5 |
6 | #Example 12.1
7 | anova(lm(Weight ~ MothersAge, data = ILBoys))
8 | anova(lm(Weight ~MothersAge, data = ILBoys))$F[1] #Extract F stat
9 |
10 | summary(aov(Weight ~MothersAge, data = ILBoys)) #same
11 |
12 | #Section 12.1.2 Permutation Test Approach
13 | #Checking the normality condition
14 | ggplot(ILBoys, aes(sample = Weight)) + geom_qq() +
15 | geom_qq_line() + facet_wrap(. ~ MothersAge)
16 |
17 | #Permutation test
18 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1]
19 | n <- length(ILBoys$Weight)
20 | N <- 10^4 - 1
21 | results <- numeric(N)
22 | for (i in 1:N)
23 | {
24 | index <- sample(n)
25 | Wt.perm <- ILBoys$Weight[index]
26 | results[i] <- anova(lm(Wt.perm ~ MothersAge, data = ILBoys))$F[1]
27 | }
28 |
29 | (sum(results >= observed) + 1) / (N + 1) # P value
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Mathematical Statistics with Resampling and R
2 |
3 | Data sets, R code, supplementary materials and errata for the textbook
4 | *Mathematical Statistics with Resampling and R*
5 | by
6 | [Laura Chihara](https://lchihara.people.sites.carleton.edu)
7 | and
8 | [Tim Hesterberg](https://www.timhesterberg.net).
9 |
10 |
11 | Current: [Third Edition (2022)](Edition3)
12 |
13 |
14 | Older:
15 | [Second Edition (2018)](Edition2),
16 | [First Edition (2011)](Edition1).
17 |
--------------------------------------------------------------------------------
/readme-MathStatsResamplingR.txt:
--------------------------------------------------------------------------------
1 | PLEASE IGNORE THIS FILE.
2 | It contains working notes by Chihara and Hesterberg.
3 |
4 | Some .pdf and .R files listed here are compiled from .tex or .Rmd files in
5 | MathStatsTextbook/trunk/StudentWebMaterials/
6 |
7 | Some .R files are copied (and possibly edited) from one of
8 | MathStatsTextbook/trunk/R/
9 | MathStatsTextbook/trunk/StudentWebMaterials/
10 |
--------------------------------------------------------------------------------