├── Edition1 ├── CorrectedOdds_Ed1.pdf ├── ErrataEdition1.pdf └── README.md ├── Edition2 ├── Chapters │ ├── Table3.1.pdf │ └── Table5.1.pdf ├── Data │ ├── Alelager.csv │ ├── Bangladesh.csv │ ├── Beerwings.csv │ ├── BookPrices.csv │ ├── Bushmeat.csv │ ├── Cereals.csv │ ├── Challenger.csv │ ├── ChiMarathonMen.csv │ ├── Cuckoos.csv │ ├── Diving2017.csv │ ├── Fatalities.csv │ ├── FishMercury.csv │ ├── FlightDelays.csv │ ├── GSS2002.csv │ ├── Girls2004.csv │ ├── Groceries.csv │ ├── ILBoys.csv │ ├── IceCream.csv │ ├── Illiteracy.csv │ ├── Lottery.csv │ ├── MathAnxiety.csv │ ├── MathStatsData_Ed2.zip │ ├── Maunaloa.csv │ ├── MnGroundwater.csv │ ├── MobileAds.csv │ ├── NBA1617.csv │ ├── NCBirths2004.csv │ ├── Nasdaq.csv │ ├── Olympics2012.csv │ ├── Phillies2009.csv │ ├── Quakes.csv │ ├── Quetzal.csv │ ├── RangersTwins2016.csv │ ├── Recidivism.csv │ ├── Salaries.csv │ ├── Service.csv │ ├── Skateboard.csv │ ├── Skating2010.csv │ ├── Spruce.csv │ ├── Starcraft.csv │ ├── TV.csv │ ├── TXBirths2004.csv │ ├── Titanic.csv │ ├── Turbine.csv │ ├── Verizon.csv │ ├── Volleyball2009.csv │ ├── Walleye.csv │ ├── Watertable.csv │ └── wafers.csv ├── Errata_Edition2.pdf ├── R │ ├── Chap02EDA.R │ ├── Chap02EDA.Rmd │ ├── Chap02EDA_d.Rmd │ ├── Chap03Testing.R │ ├── Chap03Testing.Rmd │ ├── Chap03Testing_Exer.R │ ├── Chap03Testing_Exer.Rmd │ ├── Chap03Testing_Exer_d.Rmd │ ├── Chap03Testing_d.Rmd │ ├── Chap04SamplingDist.R │ ├── Chap04SamplingDist.Rmd │ ├── Chap04SamplingDist_Exer.R │ ├── Chap04SamplingDist_Exer.Rmd │ ├── Chap04SamplingDist_Exer_d.Rmd │ ├── Chap04SamplingDist_d.Rmd │ ├── Chap05Bootstrap.R │ ├── Chap05Bootstrap.Rmd │ ├── Chap05Bootstrap_Exer.R │ ├── Chap05Bootstrap_Exer.Rmd │ ├── Chap05Bootstrap_Exer_d.Rmd │ ├── Chap05Bootstrap_d.Rmd │ ├── Chap06Estimation.R │ ├── Chap06Estimation.Rmd │ ├── Chap06Estimation_d.Rmd │ ├── Chap07MoreConfIntervals.R │ ├── Chap07MoreConfIntervals.Rmd │ ├── Chap07MoreConfIntervals_Exer.R │ ├── Chap07MoreConfIntervals_Exer.Rmd │ ├── Chap07MoreConfIntervals_Exer_d.Rmd │ ├── Chap07MoreConfIntervals_d.Rmd │ ├── Chap08MoreHypTests.R │ ├── Chap08MoreHypTests.Rmd │ ├── Chap08MoreHypTests_Exer.R │ ├── Chap08MoreHypTests_Exer.Rmd │ ├── Chap08MoreHypTests_Exer_d.Rmd │ ├── Chap08MoreHypTests_d.Rmd │ ├── Chap09Regression.R │ ├── Chap09Regression.Rmd │ ├── Chap09Regression_d.Rmd │ ├── Chap10categorical.R │ ├── Chap10categorical.Rmd │ ├── Chap10categorical_d.Rmd │ ├── Chap11Bayesian.R │ ├── Chap11Bayesian.Rmd │ ├── Chap11Bayesian_d.Rmd │ ├── Chap12ANOVA.R │ ├── Chap12ANOVA.Rmd │ ├── Chap12ANOVA_Exer.Rmd │ ├── Chap12Anova_Exer.R │ ├── Chap13AddTopics.R │ ├── Chap13AddTopics.Rmd │ └── Chap13AddTopics_d.Rmd └── README.md ├── Edition3 ├── Chapters │ ├── c01_GSS2018Questions.pdf │ └── c06_Supplement.pdf ├── Data │ ├── Data.zip │ └── Readme.md ├── Errata_Edition3.pdf ├── README.md └── RScripts │ ├── MobileAds.R │ ├── c02_RIntroEDA1.R │ ├── c02_RIntroEDA1.Rmd │ ├── c02_RIntroEDA1.pdf │ ├── c02_RIntroEDA2.Rmd │ ├── c03_PermutationTests.R │ ├── c03_SolnExercise.R │ ├── c04_SamplingDistributions.R │ ├── c05_Bootstrap.R │ ├── c06_Estimation.R │ ├── c06_WindTurbine.R │ ├── c07_MoreConfidenceIntervals.R │ ├── c08_MoreHypothesisTests.R │ ├── c09_Regression.R │ ├── c10_CategoricalData.R │ ├── c10_PermTestIndependence.R │ ├── c11_Bayes.R │ ├── c12_ANOVA.R │ └── c13_AdditionalTopics.R ├── README.md └── readme-MathStatsResamplingR.txt /Edition1/CorrectedOdds_Ed1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/CorrectedOdds_Ed1.pdf -------------------------------------------------------------------------------- /Edition1/ErrataEdition1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition1/ErrataEdition1.pdf -------------------------------------------------------------------------------- /Edition1/README.md: -------------------------------------------------------------------------------- 1 | # Mathematical Statistics with Resampling and R, 1st edition (2011) 2 | 3 | This is an older edition. For the current edition, see 4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR) 5 | 6 | 7 | ## First Edition 8 | 9 | [Author's website](https://sites.google.com/site/chiharahesterberg/chapter-materials-Ed1) 10 | 11 | Available on: 12 | 13 | * [Google Books](https://books.google.com/books?id=9KRHFDKDV84C) 14 | * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara/dp/1118029852/ref=sr_1_1?ie=UTF8) 15 | -------------------------------------------------------------------------------- /Edition2/Chapters/Table3.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table3.1.pdf -------------------------------------------------------------------------------- /Edition2/Chapters/Table5.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Chapters/Table5.1.pdf -------------------------------------------------------------------------------- /Edition2/Data/Alelager.csv: -------------------------------------------------------------------------------- 1 | ID,Type,Alcohol,Calories 2 | 1,Ale,5.5,160 3 | 2,Ale,5.4,156 4 | 3,Ale,4.85,146 5 | 4,Ale,4.5,150 6 | 5,Ale,5.2,160 7 | 6,Ale,5.3,174 8 | 7,Ale,5.3,177 9 | 8,Ale,5.2,177 10 | 9,Ale,5.77,179 11 | 10,Ale,4.94,160 12 | 11,Ale,5.6,187 13 | 12,Ale,5.6,175 14 | 13,Ale,6.77,167 15 | 14,Lager,5,145 16 | 15,Lager,5,150 17 | 16,Lager,5,153 18 | 17,Lager,4.9,153 19 | 18,Lager,4.94,163 20 | 19,Lager,4.9,175 21 | 20,Lager,4.7,146 22 | 21,Lager,4.75,160 23 | 22,Lager,4.55,142 24 | 23,Lager,4.81,169 25 | 24,Lager,4.4,156 26 | 25,Lager,5,146 27 | 26,Lager,5,147 28 | 27,Lager,4.6,138 29 | 28,Lager,4.79,153 30 | 29,Lager,5,150 31 | 30,Lager,5.8,160 32 | 31,Lager,4.9,140 33 | -------------------------------------------------------------------------------- /Edition2/Data/Bangladesh.csv: -------------------------------------------------------------------------------- 1 | "Arsenic","Chlorine","Cobalt" 2 | 2400,6.2,0.42 3 | 6,116,0.45 4 | 904,14.8,0.63 5 | 321,35.9,0.68 6 | 1280,18.9,0.58 7 | 151,7.8,0.35 8 | 141,56.3,0.46 9 | 1050,16,0.59 10 | 511,40.4,0.48 11 | 688,29.3,0.87 12 | 81,31.3,0.6 13 | 8,36.9,0.34 14 | 37,20.3,0.32 15 | 6,1.3,0.41 16 | 22,22.3,0.32 17 | 43,22.1,0.39 18 | 39,25.8,0.38 19 | 92,16.2,0.44 20 | 253,6.6,0.45 21 | 200,1.2,0.46 22 | 255,2,0.33 23 | 1150,16.4,0.61 24 | 1180,16.6,0.97 25 | 9,55.5,0.51 26 | 107,7.2,0.33 27 | 6,12.9,0.31 28 | 149,8.7,0.31 29 | 6,4.4,0.24 30 | 46,70.6,0.38 31 | 13,148,0.39 32 | 6,44.1,0.1 33 | 150,43.2,0.88 34 | 6,37.1,0.08 35 | 189,5.7,0.33 36 | 364,360,0.77 37 | 42,17.6,0.28 38 | 390,67.7,0.8 39 | 6,6.7,0.11 40 | 270,390,1.28 41 | 248,9.6,0.35 42 | 139,59.2,0.63 43 | 6,3.5,1.44 44 | 82,239,0.34 45 | 82,63.8,1.18 46 | 256,5.4,0.52 47 | 165,5.4,0.37 48 | 6,47.3,0.13 49 | 180,1240,1.01 50 | 86,72,0.3 51 | 6,295,0.24 52 | 38,157,0.29 53 | 262,72.1,0.18 54 | 404,23.6,1.01 55 | 8,637,0.32 56 | 85,133,0.57 57 | 98,15.6,0.22 58 | 6,68,0.11 59 | 22,1090,0.66 60 | 6,1290,0.63 61 | 6,24.7,0.08 62 | 6,74.6,0.1 63 | 15,115,0.41 64 | 103,72.1,0.25 65 | 86,96,0.22 66 | 6,324,0.23 67 | 46,155,0.22 68 | 62,64.3,0.43 69 | 43,89,0.45 70 | 6,310,0.16 71 | 6,310,0.21 72 | 55,23.9,0.43 73 | 6,1550,0.66 74 | 107,61.3,0.26 75 | 65,69.5,0.48 76 | 276,82.5,1.11 77 | 114,11.1,1.5 78 | 6,3.7,0.07 79 | 6,2.7,0.08 80 | 6,238,3.18 81 | 65,254,0.41 82 | 142,27.1,2.27 83 | 194,6.2,0.64 84 | 6,129,0.22 85 | 54,14.9,0.4 86 | 702,414,1.41 87 | 6,74,0.23 88 | 986,6.3,0.34 89 | 153,34.8,0.55 90 | 84,7.1,0.3 91 | 16,27.6,0.39 92 | 1460,9.3,0.71 93 | 306,33.9,0.67 94 | 49,16.5,0.38 95 | 36,13.5,0.59 96 | 106,7.2,1.75 97 | 6,3.4,0.45 98 | 41,127,1.57 99 | 84,16.2,0.7 100 | 278,3.6,0.42 101 | 41,1.8,0.3 102 | 123,10.4,0.39 103 | 186,56.7,0.95 104 | 80,86,0.6 105 | 162,2,0.21 106 | 398,7.6,0.56 107 | 39,5.5,2.1 108 | 57,3.8,0.63 109 | 6,18.6,0.34 110 | 6,2.6,0.65 111 | 6,51.9,0.69 112 | 18,16,0.37 113 | 129,1.8,0.72 114 | 245,3,0.59 115 | 148,11.5,0.39 116 | 28,38.7,0.6 117 | 20,79.8,0.65 118 | 6,93,1.17 119 | 52,195,0.94 120 | 6,115,0.45 121 | 6,15.8,0.25 122 | 6,4.2,0.3 123 | 15,4,0.26 124 | 73,2.6,0.26 125 | 30,8.2,0.45 126 | 6,2.6,0.33 127 | 128,14.7,0.31 128 | 45,8.7,0.29 129 | 343,7.2,0.24 130 | 109,5.1,0.26 131 | 191,3.8,0.32 132 | 160,36.6,0.76 133 | 51,11.3,0.44 134 | 35,16.4,0.73 135 | 8,17.7,0.52 136 | 11,11,0.84 137 | 15,4.8,0.57 138 | 6,11.3,0.95 139 | 6,13.8,0.39 140 | 6,10.5,0.44 141 | 6,2.2,0.14 142 | 132,7.5,0.42 143 | 8,87,0.68 144 | 10,17.1,0.34 145 | 6,9.6,0.78 146 | 6,2,0.85 147 | 195,4,0.41 148 | 27,67.8,1.06 149 | 6,4.8,0.86 150 | 24,12.7,1.51 151 | 12,4.8,0.5 152 | 161,7.5,0.47 153 | 9,78.2,1.27 154 | 171,4.8,0.38 155 | 35,42.5,0.33 156 | 742,14.9,0.57 157 | 51,18.2,0.44 158 | 6,8.7,0.33 159 | 6,8.8,0.26 160 | 6,328,0.91 161 | 6,3.2,0.2 162 | 6,7,0.4 163 | 6,3.2,0.2 164 | 6,19.1,0.25 165 | 6,13.4,0.9 166 | 6,13.6,0.45 167 | 20,6.8,0.34 168 | 212,28.5,0.41 169 | 6,1.4,0.43 170 | 6,1.2,0.31 171 | 6,6.9,0.26 172 | 6,3.2,0.28 173 | 40,70.4,0.45 174 | 10,3.4,0.42 175 | 6,6.7,0.24 176 | 6,22.7,0.84 177 | 6,67.3,0.88 178 | 6,8,0.25 179 | 6,25.1,0.3 180 | 6,8.6,0.24 181 | 6,2.4,0.13 182 | 8,4.4,0.3 183 | 67,5.4,0.37 184 | 21,11.5,0.51 185 | 17,4.6,0.22 186 | 6,34.5,0.5 187 | 6,13.9,0.09 188 | 6,2.6,0.23 189 | 6,16.9,0.39 190 | 14,4,0.27 191 | 6,16.5,0.31 192 | 6,51.5,1.19 193 | 6,13.2,0.22 194 | 6,4.1,0.27 195 | 137,3.9,0.43 196 | 136,6.2,0.2 197 | 12,7.2,0.19 198 | 27,59.6,0.34 199 | 234,3.4,0.13 200 | 6,3,0.79 201 | 1520,4.3,0.4 202 | 13,105,0.62 203 | 9,12.9,0.47 204 | 6,4.7,0.21 205 | 6,7.2,0.39 206 | 6,13.7,0.29 207 | 6,10.6,0.76 208 | 6,2.8,0.22 209 | 6,30.7,0.31 210 | 6,11.9,0.45 211 | 14,51.1,0.45 212 | 6,,0.31 213 | 8,64.3,0.63 214 | 6,11.6,0.42 215 | 88,5,0.59 216 | 0.5,2.9,0.18 217 | 0.5,5.1,0.14 218 | 0.5,3.1,0.13 219 | 18.6,14.2,0.68 220 | 5.5,1.4,0.52 221 | 61.2,43.4,0.42 222 | 0.5,18.1,1.3 223 | 103,1,0.28 224 | 1.5,4.4,0.14 225 | 10.7,3.1,0.27 226 | 313,1.7,0.64 227 | 0.5,5.1,0.75 228 | 131,27.5,0.49 229 | 157,3.1,0.24 230 | 8.9,2.7,0.24 231 | 3.9,4.8,0.29 232 | 0.5,33.1,1.88 233 | 0.5,3.4,0.86 234 | 0.5,3.8,0.69 235 | 12.1,3.8,0.39 236 | 0.5,8.2,0.37 237 | 109,12.2,0.41 238 | 0.5,2.4,0.98 239 | 0.5,223,0.46 240 | 2.7,4.4,0.51 241 | 0.5,13.3,0.31 242 | 0.5,1.9,0.53 243 | 0.5,20,0.32 244 | 16.8,16.1,0.31 245 | 29,9.6,0.16 246 | 2340,8.9,0.32 247 | 3.8,95.1,0.38 248 | 108,2.4,0.3 249 | 6.8,3.4,0.14 250 | 0.5,5.1,0.15 251 | 61.4,20.2,0.13 252 | 0.5,1.7,0.47 253 | 670,17.5, 254 | 287,140,0.58 255 | 409,426,0.43 256 | 1.2,1.8,0.05 257 | 125,16.6,0.35 258 | 202,7.7,0.74 259 | 30.3,39.8,0.18 260 | 0.5,1050,0.32 261 | 52.3,342,0.6 262 | 109,1360,0.75 263 | 80.7,5,0.76 264 | 75.6,4.9,0.26 265 | 72.9,195,0.68 266 | 64.5,892,0.92 267 | 36.4,65.7,0.29 268 | 34.5,99.5,0.65 269 | 88.6,,0.58 270 | 9.4,18,0.35 271 | 2.1,17.6,0.09 272 | 94.8,736,0.23 273 | -------------------------------------------------------------------------------- /Edition2/Data/Beerwings.csv: -------------------------------------------------------------------------------- 1 | ID,Hotwings,Beer,Gender 2 | 1,4,24,F 3 | 2,5,0,F 4 | 3,5,12,F 5 | 4,6,12,F 6 | 5,7,12,F 7 | 6,7,12,F 8 | 7,7,24,M 9 | 8,8,24,F 10 | 9,8,0,M 11 | 10,8,12,M 12 | 11,9,24,F 13 | 12,11,24,F 14 | 13,11,24,M 15 | 14,12,30,F 16 | 15,12,30,F 17 | 16,13,24,F 18 | 17,13,36,F 19 | 18,13,30,M 20 | 19,13,30,M 21 | 20,14,30,F 22 | 21,14,36,F 23 | 22,14,48,M 24 | 23,16,36,M 25 | 24,16,36,M 26 | 25,17,36,M 27 | 26,17,42,M 28 | 27,18,30,M 29 | 28,18,30,M 30 | 29,21,36,M 31 | 30,21,42,M 32 | -------------------------------------------------------------------------------- /Edition2/Data/BookPrices.csv: -------------------------------------------------------------------------------- 1 | "Subject","Area","Price" 2 | "Biology","Math & Science",190.7 3 | "Biology","Math & Science",160 4 | "Biology","Math & Science",117.3 5 | "Biology","Math & Science",115.15 6 | "Chemistry","Math & Science",222.67 7 | "Chemistry","Math & Science",174.95 8 | "Chemistry","Math & Science",197.15 9 | "Chemistry","Math & Science",196.4 10 | "Chemistry","Math & Science",197.5 11 | "Computer Science","Math & Science",157.8 12 | "Computer Science","Math & Science",178.7 13 | "Computer Science","Math & Science",77.95 14 | "Computer Science","Math & Science",128 15 | "Computer Science","Math & Science",138 16 | "Economics","Social Sciences",31.95 17 | "Economics","Social Sciences",209 18 | "Economics","Social Sciences",104 19 | "Economics","Social Sciences",168 20 | "Economics","Social Sciences",168 21 | "Economics","Social Sciences",163.35 22 | "Economics","Social Sciences",178.7 23 | "Educational Studies","Social Sciences",15 24 | "Geology","Math & Science",134.4 25 | "Mathematics","Math & Science",137.35 26 | "Mathematics","Math & Science",222 27 | "Mathematics","Math & Science",138.7 28 | "Mathematics","Math & Science",138.7 29 | "Mathematics","Math & Science",106.65 30 | "Mathematics","Math & Science",174 31 | "Mathematics","Math & Science",172.35 32 | "Physics","Math & Science",149.35 33 | "Physics","Math & Science",200 34 | "Physics","Math & Science",192.7 35 | "Physics","Math & Science",85.35 36 | "Physics","Math & Science",128 37 | "Political Science","Social Sciences",11 38 | "Political Science","Social Sciences",26.95 39 | "Political Science","Social Sciences",17 40 | "Psychology","Social Sciences",138.7 41 | "Psychology","Social Sciences",136 42 | "Psychology","Social Sciences",139.95 43 | "Psychology","Social Sciences",15.95 44 | "SOAN","Social Sciences",139.33 45 | "SOAN","Social Sciences",19.95 46 | -------------------------------------------------------------------------------- /Edition2/Data/Bushmeat.csv: -------------------------------------------------------------------------------- 1 | "Fish","Biomass","Year" 2 | 28.6,942.54,1970 3 | 34.7,969.77,1971 4 | 39.3,999.45,1972 5 | 32.4,987.13,1973 6 | 31.8,976.31,1974 7 | 32.8,944.07,1975 8 | 38.4,979.37,1976 9 | 33.2,997.86,1977 10 | 29.7,994.85,1978 11 | 25,936.36,1979 12 | 21.8,862.85,1980 13 | 20.8,815.67,1981 14 | 19.7,756.58,1982 15 | 20.8,725.27,1983 16 | 21.1,662.65,1984 17 | 21.3,625.97,1985 18 | 24.3,621.69,1986 19 | 27.4,589.83,1987 20 | 24.5,548.05,1988 21 | 25.2,524.88,1989 22 | 25.9,529.41,1990 23 | 23,497.37,1991 24 | 27.1,476.86,1992 25 | 23.4,453.8,1993 26 | 18.9,402.7,1994 27 | 19.6,365.25,1995 28 | 25.3,326.02,1996 29 | 22,320.12,1997 30 | 21,296.49,1998 31 | 23,228.72,1999 32 | -------------------------------------------------------------------------------- /Edition2/Data/Cereals.csv: -------------------------------------------------------------------------------- 1 | "ID","Age","Shelf","Sodiumgram","Proteingram" 2 | 1,"adult","bottom",0.007,0.1 3 | 2,"children","bottom",0.006666667,0.066666667 4 | 3,"children","bottom",0.004666667,0.033333333 5 | 4,"children","bottom",0.006969697,0.03030303 6 | 5,"adult","bottom",0.007,0.1 7 | 6,"children","bottom",0.006,0.033333333 8 | 7,"children","bottom",0.006129032,0.032258065 9 | 8,"children","bottom",0.00483871,0.032258065 10 | 9,"children","bottom",0.001851852,0.074074074 11 | 10,"children","middle",0.005517241,0.034482759 12 | 11,"children","middle",0.006666667,0.066666667 13 | 12,"children","middle",0.0045,0.066666667 14 | 13,"children","middle",0.004375,0.03125 15 | 14,"children","middle",0.007096774,0.064516129 16 | 15,"children","middle",0.007,0.033333333 17 | 16,"children","middle",0.006785714,0.107142857 18 | 17,"adult","middle",0.004545455,0.090909091 19 | 18,"children","middle",0.005,0.09375 20 | 19,"children","middle",0.0046875,0.09375 21 | 20,"children","middle",0.003833333,0.066666667 22 | 21,"children","middle",0.0045,0.066666667 23 | 22,"children","middle",0.006666667,0.066666667 24 | 23,"children","middle",0.006296296,0.037037037 25 | 24,"children","middle",0.007407407,0.037037037 26 | 25,"children","middle",0.004375,0.03125 27 | 26,"children","middle",0.005333333,0.033333333 28 | 27,"children","middle",0.005666667,0.033333333 29 | 28,"children","middle",0.004848485,0.060606061 30 | 29,"adult","top",0.0022,0.2 31 | 30,"children","top",0.007,0.033333333 32 | 31,"adult","top",0.0035,0.266666667 33 | 32,"adult","top",0.001792453,0.169811321 34 | 33,"adult","top",0.0045,0.1 35 | 34,"adult","top",0.0028,0.04 36 | 35,"adult","top",0.000222222,0.177777778 37 | 36,"adult","top",0.001634615,0.25 38 | 37,"adult","top",0.0028,0.14 39 | 38,"adult","top",0.005818182,0.072727273 40 | 39,"adult","top",0.002727273,0.121212121 41 | 40,"adult","top",0.0056,0.06 42 | 41,"adult","top",0,0.074074074 43 | 42,"adult","top",0,0.092592593 44 | 43,"adult","top",0.00245283,0.094339623 45 | -------------------------------------------------------------------------------- /Edition2/Data/Challenger.csv: -------------------------------------------------------------------------------- 1 | "Date","Temperature","Incident" 2 | "Apr12.81",66,0 3 | "Nov12.81",70,1 4 | "Mar22.82",69,0 5 | "Nov11.82",68,0 6 | "Apr04.83",67,0 7 | "Jun18.83",72,0 8 | "Aug30.83",73,0 9 | "Nov28.83",70,0 10 | "Feb03.84",57,1 11 | "Apr06.84",63,1 12 | "Aug30.84",70,1 13 | "Oct05.84",78,0 14 | "Nov08.84",67,0 15 | "Jan24.85",53,1 16 | "Apr12.85",67,0 17 | "Apr29.85",75,0 18 | "Jun17.85",70,0 19 | "Jul29.85",81,0 20 | "Aug27.85",76,0 21 | "Oct03.85",79,0 22 | "Oct30.85",75,1 23 | "Nov26.85",76,0 24 | "Jan12.86",58,1 25 | -------------------------------------------------------------------------------- /Edition2/Data/ChiMarathonMen.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/ChiMarathonMen.csv -------------------------------------------------------------------------------- /Edition2/Data/Cuckoos.csv: -------------------------------------------------------------------------------- 1 | "Eggs","Bird" 2 | 19.65,"MeadowPipit" 3 | 20.05,"MeadowPipit" 4 | 20.65,"MeadowPipit" 5 | 20.85,"MeadowPipit" 6 | 21.65,"MeadowPipit" 7 | 21.65,"MeadowPipit" 8 | 21.65,"MeadowPipit" 9 | 21.85,"MeadowPipit" 10 | 21.85,"MeadowPipit" 11 | 21.85,"MeadowPipit" 12 | 22.05,"MeadowPipit" 13 | 22.05,"MeadowPipit" 14 | 22.05,"MeadowPipit" 15 | 22.05,"MeadowPipit" 16 | 22.05,"MeadowPipit" 17 | 22.05,"MeadowPipit" 18 | 22.05,"MeadowPipit" 19 | 22.05,"MeadowPipit" 20 | 22.05,"MeadowPipit" 21 | 22.05,"MeadowPipit" 22 | 22.25,"MeadowPipit" 23 | 22.25,"MeadowPipit" 24 | 22.25,"MeadowPipit" 25 | 22.25,"MeadowPipit" 26 | 22.25,"MeadowPipit" 27 | 22.25,"MeadowPipit" 28 | 22.25,"MeadowPipit" 29 | 22.25,"MeadowPipit" 30 | 22.45,"MeadowPipit" 31 | 22.45,"MeadowPipit" 32 | 22.45,"MeadowPipit" 33 | 22.65,"MeadowPipit" 34 | 22.65,"MeadowPipit" 35 | 22.85,"MeadowPipit" 36 | 22.85,"MeadowPipit" 37 | 22.85,"MeadowPipit" 38 | 22.85,"MeadowPipit" 39 | 23.05,"MeadowPipit" 40 | 23.25,"MeadowPipit" 41 | 23.25,"MeadowPipit" 42 | 23.45,"MeadowPipit" 43 | 23.65,"MeadowPipit" 44 | 23.85,"MeadowPipit" 45 | 24.25,"MeadowPipit" 46 | 24.45,"TreePipit" 47 | 21.05,"TreePipit" 48 | 21.85,"TreePipit" 49 | 22.05,"TreePipit" 50 | 22.45,"TreePipit" 51 | 22.65,"TreePipit" 52 | 23.25,"TreePipit" 53 | 23.25,"TreePipit" 54 | 23.25,"TreePipit" 55 | 23.45,"TreePipit" 56 | 23.45,"TreePipit" 57 | 23.65,"TreePipit" 58 | 23.85,"TreePipit" 59 | 24.05,"TreePipit" 60 | 24.05,"TreePipit" 61 | 24.05,"TreePipit" 62 | 20.85,"HedgeSparrow" 63 | 21.65,"HedgeSparrow" 64 | 22.05,"HedgeSparrow" 65 | 22.85,"HedgeSparrow" 66 | 23.05,"HedgeSparrow" 67 | 23.05,"HedgeSparrow" 68 | 23.05,"HedgeSparrow" 69 | 23.05,"HedgeSparrow" 70 | 23.45,"HedgeSparrow" 71 | 23.85,"HedgeSparrow" 72 | 23.85,"HedgeSparrow" 73 | 23.85,"HedgeSparrow" 74 | 24.05,"HedgeSparrow" 75 | 25.05,"HedgeSparrow" 76 | 21.05,"Robin" 77 | 21.85,"Robin" 78 | 22.05,"Robin" 79 | 22.05,"Robin" 80 | 22.05,"Robin" 81 | 22.25,"Robin" 82 | 22.45,"Robin" 83 | 22.45,"Robin" 84 | 22.65,"Robin" 85 | 23.05,"Robin" 86 | 23.05,"Robin" 87 | 23.05,"Robin" 88 | 23.05,"Robin" 89 | 23.05,"Robin" 90 | 23.25,"Robin" 91 | 23.85,"Robin" 92 | 21.05,"PiedWagtail" 93 | 21.85,"PiedWagtail" 94 | 21.85,"PiedWagtail" 95 | 21.85,"PiedWagtail" 96 | 22.05,"PiedWagtail" 97 | 22.45,"PiedWagtail" 98 | 22.65,"PiedWagtail" 99 | 23.05,"PiedWagtail" 100 | 23.05,"PiedWagtail" 101 | 23.25,"PiedWagtail" 102 | 23.45,"PiedWagtail" 103 | 24.05,"PiedWagtail" 104 | 24.05,"PiedWagtail" 105 | 24.05,"PiedWagtail" 106 | 24.85,"PiedWagtail" 107 | 19.85,"Wren" 108 | 20.05,"Wren" 109 | 20.25,"Wren" 110 | 20.85,"Wren" 111 | 20.85,"Wren" 112 | 20.85,"Wren" 113 | 21.05,"Wren" 114 | 21.05,"Wren" 115 | 21.05,"Wren" 116 | 21.25,"Wren" 117 | 21.45,"Wren" 118 | 22.05,"Wren" 119 | 22.05,"Wren" 120 | 22.05,"Wren" 121 | 22.25,"Wren" 122 | -------------------------------------------------------------------------------- /Edition2/Data/Diving2017.csv: -------------------------------------------------------------------------------- 1 | Name,Country,Semifinal,Final 2 | CHEONG Jun Hoong,Malaysia,325.50,397.50 3 | SI Yajie,China,382.80,396.00 4 | REN Qian,China,367.50,391.95 5 | KIM Mi Rae,North Korea,346.00,385.55 6 | WU Melissa,Australia,318.70,370.20 7 | KIM Kuk Hyang,North Korea,360.85,360.00 8 | ITAHASHI Minami,Japan,313.70,357.85 9 | BENFEITO Meaghan,Canada,355.15,331.40 10 | PAMG Pandelela,Malaysia,322.75,322.40 11 | CHAMANDY Olivia,Canada,320.55,307.15 12 | PARRATTO Jessica,USA,322.75,302.35 13 | MURILLO URREA Carolina,Colombia,325.75,283.35 14 | -------------------------------------------------------------------------------- /Edition2/Data/Fatalities.csv: -------------------------------------------------------------------------------- 1 | "ID","Alcohol","Age" 2 | 1,0,86 3 | 2,0,38 4 | 3,0,40 5 | 4,1,20 6 | 5,1,27 7 | 6,0,19 8 | 7,1,43 9 | 8,0,71 10 | 9,0,63 11 | 10,0,37 12 | 11,0,24 13 | 12,0,60 14 | 13,0,52 15 | 14,0,53 16 | 15,0,71 17 | 16,1,21 18 | 17,1,17 19 | 18,0,58 20 | 19,1,39 21 | 20,1,21 22 | 21,0,23 23 | 22,0,19 24 | 23,0,52 25 | 24,0,46 26 | 25,0,50 27 | 26,1,59 28 | 27,0,43 29 | 28,1,25 30 | 29,0,80 31 | 30,0,32 32 | 31,1,40 33 | 32,0,75 34 | 33,0,20 35 | 34,0,21 36 | 35,1,57 37 | 36,0,44 38 | 37,0,17 39 | 38,0,18 40 | 39,0,20 41 | 40,0,21 42 | 41,0,84 43 | 42,0,19 44 | 43,0,18 45 | 44,0,42 46 | 45,0,73 47 | 46,0,27 48 | 47,0,62 49 | 48,1,47 50 | 49,0,45 51 | 50,0,49 52 | 51,0,54 53 | 52,0,79 54 | 53,0,53 55 | 54,0,82 56 | 55,0,71 57 | 56,0,37 58 | 57,0,45 59 | 58,0,19 60 | 59,0,73 61 | 60,0,78 62 | 61,0,45 63 | 62,0,25 64 | 63,0,33 65 | 64,0,28 66 | 65,0,59 67 | 66,0,48 68 | 67,0,49 69 | 68,0,57 70 | 69,0,18 71 | 70,0,61 72 | 71,1,28 73 | 72,0,21 74 | 73,0,83 75 | 74,0,76 76 | 75,0,57 77 | 76,0,21 78 | 77,0,79 79 | 78,0,29 80 | 79,0,20 81 | 80,0,22 82 | 81,1,27 83 | 82,0,36 84 | 83,0,19 85 | 84,1,60 86 | 85,1,23 87 | 86,0,91 88 | 87,1,28 89 | 88,1,49 90 | 89,0,29 91 | 90,0,56 92 | 91,0,20 93 | 92,0,69 94 | 93,0,17 95 | 94,1,28 96 | 95,1,28 97 | 96,0,65 98 | 97,1,37 99 | 98,1,30 100 | 99,0,20 101 | 100,0,21 102 | -------------------------------------------------------------------------------- /Edition2/Data/FishMercury.csv: -------------------------------------------------------------------------------- 1 | "Mercury" 2 | 1.87 3 | 0.16 4 | 0.088 5 | 0.16 6 | 0.145 7 | 0.099 8 | 0.101 9 | 0.18 10 | 0.187 11 | 0.097 12 | 0.18 13 | 0.132 14 | 0.065 15 | 0.126 16 | 0.107 17 | 0.152 18 | 0.11 19 | 0.076 20 | 0.168 21 | 0.151 22 | 0.048 23 | 0.15 24 | 0.162 25 | 0.118 26 | 0.163 27 | 0.178 28 | 0.076 29 | 0.078 30 | 0.039 31 | 0.09 32 | -------------------------------------------------------------------------------- /Edition2/Data/Girls2004.csv: -------------------------------------------------------------------------------- 1 | "ID","State","MothersAge","Smoker","Weight","Gestation" 2 | 1,"WY","15-19","No",3085,40 3 | 2,"WY","35-39","No",3515,39 4 | 3,"WY","25-29","No",3775,40 5 | 4,"WY","20-24","No",3265,39 6 | 5,"WY","25-29","No",2970,40 7 | 6,"WY","20-24","No",2850,38 8 | 7,"WY","20-24","No",2737,38 9 | 8,"WY","25-29","No",3515,37 10 | 9,"WY","25-29","No",3742,39 11 | 10,"WY","35-39","No",3570,40 12 | 11,"WY","20-24","No",3834,41 13 | 12,"WY","20-24","Yes",3090,39 14 | 13,"WY","25-29","Yes",3350,40 15 | 14,"WY","30-34","No",3292,37 16 | 15,"WY","15-19","No",3317,40 17 | 16,"WY","30-34","No",2485,37 18 | 17,"WY","20-24","No",3215,39 19 | 18,"WY","20-24","No",3230,40 20 | 19,"WY","30-34","No",3345,39 21 | 20,"WY","25-29","No",3050,41 22 | 21,"WY","30-34","No",2212,37 23 | 22,"WY","35-39","No",3605,39 24 | 23,"WY","30-34","No",2722,39 25 | 24,"WY","30-34","No",2880,39 26 | 25,"WY","20-24","No",3610,39 27 | 26,"WY","30-34","No",3355,39 28 | 27,"WY","20-24","No",3995,41 29 | 28,"WY","20-24","Yes",2948,39 30 | 29,"WY","35-39","No",3345,41 31 | 30,"WY","30-34","Yes",2892,39 32 | 31,"WY","20-24","No",2466,37 33 | 32,"WY","20-24","Yes",3290,39 34 | 33,"WY","25-29","No",3310,39 35 | 34,"WY","40-44","No",3175,37 36 | 35,"WY","25-29","No",2715,38 37 | 36,"WY","25-29","No",3540,38 38 | 37,"WY","25-29","No",3402,38 39 | 38,"WY","25-29","Yes",3923,39 40 | 39,"WY","20-24","No",3204,37 41 | 40,"WY","15-19","Yes",2495,37 42 | 41,"AK","20-24","No",4337,41 43 | 42,"AK","20-24","No",2948,40 44 | 43,"AK","30-34","No",3269,39 45 | 44,"AK","20-24","No",3608,38 46 | 45,"AK","30-34","No",4016,39 47 | 46,"AK","25-29","No",2919,40 48 | 47,"AK","20-24","No",2608,37 49 | 48,"AK","40-44","No",4309,39 50 | 49,"AK","20-24","No",3288,39 51 | 50,"AK","25-29","No",3742,38 52 | 51,"AK","15-19","No",4394,41 53 | 52,"AK","20-24","No",2182,37 54 | 53,"AK","25-29","No",4592,40 55 | 54,"AK","20-24","No",3090,39 56 | 55,"AK","30-34","No",3770,40 57 | 56,"AK","20-24","No",3977,39 58 | 57,"AK","25-29","No",3153,40 59 | 58,"AK","25-29","No",3458,41 60 | 59,"AK","15-19","No",3912,38 61 | 60,"AK","20-24","Yes",2863,40 62 | 61,"AK","35-39","No",3190,39 63 | 62,"AK","25-29","Yes",3515,38 64 | 63,"AK","25-29","No",3288,39 65 | 64,"AK","15-19","No",3114,40 66 | 65,"AK","30-34","Yes",3543,41 67 | 66,"AK","20-24","No",3825,39 68 | 67,"AK","25-29","No",3458,39 69 | 68,"AK","30-34","No",3698,41 70 | 69,"AK","20-24","No",3572,39 71 | 70,"AK","30-34","Yes",2352,40 72 | 71,"AK","20-24","No",3175,40 73 | 72,"AK","25-29","No",3742,41 74 | 73,"AK","20-24","No",3997,39 75 | 74,"AK","25-29","No",2576,38 76 | 75,"AK","30-34","No",3572,40 77 | 76,"AK","35-39","No",3968,39 78 | 77,"AK","20-24","No",4564,42 79 | 78,"AK","20-24","No",4210,40 80 | 79,"AK","25-29","No",3260,38 81 | 80,"AK","20-24","No",3600,40 82 | -------------------------------------------------------------------------------- /Edition2/Data/Groceries.csv: -------------------------------------------------------------------------------- 1 | Product,Size,Target,Walmart 2 | Kellogg NutriGrain Bars,8 bars,2.50,2.78 3 | Quaker Oats Life Cereal Original ,18oz,3.19,6.01 4 | General Mills Lucky Charms,11.50z,3.19,2.98 5 | Quaker Oats Old Fashioned,18oz,2.82,2.68 6 | Nabisco Oreo Cookies,14.3oz ,2.99,2.98 7 | Nabisco Chips Ahoy,13oz,2.64,1.98 8 | Doritos Nacho Cheese Chips,10oz,3.99,2.5 9 | Cheez-it Original Baked,21oz,4.79,4.79 10 | Swiss Miss Hot Chocolate,10 count,1.49,1.28 11 | Tazo Chai Classic Latte Black Tea,32 oz ,3.49,2.98 12 | Annie's Macaroni & Cheese,6oz,1.79,1.72 13 | Rice A Roni Chicken,6.9oz,1.00,1.00 14 | Zatarain's Jambalaya Rice Mix,8oz,1.62,1.54 15 | SPAM Original Lunch Meat,12oz,2.79,2.64 16 | Campbell's Chicken Noodle Soup,10.75oz,0.99,1.58 17 | Dinty Moore Hearty Meals Beef Stew,15oz,1.99,1.98 18 | Hormel Chili with Beans,15oz,1.94,1.88 19 | Dole Pineapple Chunks,20 oz,1.59,1.47 20 | Skippy Creamy Peanut Butter,16.3oz,2.59,2.58 21 | Smucker's Strawberry Preserve,18oz,2.99,2.84 22 | Heinz Tomato Ketchup,32oz,2.99,2.88 23 | Near East Couscous Toasted Pine Nuts mix,5.6oz,2.12,1.98 24 | Barilla Angel Hair Pasta,16oz,1.42,1.38 25 | Betty Crocker Super Moist Chocolate Fudge Cake Mix,15.25oz,1.22,1.17 26 | Kraft Jet-Puffed Marshmllows,16oz,1.99,1.96 27 | Dunkin' Donuts Original Blend Medium Roast Ground Coffee,12oz,7.19,6.98 28 | Dove Promises Milk Chocolate,8.87oz,3.19,3.50 29 | Skittles,41oz,7.99,6.98 30 | Vlasic Kosher Dill Pickle Spears,24oz,2.39,2.18 31 | Vlasic Old Fashioned Sauerkraut,32oz,1.99,1.97 32 | -------------------------------------------------------------------------------- /Edition2/Data/ILBoys.csv: -------------------------------------------------------------------------------- 1 | "MothersAge","Weight" 2 | "25-29",3005 3 | "25-29",3686 4 | "25-29",3714 5 | "20-24",2807 6 | "25-29",4054 7 | "15-19",3884 8 | "25-29",3005 9 | "20-24",2920 10 | "25-29",3236 11 | "20-24",3345 12 | "20-24",2948 13 | "25-29",3345 14 | "25-29",3997 15 | "20-24",3025 16 | "20-24",4026 17 | "20-24",4423 18 | "20-24",3487 19 | "20-24",3232 20 | "15-19",3005 21 | "15-19",3300 22 | "20-24",3575 23 | "25-29",3742 24 | "15-19",3232 25 | "20-24",3572 26 | "25-29",4167 27 | "20-24",2870 28 | "20-24",3374 29 | "15-19",3515 30 | "20-24",3232 31 | "20-24",3600 32 | "20-24",3025 33 | "25-29",4139 34 | "20-24",3232 35 | "25-29",3345 36 | "20-24",3837 37 | "15-19",3430 38 | "25-29",3544 39 | "15-19",4030 40 | "20-24",3600 41 | "20-24",3770 42 | "20-24",4394 43 | "25-29",4253 44 | "25-29",3374 45 | "25-29",3487 46 | "15-19",3629 47 | "25-29",3289 48 | "25-29",3535 49 | "15-19",3680 50 | "20-24",3090 51 | "25-29",3997 52 | "25-29",2892 53 | "15-19",2552 54 | "25-29",3487 55 | "20-24",2410 56 | "20-24",2920 57 | "20-24",3260 58 | "25-29",3260 59 | "25-29",3657 60 | "25-29",3515 61 | "20-24",2438 62 | "25-29",3856 63 | "15-19",3317 64 | "20-24",3165 65 | "20-24",3572 66 | "15-19",3884 67 | "20-24",3544 68 | "20-24",3608 69 | "15-19",3487 70 | "25-29",4564 71 | "25-29",4054 72 | "20-24",2336 73 | "20-24",3119 74 | "15-19",2778 75 | "20-24",3741 76 | "20-24",3119 77 | "15-19",3317 78 | "20-24",3260 79 | "20-24",3742 80 | "25-29",3629 81 | "20-24",3033 82 | "25-29",3390 83 | "15-19",3374 84 | "25-29",4335 85 | "20-24",3090 86 | "25-29",3520 87 | "20-24",3657 88 | "20-24",3920 89 | "25-29",3430 90 | "20-24",3742 91 | "15-19",2878 92 | "20-24",3912 93 | "25-29",4082 94 | "25-29",3119 95 | "15-19",3204 96 | "20-24",3430 97 | "20-24",2975 98 | "15-19",2990 99 | "25-29",4200 100 | "20-24",3656 101 | "20-24",2948 102 | "20-24",2948 103 | "20-24",3459 104 | "25-29",3657 105 | "25-29",3930 106 | "25-29",3232 107 | "25-29",2892 108 | "15-19",2580 109 | "25-29",3505 110 | "20-24",3232 111 | "25-29",3345 112 | "20-24",3430 113 | "20-24",3657 114 | "15-19",3459 115 | "15-19",3657 116 | "25-29",3058 117 | "25-29",3771 118 | "25-29",3317 119 | "25-29",3317 120 | "20-24",3033 121 | "20-24",3090 122 | "15-19",3827 123 | "25-29",3175 124 | "25-29",3289 125 | "20-24",3771 126 | "25-29",3317 127 | "15-19",2792 128 | "20-24",3130 129 | "25-29",3175 130 | "20-24",3033 131 | "25-29",3175 132 | "25-29",3459 133 | "25-29",3997 134 | "20-24",3260 135 | "15-19",2955 136 | "25-29",3232 137 | "25-29",3289 138 | "20-24",3175 139 | "25-29",3313 140 | "20-24",3771 141 | "15-19",2000 142 | "20-24",3255 143 | "25-29",2920 144 | "20-24",3572 145 | "25-29",3232 146 | "15-19",3147 147 | "25-29",4026 148 | "20-24",2977 149 | "25-29",3380 150 | "25-29",3033 151 | "20-24",3459 152 | "25-29",3750 153 | "20-24",3600 154 | "15-19",3374 155 | "25-29",3375 156 | "25-29",2778 157 | "25-29",3686 158 | "15-19",3430 159 | "25-29",3714 160 | "25-29",4253 161 | "15-19",2540 162 | "25-29",3827 163 | "20-24",3544 164 | "25-29",3250 165 | "25-29",3317 166 | "20-24",3827 167 | "20-24",3912 168 | "25-29",3289 169 | "25-29",4204 170 | "20-24",3289 171 | "15-19",3204 172 | "15-19",3033 173 | "20-24",3062 174 | "25-29",2807 175 | "20-24",3572 176 | "20-24",3515 177 | "20-24",3487 178 | "20-24",3289 179 | "25-29",4338 180 | "20-24",3250 181 | "20-24",3515 182 | "25-29",3289 183 | "20-24",3430 184 | "20-24",3747 185 | "25-29",3714 186 | "20-24",4005 187 | "25-29",4082 188 | "25-29",3686 189 | "25-29",3515 190 | "20-24",3260 191 | "25-29",3629 192 | "25-29",3296 193 | "20-24",3147 194 | "15-19",2863 195 | "25-29",3175 196 | "25-29",4139 197 | "25-29",3062 198 | "20-24",2523 199 | "25-29",3771 200 | "25-29",3714 201 | "15-19",2905 202 | "15-19",3997 203 | "25-29",3090 204 | "25-29",3575 205 | "20-24",3941 206 | "25-29",2975 207 | "25-29",2977 208 | "15-19",3119 209 | "25-29",3175 210 | "25-29",4423 211 | "25-29",4590 212 | "25-29",3430 213 | "25-29",3657 214 | "25-29",3657 215 | "25-29",2778 216 | "20-24",3515 217 | "20-24",2655 218 | "15-19",3119 219 | "25-29",3340 220 | "15-19",2863 221 | "15-19",3232 222 | "25-29",3317 223 | "25-29",3799 224 | "20-24",3941 225 | "15-19",3175 226 | "20-24",3005 227 | "15-19",2892 228 | "25-29",3374 229 | "25-29",3374 230 | "20-24",4139 231 | "25-29",3487 232 | "25-29",3260 233 | "20-24",3090 234 | "15-19",2920 235 | "25-29",3204 236 | "15-19",3515 237 | "25-29",3260 238 | "25-29",3970 239 | "20-24",3430 240 | "25-29",3969 241 | "15-19",3033 242 | "20-24",3107 243 | -------------------------------------------------------------------------------- /Edition2/Data/IceCream.csv: -------------------------------------------------------------------------------- 1 | "Brand","VanillaCalories","VanillaFat","VanillaSugar","ChocolateCalories","ChocolateFat","ChocolateSugar" 2 | "Baskin Robbins",260,16,26,260,14,31 3 | "Ben & Jerry's",240,16,19,260,16,22 4 | "Blue Bunny",140,7,12,130,7,14 5 | "Breyers",140,7,13,140,8,16 6 | "Brigham's",190,12,17,200,12,18 7 | "Bulla",234,13.5,21.8,266,15,22.6 8 | "Carvel",240,14,21,250,13,25 9 | "Cass-Clay",130,7,11,150,7,16 10 | "Chapman's",120,6,11,120,5,12 11 | "Cold Stone",270,15.5,23,264,16.2,23.6 12 | "Culver's",222,13,19,205,10,20 13 | "Dairy Queen",140,4.5,19,150,5,17 14 | "Dove",240,15,20,290,17,27 15 | "Dreamery",260,15,24,280,12,33 16 | "Edy's Grand",140,8,13,150,8,15 17 | "Emack & Bolio's",160,9,12,170,9,13 18 | "Good Humor",120,6,12,120,6,14 19 | "Graeter's",260,16,24,260,16,24 20 | "Green and Black",194,11.6,18,227,12.8,22.7 21 | "Green's",150,8,17,140,8,15 22 | "Haagen Dazs",270,18,21,270,18,21 23 | "Hershey's",140,9,14,140,8,13 24 | "Hill Station",226,15.6,16.8,235,14.3,21.2 25 | "Kemp's",130,7,13,140,6,17 26 | "Klein's",130,8,15,140,8,14 27 | "Oberweis Dairy",307,21,23,320,21,19 28 | "Our Family",130,7,11,130,6,15 29 | "Perry's",140,8,15,140,7,15 30 | "Ronnybrook Farm",240,16,20,260,19,21 31 | "Ruggles",150,8,12,150,8,16 32 | "Sara Lee",242,15.5,21.5,234,14.4,20.9 33 | "Schwan's",140,7,12,140,7,12 34 | "Sheer Bliss",300,19,27,320,19,29 35 | "Smith's",150,8,13,150,8,13 36 | "Stonyfield Farm",240,16,20,250,17,20 37 | "Tillamook",160,9,10,170,9,13 38 | "Turkey Hill",140,8,16,150,8,19 39 | "Value Choice",130,6,12,130,6,15 40 | "Whitey's",250,14,23,250,13,25 41 | -------------------------------------------------------------------------------- /Edition2/Data/Illiteracy.csv: -------------------------------------------------------------------------------- 1 | ID,Country,Illit,Births 2 | 1,Albania,20.5,1.78 3 | 2,Algeria,39.1,2.44 4 | 3,Bahrain,15,2.34 5 | 4,Belize,5.9,2.97 6 | 5,Benin,73.5,5.6 7 | 6,Bolivia,18.5,3.65 8 | 7,Botswana,17.6,3.03 9 | 8,Brazil,11.9,2.29 10 | 9,Brunei,11.5,2.38 11 | 10,Burkina Faso,83.4,5.9 12 | 11,Burma,18.1,2.23 13 | 12,Burundi,54.8,6.8 14 | 13,Cambodia,39.8,3.89 15 | 14,Cape Verde,30.9,3.53 16 | 15,Central African Republic,60.1,4.73 17 | 16,Chad,60.7,6.3 18 | 17,China,19.6,1.81 19 | 18,Colombia,7.5,2.4 20 | 19,Comoros,50.7,3.76 21 | 20,"Congo, Democratic Republic of the",44.9,6.7 22 | 21,"Congo, Republic of the",21.6,5.6 23 | 22,Cote d'Ivoire,59.1,4.7 24 | 23,Djibouti,41.6,4.74 25 | 24,Dominican Republic,15.2,2.73 26 | 25,Ecuador,9.1,2.67 27 | 26,Egypt,53.1,3.1 28 | 27,El Salvador,22.3,2.76 29 | 28,Equatorial Guinea,21.6,5.89 30 | 29,Eritrea,52.4,5.24 31 | 30,Ethiopia,64.8,5.32 32 | 31,Fiji,8.1,2.79 33 | 32,"Gambia, The",66.9,4.4 34 | 33,Ghana,32.7,4.06 35 | 34,Guatemala,36.8,4.33 36 | 35,Guinea-Bissau,72.4,7.08 37 | 36,Haiti,48.8,3.75 38 | 37,Honduras,23,3.47 39 | 38,Hong Kong,9.5,0.97 40 | 39,India,51.7,2.84 41 | 40,Indonesia,15.9,2.27 42 | 41,Iran,27.4,2.07 43 | 42,Israel,6.2,2.82 44 | 43,Jamaica,8.3,2.38 45 | 44,Jordan,13.4,3.29 46 | 45,Kenya,20.3,4.98 47 | 46,Kuwait,18.3,2.39 48 | 47,Laos,43.4,4.5 49 | 48,Lebanon,17.8,2.25 50 | 49,Lesotho,5.5,3.4 51 | 50,Liberia,59.5,6.78 52 | 51,Libya,28.1,2.85 53 | 52,Macau,8,0.88 54 | 53,Madagascar,37.5,5.04 55 | 54,Malawi,50.3,5.84 56 | 55,Malaysia,14.7,2.74 57 | 56,Mali,82,6.72 58 | 57,Malta,6.4,1.37 59 | 58,Mauritania,68.1,5.59 60 | 59,Mauritius,17.4,1.98 61 | 60,Mexico,9.8,2.11 62 | 61,Mozambique,67.3,5.3 63 | 62,Namibia,16.3,3.66 64 | 63,Nicaragua,32.2,3.08 65 | 64,Niger,90.2,7.67 66 | 65,Nigeria,39,5.5 67 | 66,Oman,32.8,3.44 68 | 67,Pakistan,69.4,4.12 69 | 68,Panama,8.1,2.62 70 | 69,Papua New Guinea,40.6,3.8 71 | 70,Paraguay,7,3.67 72 | 71,Peru,13.3,2.74 73 | 72,Portugal,8.8,1.4 74 | 73,Puerto Rico,5.5,1.8 75 | 74,Qatar,15,2.89 76 | 75,Rwanda,35.2,5.8 77 | 76,Saudi Arabia,29.3,3.83 78 | 77,Senegal,69.2,4.9 79 | 78,Singapore,10.4,1.24 80 | 79,South Africa,14.3,2.78 81 | 80,Sri Lanka,10.1,1.91 82 | 81,Sudan,49.5,4.15 83 | 82,Swaziland,19.2,3.91 84 | 83,Syria,36.1,3.24 85 | 84,Tanzania,29.4,5.2 86 | 85,Thailand,5.4,1.89 87 | 86,Togo,53.1,5.03 88 | 87,Tunisia,35.6,2.04 89 | 88,Turkey,21.4,2.19 90 | 89,United Arab Emirates,18.5,2.43 91 | 90,Venezuela,6.9,2.65 92 | 91,Vietnam,8.5,1.78 93 | 92,Yemen,69.8,5.87 94 | 93,Zambia,25.2,5.4 95 | 94,Zimbabwe,12.9,3.34 96 | -------------------------------------------------------------------------------- /Edition2/Data/Lottery.csv: -------------------------------------------------------------------------------- 1 | "Win" 2 | 25 3 | 30 4 | 32 5 | 16 6 | 17 7 | 23 8 | 28 9 | 1 10 | 36 11 | 10 12 | 26 13 | 15 14 | 22 15 | 7 16 | 21 17 | 8 18 | 22 19 | 14 20 | 23 21 | 5 22 | 19 23 | 31 24 | 27 25 | 15 26 | 19 27 | 35 28 | 20 29 | 20 30 | 10 31 | 35 32 | 19 33 | 38 34 | 36 35 | 12 36 | 12 37 | 16 38 | 17 39 | 2 40 | 13 41 | 31 42 | 37 43 | 36 44 | 25 45 | 23 46 | 16 47 | 32 48 | 7 49 | 38 50 | 31 51 | 31 52 | 11 53 | 1 54 | 25 55 | 11 56 | 10 57 | 6 58 | 1 59 | 15 60 | 37 61 | 6 62 | 34 63 | 5 64 | 31 65 | 13 66 | 32 67 | 36 68 | 36 69 | 24 70 | 38 71 | 34 72 | 32 73 | 9 74 | 14 75 | 20 76 | 29 77 | 29 78 | 34 79 | 13 80 | 24 81 | 39 82 | 7 83 | 35 84 | 17 85 | 18 86 | 23 87 | 15 88 | 38 89 | 23 90 | 8 91 | 30 92 | 5 93 | 20 94 | 33 95 | 17 96 | 34 97 | 35 98 | 32 99 | 21 100 | 32 101 | 19 102 | 33 103 | 19 104 | 4 105 | 9 106 | 28 107 | 1 108 | 16 109 | 36 110 | 13 111 | 36 112 | 18 113 | 26 114 | 34 115 | 36 116 | 30 117 | 20 118 | 39 119 | 38 120 | 10 121 | 23 122 | 2 123 | 13 124 | 39 125 | 26 126 | 22 127 | 18 128 | 1 129 | 7 130 | 2 131 | 20 132 | 16 133 | 8 134 | 35 135 | 8 136 | 18 137 | 35 138 | 30 139 | 17 140 | 24 141 | 4 142 | 26 143 | 39 144 | 2 145 | 32 146 | 15 147 | 27 148 | 29 149 | 10 150 | 33 151 | 13 152 | 4 153 | 20 154 | 15 155 | 2 156 | 28 157 | 24 158 | 23 159 | 21 160 | 36 161 | 5 162 | 27 163 | 9 164 | 24 165 | 11 166 | 5 167 | 31 168 | 20 169 | 16 170 | 9 171 | 30 172 | 25 173 | 31 174 | 3 175 | 10 176 | 26 177 | 13 178 | 35 179 | 10 180 | 8 181 | 31 182 | 18 183 | 8 184 | 11 185 | 37 186 | 11 187 | 4 188 | 22 189 | 39 190 | 32 191 | 22 192 | 8 193 | 39 194 | 18 195 | 20 196 | 32 197 | 21 198 | 31 199 | 27 200 | 23 201 | 26 202 | 23 203 | 29 204 | 32 205 | 8 206 | 14 207 | 1 208 | 28 209 | 31 210 | 36 211 | 29 212 | 22 213 | 26 214 | 37 215 | 36 216 | 39 217 | 9 218 | 20 219 | 35 220 | 4 221 | 10 222 | 22 223 | 36 224 | 24 225 | 5 226 | 31 227 | 20 228 | 7 229 | 3 230 | 25 231 | 24 232 | 37 233 | 33 234 | 29 235 | 4 236 | 2 237 | 1 238 | 22 239 | 23 240 | 28 241 | 6 242 | 8 243 | 4 244 | 33 245 | 20 246 | 16 247 | 34 248 | 27 249 | 34 250 | 5 251 | 22 252 | 38 253 | 23 254 | 20 255 | 20 256 | 27 257 | 27 258 | 27 259 | 9 260 | 20 261 | 7 262 | 11 263 | 7 264 | 34 265 | 31 266 | 19 267 | 11 268 | 36 269 | 35 270 | 8 271 | 30 272 | 3 273 | 30 274 | 15 275 | 11 276 | 15 277 | 12 278 | 12 279 | 11 280 | 23 281 | 22 282 | 30 283 | 6 284 | 36 285 | 33 286 | 18 287 | 24 288 | 37 289 | 29 290 | 38 291 | 20 292 | 34 293 | 21 294 | 32 295 | 5 296 | 10 297 | 33 298 | 3 299 | 12 300 | 33 301 | 24 302 | 15 303 | 36 304 | 21 305 | 11 306 | 22 307 | 17 308 | 18 309 | 10 310 | 3 311 | 4 312 | 29 313 | 11 314 | 14 315 | 34 316 | 34 317 | 18 318 | 23 319 | 20 320 | 32 321 | 17 322 | 37 323 | 2 324 | 31 325 | 15 326 | 13 327 | 6 328 | 27 329 | 37 330 | 5 331 | 2 332 | 26 333 | 5 334 | 39 335 | 16 336 | 10 337 | 26 338 | 37 339 | 27 340 | 12 341 | 14 342 | 18 343 | 18 344 | 14 345 | 35 346 | 18 347 | 8 348 | 17 349 | 32 350 | 39 351 | 10 352 | 9 353 | 18 354 | 28 355 | 24 356 | 24 357 | 16 358 | 38 359 | 6 360 | 36 361 | 23 362 | 25 363 | 11 364 | 37 365 | 12 366 | 32 367 | 1 368 | 11 369 | 21 370 | 15 371 | 14 372 | 24 373 | 28 374 | 8 375 | 38 376 | 4 377 | 21 378 | 17 379 | 28 380 | 16 381 | 4 382 | 31 383 | 16 384 | 10 385 | 1 386 | 24 387 | 21 388 | 22 389 | 22 390 | 22 391 | 19 392 | 31 393 | 10 394 | 3 395 | 6 396 | 29 397 | 5 398 | 36 399 | 38 400 | 39 401 | 24 402 | 26 403 | 6 404 | 6 405 | 37 406 | 32 407 | 24 408 | 3 409 | 15 410 | 23 411 | 38 412 | 33 413 | 1 414 | 35 415 | 28 416 | 32 417 | 34 418 | 28 419 | 33 420 | 29 421 | 39 422 | 3 423 | 27 424 | 36 425 | 2 426 | 13 427 | 28 428 | 14 429 | 39 430 | 8 431 | 31 432 | 27 433 | 25 434 | 11 435 | 1 436 | 28 437 | 11 438 | 14 439 | 18 440 | 23 441 | 33 442 | 10 443 | 18 444 | 4 445 | 1 446 | 17 447 | 13 448 | 24 449 | 12 450 | 36 451 | 9 452 | 9 453 | 26 454 | 32 455 | 14 456 | 9 457 | 23 458 | 32 459 | 11 460 | 28 461 | 11 462 | 22 463 | 36 464 | 33 465 | 12 466 | 2 467 | 30 468 | 39 469 | 24 470 | 13 471 | 5 472 | 26 473 | 38 474 | 39 475 | 3 476 | 5 477 | 39 478 | 9 479 | 35 480 | 7 481 | 23 482 | 5 483 | 29 484 | 12 485 | 26 486 | 33 487 | 11 488 | 35 489 | 38 490 | 24 491 | 3 492 | 31 493 | 28 494 | 12 495 | 26 496 | 35 497 | 36 498 | 18 499 | 6 500 | 35 501 | 5 502 | -------------------------------------------------------------------------------- /Edition2/Data/MathStatsData_Ed2.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Data/MathStatsData_Ed2.zip -------------------------------------------------------------------------------- /Edition2/Data/Maunaloa.csv: -------------------------------------------------------------------------------- 1 | ID,Year,Level 2 | 1,1990,357.08 3 | 2,1991,359 4 | 3,1992,359.45 5 | 4,1993,360.07 6 | 5,1994,361.48 7 | 6,1995,363.62 8 | 7,1996,364.93 9 | 8,1997,366.47 10 | 9,1998,369.33 11 | 10,1999,370.66 12 | 11,2000,371.32 13 | 12,2001,373.77 14 | 13,2002,375.58 15 | 14,2003,378.54 16 | 15,2004,380.62 17 | 16,2005,382.45 18 | 17,2006,384.94 19 | 18,2007,386.53 20 | 19,2008,388.5 21 | 20,2009,390.18 22 | 21,2010,392.94 23 | -------------------------------------------------------------------------------- /Edition2/Data/NBA1617.csv: -------------------------------------------------------------------------------- 1 | Name,Position,Team,Games,Minutes,PercFG,Perc3P,Perc2P,PercFT,OffReb,DefReb,Assists,Blocks 2 | Quincy Acy,PF,Brooklyn,32,510,42.5,43.4,41.4,75.4,18,89,18,15 3 | Anthony Bennett,PF,Brooklyn,23,264,41.3,27.1,56.8,72.2,25,53,12,3 4 | Bojan Bogdanovic,SF,Brooklyn,55,1482,44,35.7,50.9,87.4,23,174,90,3 5 | Trevor Booker,PF,Brooklyn,71,1754,51.6,32.1,54.6,67.3,143,428,138,28 6 | Spencer Dinwiddie,PG,Brooklyn,59,1334,44.4,37.6,47.8,79.2,27,137,185,23 7 | Yogi Ferrell,PG,Brooklyn,10,151,36.7,29.6,45.5,62.5,4,8,17,2 8 | Randy Foye,SG,Brooklyn,69,1284,36.3,33,41.8,85.7,9,146,135,9 9 | Archie Goodwin,SG,Brooklyn,12,184,55.7,30.8,62.5,71.9,7,21,23,4 10 | Justin Hamilton,C,Brooklyn,64,1177,45.9,30.6,59.8,75,71,191,55,43 11 | Joe Harris,SG,Brooklyn,52,1138,42.5,38.5,48.9,71.4,16,131,54,8 12 | Rondae Hollis-Jefferson,SF,Brooklyn,78,1761,43.4,22.4,46.3,75.1,96,356,154,44 13 | Sean Kilpatrick,SG,Brooklyn,70,1754,41.5,34.1,46.8,84.3,22,258,157,6 14 | Caris LeVert,SF,Brooklyn,57,1237,45,32.1,57.1,72,23,165,110,8 15 | Jeremy Lin,PG,Brooklyn,36,883,43.8,37.2,48,81.6,11,124,184,14 16 | Brook Lopez,C,Brooklyn,75,2222,47.4,34.6,53.6,81,121,282,176,124 17 | K.J. McDaniels,SF,Brooklyn,20,293,45.5,28.2,56.5,82.1,9,43,9,10 18 | Andrew Nicholson,PF,Brooklyn,10,111,38.2,18.2,47.8,100,4,23,3,0 19 | Luis Scola,PF,Brooklyn,36,461,47,34,53.5,67.6,51,88,37,4 20 | Isaiah Whitehead,PG,Brooklyn,73,1643,40.2,29.5,44.6,80.5,32,152,192,36 21 | Nicolas Batum,SG,Charlotte,77,2617,40.3,33.3,45.3,85.6,46,435,456,29 22 | Marco Belinelli,SG,Charlotte,74,1778,42.9,36,48.8,89.3,14,164,147,9 23 | Treveon Graham,SG,Charlotte,27,189,47.5,60,40,66.7,5,17,6,1 24 | Spencer Hawes,PF,Charlotte,35,626,47.7,29.1,54,88.2,32,114,62,26 25 | Roy Hibbert,C,Charlotte,42,671,54.2,NA,54.2,81.3,53,97,20,44 26 | Frank Kaminsky,C,Charlotte,75,1954,39.9,32.8,45.5,75.6,57,279,162,34 27 | Michael Kidd-Gilchrist,SF,Charlotte,81,2349,47.7,11.1,48.3,78.4,156,409,114,77 28 | Jeremy Lamb,SG,Charlotte,62,1143,46,28.1,53.6,85.3,30,234,75,23 29 | Miles Plumlee,C,Charlotte,13,174,58.3,NA,58.3,75,15,27,3,4 30 | Brian Roberts,PG,Charlotte,41,416,37.7,38.6,37.2,84.6,5,34,52,1 31 | Ramon Sessions,PG,Charlotte,50,811,38,33.9,39.3,77.1,11,62,129,3 32 | Kemba Walker,PG,Charlotte,79,2739,44.4,39.9,47.6,84.7,45,263,434,22 33 | Briante Weber,PG,Charlotte,13,159,43.5,14.3,48.7,69.2,8,14,16,0 34 | Marvin Williams,PF,Charlotte,76,2295,42.2,35,49.4,87.3,89,411,106,53 35 | Christian Wood,PF,Charlotte,13,107,52.2,0,66.7,73.3,14,15,2,6 36 | Cody Zeller,PF,Charlotte,62,1725,57.1,0,57.2,67.9,135,270,99,58 37 | Chris Andersen,C,Cleveland,12,114,40.9,0,47.4,71.4,9,22,5,7 38 | Mike Dunleavy,SF,Cleveland,23,366,40,35.1,48.5,73.7,5,42,20,2 39 | Kay Felder,PG,Cleveland,42,386,39.2,31.8,40.4,71.4,3,38,58,7 40 | Channing Frye,C,Cleveland,74,1398,45.8,40.9,54.6,85.1,37,253,45,37 41 | Kyrie Irving,PG,Cleveland,72,2525,47.3,40.1,50.5,90.5,52,178,418,24 42 | LeBron James,SF,Cleveland,74,2794,54.8,36.3,61.1,67.4,97,542,646,44 43 | Richard Jefferson,SF,Cleveland,79,1614,44.6,33.3,58,74.1,28,175,78,10 44 | James Jones,SF,Cleveland,48,381,47.8,47,50,65,3,34,14,10 45 | Kyle Korver,SG,Cleveland,35,859,48.7,48.5,49.3,93.3,7,90,35,8 46 | DeAndre Liggins,SG,Cleveland,61,752,38.2,37.8,38.5,62.2,17,84,54,14 47 | Kevin Love,PF,Cleveland,60,1885,42.7,37.3,47.1,87.1,148,518,116,21 48 | Jordan McRae,SG,Cleveland,37,384,38.7,35.3,40.4,79.4,6,35,19,6 49 | Iman Shumpert,SG,Cleveland,76,1937,41.1,36,46.9,78.9,39,179,109,27 50 | J.R. Smith,SG,Cleveland,41,1187,34.6,35.1,32.9,66.7,17,96,62,11 51 | Tristan Thompson,C,Cleveland,78,2336,60,0,60.4,49.8,286,429,77,84 52 | Deron Williams,PG,Cleveland,24,486,46.3,41.5,48.9,84,1,44,86,6 53 | Derrick Williams,PF,Cleveland,25,427,50.5,40.4,60,69.2,3,54,14,2 54 | LaMarcus Aldridge,PF,San Antonio,72,2335,47.7,41.1,48,81.2,172,351,139,88 55 | Kyle Anderson,SG,San Antonio,72,1020,44.5,37.5,46.2,78.9,33,175,91,26 56 | Joel Anthony,C,San Antonio,19,122,62.5,NA,62.5,62.5,8,23,3,6 57 | Davis Bertans,PF,San Antonio,67,808,44,39.9,55.7,82.4,22,76,46,28 58 | Dewayne Dedmon,C,San Antonio,76,1330,62.2,NA,62.2,69.9,129,367,44,61 59 | Bryn Forbes,SG,San Antonio,36,285,36.4,32.1,41.3,83.3,2,21,23,0 60 | Pau Gasol,C,San Antonio,64,1627,50.2,53.8,49.4,70.7,107,394,150,70 61 | Manu Ginobili,SG,San Antonio,69,1291,39,39.2,38.7,80.4,28,129,183,16 62 | Danny Green,SG,San Antonio,68,1807,39.2,37.9,42,84.4,31,193,124,57 63 | Nicolas Laprovittola,PG,San Antonio,18,174,42.6,37,50,100,1,9,28,1 64 | David Lee,PF,San Antonio,79,1477,59,NA,59,70.8,149,292,124,40 65 | Kawhi Leonard,SF,San Antonio,74,2474,48.5,38,52.9,88,80,350,260,55 66 | Patty Mills,PG,San Antonio,80,1754,44,41.4,47.4,82.5,24,117,280,3 67 | Dejounte Murray,PG,San Antonio,38,322,43.1,39.1,44.1,70,6,36,48,6 68 | Tony Parker,PG,San Antonio,63,1587,46.6,33.3,48.4,72.6,9,104,285,2 69 | Jonathon Simmons,SG,San Antonio,78,1392,42,29.4,46.1,75,20,140,126,25 70 | -------------------------------------------------------------------------------- /Edition2/Data/Nasdaq.csv: -------------------------------------------------------------------------------- 1 | "Symbol","Open","Close","Volume" 2 | "LQDT",5.75,5.8,58900 3 | "FEUZ",43.55,43.45,5400 4 | "PRIM",28,27.89,206600 5 | "OMED",4.84,4.9,202100 6 | "AGND",43.34,43.29,100400 7 | "WEBK",27.8,27.9,2200 8 | "MNDO",2.68,2.63,10800 9 | "RTRX",22.39,21.8,222400 10 | "VRNS",51,52.35,426700 11 | "NFEC",1.03,1.02,18400 12 | "HLG",41.36,41.75,44300 13 | "CCD",20.51,20.44,28200 14 | "PGLC",2.8,2.81,54100 15 | "FHB",29.27,29.62,249500 16 | "ZIONW",17.75,17.71,121500 17 | "USAU",1.4,1.44,211100 18 | "PTH",71.29,71.02,15700 19 | "FAT",9.25,9.29,26700 20 | "ROCK",32.9,33.05,218900 21 | "WEB",23,22.7,242600 22 | "GRVY",75.88,74.26,26100 23 | "ENFC",29,29.15,8900 24 | "CHRS",9,8.8,318100 25 | "LUNA",2.31,2.31,453800 26 | "RUTH",21.4,21.4,218900 27 | "UPL",9.63,9.45,2725400 28 | "HAYN",32.09,31.67,76900 29 | "LIVE",14.71,14.98,7600 30 | "CCXI",6.48,6.85,79200 31 | "QTNT",4.87,4.78,75000 32 | "HPT",29.98,29.91,459300 33 | "ZUMZ",20.9,19.95,2652900 34 | "FTGC",20.65,20.65,42900 35 | "FLEX",18.06,18.01,4869200 36 | "FLKS",4.23,4.18,16800 37 | "IPCI",0.845,0.85,786600 38 | "MRBK",18.365,18.35,3000 39 | "VDSI",13.35,12.95,154000 40 | "MLVF",26.3,26.7,4300 41 | "OCLR",7.07,7.01,5764000 42 | "FLL",3.68,3.68,115000 43 | "CDXC",6.84,6.59,660100 44 | "WNEB",10.6,10.65,35300 45 | "NEON",0.83,0.81,95700 46 | "EBAYL",26.95,27.01,26900 47 | "DRAD",2.3,2.35,94000 48 | "CXSE",83.28,82.72,39100 49 | "DORM",68.11,67.1,392800 50 | "GRFS",22.41,22.2,527200 51 | "ACGL",94.63,94.86,633400 52 | -------------------------------------------------------------------------------- /Edition2/Data/Olympics2012.csv: -------------------------------------------------------------------------------- 1 | Name,Country,Age,Sex,Height,Weight,Sport 2 | Chiara Cainero,Italy,34,F,67,165,Shooting 3 | Ciara Michel,Great Britain,27,F,76,154,Volleyball 4 | Claudette Mukasakindi,Rwanda,29,F,63,110,Archery 5 | Claudia Wurzel,Italy,25,F,71,146,Rowing 6 | Daria Korczynska,Poland,31,F,66,132,Track/Field 7 | Elizabeth Beisel,United States of America,19,F,66,146,Swimming 8 | Evelyn Yesenia Garcia Marroquin,El Salvador,29,F,64,119,Cycling - Road 9 | Giulia Rambaldi,Italy,25,F,70,170,Water Polo 10 | Holley Mangold,United States of America,22,F,68,340,Weightlifting 11 | Joanne Morgan,Great Britain,28,F,67,132,Volleyball 12 | Joyce Sombroek,Netherlands,21,F,70,141,Hockey 13 | Kim Conley,United States of America,26,F,63,108,Track/Field 14 | Liu Ying Goh,Malaysia,23,F,65,110,Badminton 15 | Maria Alexandra Escobar Guerrero,Ecuador,32,F,62,126,Weightlifting 16 | Maria Vasco,Spain,36,F,62,104,Track/Field 17 | Michelle Vittese,United States of America,22,F,63,128,Hockey 18 | Paola Croce,Italy,34,F,66,115,Volleyball 19 | Sara Hendershot,United States of America,24,F,71,165,Rowing 20 | Sara Winther,New Zealand,30,F,65,148,Sailing 21 | Sheilla Castro,Brazil,29,F,74,148,Volleyball 22 | Shereefa Lloyd,Jamaica,29,F,66,134,Track/Field 23 | Urszula Sadkowska,Poland,28,F,76,172,Judo 24 | Valerie Vermeersch,Belgium,26,F,69,148,Hockey 25 | Willy Kanis,Netherlands,28,F,68,176,Cycling-Track 26 | Xiang Wei Jasmine Ser,Singapore,21,F,61,106,Shooting 27 | Xuerui Li,People's Republic of China,21,F,67,132,Badminton 28 | Christopher Duenas,Guam,20,M,73,185,Swimming 29 | Emmanuel Dyen,France,33,M,71,154,Sailing 30 | Gregory Wathelet,Belgium,31,M,75,187,Equestrian 31 | Hiroshi Hoketsu,Japan,71,M,66,134,Equestrian 32 | Kazuya Kaneda,Japan,24,M,67,146,Swimming 33 | Marco Fortes,Portugal,29,M,74,298,Track/FIeld 34 | Mickael Gelabale,France,29,M,79,198,Basketball 35 | Minwoo Kim,Republic of Korea,22,M,68,152,Football 36 | Nahom Mesfin,Ethiopia,23,M,71,137,Track/Field 37 | Ramunas Navardauskas,Lithuania,24,M,75,172,Cycling - Road 38 | Ruslan Ismailov,Kyrgyzstan,25,M,68,132,Shooting 39 | Timothy Kitum,Kenya,17,M,67,132,Track/Field 40 | Tonci Stipanovic,Croatia,26,M,70,183,Sailing 41 | Victor Minibaev,Russian Federation,21,M,68,139,Diving 42 | Youcef Abdi,Australia,34,M,70,146,Track/Field 43 | Yu-Cheng Chen,Taipei (Chinese Taipei),19,M,71,159,Archery 44 | -------------------------------------------------------------------------------- /Edition2/Data/Phillies2009.csv: -------------------------------------------------------------------------------- 1 | Date,Location,Outcome,Hits,Doubles,Homeruns,StrikeOuts 2 | 5-Apr,Home,Lose,4,2,0,6 3 | 7-Apr,Home,Lose,6,1,0,3 4 | 8-Apr,Home,Win,11,3,1,6 5 | 10-Apr,Away,Lose,7,2,1,3 6 | 11-Apr,Away,Win,15,3,1,6 7 | 12-Apr,Away,Win,13,3,2,4 8 | 13-Apr,Away,Win,10,3,3,7 9 | 16-Apr,Away,Lose,5,1,0,3 10 | 17-Apr,Home,Lose,14,3,1,5 11 | 18-Apr,Home,Lose,8,2,3,7 12 | 19-Apr,Home,Win,9,1,3,5 13 | 21-Apr,Home,Win,13,4,1,8 14 | 22-Apr,Home,Lose,8,0,1,4 15 | 23-Apr,Home,Lose,2,0,1,4 16 | 24-Apr,Away,Win,8,2,2,12 17 | 25-Apr,Away,Win,9,0,2,8 18 | 26-Apr,Away,Win,12,2,0,7 19 | 27-Apr,Home,Win,14,5,2,7 20 | 28-Apr,Home,Win,11,0,4,5 21 | 29-Apr,Home,Lose,7,0,1,8 22 | 1-May,Home,Lose,8,0,1,4 23 | 2-May,Home,Win,9,3,1,6 24 | 4-May,Away,Win,8,1,2,11 25 | 5-May,Away,Win,15,4,2,7 26 | 6-May,Away,Lose,3,2,0,11 27 | 7-May,Away,Lose,10,2,2,0 28 | 8-May,Home,Win,8,3,3,7 29 | 9-May,Home,Lose,5,2,2,8 30 | 10-May,Home,Lose,6,3,0,8 31 | 12-May,Home,Win,6,1,0,6 32 | 13-May,Home,Lose,5,2,2,9 33 | 14-May,Home,Lose,5,2,0,13 34 | 15-May,Away,Win,16,1,1,9 35 | May 16 (1),Away,Win,14,2,3,5 36 | May 16 (2),Away,Win,9,3,2,3 37 | 17-May,Away,Win,10,3,0,6 38 | 19-May,Away,Win,7,2,1,5 39 | 20-May,Away,Lose,4,3,1,10 40 | 21-May,Away,Win,14,5,4,11 41 | 22-May,Away,Win,14,0,4,9 42 | 23-May,Away,Lose,6,1,2,5 43 | 24-May,Away,Win,11,3,0,7 44 | 25-May,Home,Lose,6,2,2,8 45 | 26-May,Home,Win,10,5,0,4 46 | 27-May,Home,Lose,8,2,2,7 47 | 29-May,Home,Win,16,6,0,5 48 | 30-May,Home,Win,10,4,2,5 49 | 31-May,Home,Win,7,1,1,10 50 | 1-Jun,Away,Win,12,4,2,9 51 | 2-Jun,Away,Win,11,2,3,6 52 | 3-Jun,Away,Win,10,2,1,3 53 | 4-Jun,Away,Win,7,3,0,7 54 | 5-Jun,Away,Lose,11,2,0,8 55 | 6-Jun,Away,Lose,6,1,0,8 56 | 7-Jun,Away,Win,9,1,3,6 57 | 9-Jun,Away,Lose,10,1,4,4 58 | 10-Jun,Away,Win,9,0,2,3 59 | 11-Jun,Away,Win,9,2,1,9 60 | 12-Jun,Home,Lose,5,2,1,20 61 | 13-Jun,Home,Lose,13,2,3,8 62 | 14-Jun,Home,Win,14,3,1,9 63 | 16-Jun,Home,Lose,8,1,1,10 64 | 17-Jun,Home,Lose,6,0,1,12 65 | 18-Jun,Home,Lose,12,1,4,9 66 | 19-Jun,Home,Lose,6,3,0,7 67 | 20-Jun,Home,Lose,8,2,1,4 68 | 21-Jun,Home,Lose,4,1,1,4 69 | 23-Jun,Away,Win,10,2,2,7 70 | 24-Jun,Away,Lose,4,0,1,7 71 | 25-Jun,Away,Lose,8,2,0,10 72 | 26-Jun,Away,Lose,3,0,0,8 73 | 27-Jun,Away,Win,14,3,3,9 74 | 28-Jun,Away,Win,10,1,0,9 75 | 30-Jun,Away,Lose,10,2,3,6 76 | 1-Jul,Away,Lose,2,0,0,7 77 | 2-Jul,Away,Lose,8,3,0,7 78 | 3-Jul,Home,Win,11,3,1,6 79 | 4-Jul,Home,Win,9,1,0,5 80 | 5-Jul,Home,Win,3,0,2,4 81 | 6-Jul,Home,Win,21,6,4,9 82 | 7-Jul,Home,Lose,10,2,2,8 83 | 8-Jul,Home,Win,9,0,1,11 84 | 9-Jul,Home,Win,12,2,2,5 85 | 10-Jul,Home,Win,5,3,0,6 86 | 11-Jul,Home,Win,13,3,2,8 87 | 12-Jul,Home,Win,6,2,1,7 88 | 16-Jul,Away,Win,10,2,3,10 89 | 17-Jul,Away,Win,12,2,1,10 90 | 19-Jul,Away,Win,8,2,0,7 91 | 20-Jul,Home,Win,9,2,3,10 92 | 21-Jul,Home,Win,6,0,2,12 93 | 22-Jul,Home,Lose,11,0,0,8 94 | 23-Jul,Home,Win,14,4,1,8 95 | 24-Jul,Home,Lose,8,2,0,6 96 | 25-Jul,Home,Win,14,3,2,5 97 | 26-Jul,Home,Win,13,1,4,6 98 | 27-Jul,Away,Win,6,1,1,6 99 | 28-Jul,Away,Win,7,2,1,9 100 | 29-Jul,Away,Lose,7,2,0,12 101 | 30-Jul,Away,Lose,5,3,1,9 102 | 31-Jul,Away,Win,8,2,1,6 103 | 1-Aug,Away,Lose,7,0,0,8 104 | 2-Aug,Away,Lose,9,1,1,6 105 | 4-Aug,Home,Lose,9,2,1,9 106 | 5-Aug,Home,Win,11,5,3,5 107 | 6-Aug,Home,Win,7,1,1,3 108 | 7-Aug,Home,Lose,4,1,1,9 109 | 8-Aug,Home,Lose,11,3,0,6 110 | 9-Aug,Home,Lose,6,0,1,9 111 | 11-Aug,Away,Win,3,0,2,9 112 | 12-Aug,Away,Win,14,3,3,9 113 | 13-Aug,Away,Win,10,2,2,9 114 | 14-Aug,Away,Win,7,2,2,9 115 | 15-Aug,Away,Lose,7,3,1,4 116 | 16-Aug,Away,Win,7,1,2,8 117 | 18-Aug,Home,Win,12,1,2,6 118 | 19-Aug,Home,Win,13,2,4,10 119 | 20-Aug,Home,Win,12,1,3,7 120 | 21-Aug,Away,Lose,10,1,0,10 121 | 22-Aug,Away,Win,7,4,1,6 122 | 23-Aug,Away,Win,10,1,2,6 123 | 24-Aug,Away,Win,7,2,2,9 124 | 25-Aug,Away,Lose,9,5,2,8 125 | 26-Aug,Away,Win,9,4,2,6 126 | 27-Aug,Away,Lose,6,1,1,9 127 | 28-Aug,Home,Win,9,2,2,8 128 | 29-Aug,Home,Lose,11,1,1,7 129 | 30-Aug,Home,Win,8,3,1,4 130 | 1-Sep,Home,Win,5,2,0,10 131 | 2-Sep,Home,Lose,5,0,0,4 132 | 3-Sep,Home,Win,4,3,1,12 133 | 4-Sep,Away,Lose,8,1,0,9 134 | 5-Sep,Away,Lose,10,1,0,9 135 | 6-Sep,Away,Lose,9,1,3,8 136 | 7-Sep,Away,Lose,7,2,2,7 137 | 8-Sep,Away,Win,7,0,5,2 138 | 9-Sep,Away,Win,11,3,2,8 139 | 10-Sep,Away,Lose,11,1,1,2 140 | 11-Sep,Home,Win,12,6,0,6 141 | 12-Sep,Home,Lose,12,3,3,7 142 | Sep 13 (1),Home,Win,10,2,2,5 143 | Sep 13 (2),Home,Win,3,1,0,4 144 | 15-Sep,Home,Win,10,4,0,3 145 | 16-Sep,Home,Win,8,0,1,5 146 | 17-Sep,Home,Win,6,1,0,7 147 | 18-Sep,Away,Win,10,1,4,4 148 | 19-Sep,Away,Lose,8,1,1,8 149 | 20-Sep,Away,Win,10,5,0,9 150 | Sep 22 (1),Away,Win,11,1,1,16 151 | Sep 22 (2),Away,Lose,2,1,0,7 152 | 23-Sep,Away,Lose,9,3,2,6 153 | 24-Sep,Away,Win,14,2,1,8 154 | 25-Sep,Away,Lose,9,0,0,8 155 | 26-Sep,Away,Lose,8,0,2,5 156 | 27-Sep,Away,Win,14,5,1,9 157 | 28-Sep,Home,Lose,4,1,0,5 158 | 29-Sep,Home,Win,8,1,2,6 159 | 30-Sep,Home,Win,9,3,1,2 160 | 1-Oct,Home,Lose,13,3,0,8 161 | 2-Oct,Home,Lose,7,1,1,4 162 | 3-Oct,Home,Lose,6,0,2,4 163 | 4-Oct,Home,Win,12,0,1,13 164 | -------------------------------------------------------------------------------- /Edition2/Data/Quetzal.csv: -------------------------------------------------------------------------------- 1 | "Country","Nest","Snag" 2 | "Guatemala",4.62,6.15 3 | "Guatemala",18.46,24.62 4 | "Guatemala",5.23,6.15 5 | "Guatemala",9.85,12.31 6 | "Guatemala",7.69,9.23 7 | "Guatemala",9.85,15.38 8 | "Guatemala",4,5.85 9 | "Guatemala",10.77,12.31 10 | "Guatemala",13.85,16.92 11 | "Guatemala",10.77,16.92 12 | "Guatemala",24.62,29.23 13 | "Costa Rica",6.9,8.7 14 | "Costa Rica",5.6,7.7 15 | "Costa Rica",4.3,5.2 16 | "Costa Rica",8.3,9.7 17 | "Costa Rica",1.5,1.8 18 | "Costa Rica",6.2,9.6 19 | "Costa Rica",7.4,10.7 20 | "Costa Rica",4.1,7.3 21 | "Costa Rica",10.1,10.8 22 | "Costa Rica",8.4,9.7 23 | -------------------------------------------------------------------------------- /Edition2/Data/RangersTwins2016.csv: -------------------------------------------------------------------------------- 1 | Name,Team,Pos,Age,Games,AtBats,Runs,Hits,Doubles,Triples,HR,RBI,SB,CS,BB,SO,BA 2 | Robinson Chirinos,Rangers,C,32,57,147,21,33,11,0,9,20,0,1,15,44,0.224 3 | Mitch Moreland,Rangers,1B,30,147,460,49,107,21,0,22,60,1,0,35,118,0.233 4 | Rougned Odor,Rangers,2B,22,150,605,89,164,33,4,33,88,14,7,19,135,0.271 5 | Elvis Andrus,Rangers,SS,27,147,506,75,153,31,7,8,69,24,8,47,70,0.302 6 | Adrian Beltre,Rangers,3B,37,153,583,89,175,31,1,32,104,1,1,48,66,0.3 7 | Ryan Rua,Rangers,LF,26,99,240,40,62,8,1,8,22,9,0,21,76,0.258 8 | Ian Desmond,Rangers,CF,30,156,625,107,178,29,3,22,86,21,6,44,160,0.285 9 | Nomar Mazara,Rangers,RF,21,145,516,59,137,13,3,20,64,0,2,39,112,0.266 10 | Prince Fielder,Rangers,DH,32,89,326,29,69,16,0,8,44,0,0,32,63,0.212 11 | Jurickson Profar,Rangers,UT,23,90,272,35,65,6,3,5,20,2,1,30,61,0.239 12 | Carlos Beltran,Rangers,DH,39,52,193,23,54,12,0,7,29,1,0,13,31,0.28 13 | Delino DeShields,Rangers,OF,23,74,182,36,38,7,0,4,13,8,3,15,54,0.209 14 | Kurt Suzuki,Twins,C,32,106,345,34,89,24,1,8,49,0,0,18,48,0.258 15 | Joe Mauer,Twins,1B,33,134,494,68,129,22,4,11,49,2,0,79,93,0.261 16 | Brian Dozier,Twins,2B,29,155,615,104,165,35,5,42,99,18,2,61,138,0.268 17 | Eduardo Escobar,Twins,SS,27,105,352,32,83,14,2,6,37,1,3,21,72,0.236 18 | Trevor Plouffe,Twins,3B,30,84,319,35,83,13,1,12,47,1,0,19,60,0.26 19 | Robbie Grossman,Twins,LF,26,99,332,49,93,19,1,11,37,2,3,55,96,0.28 20 | Byron Buxton,Twins,CF,22,92,298,44,67,19,6,10,38,10,2,23,118,0.225 21 | Max Kepler,Twins,RF,23,113,396,52,93,20,2,17,63,6,2,42,93,0.235 22 | ByungHo Park,Twins,DH,29,62,215,28,41,9,1,12,24,1,0,21,80,0.191 23 | Miguel Sano,Twins,UT,23,116,437,57,103,22,1,25,66,1,0,54,178,0.236 24 | Eduardo Nunez,Twins,IF,29,91,371,49,110,15,1,12,47,27,6,15,58,0.296 25 | Eddie Rosario,Twins,OF,24,92,335,52,90,17,2,10,32,5,2,12,91,0.269 26 | Jorge Polanco,Twins,SS,22,69,245,24,69,15,4,4,27,4,3,17,46,0.282 27 | Danny Santana,Twins,CF,25,75,233,29,56,10,2,2,14,12,9,12,55,0.24 28 | Juan Centeno,Twins,C,26,55,176,16,46,12,1,3,25,0,0,12,38,0.261 29 | -------------------------------------------------------------------------------- /Edition2/Data/Salaries.csv: -------------------------------------------------------------------------------- 1 | League,Salary,Year 2 | National,0.55775,1985 3 | National,0.5187075,1985 4 | National,2.1,2015 5 | American,8,2015 6 | National,0.568905,1985 7 | American,0.5081,2015 8 | American,1.45015,1985 9 | American,1.5,2015 10 | National,0.8,2015 11 | American,0.2231,1985 12 | American,0.51,2015 13 | National,1.7848,1985 14 | National,1,2015 15 | National,1.88,2015 16 | National,1.333333,2015 17 | American,0.24541,1985 18 | American,0.501975,1985 19 | National,0.5075525,1985 20 | National,2.0079,1985 21 | American,0.66,2015 22 | American,1.675,2015 23 | National,0.523925,2015 24 | National,0.5125,2015 25 | American,0.517,2015 26 | American,1.710434077,1985 27 | National,1.776730473,1985 28 | American,0.55775,1985 29 | American,0.5109,2015 30 | American,2.95,2015 31 | American,0.5201,2015 32 | National,0.5165,2015 33 | National,2.5,2015 34 | National,0.78085,1985 35 | American,1.29398,1985 36 | American,8.5,2015 37 | American,0.3290725,1985 38 | American,1.153746033,1985 39 | National,0.5057,2015 40 | National,0.5085,2015 41 | American,1.225,2015 42 | American,1.47246,1985 43 | American,2.475,2015 44 | National,0.836625,1985 45 | American,0.26772,1985 46 | National,2.6,2015 47 | National,0.531,2015 48 | American,1.394375,1985 49 | American,2.416915923,1985 50 | National,0.4462,1985 51 | National,0.518,2015 52 | American,7.7,2015 53 | American,0.525415,2015 54 | National,0.5229,2015 55 | National,0.29003,1985 56 | American,0.301185,1985 57 | American,0.5095,2015 58 | American,0.747385,1985 59 | American,1.24936,1985 60 | American,0.603,2015 61 | American,0.5129,2015 62 | American,1.6,2015 63 | National,0.825,2015 64 | National,0.525,2015 65 | National,1.75,2015 66 | National,0.858935,1985 67 | National,6.857143,2015 68 | National,2.536647,1985 69 | American,1.015105,1985 70 | National,0.55775,1985 71 | American,0.5085,2015 72 | -------------------------------------------------------------------------------- /Edition2/Data/Service.csv: -------------------------------------------------------------------------------- 1 | ID,Times 2 | 1,1.1 3 | 2,1.4 4 | 3,0.683333333 5 | 4,0.716666666 6 | 5,0.316666666 7 | 6,0.533333334 8 | 7,0.35 9 | 8,0.7 10 | 9,1.633333333 11 | 10,0.933333334 12 | 11,0.533333334 13 | 12,0.283333333 14 | 13,0.516666666 15 | 14,0.383333334 16 | 15,1.783333333 17 | 16,1.216666667 18 | 17,0.883333333 19 | 18,0.383333334 20 | 19,1.566666667 21 | 20,0.216666667 22 | 21,0.483333333 23 | 22,1.333333334 24 | 23,0.133333333 25 | 24,0.833333334 26 | 25,0.766666667 27 | 26,0.783333333 28 | 27,0.3 29 | 28,0.683333333 30 | 29,0.566666667 31 | 30,0.683333333 32 | 31,0.866666667 33 | 32,0.7 34 | 33,0.333333333 35 | 34,0.983333334 36 | 35,0.616666667 37 | 36,0.383333333 38 | 37,0.35 39 | 38,0.283333333 40 | 39,0.566666667 41 | 40,1.766666667 42 | 41,1.6 43 | 42,0.783333334 44 | 43,0.316666666 45 | 44,0.616666667 46 | 45,0.316666666 47 | 46,0.15 48 | 47,0.516666667 49 | 48,0.7 50 | 49,0.233333333 51 | 50,0.833333333 52 | 51,0.65 53 | 52,0.916666667 54 | 53,1.333333334 55 | 54,0.25 56 | 55,0.133333333 57 | 56,0.383333333 58 | 57,2.2 59 | 58,0.35 60 | 59,0.55 61 | 60,0.933333334 62 | 61,0.4 63 | 62,0.566666666 64 | 63,0.433333334 65 | 64,1.633333334 66 | 65,1.966666666 67 | 66,0.65 68 | 67,1.133333333 69 | 68,0.5 70 | 69,0.15 71 | 70,1.05 72 | 71,0.75 73 | 72,1 74 | 73,1.133333334 75 | 74,1.15 76 | 75,0.816666667 77 | 76,0.866666667 78 | 77,0.45 79 | 78,0.15 80 | 79,0.383333333 81 | 80,0.55 82 | 81,0.683333333 83 | 82,0.966666667 84 | 83,0.533333333 85 | 84,0.35 86 | 85,0.666666667 87 | 86,0.633333334 88 | 87,0.466666667 89 | 88,0.466666667 90 | 89,0.783333334 91 | 90,0.6 92 | 91,1.483333333 93 | 92,0.733333334 94 | 93,1.4 95 | 94,1.033333333 96 | 95,0.683333333 97 | 96,0.1 98 | 97,0.45 99 | 98,0.416666667 100 | 99,0.516666666 101 | 100,1.15 102 | 101,0.466666667 103 | 102,0.183333334 104 | 103,0.433333333 105 | 104,0.3 106 | 105,0.666666666 107 | 106,1.15 108 | 107,0.55 109 | 108,0.733333334 110 | 109,0.9 111 | 110,0.95 112 | 111,0.583333333 113 | 112,1.3 114 | 113,0.316666666 115 | 114,0.733333334 116 | 115,0.433333334 117 | 116,0.283333333 118 | 117,0.316666667 119 | 118,0.416666666 120 | 119,0.933333334 121 | 120,0.8 122 | 121,0.45 123 | 122,0.6 124 | 123,0.25 125 | 124,1.783333334 126 | 125,0.5 127 | 126,0.5 128 | 127,0.233333333 129 | 128,0.216666667 130 | 129,1.033333333 131 | 130,0.516666667 132 | 131,0.7 133 | 132,0.216666667 134 | 133,0.833333333 135 | 134,1.183333333 136 | 135,1.116666667 137 | 136,0.433333333 138 | 137,0.283333334 139 | 138,0.35 140 | 139,0.716666667 141 | 140,0.4 142 | 141,0.333333334 143 | 142,0.216666667 144 | 143,0.433333333 145 | 144,0.3 146 | 145,0.35 147 | 146,1.666666666 148 | 147,0.983333334 149 | 148,0.316666667 150 | 149,1.416666667 151 | 150,1.033333333 152 | 151,1.116666667 153 | 152,0.55 154 | 153,0.466666667 155 | 154,0.566666666 156 | 155,0.55 157 | 156,0.983333333 158 | 157,0.8 159 | 158,0.533333333 160 | 159,0.3 161 | 160,0.3 162 | 161,1.183333333 163 | 162,0.166666666 164 | 163,0.366666667 165 | 164,0.366666667 166 | 165,0.15 167 | 166,0.183333333 168 | 167,0.283333334 169 | 168,0.633333334 170 | 169,0.566666667 171 | 170,0.45 172 | 171,0.983333333 173 | 172,1.433333333 174 | 173,1.816666667 175 | 174,1.183333333 176 | -------------------------------------------------------------------------------- /Edition2/Data/Skateboard.csv: -------------------------------------------------------------------------------- 1 | "Age","Experimenter","Testosterone" 2 | 18,"Female",206 3 | 18,"Female",197 4 | 18,"Female",135.8 5 | 18,"Female",170.2 6 | 19,"Female",107.3 7 | 19,"Female",351.6 8 | 18,"Female",282.6 9 | 18,"Female",257 10 | 18,"Female",117.8 11 | 19,"Female",342.4 12 | 34,"Female",129.6 13 | 26,"Female",208.6 14 | 19,"Female",253.8 15 | 22,"Female",213.6 16 | 25,"Female",344 17 | 25,"Female",127.7 18 | 29,"Female",351.6 19 | 19,"Female",179.6 20 | 21,"Female",469.6 21 | 19,"Female",411.4 22 | 25,"Female",267.2 23 | 20,"Female",267.4 24 | 19,"Female",308.8 25 | 33,"Female",568.2 26 | 23,"Female",293.8 27 | 27,"Female",495 28 | 20,"Female",408 29 | 21,"Female",644.8 30 | 18,"Female",206.6 31 | 18,"Female",472 32 | 18,"Female",369.8 33 | 23,"Female",286.4 34 | 22,"Female",246.4 35 | 18,"Female",232 36 | 18,"Female",126.4 37 | 32,"Female",106.2 38 | 22,"Female",160.6 39 | 19,"Female",146.8 40 | 21,"Female",361.2 41 | 29,"Female",196.4 42 | 23,"Female",307.4 43 | 24,"Female",625 44 | 23,"Female",209 45 | 19,"Female",502 46 | 19,"Female",236.6 47 | 19,"Female",183.8 48 | 20,"Female",320 49 | 27,"Female",544 50 | 21,"Female",552.6 51 | 19,"Male",127.2 52 | 21,"Male",143.2 53 | 18,"Male",273 54 | 25,"Male",131.6 55 | 20,"Male",190 56 | 29,"Male",193.2 57 | 19,"Male",112 58 | 24,"Male",457 59 | 26,"Male",155.4 60 | 20,"Male",277.8 61 | 22,"Male",139.6 62 | 20,"Male",420.6 63 | 20,"Male",206 64 | 18,"Male",411 65 | 22,"Male",151.2 66 | 18,"Male",271.6 67 | 22,"Male",202 68 | 35,"Male",204 69 | 19,"Male",227 70 | 21,"Male",141.2 71 | 20,"Male",124.4 72 | 19,"Male",124.4 73 | -------------------------------------------------------------------------------- /Edition2/Data/Skating2010.csv: -------------------------------------------------------------------------------- 1 | "Name","Country","Short","Free","Total" 2 | "LYSACEK Evan","United States",90.3,167.37,257.67 3 | "PLUSHENKO Evgeni","Russian Federation",90.85,165.51,256.36 4 | "TAKAHASHI Daisuke","Japan",90.25,156.98,247.23 5 | "LAMBIEL Stephane","Switzerland",84.63,162.09,246.72 6 | "CHAN Patrick","Canada",81.12,160.3,241.42 7 | "WEIR Johnny","United States",82.1,156.77,238.87 8 | "ODA Nobunari","Japan",84.85,153.69,238.54 9 | "KOZUKA Takahiko","Japan",79.59,151.6,231.19 10 | "ABBOTT Jeremy","United States",69.4,149.56,218.96 11 | "BREZINA Michal","Czech Republic",78.8,137.93,216.73 12 | "TEN Denis","Kazakhstan",76.24,135.01,211.25 13 | "AMODIO Florent","France",75.35,134.95,210.3 14 | "BORODULIN Artem","Russian Federation",72.24,137.92,210.16 15 | "FERNANDEZ Javier","Spain",68.69,137.99,206.68 16 | "SCHULTHEISS Adrian","Sweden",63.13,137.31,200.44 17 | "JOUBERT Brian","France",68,132.22,200.22 18 | "van der PERREN Kevin","Belgium",72.9,116.94,189.84 19 | "CONTESTI Samuel","Italy",70.6,116.9,187.5 20 | "VERNER Tomas","Czech Republic",65.32,119.42,184.74 21 | "BACCHINI Paolo","Italy",64.42,112.79,177.21 22 | "PFEIFER Viktor","Austria",60.88,115.05,175.93 23 | "LINDEMANN Stefan","Germany",68.5,103.48,171.98 24 | "CHIPEUR Vaughn","Canada",57.22,113.7,170.92 25 | "KOVALEVSKI Anton","Ukraine",63.81,102.09,165.9 26 | -------------------------------------------------------------------------------- /Edition2/Data/Spruce.csv: -------------------------------------------------------------------------------- 1 | "Tree","Competition","Fertilizer","Height0","Height5","Diameter0","Diameter5","Ht.change","Di.change" 2 | 1,"NC","F",15,60,1.984375,7.4,45,5.415625 3 | 2,"NC","F",9,45.2,1.190625,5.2,36.2,4.009375 4 | 3,"NC","F",12,42,1.7859375,5.7,30,3.9140625 5 | 4,"NC","F",13.7,49.5,1.5875,6.4,35.8,4.8125 6 | 5,"NC","F",12,47.3,1.5875,6.2,35.3,4.612500000000001 7 | 6,"NC","F",12,56.4,1.5875,7.4,44.4,5.8125 8 | 7,"NC","NF",16.8,43.5,1.984375,4.9,26.7,2.9156250000000004 9 | 8,"NC","NF",14.6,49.2,1.984375,5.4,34.6,3.4156250000000004 10 | 9,"NC","NF",16,54,1.984375,7.1,38,5.115625 11 | 10,"NC","NF",15.4,45,1.984375,5.1,29.6,3.1156249999999996 12 | 11,"NC","NF",11.7,38,1.3890625,4.1,26.3,2.7109374999999996 13 | 12,"NC","NF",15,60.5,1.5875,7.3,45.5,5.7125 14 | 13,"C","F",13.1,45.4,1.984375,6.3,32.3,4.315625 15 | 14,"C","F",11,50,1.5875,6.2,39,4.612500000000001 16 | 15,"C","F",16,53,2.1828125,5.6,37,3.4171875 17 | 16,"C","F",13.5,54,1.5875,6.3,40.5,4.7125 18 | 17,"C","F",11.6,39,1.5875,4.5,27.4,2.9125 19 | 18,"C","F",13.5,54.2,1.984375,6.5,40.7,4.515625 20 | 19,"C","NF",13.2,28.3,1.5875,3.4,15.100000000000001,1.8125 21 | 20,"C","NF",15.8,29.5,2.38125,3.4,13.7,1.0187499999999998 22 | 21,"C","NF",13.5,42,1.7859375,4.6,28.5,2.8140624999999995 23 | 22,"C","NF",13.4,29,1.984375,4.2,15.6,2.215625 24 | 23,"C","NF",12.5,31,1.984375,3.5,18.5,1.515625 25 | 24,"C","NF",14.7,38,1.984375,4.7,23.3,2.715625 26 | 25,"NC","F",11.5,63,1.984375,8.7,51.5,6.715624999999999 27 | 26,"NC","F",13.7,64.5,1.7859375,8.4,50.8,6.6140625 28 | 27,"NC","F",18.7,58.3,1.984375,7.1,39.599999999999994,5.115625 29 | 28,"NC","F",15.8,66.2,1.5875,9.1,50.400000000000006,7.512499999999999 30 | 29,"NC","F",17.5,62.2,1.984375,8,44.7,6.015625 31 | 30,"NC","F",15,63,1.7859375,8.9,48,7.1140625 32 | 31,"NC","NF",17,53,1.984375,6.9,36,4.915625 33 | 32,"NC","NF",14.2,46.2,1.5875,4.9,32,3.3125000000000004 34 | 33,"NC","NF",11.2,41.4,1.5875,4.3,30.2,2.7125 35 | 34,"NC","NF",16.7,36.2,1.984375,4.8,19.500000000000004,2.815625 36 | 35,"NC","NF",12.5,46.9,1.5875,5.2,34.4,3.6125000000000003 37 | 36,"NC","NF",15.2,43.5,1.984375,5.8,28.3,3.815625 38 | 37,"C","F",15.5,43,1.984375,7.1,27.5,5.115625 39 | 38,"C","F",13.7,43.2,1.984375,5.9,29.500000000000004,3.9156250000000004 40 | 39,"C","F",17.8,48,1.984375,6.4,30.2,4.415625 41 | 40,"C","F",12.8,41,1.5875,6.3,28.2,4.7125 42 | 41,"C","F",15,46,2.38125,5.7,31,3.31875 43 | 42,"C","F",15,45.5,1.984375,6,30.5,4.015625 44 | 43,"C","NF",14,40,1.7859375,4.3,26,2.5140624999999996 45 | 44,"C","NF",15.7,24,1.7859375,3.7,8.3,1.9140625000000002 46 | 45,"C","NF",15.1,37.1,1.5875,4.3,22,2.7125 47 | 46,"C","NF",14,30,1.984375,4.3,16,2.315625 48 | 47,"C","NF",14.6,35,1.984375,3.9,20.4,1.915625 49 | 48,"C","NF",16,37,1.984375,4.1,21,2.1156249999999996 50 | 49,"NC","F",17,68,2.38125,11.3,51,8.918750000000001 51 | 50,"NC","F",17.3,56,1.7859375,9.4,38.7,7.6140625 52 | 51,"NC","F",18.2,68,2.38125,8.9,49.8,6.518750000000001 53 | 52,"NC","F",15,55.4,1.984375,8.7,40.4,6.715624999999999 54 | 53,"NC","F",15.3,62,2.1828125,8.7,46.7,6.5171874999999995 55 | 54,"NC","F",17,48.6,2.38125,8.1,31.6,5.71875 56 | 55,"NC","NF",16,45,1.984375,6.5,29,4.515625 57 | 56,"NC","NF",16.4,43.5,1.984375,5.1,27.1,3.1156249999999996 58 | 57,"NC","NF",14.8,37.7,2.38125,4.3,22.900000000000002,1.9187499999999997 59 | 58,"NC","NF",12,40,1.7859375,4.7,28,2.9140625 60 | 59,"NC","NF",14.5,40.5,1.7859375,5.2,26,3.4140625 61 | 60,"NC","NF",17.1,35,2.38125,4.9,17.9,2.5187500000000003 62 | 61,"C","F",14.3,52,2.38125,6.7,37.7,4.31875 63 | 62,"C","F",12.5,64,1.984375,9,51.5,7.015625 64 | 63,"C","F",14.7,50,2.38125,7,35.3,4.61875 65 | 64,"C","F",16.3,46.2,2.1828125,6.8,29.900000000000002,4.6171875 66 | 65,"C","F",16.2,47,2.38125,7.8,30.8,5.418749999999999 67 | 66,"C","F",17.5,47,2.38125,6.6,29.5,4.21875 68 | 67,"C","NF",16.2,24.7,1.984375,3.3,8.5,1.3156249999999998 69 | 68,"C","NF",11.3,26.4,1.5875,2.7,15.099999999999998,1.1125000000000003 70 | 69,"C","NF",17.5,36,1.984375,3.8,18.5,1.8156249999999998 71 | 70,"C","NF",13.3,24.4,1.7859375,3.5,11.099999999999998,1.7140625 72 | 71,"C","NF",11,27.2,2.38125,4,16.2,1.61875 73 | 72,"C","NF",14.6,33.6,2.38125,4.5,19,2.11875 74 | -------------------------------------------------------------------------------- /Edition2/Data/Starcraft.csv: -------------------------------------------------------------------------------- 1 | ID,Race,Age,Wins 2 | 1,Protoss,20,29 3 | 2,Protoss,19,27 4 | 3,Protoss,19,26 5 | 4,Protoss,18,19 6 | 5,Protoss,22,23 7 | 6,Protoss,18,25 8 | 7,Protoss,24,19 9 | 8,Protoss,20,20 10 | 9,Protoss,21,19 11 | 10,Protoss,18,21 12 | 11,Protoss,22,16 13 | 12,Protoss,23,21 14 | 13,Protoss,21,18 15 | 14,Protoss,19,18 16 | 15,Protoss,24,17 17 | 16,Terran,18,26 18 | 17,Terran,16,21 19 | 18,Terran,20,25 20 | 19,Terran,18,24 21 | 20,Terran,20,23 22 | 21,Terran,18,26 23 | 22,Terran,17,22 24 | 23,Terran,21,21 25 | 24,Terran,21,21 26 | 25,Terran,21,20 27 | 26,Terran,18,22 28 | 27,Terran,23,20 29 | 28,Terran,18,22 30 | 29,Terran,19,21 31 | 30,Terran,17,24 32 | 31,Zerg,18,28 33 | 32,Zerg,20,24 34 | 33,Zerg,20,20 35 | 34,Zerg,21,23 36 | 35,Zerg,22,17 37 | 36,Zerg,18,20 38 | 37,Zerg,20,15 39 | 38,Zerg,24,17 40 | 39,Zerg,23,14 41 | 40,Zerg,21,18 42 | 41,Zerg,23,12 43 | 42,Zerg,21,15 44 | 43,Zerg,24,16 45 | 44,Zerg,18,14 46 | 45,Zerg,23,11 47 | -------------------------------------------------------------------------------- /Edition2/Data/TV.csv: -------------------------------------------------------------------------------- 1 | ID,Times,Cable 2 | 1,7,Basic 3 | 2,10,Basic 4 | 3,10.6,Basic 5 | 4,10.2,Basic 6 | 5,8.6,Basic 7 | 6,7.6,Basic 8 | 7,8.2,Basic 9 | 8,10.4,Basic 10 | 9,11,Basic 11 | 10,8.5,Basic 12 | 11,3.4,Extended 13 | 12,7.8,Extended 14 | 13,9.4,Extended 15 | 14,4.7,Extended 16 | 15,5.4,Extended 17 | 16,7.6,Extended 18 | 17,5,Extended 19 | 18,8,Extended 20 | 19,7.8,Extended 21 | 20,9.6,Extended 22 | -------------------------------------------------------------------------------- /Edition2/Data/Turbine.csv: -------------------------------------------------------------------------------- 1 | "Date2010","AveKW","AveSpeed","Production" 2 | "Feb 14",547.9,7.8,13146 3 | "Feb 15",776,8.9,18626 4 | "Feb 16",944.4,9.7,22667 5 | "Feb 17",506.2,7.7,12148 6 | "Feb 18",322.9,6.4,7742 7 | "Feb 19",67.9,3.1,1585 8 | "Feb 20",79.9,3.9,1876 9 | "Feb 21",123.6,4.5,2936 10 | "Feb 22",273.3,6.5,6559 11 | "Feb 23",626.8,7.8,15041 12 | "Feb 24",242.2,5.8,5800 13 | "Feb 25",2.2,2.5,6 14 | "Feb 26",124.6,3.8,2940 15 | "Feb 27",494.6,7.7,11871 16 | "Feb 28",187.2,5.8,4481 17 | "Mar 01",303.5,5.9,7258 18 | "Mar 02",74.6,3.5,1743 19 | "Mar 03",148.6,5.1,3543 20 | "Mar 04",120.2,4.2,2848 21 | "Mar 05",581.9,8,13965 22 | "Mar 06",503.6,7.6,12087 23 | "Mar 07",89.4,3.5,2099 24 | "Mar 08",210.1,5.9,5037 25 | "Mar 09",347.9,7.1,8348 26 | "Mar 10",594.9,8.4,14279 27 | "Mar 11",611.5,8.2,14674 28 | "Mar 12",35.3,3.1,793 29 | "Mar 13",675,8.7,16202 30 | "Mar 14",317,6.7,7607 31 | "Mar 15",334.3,6.7,8019 32 | "Mar 16",201.5,5.8,4833 33 | "Mar 17",255.7,6,6125 34 | "Mar 18",454.2,6.7,10870 35 | "Mar 19",564.1,8.6,13768 36 | "Mar 20",278.4,6.4,6678 37 | "Mar 21",72.4,3.7,1692 38 | "Mar 22",405.6,7.6,9708 39 | "Mar 23",304.1,6.7,7226 40 | "Mar 24",176.2,6.3,4190 41 | "Mar 25",736.1,8.5,17666 42 | "Mar 26",1072,10.2,25729 43 | "Mar 27",601.5,7.1,14420 44 | "Mar 28",448.3,7,10752 45 | "Mar 29",849.6,9.2,20386 46 | "Mar 30",841.2,13.2,20146 47 | "Mar 31",271.4,8.4,6485 48 | "Apr 01",867.2,9.9,20811 49 | "Apr 02",945.6,11.3,22663 50 | "Apr 03",824.2,9.1,19781 51 | "Apr 04",716.9,8.7,17201 52 | "Apr 05",318.5,5.9,7618 53 | "Apr 06",971.4,10,23315 54 | "Apr 07",884.4,9.4,21225 55 | "Apr 08",309.3,6.5,7418 56 | "Apr 09",438.9,6.2,10502 57 | "Apr 10",276.8,5.4,6613 58 | "Apr 11",160.1,4.9,3814 59 | "Apr 12",755.8,9,18139 60 | "Apr 13",1149,10.9,27572 61 | "Apr 14",315,9.6,7514 62 | "Apr 15",898.3,9.3,21554 63 | "Apr 16",1142.6,10.6,27422 64 | "Apr 17",434.2,7.1,10411 65 | "Apr 18",44.7,4.3,1047 66 | "Apr 19",148.4,4.8,3530 67 | "Apr 20",175.9,4.3,4169 68 | "Apr 21",356.4,7,8552 69 | "Apr 22",80.6,3.7,1889 70 | "Apr 23",1041,10.5,24985 71 | "Apr 24",623.1,8.2,14952 72 | "Apr 25",981,10,23546 73 | "Apr 26",218.1,5.2,5208 74 | "Apr 27",233.7,5.7,5589 75 | "Apr 28",614.5,8,14744 76 | "Apr 29",1285.6,12.2,30854 77 | "Apr 30",556.3,7.6,13338 78 | "May 01",1111.7,11.1,26680 79 | "May 02",781.7,9.2,18762 80 | "May 03",339.2,6.4,8127 81 | "May 04",727.3,8.5,17443 82 | "May 05",1254.4,12.9,30096 83 | "May 06",190.2,4.5,4522 84 | "May 07",164.6,5.1,3925 85 | "May 08",732.7,8.7,17584 86 | "May 09",88.8,3.8,2091 87 | "May 10",1210.8,11,29059 88 | "May 11",660,8.3,15841 89 | "May 12",432.7,7.2,10386 90 | "May 13",490,6.9,11744 91 | "May 14",881.8,9.4,21164 92 | "May 15",77.5,4.6,1830 93 | "May 16",286.5,6.6,6870 94 | "May 17",149.8,5.1,3568 95 | "May 18",100.2,4.8,2370 96 | "May 19",166.2,5,3959 97 | "May 20",426.4,6.9,10224 98 | "May 21",170.2,5.1,4060 99 | "May 22",1005.6,11.2,24132 100 | "May 23",1139.4,11,27343 101 | "May 24",690,10.6,16531 102 | "May 25",360,6,8606 103 | "May 26",195.3,5.6,4669 104 | "May 27",257.5,6.5,6179 105 | "May 28",347.5,6.9,8334 106 | "May 29",765.5,9.3,18370 107 | "May 30",643.5,8.2,15432 108 | "May 31",144.3,4.2,3410 109 | "Jun 01",571.9,7.8,13718 110 | "Jun 02",101.6,4.4,2405 111 | "Jun 03",136.2,4.4,3222 112 | "Jun 04",400.2,6.8,995 113 | "Jun 05",142.6,5.2,3399 114 | "Jun 06",338.1,6.3,8096 115 | "Jun 07",55.1,3.4,1261 116 | "Jun 08",633.3,8.3,15200 117 | "Jun 09",869.4,9.6,20866 118 | "Jun 10",383.9,7.1,9212 119 | "Jun 11",624.9,8.3,14984 120 | "Jun 12",170.3,5.4,4073 121 | "Jun 13",14.9,3.3,309 122 | "Jun 14",259.5,6.1,6218 123 | "Jun 15",282,6.4,6762 124 | "Jun 16",184.4,4.8,4384 125 | "Jun 17",1079.3,12.6,25896 126 | "Jun 18",687.2,8.8,16490 127 | "Jun 19",602.9,7.7,14453 128 | "Jun 20",30.1,3.7,674 129 | "Jun 21",127,4.9,3017 130 | "Jun 22",408.2,7,9785 131 | "Jun 23",531,7.5,12731 132 | "Jun 24",156.6,4.3,3713 133 | "Jun 25",743.4,9.2,17841 134 | "Jun 26",114,3.7,2681 135 | "Jun 27",356.1,5.6,8511 136 | "Jun 28",676.7,8.5,16241 137 | "Jun 29",76.2,4.2,1787 138 | "Jun 30",171.8,5,4094 139 | "Jul 01",973.7,10.2,23367 140 | "Jul 03",1238.8,11.5,29731 141 | "Jul 04",533.1,7.6,12793 142 | "Jul 05",54.3,3.7,1258 143 | "Jul 06",18.5,2.6,379 144 | "Jul 07",113,4,2671 145 | "Jul 08",144.9,5.5,3470 146 | "Jul 09",162.9,5.5,3899 147 | "Jul 10",564.9,7.9,13549 148 | "Jul 11",462.7,6.9,11090 149 | "Jul 12",102.7,4.3,2417 150 | "Jul 13",442.7,7.5,10617 151 | "Jul 14",953.6,10.2,22882 152 | "Jul 15",439.2,7.6,10542 153 | "Jul 16",382.5,7,9177 154 | "Jul 17",385.1,6.2,9214 155 | "Jul 18",459.7,7.6,11034 156 | "Jul 19",59.1,3.1,1349 157 | "Jul 20",129.6,5.3,3096 158 | "Jul 21",123,4.4,2910 159 | "Jul 22",444,6.5,10630 160 | "Jul 23",120.2,4.6,2848 161 | "Jul 24",311.7,7,7470 162 | "Jul 25",32.6,3.3,720 163 | "Jul 26",259.9,6.5,6235 164 | "Jul 27",696.8,9,16722 165 | "Jul 28",248.4,6.4,5959 166 | "Jul 29",2.2,2.2,0 167 | "Jul 30",140.3,5.2,3340 168 | "Jul 31",27.1,3.5,595 169 | "Aug 01",231.7,6,5548 170 | -------------------------------------------------------------------------------- /Edition2/Data/Volleyball2009.csv: -------------------------------------------------------------------------------- 1 | "Team","HitPercent","Assts","Kills" 2 | "Penn St.",38.1,13.64,14.62 3 | "Texas",33.800000000000004,13.37,14.59 4 | "Hawaii",30.5,13.56,14.58 5 | "Florida St.",30,12.6,13.75 6 | "Florida",29.099999999999998,13.21,14.47 7 | "Washington",28.799999999999997,13.35,14.25 8 | "Md.-East. Shore",28.799999999999997,11.87,12.76 9 | "Middle Tenn.",28.1,12.78,13.65 10 | "St. Mary's (CA)",27.900000000000002,13.31,14.24 11 | "Kentucky",27.900000000000002,13.63,14.74 12 | "Ohio",27.3,12.77,14.06 13 | "California",27.1,12.89,13.95 14 | "LSU",26.900000000000002,13.22,13.99 15 | "Stanford",26.8,13.03,13.85 16 | "Ohio St.",26.8,12.26,13.32 17 | "UNI",26.700000000000003,13.65,14.83 18 | "Oregon",26.6,13.83,14.69 19 | "Lipscomb",26.3,13.79,14.75 20 | "Tulsa",26.200000000000003,13.48,14.43 21 | "Western Ky.",26.1,11.94,13.06 22 | "St. Louis",26.1,12.73,14.04 23 | "Clemson",25.8,12.14,13.36 24 | "Nebraska",25.6,13.63,14.65 25 | "Yale",25.5,12.8,14.02 26 | "Duke",25.5,12.9,13.83 27 | "Minnesota",25.4,12.88,13.86 28 | "FIU",25.4,13.42,14.52 29 | "Louisville",25.4,11.92,12.99 30 | "Notre Dame",25.3,12.55,13.68 31 | "Pepperdine",25,12.9,13.93 32 | -------------------------------------------------------------------------------- /Edition2/Data/Walleye.csv: -------------------------------------------------------------------------------- 1 | "Length","Weight" 2 | 11.1,0.4 3 | 16.1,1.39 4 | 20.7,2.8 5 | 14.3,1.03 6 | 11.5,0.5 7 | 15.7,0.9 8 | 12.5,0.6 9 | 15.3,1.3 10 | 26.6,7.5 11 | 17.8,2.2 12 | 15.2,1.2 13 | 15,1 14 | 20.8,3.5 15 | 11,0.4 16 | 21,3.1 17 | 14.1,1.2 18 | 12.5,0.6 19 | 13.5,0.8 20 | 16.4,1.2 21 | 29.3,10.3 22 | 12.7,0.8 23 | 12.6,0.6 24 | 11,0.4 25 | 20,2 26 | 14.6,1.1 27 | 20.5,3 28 | 23.3,5.4 29 | 16.2,1.5 30 | 9.2,0.4 31 | 25.5,5.5 32 | 15.9,1.4 33 | 17.6,1.6 34 | 20.8,3 35 | 11,0.4 36 | 17.2,1.9 37 | 14.7,1.08 38 | 20.5,3.1 39 | 9.1,0.3 40 | 10.3,0.3 41 | 19.4,2.66 42 | 13.6,0.81 43 | 20.3,2.8 44 | 16,1.4 45 | 22,4 46 | 12.5,0.8 47 | 14.2,1 48 | 17.5,2.2 49 | 16.1,1.47 50 | 13.5,0.7 51 | 24.2,4.5 52 | 12.8,0.68 53 | 20.6,3.1 54 | 10.5,0.4 55 | 27,6.4 56 | 14,1.1 57 | 10.9,0.4 58 | 17.6,1.8 59 | 16.1,1.7 60 | 23.6,4.5 61 | 15.8,1.2 62 | -------------------------------------------------------------------------------- /Edition2/Data/Watertable.csv: -------------------------------------------------------------------------------- 1 | "Depth","Alive" 2 | 50,1 3 | 43,1 4 | 50,1 5 | 46,1 6 | 25,1 7 | 19,1 8 | 30,1 9 | 35,1 10 | 8,1 11 | 8,0 12 | 11,1 13 | 9,1 14 | 30,1 15 | 46,1 16 | 38,1 17 | 34,1 18 | 24,1 19 | 30,1 20 | 34,1 21 | 36,1 22 | 12,1 23 | 18,1 24 | 20,1 25 | 22,1 26 | 25,1 27 | 18,1 28 | 30,1 29 | 23,1 30 | 6,1 31 | 4,1 32 | 6,1 33 | 6,1 34 | 4,1 35 | 3,0 36 | 10,1 37 | 4,0 38 | 36,1 39 | 42,1 40 | 27,1 41 | 42,1 42 | 47,1 43 | 56,1 44 | 50,1 45 | 51,1 46 | 48,1 47 | 48,1 48 | 53,1 49 | 55,1 50 | 30,1 51 | 29,1 52 | 28,1 53 | 25,1 54 | 28,1 55 | 25,1 56 | 27,1 57 | 27,1 58 | 24,1 59 | 20,1 60 | 26,1 61 | 22,1 62 | 2,0 63 | 6,0 64 | 8,1 65 | 9,0 66 | 9,1 67 | 1,0 68 | 8,0 69 | 8,0 70 | 1,0 71 | 4,0 72 | 3,0 73 | 8,0 74 | 26,1 75 | 31,1 76 | 31,1 77 | 32,1 78 | 30,1 79 | 24,1 80 | 30,1 81 | 28,1 82 | 15,1 83 | 19,1 84 | 20,1 85 | 24,1 86 | 19,1 87 | 15,1 88 | 17,1 89 | 20,1 90 | 5,0 91 | 9,1 92 | 7,1 93 | 11,0 94 | 7,1 95 | 9,1 96 | 8,1 97 | 9,1 98 | 33,1 99 | 37,1 100 | 36,1 101 | 36,1 102 | 46,1 103 | 41,1 104 | 44,1 105 | 47,1 106 | 25,1 107 | 24,1 108 | 24,1 109 | 21,1 110 | 20,1 111 | 24,1 112 | 23,1 113 | 22,1 114 | 2,0 115 | 1,0 116 | 5,0 117 | 3,0 118 | 4,0 119 | 4,0 120 | 7,0 121 | 7,0 122 | 43,1 123 | 43,1 124 | 47,1 125 | 36,1 126 | 32,1 127 | 30,1 128 | 29,1 129 | 33,1 130 | 22,1 131 | 24,1 132 | 23,1 133 | 22,1 134 | 18,1 135 | 18,1 136 | 21,1 137 | 17,1 138 | 14,1 139 | 13,0 140 | 13,1 141 | 14,1 142 | 5,0 143 | 10,0 144 | 13,1 145 | 4,0 146 | 43,1 147 | 40,1 148 | 47,1 149 | 50,1 150 | 39,1 151 | 39,1 152 | 48,1 153 | 50,1 154 | 41,1 155 | 40,1 156 | 39,1 157 | 38,1 158 | 20,1 159 | 18,1 160 | 26,1 161 | 18,1 162 | 19,1 163 | 20,1 164 | 18,1 165 | 25,1 166 | 23,1 167 | 19,1 168 | 27,1 169 | 26,1 170 | 9,0 171 | 9,0 172 | 11,0 173 | 10,0 174 | 8,0 175 | 6,0 176 | 14,0 177 | 12,0 178 | 10,1 179 | 13,1 180 | 15,1 181 | 12,1 182 | 41,1 183 | 42,1 184 | 48,1 185 | 45,1 186 | 38,1 187 | 39,1 188 | 38,1 189 | 39,1 190 | 40,1 191 | 36,1 192 | 38,1 193 | 39,1 194 | 17,1 195 | 18,1 196 | 20,1 197 | 22,1 198 | 18,1 199 | 18,1 200 | 20,1 201 | 19,1 202 | 13,1 203 | 18,1 204 | 22,1 205 | 24,1 206 | 6,0 207 | 10,0 208 | 9,0 209 | 13,0 210 | 7,0 211 | 4,0 212 | 5,0 213 | 7,0 214 | 5,0 215 | 7,0 216 | 4,0 217 | 11,0 218 | 30,1 219 | 31,1 220 | 42,1 221 | 35,1 222 | 30,1 223 | 30,1 224 | 32,1 225 | 50,1 226 | 18,1 227 | 17,1 228 | 30,1 229 | 27,1 230 | 17,1 231 | 18,1 232 | 22,1 233 | 20,1 234 | 13,1 235 | 11,1 236 | 12,1 237 | 11,0 238 | 7,0 239 | 4,0 240 | 7,0 241 | 8,0 242 | 45,1 243 | 37,1 244 | 50,1 245 | 44,1 246 | 18,1 247 | 13,1 248 | 17,1 249 | 19,1 250 | 10,1 251 | 10,1 252 | 11,0 253 | 7,0 254 | 37,1 255 | 36,1 256 | 36,1 257 | 30,1 258 | 30,1 259 | 24,1 260 | 37,1 261 | 32,1 262 | 12,1 263 | 14,1 264 | 23,1 265 | 22,1 266 | 15,1 267 | 24,1 268 | 24,1 269 | 17,1 270 | 12,1 271 | 12,1 272 | 16,1 273 | 16,0 274 | 10,1 275 | 8,0 276 | 9,1 277 | 11,1 278 | 67,1 279 | 69,1 280 | 64,1 281 | 65,1 282 | 35,1 283 | 35,1 284 | 40,1 285 | 40,1 286 | 50,1 287 | 55,1 288 | 50,1 289 | 53,1 290 | 14,1 291 | 16,1 292 | 18,1 293 | 21,1 294 | 28,1 295 | 27,1 296 | 22,1 297 | 30,1 298 | 21,1 299 | 20,1 300 | 28,1 301 | 25,1 302 | 14,1 303 | 12,1 304 | 14,1 305 | 14,1 306 | 10,0 307 | 6,0 308 | 4,0 309 | 8,0 310 | 8,1 311 | 10,0 312 | 11,0 313 | 11,1 314 | 28,1 315 | 27,1 316 | 32,1 317 | 43,1 318 | 27,1 319 | 35,1 320 | 37,1 321 | 33,1 322 | 16,1 323 | 16,1 324 | 20,1 325 | 18,1 326 | 17,1 327 | 15,1 328 | 17,1 329 | 18,1 330 | 14,1 331 | 9,0 332 | 18,1 333 | 15,1 334 | 11,0 335 | 11,0 336 | 12,0 337 | 12,0 338 | 33,1 339 | 30,1 340 | 30,1 341 | 33,1 342 | 14,1 343 | 18,1 344 | 17,1 345 | 16,1 346 | 10,1 347 | 7,1 348 | 10,0 349 | 12,0 350 | 28,1 351 | 21,1 352 | 34,1 353 | 34,1 354 | 20,1 355 | 19,1 356 | 20,1 357 | 27,1 358 | 11,1 359 | 11,1 360 | 11,1 361 | 9,0 362 | -------------------------------------------------------------------------------- /Edition2/Data/wafers.csv: -------------------------------------------------------------------------------- 1 | "Instrument","Resistance" 2 | 1,196.3052 3 | 1,196.124 4 | 1,196.189 5 | 1,196.2569 6 | 1,196.3403 7 | 2,196.3042 8 | 2,196.3825 9 | 2,196.1669 10 | 2,196.3257 11 | 2,196.0422 12 | 3,196.1303 13 | 3,196.2005 14 | 3,196.2889 15 | 3,196.0343 16 | 3,196.1811 17 | 4,196.2795 18 | 4,196.1748 19 | 4,196.1494 20 | 4,196.1485 21 | 4,195.9885 22 | 5,196.2119 23 | 5,196.1051 24 | 5,196.185 25 | 5,196.0052 26 | 5,196.209 27 | -------------------------------------------------------------------------------- /Edition2/Errata_Edition2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition2/Errata_Edition2.pdf -------------------------------------------------------------------------------- /Edition2/R/Chap02EDA.R: -------------------------------------------------------------------------------- 1 | #Chap 2: Exploratory Data Analysis 2 | 3 | 4 | #Section 2.4 5 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36) 6 | qqnorm(x) # plot points 7 | qqline(x) # add straight line 8 | 9 | 10 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv") 11 | 12 | qqnorm(NCBirths$Weight) 13 | qqline(NCBirths$Weight) 14 | 15 | #--------------------------------------------------------------------------- 16 | #Section 2.5 17 | #R Note 18 | x <- c(3, 6, 15, 15, 17, 19, 24) 19 | plot.ecdf(x) 20 | x <- rnorm(25) # random sample of size 25 from N(0,1) 21 | plot.ecdf(x, xlim = c(-4, 4)) # adjust x range 22 | curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf 23 | 24 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv") 25 | 26 | beerM <- subset(Beerwings, select = Beer, subset = Gender == "M", 27 | drop = T) 28 | beerF <- subset(Beerwings, select = Beer, subset = Gender == "F", 29 | drop = T) 30 | 31 | plot.ecdf(beerM, xlab = "ounces") 32 | plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE) 33 | abline(v = 25, lty = 2) 34 | legend(5, .8, legend = c("Males", "Females"), 35 | col = c("black", "blue"), pch = c(19, 2)) 36 | 37 | #-------------------------- 38 | #Section 2.6 39 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten", 40 | ylab = "Beer consumed") 41 | 42 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten", 43 | ylab = "Beer consumed") 44 | 45 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender, xlab = "Hot wings eaten", 46 | ylab = "Beer consumed") 47 | -------------------------------------------------------------------------------- /Edition2/R/Chap02EDA.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 2 Exploratory Data Analysis" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | 13 | ###Section 2.4 14 | 15 | ```{r} 16 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36) 17 | qqnorm(x) # plot points 18 | qqline(x) # add straight line 19 | 20 | 21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv") 22 | 23 | qqnorm(NCBirths$Weight) 24 | qqline(NCBirths$Weight) 25 | ``` 26 | 27 | ###Section 2.5 28 | ####R Note 29 | ```{r} 30 | x <- c(3, 6, 15, 15, 17, 19, 24) 31 | plot.ecdf(x) 32 | x <- rnorm(25) # random sample of size 25 from N(0,1) 33 | plot.ecdf(x, xlim = c(-4, 4)) # adjust x range 34 | curve(pnorm(x), col = "blue", add = TRUE) # impose normal cdf 35 | 36 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv") 37 | 38 | beerM <- subset(Beerwings, select = Beer, subset = Gender == "M", 39 | drop = T) 40 | beerF <- subset(Beerwings, select = Beer, subset = Gender == "F", 41 | drop = T) 42 | 43 | plot.ecdf(beerM, xlab = "ounces") 44 | plot.ecdf(beerF, col = "blue", pch = 2, add = TRUE) 45 | abline(v = 25, lty = 2) 46 | legend(5, .8, legend = c("Males", "Females"), 47 | col = c("black", "blue"), pch = c(19, 2)) 48 | ``` 49 | 50 | ###Section 2.6 51 | ```{r} 52 | plot(Beer ~ Hotwings, data = Beerwings, xlab = "Hot wings eaten", 53 | ylab = "Beer consumed") 54 | 55 | plot(Beerwings$Hotwings, Beerwings$Beer, xlab = "Hot wings eaten", 56 | ylab = "Beer consumed") 57 | 58 | plot(Beer ~ Hotwings, data = Beerwings, col = Gender, xlab = "Hot wings eaten", 59 | ylab = "Beer consumed") 60 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap02EDA_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 2 Exploratory Data Analysis" 3 | author: "Chihara-Hesterberg" 4 | date: "November 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Section 2.4 15 | 16 | ```{r} 17 | x <- c(17.7, 22.6, 26.1, 28.3, 30, 31.2, 31.5, 33.5, 34.7, 36) 18 | df <- data.frame(x) 19 | ggplot(df, aes(sample = x)) + stat_qq() + stat_qq_line() 20 | 21 | NCBirths <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/NCBirths2004.csv") 22 | 23 | ggplot(NCBirths, aes(sample = Weight)) + stat_qq() + stat_qq_line() 24 | 25 | ``` 26 | 27 | ###Section 2.5 28 | ####R Note 29 | ```{r} 30 | x <- c(3, 6, 15, 15, 17, 19, 24) 31 | df <- data.frame(x) 32 | ggplot(df, aes(x)) + stat_ecdf(geom = "step") 33 | 34 | # random sample of size 25 from N(0,1) 35 | df <- data.frame(x = rnorm(25)) 36 | 37 | ggplot(df, aes(x)) + stat_ecdf() + stat_function(fun = pnorm, color = "red") 38 | 39 | Beerwings <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv") 40 | 41 | ggplot(Beerwings, aes(Beer, color = Gender)) + stat_ecdf() 42 | 43 | ``` 44 | 45 | ###Section 2.6 46 | ```{r} 47 | 48 | ggplot(Beerwings, aes(x=Hotwings, y = Beer)) + geom_point() 49 | 50 | ggplot(Beerwings, aes(x = Hotwings, y = Beer, color = Gender)) + geom_point() 51 | 52 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap03Testing.R: -------------------------------------------------------------------------------- 1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests 2 | ##------------------------------------- 3 | ##Section 3.3 4 | #Beerwings <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Beerwings.csv") 5 | 6 | tapply(Beerwings$Hotwings, Beerwings$Gender, mean) 7 | 8 | observed <- 14.5333- 9.3333 #store observed mean differences 9 | 10 | #Get hotwings variable 11 | hotwings <- Beerwings$Hotwings 12 | 13 | #Alternative way: 14 | hotwings <- subset(Beerwings, select = Hotwings, drop = TRUE) 15 | #drop = TRUE to convert hotwings to a vector (without this, hotwings will be a 16 | #30x1 data frame 17 | 18 | #set.seed(0) 19 | N<- 10^5-1 #set number of times to repeat this process 20 | result <- numeric(N) # space to save the random differences 21 | for(i in 1:N) 22 | { 23 | index <- sample(30, size=15, replace = FALSE) # sample of numbers from 1:30 24 | result[i] <- mean(hotwings[index]) - mean(hotwings[-index]) 25 | } 26 | 27 | ##Plot 28 | 29 | hist(result, xlab = "xbarM - xbarF", main = "Permutation distribution for hot wings") 30 | abline(v = observed, col = "blue", lty=5) 31 | 32 | #------------------------- 33 | #Another visualization of distribution 34 | plot.ecdf(result) 35 | abline(v = observed, col = "blue", lty = 5) 36 | 37 | 38 | #Compute P-value 39 | (sum(result >= observed)+1)/(N+ 1) #P-value 40 | 41 | 42 | #---------------------------------------- 43 | #Example 3.4 Verizon 44 | #Permutation test 45 | 46 | Verizon <- read.csv("https://sites.google.com/site/chiharahesterberg/data2/Verizon.csv") 47 | 48 | tapply(Verizon$Time, Verizon$Group, mean) 49 | 50 | 51 | Time <- subset(Verizon, select = Time, drop = T) 52 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE) 53 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE) 54 | 55 | observed <- mean(Time.ILEC) - mean(Time.CLEC) 56 | observed 57 | 58 | 59 | N <- 10^4-1 #set number of times to repeat this process 60 | #set.seed(99) 61 | result <- numeric(N) # space to save the random differences 62 | for(i in 1:N) { 63 | index <- sample(1687, size = 1664, replace = FALSE) #sample of numbers from 1:1687 64 | result[i] <- mean(Time[index]) - mean(Time[-index]) 65 | } 66 | 67 | hist(result, xlab = "xbar1 - xbar2", 68 | main = "Permutation Distribution for Verizon repair times") 69 | abline(v = observed, col = "blue", lty = 5) 70 | 71 | (sum(result <= observed) + 1)/(N + 1) #P-value 72 | 73 | 74 | #------------------------------------------------------- 75 | #Example 3.6, Verizon cont. 76 | #median, trimmed means 77 | 78 | tapply(Verizon$Time, Verizon$Group, median) 79 | 80 | #Difference in means 81 | observed <- median(Time.ILEC) - median(Time.CLEC) 82 | observed 83 | 84 | #Differnce in trimmed means 85 | observed2 <- mean(Time.ILEC, trim = .25) - mean(Time.CLEC, trim = .25) 86 | observed2 87 | 88 | N <- 10^4-1 #set number of times to repeat this process 89 | #set.seed(99) 90 | result <- numeric(N) # space to save the random differences 91 | result2 <- numeric(N) 92 | for(i in 1:N) { 93 | index <- sample(1687, size=1664, replace = FALSE) #sample of numbers from 1:1687 94 | result[i] <- median(Time[index]) - median(Time[-index]) 95 | result2[i] <- mean(Time[index], trim = .25) - mean(Time[-index], trim = .25) 96 | } 97 | 98 | hist(result, xlab = "median1 - median2", 99 | main = "Permutation Distribution for medians") 100 | abline(v = observed, col = "blue", lty = 5) 101 | 102 | #P-value difference in means 103 | (sum(result <= observed) + 1)/(N+ 1) 104 | 105 | 106 | hist(result2, xlab = "trimMean1 - trimMean2", 107 | main = "Permutation Distribution for trimmed means") 108 | abline(v = observed, col = "blue", lty = 5) 109 | 110 | #P-value difference in trimmed means 111 | (sum(result2 <= observed2) + 1)/(N+ 1) 112 | 113 | #------------------------------------------------ 114 | #Example 3.6, Verzion continued 115 | # 116 | #difference in proportion of time > 10 117 | #and ratio of variances 118 | observed3 <- mean(Time.ILEC > 10) - mean(Time.CLEC > 10) 119 | observed3 120 | 121 | #ratio of variances 122 | observed4 <- var(Time.ILEC)/var(Time.CLEC) 123 | observed4 124 | 125 | N <- 10^4-1 #set number of times to repeat this process 126 | #set.seed(99) 127 | result3 <- numeric(N) 128 | result4 <- numeric(N) 129 | 130 | for(i in 1:N) { 131 | index <- sample(1687, size = 1664, replace = FALSE) 132 | result3[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10) 133 | result4[i] <- var(Time[index])/var(Time[-index]) 134 | } 135 | 136 | 137 | 138 | hist(result3, xlab = "Difference in proportions", main = "Repair times > 10 hours") 139 | abline(v = observed3, lty = 5, col = "blue") 140 | #P-value difference in proportion 141 | (sum(result3 <= observed3) + 1)/(N+ 1) #P-value 142 | 143 | 144 | hist(result4, xlab = "variance1/variance2", main = "Ratio of variances") 145 | abline(v = observed4, lty = 5, col = "blue") 146 | 147 | 148 | #P-value ratio of variances 149 | (sum(result4 <= observed4) + 1)/(N+ 1) #P-value 150 | 151 | #-------------------------------------- 152 | #Example 3.8 153 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv") 154 | 155 | k <- complete.cases(Recidivism$Age25) 156 | Recid2 <- ifelse(Recidivism$Recid[k] == "Yes", 1, 0) 157 | Age25.2 <- Recidivism$Age25[k] 158 | 159 | table(Age25.2) 160 | tapply(Recid2, Age25.2, mean) 161 | observed <- .365 - .306 162 | 163 | N <- 10^4 - 1 164 | result <- numeric(N) 165 | 166 | for (i in 1:N) 167 | { 168 | index <- sample(17019, size = 3077, replace = FALSE) 169 | result[i] <- mean(Recid2[index]) - mean(Recid2[-index]) 170 | } 171 | 172 | 2* (sum(result >= observed) + 1)/(N + 1) 173 | 174 | #--------------------- 175 | #Section 3.4 Matched Pairs 176 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv") 177 | 178 | Diff <- Diving2017$Final - Diving2017$Semifinal 179 | observed <- mean(Diff) 180 | 181 | N <- 10^5 - 1 182 | result <- numeric(N) 183 | 184 | for (i in 1:N) 185 | { 186 | Sign <- sample(c(-1, 1), 12, replace = TRUE) 187 | Diff2 <- Sign*Diff 188 | result[i] <- mean(Diff2) 189 | } 190 | 191 | hist(result) 192 | abline(v = observed, col = "blue") 193 | 194 | 2* (sum(result >= observed) + 1)/(N + 1) 195 | -------------------------------------------------------------------------------- /Edition2/R/Chap03Testing_Exer.R: -------------------------------------------------------------------------------- 1 | #Chapter 3 Introduction to Hypothesis Testing: Permutation Tests 2 | #R Code for exercise 3 | 4 | #----------------- 5 | #Exercise 7 6 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 7 | 8 | N<-10^4-1 9 | 10 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE) 11 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE) 12 | 13 | observedSumUA <- sum(UA.Delay) 14 | observedmeanUA <- mean(UA.Delay) 15 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay) 16 | m <-length(UA.Delay) #number of UA observations 17 | 18 | sumUA<-numeric(N) 19 | meanUA<-numeric(N) 20 | meanDiff<-numeric(N) 21 | 22 | set.seed(0) 23 | for (i in 1:N) 24 | { 25 | index <- sample(4029, m, replace = FALSE) 26 | sumUA[i] <- sum(FlightDelays$Delay[index]) 27 | meanUA[i] <- mean(FlightDelays$Delay[index]) 28 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index]) 29 | 30 | } 31 | 32 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value 33 | 34 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value 35 | 36 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value 37 | 38 | #------------------------------- 39 | -------------------------------------------------------------------------------- /Edition2/R/Chap03Testing_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap03Testing_Exer" 3 | author: "Chihara-Hesterberg" 4 | date: "July 20, 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests 13 | ##Exercises 14 | 15 | ###Exercise 7 16 | ```{r} 17 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 18 | 19 | N<-10^4-1 20 | 21 | UA.Delay <- subset(FlightDelays, select = Delay, Carrier == "UA", drop = TRUE) 22 | AA.Delay <- subset(FlightDelays, select = Delay, Carrier == "AA", drop = TRUE) 23 | 24 | observedSumUA <- sum(UA.Delay) 25 | observedmeanUA <- mean(UA.Delay) 26 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay) 27 | m <-length(UA.Delay) #number of UA observations 28 | 29 | sumUA<-numeric(N) 30 | meanUA<-numeric(N) 31 | meanDiff<-numeric(N) 32 | 33 | set.seed(0) 34 | for (i in 1:N) 35 | { 36 | index <- sample(4029, m, replace = FALSE) 37 | sumUA[i] <- sum(FlightDelays$Delay[index]) 38 | meanUA[i] <- mean(FlightDelays$Delay[index]) 39 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index]) 40 | 41 | } 42 | 43 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value 44 | 45 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value 46 | 47 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /Edition2/R/Chap03Testing_Exer_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap03Testing_Exer" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(dplyr) 11 | library(ggplot2) 12 | ``` 13 | 14 | ##Chapter 3 Introduction to Hypothesis Testing: Permutation Tests 15 | ##Exercises 16 | 17 | ###Exercise 7 18 | ```{r} 19 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 20 | 21 | N<-10^4-1 22 | UA.Delay <- FlightDelays %>% filter(Carrier == "UA") %>% pull(Delay) 23 | AA.Delay <- FlightDelays %>% filter(Carrier == "AA") %>% pull(Delay) 24 | 25 | observedSumUA <- sum(UA.Delay) 26 | observedmeanUA <- mean(UA.Delay) 27 | observedmeanDiff <- mean(UA.Delay) - mean(AA.Delay) 28 | m <-length(UA.Delay) #number of UA observations 29 | 30 | sumUA<-numeric(N) 31 | meanUA<-numeric(N) 32 | meanDiff<-numeric(N) 33 | 34 | set.seed(0) 35 | for (i in 1:N) 36 | { 37 | index <- sample(4029, m, replace = FALSE) 38 | sumUA[i] <- sum(FlightDelays$Delay[index]) 39 | meanUA[i] <- mean(FlightDelays$Delay[index]) 40 | meanDiff[i] <- mean(FlightDelays$Delay[index]) - mean(FlightDelays$Delay[-index]) 41 | 42 | } 43 | 44 | (sum(sumUA >= observedSumUA) + 1)/(N + 1) #P-value 45 | 46 | (sum(meanUA >= observedmeanUA) + 1)/(N + 1) #P-value 47 | 48 | (sum(meanDiff >= observedmeanDiff) + 1)/(N + 1) #P-value 49 | 50 | ``` 51 | -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist.R: -------------------------------------------------------------------------------- 1 | ###Chapter 4: Sampling Distributions 2 | 3 | #--------------------------------------------- 4 | #Example 4.2: Sampling distribution from Exp(1/15) 5 | Xbar <- numeric(1000) 6 | #set.seed(300) 7 | for (i in 1:1000) 8 | { 9 | x <- rexp(100, rate = 1/15) 10 | Xbar[i] <- mean(x) 11 | } 12 | 13 | hist(Xbar, main="Simulated sampling distribution", xlab="means") 14 | 15 | qqnorm(Xbar) 16 | qqline(Xbar) 17 | 18 | mean(Xbar) 19 | sd(Xbar) 20 | 21 | #---------------------------------------------------- 22 | ##Example 4.3: Sampling Dist from Unif[0,1] 23 | 24 | maxY <- numeric(1000) 25 | #set.seed(100) 26 | for (i in 1:1000) 27 | { 28 | y <- runif(12) #draw random sample of size 12 29 | maxY[i] <- max(y) #find max, save in position i 30 | } 31 | 32 | hist(maxY, main = "", xlab = "maximums") 33 | 34 | #To create a histogram with a density curve imposed 35 | #scale bars to have area one with prob=TRUE option 36 | hist(maxY, main = "", xlab = "maximums", prob = TRUE) 37 | 38 | #add pdf to histogram 39 | curve(12*x^{11}, col = "blue", add = TRUE) 40 | 41 | #--------------------------------------------- 42 | #Example 4.6 Sum of Poisson random variables 43 | 44 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5) 45 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12) 46 | W <- X + Y 47 | 48 | hist(W, prob = TRUE) #prob = TRUE, scales hist to 1 49 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17) 50 | 51 | mean(W) 52 | var(W) 53 | 54 | #------------------------------------------------ 55 | #Example 4.7 56 | #Sampling distribution simulation 57 | #Sample of size 30 from gamma r=5, lambda=2 58 | 59 | #set.seed(10) 60 | Xbar <- numeric(1000) 61 | for (i in 1:1000) 62 | { 63 | x <- rgamma(30, shape = 5, rate = 2) 64 | Xbar[i] <- mean(x) 65 | } 66 | 67 | hist(Xbar, main = "Distribution of means") 68 | 69 | qqnorm(Xbar) 70 | qqline(Xbar) 71 | 72 | mean(Xbar) 73 | sd(Xbar) 74 | sum(Xbar > 3)/1000 75 | #alternatively 76 | mean(Xbar > 3) 77 | 78 | #---------------------------------------------- 79 | #Example 4.11 R Note 80 | dbinom(25, 120, .3) 81 | 82 | pbinom(25, 120, .3) 83 | -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 4 Sampling Distributions" 3 | author: "Chihara-Hesterberg" 4 | date: "July 20, 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Example 4.2: Sampling distribution from Exp(1/15) 13 | ```{r} 14 | Xbar <- numeric(1000) 15 | #set.seed(300) 16 | for (i in 1:1000) 17 | { 18 | x <- rexp(100, rate = 1/15) 19 | Xbar[i] <- mean(x) 20 | } 21 | 22 | hist(Xbar, main="Simulated sampling distribution", xlab="means") 23 | 24 | qqnorm(Xbar) 25 | qqline(Xbar) 26 | 27 | mean(Xbar) 28 | sd(Xbar) 29 | ``` 30 | 31 | ###Example 4.3: Sampling Dist from Unif[0,1] 32 | ```{r} 33 | maxY <- numeric(1000) 34 | #set.seed(100) 35 | for (i in 1:1000) 36 | { 37 | y <- runif(12) #draw random sample of size 12 38 | maxY[i] <- max(y) #find max, save in position i 39 | } 40 | 41 | hist(maxY, main = "", xlab = "maximums") 42 | ``` 43 | 44 | To create a histogram with a density curve imposed, 45 | scale bars to have area one with the `prob=TRUE` argument. 46 | The `curve()` command can then be used to add the density curve. 47 | 48 | ```{r} 49 | hist(maxY, main = "", xlab = "maximums", prob = TRUE) 50 | curve(12*x^{11}, col = "blue", add = TRUE) 51 | ``` 52 | 53 | ###Example 4.6 Sum of Poisson random variables 54 | ```{r} 55 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5) 56 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12) 57 | W <- X + Y 58 | 59 | hist(W, prob = TRUE) #prob = TRUE, scales hist to 1 60 | lines(2:35, dpois(2:35, 17), type = "b") #overlay pmf for Pois(17) 61 | 62 | mean(W) 63 | var(W) 64 | ``` 65 | 66 | ###Example 4.7 67 | Sampling distribution simulation 68 | Sample of size 30 from gamma r=5, lambda=2 69 | 70 | ```{r} 71 | #set.seed(10) 72 | Xbar <- numeric(1000) 73 | for (i in 1:1000) 74 | { 75 | x <- rgamma(30, shape = 5, rate = 2) 76 | Xbar[i] <- mean(x) 77 | } 78 | 79 | hist(Xbar, main = "Distribution of means") 80 | 81 | qqnorm(Xbar) 82 | qqline(Xbar) 83 | 84 | mean(Xbar) 85 | sd(Xbar) 86 | sum(Xbar > 3)/1000 87 | #alternatively 88 | mean(Xbar > 3) 89 | ``` 90 | 91 | ###Example 4.11 R Note 92 | 93 | ```{r} 94 | dbinom(25, 120, .3) 95 | 96 | pbinom(25, 120, .3) 97 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist_Exer.R: -------------------------------------------------------------------------------- 1 | ##Chapter Sampling Distributions 2 | ##Exercises 3 | ##R Scripts 4 | ## 5 | ##----------------------------- 6 | #Exercise 4 7 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20) 8 | N <- 10^4 9 | Xbar <- numeric(N) 10 | 11 | for (i in 1:N) 12 | { 13 | samp <- sample(pop, 4, replace = TRUE) 14 | Xbar[i] <- mean(samp) 15 | } 16 | 17 | hist(Xbar) 18 | mean(Xbar < 11) 19 | 20 | #---------------------------------------------- 21 | #Exercise 4.6 22 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv") 23 | N <- 10^4 24 | phat <- numeric(N) 25 | n <- 25 26 | 27 | for (i in 1:N) 28 | { 29 | samp <- sample(Recidivism$Recid, n) 30 | phat[i] <- mean(samp == "Yes") 31 | } 32 | 33 | #c) change n <- 250 34 | 35 | #---------------------------------------------------------------------------- 36 | #Exercise 19 37 | ## X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2) 38 | ## W = mean(X)+mean(Y) 39 | W <- numeric(1000) 40 | set.seed(0) 41 | for (i in 1:1000) 42 | { 43 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2) 44 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2) 45 | W[i] <- mean(x) + mean(y) #save sum of means 46 | } 47 | 48 | hist(W) 49 | 50 | mean(W < 40) 51 | 52 | 53 | #-------------------- 54 | #Exercise 22 55 | 56 | X <- runif(1000, 40, 60) 57 | Y <- runif(1000, 45, 80) 58 | 59 | total <- X + Y 60 | 61 | hist(total) 62 | 63 | #---------------- 64 | #33 Finite pop simulation 65 | 66 | N <- 400 # population size 67 | n <- 5 # sample size 68 | 69 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from 70 | # Exp(1/10) 71 | hist(finpop) # distribution of your finite pop. 72 | mean(finpop) # mean (mu) of your pop. 73 | sd(finpop) # stdev (sigma) of your pop. 74 | sd(finpop)/sqrt(n) # theoretical standard error of sampling 75 | # dist. of mean(x), with replacement 76 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement 77 | 78 | Xbar <- numeric(1000) 79 | for (i in 1:1000) 80 | { 81 | x <- sample(finpop, n) # Random sample of size n (w/o replacement) 82 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means 83 | } 84 | hist(Xbar) 85 | 86 | qqnorm(Xbar) 87 | qqline(Xbar) 88 | 89 | mean(Xbar) 90 | sd(Xbar) # estimated standard error of sampling 91 | # distribution 92 | 93 | #---------------------------- 94 | #34 95 | W <- numeric(1000) 96 | for (i in 1:1000) 97 | { 98 | x <- rnorm(20, 25, 7) 99 | W[i] <- var(x) 100 | } 101 | mean(W) 102 | var(W) 103 | hist(W) 104 | 105 | qqnorm(W) 106 | qqline(W) 107 | -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 4 Sampling Distribution-Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "July 20, 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Exercise 4 13 | ```{r} 14 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20) 15 | N <- 10^4 16 | Xbar <- numeric(N) 17 | 18 | for (i in 1:N) 19 | { 20 | samp <- sample(pop, 4, replace = TRUE) 21 | Xbar[i] <- mean(samp) 22 | } 23 | 24 | hist(Xbar) 25 | mean(Xbar < 11) 26 | ``` 27 | 28 | ###Exercise 6 29 | ```{r} 30 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv") 31 | N <- 10^4 32 | phat <- numeric(N) 33 | n <- 25 34 | 35 | for (i in 1:N) 36 | { 37 | samp <- sample(Recidivism$Recid, n) 38 | phat[i] <- mean(samp == "Yes") 39 | } 40 | 41 | #c) change n <- 250 42 | ``` 43 | 44 | ###Exercise 19 45 | X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2) 46 | W = X + Y 47 | ```{r} 48 | 49 | W <- numeric(1000) 50 | set.seed(0) 51 | for (i in 1:1000) 52 | { 53 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2) 54 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2) 55 | W[i] <- mean(x) + mean(y) #save sum of means 56 | } 57 | 58 | hist(W) 59 | 60 | mean(W < 40) 61 | ``` 62 | 63 | 64 | ###Exercise 22 65 | ```{r} 66 | X <- runif(1000, 40, 60) 67 | Y <- runif(1000, 45, 80) 68 | 69 | total <- X + Y 70 | 71 | hist(total) 72 | ``` 73 | 74 | ###Exercise 33 75 | Finite population simulation 76 | 77 | ```{r} 78 | N <- 400 # population size 79 | n <- 5 # sample size 80 | 81 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from 82 | # Exp(1/10) 83 | hist(finpop) # distribution of your finite pop. 84 | mean(finpop) # mean (mu) of your pop. 85 | sd(finpop) # stdev (sigma) of your pop. 86 | sd(finpop)/sqrt(n) # theoretical standard error of sampling 87 | # dist. of mean(x), with replacement 88 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement 89 | 90 | Xbar <- numeric(1000) 91 | for (i in 1:1000) 92 | { 93 | x <- sample(finpop, n) # Random sample of size n (w/o replacement) 94 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means 95 | } 96 | hist(Xbar) 97 | 98 | qqnorm(Xbar) 99 | qqline(Xbar) 100 | 101 | mean(Xbar) 102 | sd(Xbar) # estimated standard error of sampling 103 | # distribution 104 | ``` 105 | 106 | ###Exercise 34 107 | ```{r} 108 | W <- numeric(1000) 109 | for (i in 1:1000) 110 | { 111 | x <- rnorm(20, 25, 7) 112 | W[i] <- var(x) 113 | } 114 | mean(W) 115 | var(W) 116 | hist(W) 117 | 118 | qqnorm(W) 119 | qqline(W) 120 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist_Exer_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 4 Sampling Distribution-Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Exercise 4 15 | ```{r} 16 | pop <- c(3, 5, 6, 6, 8, 11, 13, 15, 19, 20) 17 | N <- 10^4 18 | Xbar <- numeric(N) 19 | 20 | for (i in 1:N) 21 | { 22 | samp <- sample(pop, 4, replace = TRUE) 23 | Xbar[i] <- mean(samp) 24 | } 25 | 26 | ggplot() + geom_histogram(aes(Xbar), bins = 10) 27 | 28 | mean(Xbar < 11) 29 | ``` 30 | 31 | ###Exercise 6 32 | ```{r} 33 | Recidivism <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Recidivism.csv") 34 | N <- 10^4 35 | phat <- numeric(N) 36 | n <- 25 37 | 38 | for (i in 1:N) 39 | { 40 | samp <- sample(Recidivism$Recid, n) 41 | phat[i] <- mean(samp == "Yes") 42 | } 43 | 44 | #c) change n <- 250 45 | ``` 46 | 47 | ###Exercise 19 48 | X1,X2,..X10 ~ N(20, 8^2), Y1, Y2,..Y15 ~ N(16,7^2) 49 | W = X + Y 50 | ```{r} 51 | 52 | W <- numeric(1000) 53 | set.seed(0) 54 | for (i in 1:1000) 55 | { 56 | x <- rnorm(10, 20, 8) #draw 10 from N(20, 8^2) 57 | y <- rnorm(15, 16, 7) #draw 15 from N(16, 7^2) 58 | W[i] <- mean(x) + mean(y) #save sum of means 59 | } 60 | 61 | ggplot() + geom_histogram(aes(W), bins = 12) 62 | 63 | mean(W < 40) 64 | ``` 65 | 66 | 67 | ###Exercise 22 68 | ```{r} 69 | X <- runif(1000, 40, 60) 70 | Y <- runif(1000, 45, 80) 71 | 72 | total <- X + Y 73 | 74 | ggplot() + geom_histogram(aes(total), bins = 12) 75 | ``` 76 | 77 | ###Exercise 33 78 | Finite population simulation 79 | 80 | ```{r} 81 | N <- 400 # population size 82 | n <- 5 # sample size 83 | 84 | finpop <- rexp(N, 1/10) # Create a finite pop. of size N=400 from 85 | # Exp(1/10) 86 | ggplot() + geom_histogram(aes(finpop), bins = 12) # distribution of your finite pop. 87 | 88 | mean(finpop) # mean (mu) of your pop. 89 | sd(finpop) # stdev (sigma) of your pop. 90 | sd(finpop)/sqrt(n) # theoretical standard error of sampling 91 | # dist. of mean(x), with replacement 92 | sd(finpop)/sqrt(n) * sqrt((N-n)/(N-1)) # without replacement 93 | 94 | Xbar <- numeric(1000) 95 | for (i in 1:1000) 96 | { 97 | x <- sample(finpop, n) # Random sample of size n (w/o replacement) 98 | Xbar[i] <- mean(x) # Find mean of sample, store in my.means 99 | } 100 | 101 | ggplot() + geom_histogram(aes(Xbar), bins = 12) 102 | 103 | df <- data.frame(Xbar) 104 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line() 105 | 106 | mean(Xbar) 107 | sd(Xbar) # estimated standard error of sampling 108 | # distribution 109 | ``` 110 | 111 | ###Exercise 34 112 | ```{r} 113 | W <- numeric(1000) 114 | for (i in 1:1000) 115 | { 116 | x <- rnorm(20, 25, 7) 117 | W[i] <- var(x) 118 | } 119 | mean(W) 120 | var(W) 121 | 122 | ggplot() + geom_histogram(aes(W), bins = 10) 123 | 124 | df <- data.frame(W) 125 | ggplot(df, aes(sample = W)) + stat_qq() + stat_qq_line() 126 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap04SamplingDist_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 4 Sampling Distributions" 3 | author: "Chihara-Hesterberg" 4 | date: "November 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(dplyr) 11 | library(ggplot2) 12 | ``` 13 | 14 | ###Example 4.2: Sampling distribution from Exp(1/15) 15 | ```{r} 16 | Xbar <- numeric(1000) 17 | #set.seed(300) 18 | for (i in 1:1000) 19 | { 20 | x <- rexp(100, rate = 1/15) 21 | Xbar[i] <- mean(x) 22 | } 23 | 24 | ggplot() + geom_histogram(aes(Xbar), bins = 15) + xlab("means") 25 | 26 | df <- data.frame(Xbar) 27 | ggplot(df, aes(sample=Xbar)) + stat_qq() + stat_qq_line() 28 | 29 | mean(Xbar) 30 | sd(Xbar) 31 | ``` 32 | 33 | ###Example 4.3: Sampling Dist from Unif[0,1] 34 | ```{r} 35 | maxY <- numeric(1000) 36 | #set.seed(100) 37 | for (i in 1:1000) 38 | { 39 | y <- runif(12) #draw random sample of size 12 40 | maxY[i] <- max(y) #find max, save in position i 41 | } 42 | 43 | ggplot() + geom_histogram(aes(maxY), binwidth=.05, center=.975) + xlab("maximums") 44 | 45 | ``` 46 | 47 | To create a histogram with a density curve imposed, we will need to create a data frame that holds the 'maxY' variable. We also create a function for the density curve $f(x)=12x^{11}$. 48 | 49 | 50 | ```{r} 51 | df <- data.frame(maxY) 52 | myfun <- function(x){12*x^{11}} 53 | 54 | ggplot(df) + geom_histogram(aes(maxY, y = stat(density)), binwidth=.05, center=.975) +xlab("maximums") + stat_function(fun = myfun) 55 | ``` 56 | 57 | ###Example 4.6 Sum of Poisson random variables 58 | ```{r} 59 | X <- rpois(10^4, 5) #Draw 10^4 values from Pois(5) 60 | Y <- rpois(10^4, 12) #Draw 10^4 values from Pois(12) 61 | W <- X + Y 62 | 63 | df1 <- data.frame(W) 64 | df2 <- data.frame(x=2:35, y = dpois(2:35,17)) 65 | ggplot(df1, aes(W)) + geom_histogram(aes(y=stat(density)), bins=12) + geom_line(data=df2, aes(x=x, y=y), colour = "red") 66 | 67 | mean(W) 68 | var(W) 69 | ``` 70 | 71 | ###Example 4.7 72 | Sampling distribution simulation 73 | Sample of size 30 from gamma r=5, lambda=2 74 | 75 | ```{r} 76 | #set.seed(10) 77 | Xbar <- numeric(1000) 78 | for (i in 1:1000) 79 | { 80 | x <- rgamma(30, shape = 5, rate = 2) 81 | Xbar[i] <- mean(x) 82 | } 83 | 84 | ggplot() + geom_histogram(aes(Xbar), bins=15) + labs(title = "Distribution of means") 85 | 86 | ggplot() + stat_qq(aes(sample = Xbar)) 87 | 88 | #If you want a line, then 89 | df <- data.frame(Xbar) 90 | ggplot(df, aes(sample = Xbar)) + stat_qq() + stat_qq_line() 91 | 92 | mean(Xbar) 93 | sd(Xbar) 94 | sum(Xbar > 3)/1000 95 | #alternatively 96 | mean(Xbar > 3) 97 | ``` 98 | 99 | ###Example 4.11 R Note 100 | 101 | ```{r} 102 | dbinom(25, 120, .3) 103 | 104 | pbinom(25, 120, .3) 105 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap05Bootstrap_Exer.R: -------------------------------------------------------------------------------- 1 | ##Chapter 5 The Boostrap 2 | 3 | #Exercises 4 | 5 | #------------------------------------------------ 6 | #10 (medians) 7 | ## 8 | ne <- 10000 # n even 9 | no <- 10001 # n odd 10 | 11 | wwe <- rnorm(ne) # draw random sample of size ne 12 | wwo <- rnorm(no) # draw random sample of size no 13 | 14 | N <- 10^4 15 | even.boot <- numeric(N) #save space 16 | odd.boot <- numeric(N) 17 | set.seed(10) 18 | for (i in 1:N) 19 | { 20 | x.even <- sample(wwe, ne, replace = TRUE) 21 | x.odd <- sample(wwo, no, replace = TRUE) 22 | even.boot[i] <- median(x.even) 23 | odd.boot[i] <- median(x.odd) 24 | } 25 | 26 | par(mfrow = c(2, 1)) 27 | hist(even.boot, xlim = c(-1, 1)) #set x range to be 28 | hist(odd.boot, xlim = c(-1, 1)) #same in both plots 29 | par(mfrow = c(1, 1)) #reset to original 30 | 31 | #----------------------------------- 32 | #Exercise 20 33 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv") 34 | N <- 10^5 35 | result <- numeric(N) 36 | for (i in 1:N) 37 | { 38 | index <- sample(12, replace = TRUE) 39 | Dive.boot <- Diving2017[index, ] 40 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal) 41 | } 42 | 43 | hist(result) 44 | quantile(result, c(0.025, 0.975)) 45 | -------------------------------------------------------------------------------- /Edition2/R/Chap05Bootstrap_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 5 Bootstrap - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ###Exercise 10 13 | 14 | Simulate bootstrap for medians 15 | ```{r} 16 | ne <- 10000 # n even 17 | no <- 10001 # n odd 18 | 19 | wwe <- rnorm(ne) # draw random sample of size ne 20 | wwo <- rnorm(no) # draw random sample of size no 21 | 22 | N <- 10^4 23 | even.boot <- numeric(N) #save space 24 | odd.boot <- numeric(N) 25 | set.seed(10) 26 | for (i in 1:N) 27 | { 28 | x.even <- sample(wwe, ne, replace = TRUE) 29 | x.odd <- sample(wwo, no, replace = TRUE) 30 | even.boot[i] <- median(x.even) 31 | odd.boot[i] <- median(x.odd) 32 | } 33 | 34 | par(mfrow = c(2, 1)) 35 | hist(even.boot, xlim = c(-1, 1)) #set x range to be 36 | hist(odd.boot, xlim = c(-1, 1)) #same in both plots 37 | par(mfrow = c(1, 1)) #reset to original 38 | ``` 39 | 40 | ###Exercise 20 41 | ```{r} 42 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv") 43 | N <- 10^5 44 | result <- numeric(N) 45 | for (i in 1:N) 46 | { 47 | index <- sample(12, replace = TRUE) 48 | Dive.boot <- Diving2017[index, ] 49 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal) 50 | } 51 | 52 | hist(result) 53 | quantile(result, c(0.025, 0.975)) 54 | ``` 55 | -------------------------------------------------------------------------------- /Edition2/R/Chap05Bootstrap_Exer_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chapter 5 Bootstrap - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Exercise 10 15 | 16 | Simulate bootstrap for medians 17 | ```{r} 18 | ne <- 10000 # n even 19 | no <- 10001 # n odd 20 | 21 | wwe <- rnorm(ne) # draw random sample of size ne 22 | wwo <- rnorm(no) # draw random sample of size no 23 | 24 | N <- 10^4 25 | even.boot <- numeric(N) #save space 26 | odd.boot <- numeric(N) 27 | #set.seed(10) 28 | for (i in 1:N) 29 | { 30 | x.even <- sample(wwe, ne, replace = TRUE) 31 | x.odd <- sample(wwo, no, replace = TRUE) 32 | even.boot[i] <- median(x.even) 33 | odd.boot[i] <- median(x.odd) 34 | } 35 | 36 | range(even.boot) 37 | range(odd.boot) 38 | p1 <- ggplot() + geom_histogram(aes(even.boot), breaks = seq(-.06, .04, by = .005)) 39 | p2 <- ggplot() + geom_histogram(aes(odd.boot), breaks = seq(-.06, .04, by = .005)) 40 | 41 | library(gridExtra) 42 | grid.arrange(p1,p2) 43 | 44 | ``` 45 | 46 | ###Exercise 20 47 | ```{r} 48 | Diving2017 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Diving2017.csv") 49 | N <- 10^5 50 | result <- numeric(N) 51 | for (i in 1:N) 52 | { 53 | index <- sample(12, replace = TRUE) 54 | Dive.boot <- Diving2017[index, ] 55 | result[i] <- mean(Dive.boot$Final) - median(Dive.boot$Semifinal) 56 | } 57 | 58 | ggplot() + geom_histogram(aes(result), bins = 12) 59 | 60 | quantile(result, c(0.025, 0.975)) 61 | ``` 62 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals.R: -------------------------------------------------------------------------------- 1 | #Chapter 7 More Confidence Intervals 2 | 3 | #----------------------------------------------------- 4 | #Section 7.1.1 CI for normal with known sigma 5 | 6 | #set.seed(1) 7 | counter <- 0 # set counter to 0 8 | plot(x = c(22, 28), y = c(1, 100), type = "n", 9 | xlab = "", ylab = "") 10 | abline(v = 25, col="red") # vertical line at mu 11 | for (i in 1:1000) 12 | { 13 | x <- rnorm(30, 25, 4) # draw a random sample of size 30 14 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit 15 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit 16 | if (L < 25 && 25 < U) # check to see if 25 is in interval 17 | counter <- counter + 1 # increase counter by 1 18 | if (i <= 100) #plot first 100 intervals 19 | segments(L, i, U, i) 20 | } 21 | 22 | abline(v = 25, col = "red") #vertical line at mu 23 | 24 | counter/1000 # proportion of times interval contains mu. 25 | 26 | #--------------------------------------------------------------- 27 | # Section 7.1.2 28 | # Simulate distribution of t statistic 29 | N <- 10^4 30 | w <- numeric(N) 31 | n <- 15 #sample size 32 | for (i in 1:N) 33 | { 34 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2) 35 | xbar <- mean(x) 36 | s <- sd(x) 37 | w[i] <- (xbar-25) / (s/sqrt(n)) 38 | } 39 | 40 | hist(w) 41 | 42 | qqnorm(w, pch = ".") 43 | abline(0, 1, col = 2) # y = x line 44 | 45 | #pch = "." is point character. This option says to use . for the points. 46 | 47 | #---------------------------------------------------------- 48 | # Example 7.7 Simulation 95% confidence interval from 49 | # skewed gamma distribution 50 | # set.seed(0) 51 | 52 | tooLow <- 0 #set counter to 0 53 | tooHigh <- 0 #sest counter to 0 54 | n <- 20 # sample size 55 | N <- 10^5 56 | for (i in 1:N) 57 | { 58 | x <- rgamma(n, shape=5, rate=2) 59 | xbar <- mean(x) 60 | s <- sd(x) 61 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n) 62 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n) 63 | if (upper < 5/2) tooLow <- tooLow + 1 64 | if (lower > 5/2) tooHigh <- tooHigh + 1 65 | } 66 | tooLow/N 67 | tooHigh/N 68 | 69 | 70 | 71 | #---------------------------------------- 72 | # Example 7.21 One sample bootstrap t confidence interval 73 | 74 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 75 | Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE) 76 | 77 | xbar <- mean(Arsenic) 78 | N <- 10^4 79 | n <- length(Arsenic) 80 | Tstar <- numeric(N) 81 | #set.seed(100) 82 | for (i in 1:N) 83 | { 84 | x <-sample(Arsenic, size = n, replace = T) 85 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n)) 86 | } 87 | 88 | quantile(Tstar, c(0.025, 0.975)) 89 | 90 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*") 91 | 92 | dev.new() 93 | qqnorm(Tstar) 94 | qqline(Tstar) 95 | 96 | #------------------------------------------------------- 97 | # Exampe 7.22 Verizon 98 | # 2-Sample bootstrap t confidence interval 99 | 100 | # Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv") 101 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE) 102 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE) 103 | 104 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC) 105 | nx <- length(Time.ILEC) #nx=1664 106 | ny <- length(Time.CLEC) #ny=23 107 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny) 108 | 109 | N <- 10000 110 | Tstar <- numeric(N) 111 | set.seed(0) 112 | for(i in 1:N) 113 | { 114 | bootx <- sample(Time.ILEC, nx, replace = TRUE) 115 | booty <- sample(Time.CLEC, ny, replace = TRUE) 116 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) / 117 | sqrt(var(bootx)/nx + var(booty)/ny) 118 | } 119 | 120 | thetahat - quantile(Tstar, c(.975, .025)) * SE 121 | 122 | t.test(Time.ILEC, Time.CLEC)$conf 123 | 124 | #---------------------------------------------------------------- 125 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 7 More Confidence Intervals" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Section 7.1.1 13 | CI for normal with known sigma 14 | ```{r, out.width="100%"} 15 | #set.seed(1) 16 | counter <- 0 # set counter to 0 17 | plot(x = c(22, 28), y = c(1, 100), type = "n", 18 | xlab = "", ylab = "") 19 | abline(v = 25, col="red") # vertical line at mu 20 | for (i in 1:1000) 21 | { 22 | x <- rnorm(30, 25, 4) # draw a random sample of size 30 23 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit 24 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit 25 | if (L < 25 && 25 < U) # check to see if 25 is in interval 26 | counter <- counter + 1 # increase counter by 1 27 | if (i <= 100) #plot first 100 intervals 28 | segments(L, i, U, i) 29 | } 30 | 31 | abline(v = 25, col = "red") #vertical line at mu 32 | 33 | counter/1000 # proportion of times interval contains mu. 34 | ``` 35 | 36 | ### Section 7.1.2 37 | Simulate distribution of t statistic 38 | ```{r} 39 | N <- 10^4 40 | w <- numeric(N) 41 | n <- 15 #sample size 42 | for (i in 1:N) 43 | { 44 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2) 45 | xbar <- mean(x) 46 | s <- sd(x) 47 | w[i] <- (xbar-25) / (s/sqrt(n)) 48 | } 49 | 50 | hist(w) 51 | 52 | qqnorm(w, pch = ".") 53 | abline(0, 1, col = 2) # y = x line 54 | ``` 55 | The `pch = "."` argument in the `qqnorm` command sets the point character. Here, the option says to use . for the points. 56 | 57 | 58 | ### Example 7.7 59 | Simulation 95% confidence interval from 60 | skewed gamma distribution 61 | ```{r} 62 | # set.seed(0) 63 | 64 | tooLow <- 0 #set counter to 0 65 | tooHigh <- 0 #sest counter to 0 66 | n <- 20 # sample size 67 | N <- 10^5 68 | for (i in 1:N) 69 | { 70 | x <- rgamma(n, shape=5, rate=2) 71 | xbar <- mean(x) 72 | s <- sd(x) 73 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n) 74 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n) 75 | if (upper < 5/2) tooLow <- tooLow + 1 76 | if (lower > 5/2) tooHigh <- tooHigh + 1 77 | } 78 | tooLow/N 79 | tooHigh/N 80 | ``` 81 | 82 | 83 | ### Example 7.21 84 | One sample bootstrap t confidence interval 85 | 86 | ```{r} 87 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 88 | Arsenic <- Bangladesh$Arsenic 89 | #Alternatively 90 | #Arsenic <- subset(Bangladesh, select = Arsenic, drop = TRUE) 91 | 92 | xbar <- mean(Arsenic) 93 | N <- 10^4 94 | n <- length(Arsenic) 95 | Tstar <- numeric(N) 96 | #set.seed(100) 97 | for (i in 1:N) 98 | { 99 | x <-sample(Arsenic, size = n, replace = T) 100 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n)) 101 | } 102 | 103 | quantile(Tstar, c(0.025, 0.975)) 104 | 105 | hist(Tstar, xlab = "T*", main = "Bootstrap distribution of T*") 106 | 107 | qqnorm(Tstar) 108 | qqline(Tstar) 109 | ``` 110 | 111 | ### Exampe 7.22 Verizon 112 | 2-Sample bootstrap t confidence interval 113 | 114 | ```{r} 115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv") 116 | Time.ILEC <- subset(Verizon, select = Time, Group == "ILEC", drop = TRUE) 117 | Time.CLEC <- subset(Verizon, select = Time, Group == "CLEC", drop = TRUE) 118 | 119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC) 120 | nx <- length(Time.ILEC) #nx=1664 121 | ny <- length(Time.CLEC) #ny=23 122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny) 123 | 124 | N <- 10000 125 | Tstar <- numeric(N) 126 | set.seed(0) 127 | for(i in 1:N) 128 | { 129 | bootx <- sample(Time.ILEC, nx, replace = TRUE) 130 | booty <- sample(Time.CLEC, ny, replace = TRUE) 131 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) / 132 | sqrt(var(bootx)/nx + var(booty)/ny) 133 | } 134 | 135 | thetahat - quantile(Tstar, c(.975, .025)) * SE 136 | 137 | t.test(Time.ILEC, Time.CLEC)$conf 138 | ``` 139 | 140 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals_Exer.R: -------------------------------------------------------------------------------- 1 | #Chapter 7: More confidence intervals 2 | #Exercises 3 | 4 | #Exericse 9 5 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 6 | mu <- mean(FlightDelays$Delay) 7 | 8 | counter <- 0 9 | plot(c(-20, 100), c(1, 100), type = "n") 10 | 11 | for (i in 1:1000) 12 | { 13 | x <- sample(FlightDelays$Delay, 30, replace = FALSE) 14 | L <- t.test(x)$conf.int[1] 15 | U <- t.test(x)$conf.int[2] 16 | 17 | if (L < mu && mu < U) 18 | counter <- counter + 1 19 | if (i <= 100) 20 | segments(L, i, U, i) 21 | } 22 | 23 | abline(v = mu, col = "red") 24 | counter/1000 25 | 26 | #------------------------------------------------------- 27 | #Exercise 22 28 | %%Simulation to compare pooled/unpooled t-confidence intervals 29 | 30 | pooled.count <- 0 31 | unpooled.count <- 0 32 | 33 | m <- 20 34 | n <- 10 35 | 36 | B <- 10000 37 | for (i in 1:B) 38 | { 39 | x <- rnorm(m, 8,10) 40 | y <- rnorm(n, 3, 15) 41 | 42 | CI.pooled <- t.test(x,y,var.equal=T)$conf 43 | CI.unpooled <- t.test(x,y)$conf 44 | 45 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2]) 46 | pooled.count <- pooled.count + 1 47 | 48 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2]) 49 | unpooled.count <- unpooled.count + 1 50 | } 51 | 52 | pooled.count/B 53 | 54 | unpooled.count/B 55 | 56 | #----------------- 57 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 7 More Confidence Intervals - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Exericse 9 13 | ```{r} 14 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 15 | mu <- mean(FlightDelays$Delay) 16 | 17 | counter <- 0 18 | plot(c(-20, 100), c(1, 100), type = "n") 19 | 20 | for (i in 1:1000) 21 | { 22 | x <- sample(FlightDelays$Delay, 30, replace = FALSE) 23 | L <- t.test(x)$conf.int[1] 24 | U <- t.test(x)$conf.int[2] 25 | 26 | if (L < mu && mu < U) 27 | counter <- counter + 1 28 | if (i <= 100) 29 | segments(L, i, U, i) 30 | } 31 | 32 | abline(v = mu, col = "red") 33 | counter/1000 34 | ``` 35 | 36 | ###Exercise 22 37 | Simulation to compare pooled/unpooled t-confidence intervals 38 | ```{r} 39 | pooled.count <- 0 40 | unpooled.count <- 0 41 | 42 | m <- 20 43 | n <- 10 44 | 45 | N <- 10000 46 | for (i in 1:N) 47 | { 48 | x <- rnorm(m, 8,10) 49 | y <- rnorm(n, 3, 15) 50 | 51 | CI.pooled <- t.test(x,y,var.equal=T)$conf 52 | CI.unpooled <- t.test(x,y)$conf 53 | 54 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2]) 55 | pooled.count <- pooled.count + 1 56 | 57 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2]) 58 | unpooled.count <- unpooled.count + 1 59 | } 60 | 61 | pooled.count/N 62 | 63 | unpooled.count/N 64 | 65 | ``` 66 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals_Exer_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 7 More Confidence Intervals - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Exericse 9 15 | ```{r} 16 | FlightDelays <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/FlightDelays.csv") 17 | mu <- mean(FlightDelays$Delay) 18 | 19 | counter <- 0 20 | 21 | df <- data.frame(x=c(-20,100), y = c(1,100)) 22 | p <- ggplot(df, aes(x = x, y = y)) + geom_vline(xintercept=mu, colour = "red") 23 | 24 | for (i in 1:1000) 25 | { 26 | x <- sample(FlightDelays$Delay, 30, replace = FALSE) 27 | L <- t.test(x)$conf.int[1] 28 | U <- t.test(x)$conf.int[2] 29 | 30 | if (L < mu && mu < U) 31 | counter <- counter + 1 32 | if (i <= 100) 33 | p <- p + annotate("segment", x = L, xend=U, y = i, yend=i ) 34 | 35 | } 36 | 37 | 38 | print(p) 39 | 40 | counter/1000 41 | ``` 42 | 43 | ###Exercise 22 44 | Simulation to compare pooled/unpooled t-confidence intervals 45 | ```{r} 46 | pooled.count <- 0 47 | unpooled.count <- 0 48 | 49 | m <- 20 50 | n <- 10 51 | 52 | N <- 10000 53 | for (i in 1:N) 54 | { 55 | x <- rnorm(m, 8,10) 56 | y <- rnorm(n, 3, 15) 57 | 58 | CI.pooled <- t.test(x,y,var.equal=T)$conf 59 | CI.unpooled <- t.test(x,y)$conf 60 | 61 | if (CI.pooled[1] < 5 & 5 < CI.pooled[2]) 62 | pooled.count <- pooled.count + 1 63 | 64 | if (CI.unpooled[1] < 5 & 5 < CI.unpooled[2]) 65 | unpooled.count <- unpooled.count + 1 66 | } 67 | 68 | pooled.count/N 69 | 70 | unpooled.count/N 71 | 72 | ``` 73 | -------------------------------------------------------------------------------- /Edition2/R/Chap07MoreConfIntervals_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 7 More Confidence Intervals" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Section 7.1.1 15 | CI for normal with known sigma 16 | ```{r, out.width="100%"} 17 | #set.seed(1) 18 | counter <- 0 # set counter to 0 19 | df <- data.frame(x=c(22,28), y = c(1,100)) 20 | p <- ggplot(df, aes(x=x, y = y)) + geom_vline(xintercept=25, colour = "red") 21 | 22 | for (i in 1:1000) 23 | { 24 | x <- rnorm(30, 25, 4) # draw a random sample of size 30 25 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit 26 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit 27 | if (L < 25 && 25 < U) # check to see if 25 is in interval 28 | counter <- counter + 1 # increase counter by 1 29 | if (i <= 100) #plot first 100 intervals 30 | p <- p + annotate("segment", x = L, xend=U, y = i, yend=i ) 31 | } 32 | 33 | print(p) 34 | 35 | counter/1000 # proportion of times interval contains mu. 36 | ``` 37 | 38 | ### Section 7.1.2 39 | Simulate distribution of t statistic 40 | ```{r} 41 | N <- 10^4 42 | w <- numeric(N) 43 | n <- 15 #sample size 44 | for (i in 1:N) 45 | { 46 | x <- rnorm(n, 25, 7) #draw a size 15 sample from N(25, 7^2) 47 | xbar <- mean(x) 48 | s <- sd(x) 49 | w[i] <- (xbar-25) / (s/sqrt(n)) 50 | } 51 | 52 | ggplot() + geom_histogram(aes(w), bins = 12) 53 | 54 | ggplot() + stat_qq(aes(sample = w)) + geom_abline(intercept = 0, slope = 1, colour = "red") 55 | 56 | ``` 57 | 58 | 59 | ### Example 7.7 60 | Simulation 95% confidence interval from 61 | skewed gamma distribution 62 | ```{r} 63 | # set.seed(0) 64 | 65 | tooLow <- 0 #set counter to 0 66 | tooHigh <- 0 #sest counter to 0 67 | n <- 20 # sample size 68 | N <- 10^5 69 | for (i in 1:N) 70 | { 71 | x <- rgamma(n, shape=5, rate=2) 72 | xbar <- mean(x) 73 | s <- sd(x) 74 | lower <- xbar - abs(qt(.025, n-1))*s/sqrt(n) 75 | upper <- xbar + abs(qt(.025, n-1))*s/sqrt(n) 76 | if (upper < 5/2) tooLow <- tooLow + 1 77 | if (lower > 5/2) tooHigh <- tooHigh + 1 78 | } 79 | tooLow/N 80 | tooHigh/N 81 | ``` 82 | 83 | 84 | ### Example 7.21 85 | One sample bootstrap t confidence interval 86 | 87 | ```{r} 88 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 89 | Arsenic <- pull(Bangladesh, Arsenic) 90 | Arsenic <- Bangladesh$Arsenic 91 | 92 | xbar <- mean(Arsenic) 93 | N <- 10^4 94 | n <- length(Arsenic) 95 | Tstar <- numeric(N) 96 | #set.seed(100) 97 | for (i in 1:N) 98 | { 99 | x <-sample(Arsenic, size = n, replace = T) 100 | Tstar[i] <- (mean(x)-xbar)/(sd(x)/sqrt(n)) 101 | } 102 | 103 | quantile(Tstar, c(0.025, 0.975)) 104 | 105 | ggplot() + geom_histogram(aes(Tstar), bins = 12) + labs(x= "T*", title = "Bootstrap distribution of T*") 106 | 107 | df <- data.frame(Tstar) 108 | ggplot(df, aes(sample = Tstar)) + stat_qq() + stat_qq_line() 109 | ``` 110 | 111 | ### Exampe 7.22 Verizon 112 | 2-Sample bootstrap t confidence interval 113 | 114 | ```{r} 115 | Verizon <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Verizon.csv") 116 | 117 | Time.ILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time) 118 | Time.CLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time) 119 | thetahat <- mean(Time.ILEC)-mean(Time.CLEC) 120 | nx <- length(Time.ILEC) #nx=1664 121 | ny <- length(Time.CLEC) #ny=23 122 | SE <- sqrt(var(Time.ILEC)/nx + var(Time.CLEC)/ny) 123 | 124 | N <- 10000 125 | Tstar <- numeric(N) 126 | set.seed(0) 127 | for(i in 1:N) 128 | { 129 | bootx <- sample(Time.ILEC, nx, replace = TRUE) 130 | booty <- sample(Time.CLEC, ny, replace = TRUE) 131 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) / 132 | sqrt(var(bootx)/nx + var(booty)/ny) 133 | } 134 | 135 | thetahat - quantile(Tstar, c(.975, .025)) * SE 136 | 137 | t.test(Time.ILEC, Time.CLEC)$conf 138 | ``` 139 | 140 | -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests.R: -------------------------------------------------------------------------------- 1 | #Chap08MoreHypTest 2 | 3 | #Section 8.2 4 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 5 | 6 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater") 7 | 8 | Arsenic <- Bangladesh$Arsenic 9 | N <- 10^5 10 | 11 | observedT <- t.test(Arsenic, mu = 100)$statistic 12 | xbar <- mean(Arsenic) 13 | n <- length(Arsenic) 14 | Tstar <- numeric(N) 15 | for (i in 1:N) 16 | { 17 | bootx <- sample(Arsenic, n , replace = TRUE) 18 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n)) 19 | } 20 | 21 | hist(Tstar) 22 | abline(v = observedT) 23 | 24 | (sum(Tstar >= observedT) + 1)/(N + 1) 25 | -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 8 More Hypothesis Tests" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Section 8.2 13 | ```{r} 14 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 15 | 16 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater") 17 | 18 | Arsenic <- Bangladesh$Arsenic 19 | N <- 10^5 20 | 21 | observedT <- t.test(Arsenic, mu = 100)$statistic 22 | xbar <- mean(Arsenic) 23 | n <- length(Arsenic) 24 | Tstar <- numeric(N) 25 | for (i in 1:N) 26 | { 27 | bootx <- sample(Arsenic, n , replace = TRUE) 28 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n)) 29 | } 30 | 31 | hist(Tstar) 32 | abline(v = observedT) 33 | 34 | (sum(Tstar >= observedT) + 1)/(N + 1) 35 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests_Exer.R: -------------------------------------------------------------------------------- 1 | #Chapter 8 More Hypothesis Tests 2 | #Exercises 3 | 4 | #Exercise 13 5 | m <- 30 6 | n <- 30 7 | sigma1 <- 5 8 | sigma2 <- 5 9 | 10 | pooled.count <- 0 11 | unpooled.count <- 0 12 | 13 | for (i in 1:10^5) 14 | { 15 | x <- rnorm(m, 30, 5) 16 | y <- rnorm(n, 30, 5) 17 | 18 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value 19 | p.unpooled <- t.test(x, y)$p.value 20 | 21 | pooled.count <- pooled.count + (p.pooled < 0.05) 22 | unpooled.count <- unpooled.count + (p.unpooled < 0.05) 23 | } 24 | 25 | pooled.count/10^5 26 | unpooled.count/10^5 27 | 28 | #------------------------------------------- 29 | #Exercise 21 30 | 31 | n1 <- 100 32 | n2 <- 100 33 | N <- 10^4 34 | p <- 0.1 35 | 36 | x1 <- rbinom(N, size = n1, p) 37 | x2 <- rbinom(N, size = n2, p) 38 | 39 | phat <- (x1 + x2)/(n1 + n2) 40 | propDiff <- x1/n1 - x2/n2 41 | 42 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2)) 43 | 44 | qqnorm(propDiff/SE) 45 | abline(0, 1, col = "lightgray") 46 | 47 | #Exercise 40 48 | 49 | N <- 10^4 50 | tstat <- numeric(N) 51 | for (i in 1:N) 52 | { 53 | w <- rnorm(30, 7, 1) 54 | tstat[i] <- (mean(w) - 5)* sqrt(30) 55 | } 56 | 57 | 58 | hist(tstat, prob = TRUE) 59 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE) 60 | 61 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20, 62 | col = "blue", add = TRUE) 63 | -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 8 More Hypothesis Tests - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | ``` 11 | 12 | ###Exercise 13 13 | ```{r} 14 | m <- 30 15 | n <- 30 16 | sigma1 <- 5 17 | sigma2 <- 5 18 | 19 | pooled.count <- 0 20 | unpooled.count <- 0 21 | 22 | for (i in 1:10^5) 23 | { 24 | x <- rnorm(m, 30, 5) 25 | y <- rnorm(n, 30, 5) 26 | 27 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value 28 | p.unpooled <- t.test(x, y)$p.value 29 | 30 | pooled.count <- pooled.count + (p.pooled < 0.05) 31 | unpooled.count <- unpooled.count + (p.unpooled < 0.05) 32 | } 33 | 34 | pooled.count/10^5 35 | unpooled.count/10^5 36 | 37 | ``` 38 | ###Exercise 21 39 | 40 | ```{r} 41 | n1 <- 100 42 | n2 <- 100 43 | N <- 10^4 44 | p <- 0.1 45 | 46 | x1 <- rbinom(N, size = n1, p) 47 | x2 <- rbinom(N, size = n2, p) 48 | 49 | phat <- (x1 + x2)/(n1 + n2) 50 | propDiff <- x1/n1 - x2/n2 51 | 52 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2)) 53 | 54 | qqnorm(propDiff/SE) 55 | abline(0, 1, col = "lightgray") 56 | ``` 57 | 58 | ###Exercise 40 59 | ```{r} 60 | N <- 10^4 61 | tstat <- numeric(N) 62 | for (i in 1:N) 63 | { 64 | w <- rnorm(30, 7, 1) 65 | tstat[i] <- (mean(w) - 5)* sqrt(30) 66 | } 67 | 68 | 69 | hist(tstat, prob = TRUE) 70 | curve(dt(x, df = 29), from = 0, to = 20, add = TRUE) 71 | 72 | curve(dt(x , df = 29, ncp = 10.95), from = 0, to = 20, 73 | col = "blue", add = TRUE) 74 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests_Exer_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 8 More Hypothesis Tests - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Exercise 13 15 | ```{r} 16 | m <- 30 17 | n <- 30 18 | sigma1 <- 5 19 | sigma2 <- 5 20 | 21 | pooled.count <- 0 22 | unpooled.count <- 0 23 | 24 | for (i in 1:10^5) 25 | { 26 | x <- rnorm(m, 30, 5) 27 | y <- rnorm(n, 30, 5) 28 | 29 | p.pooled <- t.test(x, y, var.equal = TRUE)$p.value 30 | p.unpooled <- t.test(x, y)$p.value 31 | 32 | pooled.count <- pooled.count + (p.pooled < 0.05) 33 | unpooled.count <- unpooled.count + (p.unpooled < 0.05) 34 | } 35 | 36 | pooled.count/10^5 37 | unpooled.count/10^5 38 | 39 | ``` 40 | ###Exercise 21 41 | 42 | ```{r} 43 | n1 <- 100 44 | n2 <- 100 45 | N <- 10^4 46 | p <- 0.1 47 | 48 | x1 <- rbinom(N, size = n1, p) 49 | x2 <- rbinom(N, size = n2, p) 50 | 51 | phat <- (x1 + x2)/(n1 + n2) 52 | propDiff <- x1/n1 - x2/n2 53 | 54 | SE <- sqrt(phat * (1 - phat)*(1/n1 + 1/n2)) 55 | 56 | df <- data.frame(x=propDiff/SE) 57 | 58 | ggplot(df, aes(sample = x)) + stat_qq() + 59 | geom_abline(intercept = 0, slope = 1, colour = "lightgray") 60 | ``` 61 | 62 | ###Exercise 40 63 | ```{r} 64 | N <- 10^4 65 | tstat <- numeric(N) 66 | for (i in 1:N) 67 | { 68 | w <- rnorm(30, 7, 1) 69 | tstat[i] <- (mean(w) - 5)* sqrt(30) 70 | } 71 | 72 | df <- data.frame(x=tstat) 73 | ggplot(df, aes(x)) + geom_histogram(aes(y = stat(density)), bins = 12) + 74 | stat_function(fun=dt, args=list(df=29), colour = "red") + 75 | stat_function(fun=dt, args=list(df=29, ncp = 10.95), colour = "blue") 76 | 77 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap08MoreHypTests_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 8 More Hypothesis Tests" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Section 8.2 15 | ```{r} 16 | Bangladesh <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Bangladesh.csv") 17 | 18 | t.test(Bangladesh$Arsenic, mu = 100, alt = "greater") 19 | 20 | Arsenic <- Bangladesh %>% pull(Arsenic) 21 | #Arsenic <- Bangladesh$Arsenic 22 | N <- 10^5 23 | 24 | observedT <- t.test(Arsenic, mu = 100)$statistic 25 | xbar <- mean(Arsenic) 26 | n <- length(Arsenic) 27 | Tstar <- numeric(N) 28 | for (i in 1:N) 29 | { 30 | bootx <- sample(Arsenic, n , replace = TRUE) 31 | Tstar[i] <- (mean(bootx) - xbar)/(sd(bootx)/sqrt(n)) 32 | } 33 | 34 | ggplot() + geom_histogram(aes(Tstar), bins = 12) + geom_vline(xintercept = observedT) 35 | 36 | (sum(Tstar >= observedT) + 1)/(N + 1) 37 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap09Regression.R: -------------------------------------------------------------------------------- 1 | #Chapter 9 Regression 2 | # 3 | 4 | #Section 9.2 5 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv") 6 | 7 | plot(Spruce$Di.change, Spruce$Ht.change) 8 | cor(Spruce$Di.change, Spruce$Ht.change) 9 | 10 | plot(Ht.change ~ Di.change, data = Spruce) 11 | 12 | #Example 9.3 13 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce) 14 | spruce.lm 15 | 16 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals") 17 | abline(h = 0) 18 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue") 19 | 20 | #Example 9.8 21 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv") 22 | skate.lm <- lm(Free ~ Short, data = Skating2010) 23 | summary(skate.lm) 24 | 25 | #Section 9.5 26 | N <- 10^4 27 | cor.boot <- numeric(N) 28 | beta.boot <- numeric(N) 29 | alpha.boot <- numeric(N) 30 | yPred.boot <- numeric(N) 31 | n <- 24 #number of skaters 32 | for (i in 1:N) 33 | { 34 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n 35 | Skate.boot <- Skating2010[index, ] 36 | 37 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free) 38 | 39 | #recalculate linear model estimates 40 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot) 41 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept 42 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope 43 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^ 44 | } 45 | 46 | mean(cor.boot) 47 | sd(cor.boot) 48 | quantile(cor.boot, c(0.025, 0.975)) 49 | 50 | hist(cor.boot, main = "Bootstrap distribuiton of correlation", 51 | xlab = "Correlation") 52 | observed <- cor(Skating2010$Short, Skating2010$Free) 53 | abline(v = observed, col = "blue") #add line at observed cor. 54 | 55 | #------------------------------------------------------- 56 | # Section 9.5.1 Permutation test 57 | 58 | N <- 10^5 - 1 59 | n <- nrow(Skating2010) #number of observations 60 | result <- numeric(N) 61 | observed <- cor(Skating2010$Short, Skating2010$Free) 62 | for (i in 1:N) 63 | { 64 | index <- sample(n , replace = FALSE) 65 | Short.permuted <- Skating2010$Short[index] 66 | result[i] <- cor(Short.permuted, Skating2010$Free) 67 | } 68 | 69 | (sum(observed <= result) + 1)/(N+1) #P-value 70 | 71 | #---------------------------------------------- 72 | #Chapter 9.6.1 Inference for logistic regression 73 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv") 74 | 75 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial) 76 | data.class(fit) # is a "glm" object, so for help use: 77 | help(glm) 78 | 79 | fit # prints the coefficients and other basic info 80 | coef(fit) # the coefficients as a vector 81 | summary(fit) # gives standard errors for coefficients, etc. 82 | 83 | x <- seq(17, 91, length = 500) # vector spanning the age range 84 | # compute predicted probabilities 85 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x)) 86 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x) 87 | 88 | plot(Fatalities$Age, Fatalities$Alcohol, 89 | ylab = "Probability of alcohol") 90 | lines(x, y2) 91 | 92 | # Full bootstrap - slope coefficient, and prediction at age 20 93 | N <- 10^3 94 | n <- nrow(Fatalities) # number of observations 95 | alpha.boot <- numeric(N) 96 | beta.boot <- numeric(N) 97 | pPred.boot <- numeric(N) 98 | 99 | for (i in 1:N) 100 | { 101 | index <- sample(n, replace = TRUE) 102 | Fatal.boot <- Fatalities[index, ] # resampled data 103 | 104 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 105 | family = binomial) 106 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept 107 | beta.boot[i] <- coef(fit.boot)[2] # new slope 108 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i]) 109 | } 110 | 111 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals 112 | quantile(pPred.boot, c(.025, .975)) 113 | 114 | par(mfrow=c(2,2)) # set layout 115 | hist(beta.boot, xlab = "beta", main = "") 116 | qqnorm(beta.boot, main = "") 117 | 118 | hist(pPred.boot, xlab = "p^", main = "") 119 | qqnorm(pPred.boot, main = "") 120 | 121 | #-------------------- 122 | help(predict.glmm) # for more help on predict 123 | 124 | n <- nrow(Fatalities) # number of observations 125 | x <- seq(17, 91, length = 500) # vector spanning the age range 126 | df.Age <- data.frame(Age = x) # data frame to hold 127 | # explanatory variables, will use this for making predictions 128 | 129 | plot(Fatalities$Age, Fatalities$Alcohol, 130 | ylab = "Probability of alcohol") 131 | for (i in 1:25) 132 | { 133 | index <- sample(n, replace = TRUE) 134 | Fatal.boot <- Fatalities[index, ] # resampled data 135 | 136 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 137 | family = binomial) 138 | pPred <- predict(fit.boot, newdata = df.Age, type = "response") 139 | lines(x, pPred) 140 | } 141 | 142 | #end fatalities 143 | #--------------------- 144 | -------------------------------------------------------------------------------- /Edition2/R/Chap09Regression.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 9 Regression" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ###Section 9.2 13 | ```{r} 14 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv") 15 | 16 | plot(Spruce$Di.change, Spruce$Ht.change) 17 | cor(Spruce$Di.change, Spruce$Ht.change) 18 | 19 | plot(Ht.change ~ Di.change, data = Spruce) 20 | ``` 21 | 22 | ###Example 9.3 23 | ```{r} 24 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce) 25 | spruce.lm 26 | 27 | plot(Spruce$Ht.change, resid(spruce.lm), ylab = "residuals") 28 | abline(h = 0) 29 | lines(smooth.spline(Spruce$Ht.change, resid(spruce.lm), df = 3), col = "blue") 30 | ``` 31 | 32 | ###Example 9.8 33 | ```{r} 34 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv") 35 | skate.lm <- lm(Free ~ Short, data = Skating2010) 36 | summary(skate.lm) 37 | ``` 38 | 39 | ###Section 9.5 40 | 41 | ```{r} 42 | N <- 10^4 43 | cor.boot <- numeric(N) 44 | beta.boot <- numeric(N) 45 | alpha.boot <- numeric(N) 46 | yPred.boot <- numeric(N) 47 | n <- 24 #number of skaters 48 | for (i in 1:N) 49 | { 50 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n 51 | Skate.boot <- Skating2010[index, ] 52 | 53 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free) 54 | 55 | #recalculate linear model estimates 56 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot) 57 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept 58 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope 59 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^ 60 | } 61 | 62 | mean(cor.boot) 63 | sd(cor.boot) 64 | quantile(cor.boot, c(0.025, 0.975)) 65 | 66 | hist(cor.boot, main = "Bootstrap distribuiton of correlation", 67 | xlab = "Correlation") 68 | observed <- cor(Skating2010$Short, Skating2010$Free) 69 | abline(v = observed, col = "blue") #add line at observed cor. 70 | ``` 71 | 72 | ### Section 9.5.1 Permutation test 73 | 74 | ```{r} 75 | N <- 10^5 - 1 76 | n <- nrow(Skating2010) #number of observations 77 | result <- numeric(N) 78 | observed <- cor(Skating2010$Short, Skating2010$Free) 79 | for (i in 1:N) 80 | { 81 | index <- sample(n , replace = FALSE) 82 | Short.permuted <- Skating2010$Short[index] 83 | result[i] <- cor(Short.permuted, Skating2010$Free) 84 | } 85 | 86 | (sum(observed <= result) + 1)/(N+1) #P-value 87 | ``` 88 | 89 | 90 | ###Chapter 9.6.1 Inference for logistic regression 91 | 92 | ```{r} 93 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv") 94 | 95 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial) 96 | data.class(fit) # is a "glm" object, so for help use: 97 | help(glm) 98 | 99 | fit # prints the coefficients and other basic info 100 | coef(fit) # the coefficients as a vector 101 | summary(fit) # gives standard errors for coefficients, etc. 102 | 103 | x <- seq(17, 91, length = 500) # vector spanning the age range 104 | # compute predicted probabilities 105 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x)) 106 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x) 107 | 108 | plot(Fatalities$Age, Fatalities$Alcohol, 109 | ylab = "Probability of alcohol") 110 | lines(x, y2) 111 | ``` 112 | 113 | 114 | #### Full bootstrap - slope coefficient, and prediction at age 20 115 | ```{r} 116 | N <- 10^3 117 | n <- nrow(Fatalities) # number of observations 118 | alpha.boot <- numeric(N) 119 | beta.boot <- numeric(N) 120 | pPred.boot <- numeric(N) 121 | 122 | for (i in 1:N) 123 | { 124 | index <- sample(n, replace = TRUE) 125 | Fatal.boot <- Fatalities[index, ] # resampled data 126 | 127 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 128 | family = binomial) 129 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept 130 | beta.boot[i] <- coef(fit.boot)[2] # new slope 131 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i]) 132 | } 133 | 134 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals 135 | quantile(pPred.boot, c(.025, .975)) 136 | 137 | par(mfrow=c(2,2)) # set layout 138 | hist(beta.boot, xlab = "beta", main = "") 139 | qqnorm(beta.boot, main = "") 140 | 141 | hist(pPred.boot, xlab = "p^", main = "") 142 | qqnorm(pPred.boot, main = "") 143 | ``` 144 | 145 | 146 | ```{r} 147 | n <- nrow(Fatalities) # number of observations 148 | x <- seq(17, 91, length = 500) # vector spanning the age range 149 | df.Age <- data.frame(Age = x) # data frame to hold 150 | # explanatory variables, will use this for making predictions 151 | 152 | plot(Fatalities$Age, Fatalities$Alcohol, 153 | ylab = "Probability of alcohol") 154 | for (i in 1:25) 155 | { 156 | index <- sample(n, replace = TRUE) 157 | Fatal.boot <- Fatalities[index, ] # resampled data 158 | 159 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 160 | family = binomial) 161 | pPred <- predict(fit.boot, newdata = df.Age, type = "response") 162 | lines(x, pPred) 163 | } 164 | ``` 165 | 166 | -------------------------------------------------------------------------------- /Edition2/R/Chap09Regression_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 9 Regression" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | library(dplyr) 11 | library(ggplot2) 12 | ``` 13 | 14 | ###Section 9.2 15 | ```{r} 16 | Spruce <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Spruce.csv") 17 | 18 | ggplot(Spruce, aes(x = Di.change, y = Ht.change)) + geom_point() 19 | 20 | cor(Spruce$Di.change, Spruce$Ht.change) 21 | ``` 22 | 23 | ###Example 9.3 24 | ```{r} 25 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce) 26 | spruce.lm 27 | 28 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() + 29 | stat_smooth(method="lm", se = FALSE) 30 | ``` 31 | 32 | We introduce a new package `broom` that performs some __tidying__ of the output of base R's `lm` command: 33 | 34 | ```{r} 35 | library(broom) 36 | 37 | fit <- augment(spruce.lm) 38 | head(fit, 3) 39 | ``` 40 | In particular, note that we now have a data set that, in addition to the original variables, also contains a column of the fitted (predicted) values and the residuals. 41 | 42 | To create a residual plot: 43 | 44 | ```{r} 45 | ggplot(fit, aes(x=Ht.change, y = .resid)) + geom_point() + 46 | geom_hline(yintercept = 0) + labs(y = "residuals") 47 | ``` 48 | 49 | To add a __smoother__ line to the residual plot, use the `stat_smooth()` command: 50 | 51 | ```{r} 52 | ggplot(fit, aes(x = Ht.change, y = .resid)) + geom_point() + stat_smooth(method = loess, se = FALSE) + geom_hline(yintercept = 0) 53 | ``` 54 | 55 | ###Example 9.8 56 | ```{r} 57 | Skating2010 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Skating2010.csv") 58 | skate.lm <- lm(Free ~ Short, data = Skating2010) 59 | summary(skate.lm) 60 | ``` 61 | 62 | ###Section 9.5 63 | 64 | ```{r} 65 | N <- 10^4 66 | cor.boot <- numeric(N) 67 | beta.boot <- numeric(N) 68 | alpha.boot <- numeric(N) 69 | yPred.boot <- numeric(N) 70 | n <- 24 #number of skaters 71 | for (i in 1:N) 72 | { 73 | index <- sample(n, replace = TRUE) #sample f rom 1, 2, ... n 74 | Skate.boot <- Skating2010[index, ] 75 | 76 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free) 77 | 78 | #recalculate linear model estimates 79 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot) 80 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept 81 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope 82 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] #recompute Y^ 83 | } 84 | 85 | mean(cor.boot) 86 | sd(cor.boot) 87 | quantile(cor.boot, c(0.025, 0.975)) 88 | 89 | 90 | observed <- cor(Skating2010$Short, Skating2010$Free) 91 | 92 | ggplot() + geom_histogram(aes(cor.boot), bins = 12) + 93 | labs(title = "Bootstrap distribution of correlation", x = "Correlation") + 94 | geom_vline(xintercept = observed, colour = "blue") 95 | ``` 96 | 97 | ### Section 9.5.1 Permutation test 98 | 99 | ```{r} 100 | N <- 10^5 - 1 101 | n <- nrow(Skating2010) #number of observations 102 | result <- numeric(N) 103 | observed <- cor(Skating2010$Short, Skating2010$Free) 104 | for (i in 1:N) 105 | { 106 | index <- sample(n , replace = FALSE) 107 | Short.permuted <- Skating2010$Short[index] 108 | result[i] <- cor(Short.permuted, Skating2010$Free) 109 | } 110 | 111 | (sum(observed <= result) + 1)/(N+1) #P-value 112 | ``` 113 | 114 | 115 | ###Chapter 9.6.1 Inference for logistic regression 116 | 117 | ```{r} 118 | Fatalities <-read.csv("http://sites.google.com/site/chiharahesterberg/data2/Fatalities.csv") 119 | 120 | fit <- glm(Alcohol ~ Age, data = Fatalities, family = binomial) 121 | data.class(fit) # is a "glm" object, so for help use: 122 | help(glm) 123 | 124 | fit # prints the coefficients and other basic info 125 | coef(fit) # the coefficients as a vector 126 | summary(fit) # gives standard errors for coefficients, etc. 127 | 128 | x <- seq(17, 91, length = 500) # vector spanning the age range 129 | # compute predicted probabilities 130 | y1 <- exp(-.123 - .029*x) / (1 + exp(-.123 - .029*x)) 131 | y2 <- plogis(coef(fit)[1] + coef(fit)[2] * x) 132 | 133 | my.fun <- function(x, lm.object){ 134 | plogis(coef(lm.object)[1] + coef(lm.object)[2]*x) 135 | } 136 | 137 | ggplot(Fatalities, aes(x=Age, y = Alcohol)) + geom_point() + 138 | stat_function(fun = my.fun, args=list(lm.object = fit)) 139 | 140 | ``` 141 | 142 | 143 | #### Full bootstrap - slope coefficient, and prediction at age 20 144 | ```{r} 145 | N <- 10^3 146 | n <- nrow(Fatalities) # number of observations 147 | alpha.boot <- numeric(N) 148 | beta.boot <- numeric(N) 149 | pPred.boot <- numeric(N) 150 | 151 | for (i in 1:N) 152 | { 153 | index <- sample(n, replace = TRUE) 154 | Fatal.boot <- Fatalities[index, ] # resampled data 155 | 156 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 157 | family = binomial) 158 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept 159 | beta.boot[i] <- coef(fit.boot)[2] # new slope 160 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i]) 161 | } 162 | 163 | quantile(beta.boot, c(.025, .975)) # 95% percentile intervals 164 | quantile(pPred.boot, c(.025, .975)) 165 | 166 | library(gridExtra) 167 | 168 | p1 <- ggplot() + geom_histogram(aes(beta.boot), bins = 12) + labs(x = "beta") 169 | p2 <- ggplot() + stat_qq(aes(sample = beta.boot)) 170 | p3 <- ggplot() + geom_histogram(aes(pPred.boot), bins = 12) + labs(x = "p^") 171 | p4 <- ggplot() + stat_qq(aes(sample = pPred.boot)) 172 | grid.arrange(p1, p2, p3, p4) 173 | ``` 174 | 175 | 176 | ```{r} 177 | n <- nrow(Fatalities) # number of observations 178 | x <- seq(17, 91, length = 500) # vector spanning the age range 179 | df.Age <- data.frame(Age = x) # data frame to hold 180 | # explanatory variables, will use this for making predictions 181 | 182 | p <- ggplot(Fatalities, aes(x= Age, y = Alcohol)) + geom_point() + 183 | labs(y = "Probability of alcohol") 184 | 185 | for (i in 1:25) 186 | { 187 | index <- sample(n, replace = TRUE) 188 | Fatal.boot <- Fatalities[index, ] # resampled data 189 | 190 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 191 | family = binomial) 192 | df.Age$pPred <- predict(fit.boot, newdata = df.Age, type = "response") 193 | p <- p + geom_line(data = df.Age, aes(x = Age, y = pPred)) 194 | } 195 | 196 | print(p) 197 | ``` 198 | 199 | -------------------------------------------------------------------------------- /Edition2/R/Chap10categorical.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------ 2 | #Chapter 10 Categorical data 3 | #Here is a function that computes the chi-square 4 | #test statistic 5 | 6 | #This function is a bit more enhanced than the code in the textbook 7 | chisq <- function(observed, print = TRUE) { 8 | # Chi-square statistic for independence in a contingency table, 9 | # with related data exploration. 10 | # observed is the observed contingency table 11 | 12 | observedWithTotals <- cbind(observed, total = rowSums(observed)) 13 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals)) 14 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed) 15 | statistic <- sum((observed-expected)^2/expected) 16 | if (print) 17 | { 18 | cat("Observed, with totals:\n") 19 | print(observedWithTotals) 20 | cat("\nRow Fractions:\n") 21 | print(round(observed / rowSums(observed), 3)) 22 | cat("\nColumn Fractions:\n") 23 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3)) 24 | 25 | cat("\nExpected:\n") 26 | print(round(expected, 1)) 27 | cat("\nDifference:\n") 28 | print(round(observed - expected, 1)) 29 | 30 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n") 31 | } 32 | return(invisible(statistic)) 33 | } 34 | 35 | 36 | #------------------------------------------- 37 | #Uncomment below if you haven't imported GSS2002 yet. 38 | #GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv") 39 | 40 | Education <- GSS2002$Education 41 | DeathPenalty <- GSS2002$DeathPenalty 42 | #Alternatively 43 | #Education <- subset(GSS2002, select=Education, drop = TRUE) 44 | #DeathPenalty <- subset(GSS2002, select=DeathPenalty, drop = TRUE) 45 | 46 | table(Education, DeathPenalty) #note education ordered alphabetically 47 | 48 | Education <- ordered(GSS2002$Education, 49 | levels = c("Left HS", "HS", "Jr Col", "Bachelors", 50 | "Graduate")) 51 | 52 | table(Education, DeathPenalty) 53 | 54 | #Use function created above to calculate chi-square test statistic 55 | observedChi2 <- chisq(table(Education, DeathPenalty)) 56 | observedChi2 57 | 58 | #Find those rows where there is at least one NA 59 | index <- which(is.na(Education) | is.na(DeathPenalty)) 60 | 61 | #Remove those rows from the two variables and define Educ2 and 62 | #DeathPenalty2 to be the new vectors with those rows removed 63 | Educ2 <- Education[-index] 64 | DeathPenalty2 <- DeathPenalty[-index] 65 | 66 | N <- 10^4-1 67 | result<-numeric(N) 68 | 69 | for (i in 1:N) 70 | { 71 | DP.permutation <-sample(DeathPenalty2) 72 | GSS.table <- table(Educ2, DP.permutation) 73 | result[i]<-chisq(GSS.table, print = FALSE) 74 | } 75 | 76 | #Create a histogram 77 | hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic") 78 | abline(v = observedChi2, col = "blue", lty = 5) 79 | 80 | 81 | #optional: Create a histogram with the density curve 82 | #imposed onto the histogram 83 | #The prob=TRUE option below scales the histogram to have area 1 84 | hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic", 85 | ylim = c(0,.2)) 86 | curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE) 87 | 88 | #Compute P-value 89 | (sum(result >= observedChi2) + 1)/(N + 1) 90 | 91 | 92 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1) 93 | mat <- table(Education, DeathPenalty) 94 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1) 95 | 96 | #---------------------------------------------------------------- 97 | #Example 10.2 98 | mat <- rbind(c(42, 50), c(30, 87)) 99 | chisq.test(mat) 100 | 101 | #Section 10.3.3 Fisher's Exact Test 102 | fisher.test(mat) 103 | 104 | 105 | 106 | #Section 10.4 Test of Homogeneity 107 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50)) 108 | candy.mat 109 | 110 | chisq.test(candy.mat) 111 | 112 | #Section 10.6 113 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv") 114 | Homeruns <- Phillies2009$Homeruns 115 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE) 116 | 117 | 118 | 119 | lambda <- mean(Homeruns) 120 | dpois(0:5, lambda) 121 | table(Homeruns) 122 | 123 | table(Homeruns)/162 124 | -------------------------------------------------------------------------------- /Edition2/R/Chap10categorical.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 10 Categorical Data" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ###Section 10.2 13 | 14 | Here is a function that computes the chi-square test statistic 15 | 16 | The code below gives a function that is a bit more enhanced than the code in the textbook: 17 | ```{r} 18 | chisq <- function(observed, print = TRUE) { 19 | # Chi-square statistic for independence in a contingency table, 20 | # with related data exploration. 21 | # observed is the observed contingency table 22 | 23 | observedWithTotals <- cbind(observed, total = rowSums(observed)) 24 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals)) 25 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed) 26 | statistic <- sum((observed-expected)^2/expected) 27 | if (print){ 28 | cat("Observed, with totals:\n") 29 | print(observedWithTotals) 30 | cat("\nRow Fractions:\n") 31 | print(round(observed / rowSums(observed), 3)) 32 | cat("\nColumn Fractions:\n") 33 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3)) 34 | 35 | cat("\nExpected:\n") 36 | print(round(expected, 1)) 37 | cat("\nDifference:\n") 38 | print(round(observed - expected, 1)) 39 | 40 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n") 41 | } 42 | return(invisible(statistic)) 43 | } 44 | ``` 45 | Import the General Social Survey data and extract the two variables, `Education` and 46 | `DeathPenalty`. 47 | 48 | `Education` is a factor variable. We use the `ordered` command to *order* the levels. 49 | 50 | ```{r} 51 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv") 52 | 53 | Education <- GSS2002$Education 54 | DeathPenalty <- GSS2002$DeathPenalty 55 | 56 | #Alternatively 57 | #Education <- subset(GSS2002, select = Education, drop = TRUE) 58 | #DeathPenalty <- subset(GSS2002, select = DeathPenalty, drop = TRUE) 59 | 60 | table(Education, DeathPenalty) #Education ordered alphabetically 61 | 62 | Education <- ordered(GSS2002$Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate")) 63 | table(Education, DeathPenalty) 64 | ``` 65 | 66 | Use function created above to calculate chi-square test statistic 67 | 68 | ```{r} 69 | observedChi2 <- chisq(table(Education, DeathPenalty)) 70 | observedChi2 71 | ``` 72 | 73 | There are missing values in both variables so we get the row numbers where there is at least one NA. We remove those rows from the two variables and create two new vectors `Educ2` and `DeathPenalty2` that hold the non-NA values: 74 | 75 | ```{r} 76 | str(GSS2002) 77 | 78 | index <- which(is.na(Education) | is.na(DeathPenalty)) 79 | 80 | Educ2 <- Education[-index] 81 | DeathPenalty2 <- DeathPenalty[-index] 82 | ``` 83 | Now run the permutation test 84 | ```{r} 85 | N <- 10^4-1 86 | result<-numeric(N) 87 | 88 | for (i in 1:N) 89 | { 90 | DP.permutation <-sample(DeathPenalty2) 91 | GSS.table <- table(Educ2, DP.permutation) 92 | result[i]<-chisq(GSS.table, print = FALSE) 93 | } 94 | 95 | #Create a histogram 96 | hist(result, xlab = "chi-square statistic", main = "Distribution of chi-square statistic") 97 | abline(v = observedChi2, col = "blue", lty = 5) 98 | 99 | #Compute P-value 100 | (sum(result >= observedChi2) + 1)/(N + 1) 101 | ``` 102 | 103 | Optional: Create a histogram with the density curve 104 | imposed onto the histogram 105 | The `prob = TRUE` argument scales the histogram to have area 1 106 | ```{r} 107 | hist(result, xlab = "chi-square statistic", main="Distribution of chi-square statistic") 108 | curve(dchisq(x, df = 4), from = 0, to = 25, col = "green", add = TRUE) 109 | ``` 110 | 111 | The `chisq.test` command also has an option that will perform this permutation test: 112 | 113 | ```{r} 114 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1) 115 | mat <- table(Education, DeathPenalty) 116 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1) 117 | ``` 118 | 119 | 120 | 121 | 122 | ###Example 10.2 123 | ```{r} 124 | mat <- rbind(c(42, 50), c(30, 87)) 125 | chisq.test(mat) 126 | ``` 127 | 128 | ###Section 10.3.3 Fisher's Exact Test 129 | 130 | ```{r} 131 | fisher.test(mat) 132 | ``` 133 | 134 | ###Section 10.4 Test of Homogeneity 135 | ```{r} 136 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50)) 137 | candy.mat 138 | 139 | chisq.test(candy.mat) 140 | ``` 141 | 142 | ###Section 10.6 143 | ```{r} 144 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv") 145 | Homeruns <- Phillies2009$Homeruns 146 | #Homeruns <- subset(Phillies2009, select = Homeruns, drop = TRUE) 147 | 148 | lambda <- mean(Homeruns) 149 | dpois(0:5, lambda) 150 | table(Homeruns) 151 | 152 | table(Homeruns)/162 153 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap10categorical_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 10 Categorical Data" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ###Section 10.2 15 | 16 | Here is a function that computes the chi-square test statistic 17 | 18 | The code below gives a function that is a bit more enhanced than the code in the textbook: 19 | ```{r} 20 | chisq <- function(observed, print = TRUE) { 21 | # Chi-square statistic for independence in a contingency table, 22 | # with related data exploration. 23 | # observed is the observed contingency table 24 | 25 | observedWithTotals <- cbind(observed, total = rowSums(observed)) 26 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals)) 27 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed) 28 | statistic <- sum((observed-expected)^2/expected) 29 | if (print){ 30 | cat("Observed, with totals:\n") 31 | print(observedWithTotals) 32 | cat("\nRow Fractions:\n") 33 | print(round(observed / rowSums(observed), 3)) 34 | cat("\nColumn Fractions:\n") 35 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3)) 36 | 37 | cat("\nExpected:\n") 38 | print(round(expected, 1)) 39 | cat("\nDifference:\n") 40 | print(round(observed - expected, 1)) 41 | 42 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n") 43 | } 44 | return(invisible(statistic)) 45 | } 46 | ``` 47 | Import the General Social Survey data. We are interested in the two variables, `Education` and `DeathPenalty`. 48 | 49 | Using the `str()` command, we note that these two variables have missing values. We will create a new data frame which contains just the two variables of interest and also, only contains the rows without NA's. 50 | 51 | ```{r} 52 | GSS2002 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/GSS2002.csv") 53 | 54 | str(GSS2002) 55 | 56 | df <- GSS2002 %>% 57 | select(Education, DeathPenalty) %>% 58 | filter(!is.na(Education) & !is.na(DeathPenalty)) 59 | 60 | Education <- pull(df, Education) 61 | DeathPenalty <- pull(df, DeathPenalty) 62 | 63 | table(Education, DeathPenalty) 64 | ``` 65 | 66 | `Education` is a factor variable and the default ordering of the levels is alphabetical. We use the `ordered` command to *order* the levels. 67 | 68 | ```{r} 69 | Education <- ordered(Education, levels = c("Left HS", "HS", "Jr Col", "Bachelors", "Graduate")) 70 | table(Education, DeathPenalty) 71 | ``` 72 | 73 | 74 | Use function created above to calculate chi-square test statistic 75 | 76 | ```{r} 77 | observedChi2 <- chisq(table(Education, DeathPenalty)) 78 | observedChi2 79 | ``` 80 | 81 | Now run the permutation test 82 | 83 | ```{r} 84 | N <- 10^4-1 85 | result<-numeric(N) 86 | 87 | for (i in 1:N) 88 | { 89 | DP.permutation <-sample(DeathPenalty) 90 | GSS.table <- table(Education, DP.permutation) 91 | result[i]<-chisq(GSS.table, print = FALSE) 92 | } 93 | 94 | ggplot() + geom_histogram(aes(result)) + 95 | labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") + 96 | geom_vline(xintercept = observedChi2, colour = "blue") 97 | 98 | (sum(result >= observedChi2) + 1)/(N + 1) 99 | ``` 100 | 101 | 102 | Optional: Create a histogram with the density curve 103 | imposed onto the histogram. The ggplot() command will require a data frame which contains the variable of interest. 104 | 105 | ```{r} 106 | df <- data.frame(result) 107 | ggplot(df) + geom_histogram(aes(result, y = stat(density))) + 108 | labs(title = "Distribution of chi-square statistics", x = "chi-square statistic") + 109 | geom_vline(xintercept = observedChi2, colour = "blue") + 110 | stat_function(fun = dchisq, args = list(df = 4), colour = "green") 111 | ``` 112 | 113 | The `chisq.test` command also has an option that will perform this permutation test: 114 | 115 | ```{r} 116 | chisq.test(Education, DeathPenalty, simulate.p.value = TRUE, B = 10^4 - 1) 117 | mat <- table(Education, DeathPenalty) 118 | chisq.test(mat, simulate.p.value = TRUE, B = 10^4-1) 119 | ``` 120 | 121 | ###Example 10.2 122 | ```{r} 123 | mat <- rbind(c(42, 50), c(30, 87)) 124 | chisq.test(mat) 125 | ``` 126 | 127 | ###Section 10.3.3 Fisher's Exact Test 128 | 129 | ```{r} 130 | fisher.test(mat) 131 | ``` 132 | 133 | ###Section 10.4 Test of Homogeneity 134 | ```{r} 135 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50)) 136 | candy.mat 137 | 138 | chisq.test(candy.mat) 139 | ``` 140 | 141 | ###Section 10.6 142 | ```{r} 143 | Phillies2009 <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/Phillies2009.csv") 144 | Homeruns <- pull(Phillies2009, Homeruns) 145 | 146 | lambda <- mean(Homeruns) 147 | dpois(0:5, lambda) 148 | table(Homeruns) 149 | 150 | table(Homeruns)/162 151 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap11Bayesian.R: -------------------------------------------------------------------------------- 1 | #Chapter 10 Bayesian Methods 2 | # R scripts 3 | 4 | #----------------------------------- 5 | # Example 11.1 6 | theta <- seq(0, 1, by = .1) 7 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0) 8 | likelihood <- theta * (1 - theta)^2 9 | constant <- sum(prior * likelihood) 10 | posterior <- prior * likelihood / constant 11 | posterior 12 | sum(theta * prior) # prior mean 13 | sum(theta * posterior) # posterior mean 14 | 15 | #----------------------- 16 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail 17 | constant2 <- sum(prior * likelihood2) 18 | posterior2 <- prior * likelihood2 / constant2 19 | posterior 20 | likelihood3 <- theta^2 * (1 - theta)^3 21 | constant3 <- sum(posterior * likelihood3) 22 | posterior3 <- posterior * likelihood3 / constant3 23 | posterior3 # not shown, matches posterior2 24 | sum(theta*posterior2) # posterior mean 25 | 26 | plot(theta, prior, type = "b", ylim = c(0, max(posterior3)), 27 | ylab = "probability") 28 | lines(theta, posterior, type = "b", lty = 2) 29 | lines(theta, posterior2, type = "b", lty = 3) 30 | legend("topleft", legend = c("prior", "posterior1", "posterior2"), 31 | lty = 1:3) 32 | 33 | #------------------------- 34 | # Chapter 11.5 Sequential data 35 | 36 | n <- c(1874, 1867, 1871, 1868, 1875, 1875) 37 | X <- c(52, 41, 55, 49, 39, 39) 38 | alpha <- X # vector of posterior parameters 39 | beta <- n - X # vector of posterior parameters 40 | N <- 10^5 # replications 41 | theta <- matrix(0.0, nrow = N, ncol = 6) 42 | for (j in 1:6) 43 | { 44 | theta[, j] <- rbeta(N, alpha[j], beta[j]) 45 | } 46 | probBest <- numeric(6) # vector for results 47 | best <- apply(theta, 1, max) # maximum of each row 48 | for (j in 1:6) 49 | { 50 | probBest[j] <- mean(theta[, j] == best) 51 | } 52 | 53 | probBest 54 | 55 | plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".") 56 | abline(0, 1) 57 | text(.037, .042, substitute(theta[3] > theta[1])) 58 | text(.042, .037, substitute(theta[3] > theta[1])) 59 | -------------------------------------------------------------------------------- /Edition2/R/Chap11Bayesian.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 11 Bayesian Statistics" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ### Example 11.1 13 | ```{r} 14 | theta <- seq(0, 1, by = .1) 15 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0) 16 | likelihood <- theta * (1 - theta)^2 17 | constant <- sum(prior * likelihood) 18 | posterior <- prior * likelihood / constant 19 | posterior 20 | sum(theta * prior) # prior mean 21 | sum(theta * posterior) # posterior mean 22 | 23 | 24 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail 25 | constant2 <- sum(prior * likelihood2) 26 | posterior2 <- prior * likelihood2 / constant2 27 | posterior2 28 | 29 | likelihood3 <- theta^2 * (1 - theta)^3 30 | constant3 <- sum(posterior * likelihood3) 31 | posterior3 <- posterior * likelihood3 / constant3 32 | posterior3 # not shown, matches posterior2 33 | sum(theta*posterior2) # posterior mean 34 | 35 | plot(theta, prior, type = "b", ylim = c(0, max(posterior3)), 36 | ylab = "probability") 37 | lines(theta, posterior, type = "b", lty = 2) 38 | lines(theta, posterior2, type = "b", lty = 3) 39 | legend("topleft", legend = c("prior", "posterior1", "posterior2"), 40 | lty = 1:3) 41 | ``` 42 | 43 | ### Chapter 11.5 Sequential data 44 | ```{r} 45 | n <- c(1874, 1867, 1871, 1868, 1875, 1875) 46 | X <- c(52, 41, 55, 49, 39, 39) 47 | alpha <- X # vector of posterior parameters 48 | beta <- n - X # vector of posterior parameters 49 | N <- 10^5 # replications 50 | theta <- matrix(0.0, nrow = N, ncol = 6) 51 | for (j in 1:6) 52 | { 53 | theta[, j] <- rbeta(N, alpha[j], beta[j]) 54 | } 55 | probBest <- numeric(6) # vector for results 56 | best <- apply(theta, 1, max) # maximum of each row 57 | for (j in 1:6) 58 | { 59 | probBest[j] <- mean(theta[, j] == best) 60 | } 61 | 62 | probBest 63 | 64 | plot(theta[1:10^4, 1], theta[1:10^4, 3], pch = ".") 65 | abline(0, 1) 66 | text(.037, .042, substitute(theta[3] > theta[1])) 67 | text(.042, .037, substitute(theta[3] > theta[1])) 68 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap11Bayesian_d.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 11 Bayesian Statistics" 3 | author: "Chihara-Hesterberg" 4 | date: "December 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | library(ggplot2) 11 | library(dplyr) 12 | ``` 13 | 14 | ### Example 11.1 15 | ```{r} 16 | theta <- seq(0, 1, by = .1) 17 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0) 18 | likelihood <- theta * (1 - theta)^2 19 | constant <- sum(prior * likelihood) 20 | posterior <- prior * likelihood / constant 21 | posterior 22 | sum(theta * prior) # prior mean 23 | sum(theta * posterior) # posterior mean 24 | 25 | 26 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail 27 | constant2 <- sum(prior * likelihood2) 28 | posterior2 <- prior * likelihood2 / constant2 29 | posterior2 30 | 31 | likelihood3 <- theta^2 * (1 - theta)^3 32 | constant3 <- sum(posterior * likelihood3) 33 | posterior3 <- posterior * likelihood3 / constant3 34 | posterior3 # not shown, matches posterior2 35 | sum(theta*posterior2) # posterior mean 36 | 37 | df <- data.frame(theta, prior, posterior, posterior2) 38 | 39 | ggplot(df) + 40 | geom_line(aes(x = theta, y = prior, colour = "prior")) + 41 | geom_line(aes(x = theta, y = posterior, colour = "posterior")) + 42 | geom_line(aes(x = theta, y = posterior2, colour = "posterior2")) + 43 | scale_colour_manual(name=NULL, 44 | values= c("prior" = "black", "posterior" = "blue", "posterior2" = "red" )) 45 | ``` 46 | 47 | ### Chapter 11.5 Sequential data 48 | ```{r} 49 | n <- c(1874, 1867, 1871, 1868, 1875, 1875) 50 | X <- c(52, 41, 55, 49, 39, 39) 51 | alpha <- X # vector of posterior parameters 52 | beta <- n - X # vector of posterior parameters 53 | N <- 10^5 # replications 54 | theta <- matrix(0.0, nrow = N, ncol = 6) 55 | for (j in 1:6) 56 | { 57 | theta[, j] <- rbeta(N, alpha[j], beta[j]) 58 | } 59 | probBest <- numeric(6) # vector for results 60 | best <- apply(theta, 1, max) # maximum of each row 61 | for (j in 1:6) 62 | { 63 | probBest[j] <- mean(theta[, j] == best) 64 | } 65 | 66 | probBest 67 | 68 | df <- as.data.frame(theta[1:10^4, ]) 69 | names(df) <- paste("x", as.character(1:6), sep = "") 70 | 71 | ggplot(df) + geom_point(aes(x = x1, y = x3), pch = ".") + 72 | geom_abline(slope = 1, intercept = 0) + 73 | annotate("text", x = 0.037, y = 0.042, parse = TRUE, label ="theta[3] > theta[1]") + 74 | annotate("text", x = 0.042, y = 0.037, parse = TRUE, label ="theta[1] > theta[3]") 75 | 76 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap12ANOVA.R: -------------------------------------------------------------------------------- 1 | #Chapter 12 ANOVA 2 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv") 3 | anova(lm(Weight ~ MothersAge, data = ILBoys)) 4 | 5 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1] 6 | 7 | summary(aov(Weight ~ MothersAge, data = ILBoys)) 8 | 9 | #-------------------------------- 10 | #Section 12.1.2 Permutation test approach 11 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1] 12 | 13 | n <- length(ILBoys$Weight) 14 | N <- 10^4 - 1 15 | results <- numeric(N) 16 | for (i in 1:N) 17 | { 18 | index <- sample(n) 19 | Weight.perm <- ILBoys$Weight[index] 20 | results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1] 21 | } 22 | 23 | (sum(results >= observed) + 1) / (N + 1) 24 | -------------------------------------------------------------------------------- /Edition2/R/Chap12ANOVA.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 12 ANOVA" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ###Example 12.1 13 | Illinois baby boys 14 | 15 | ```{r} 16 | 17 | ILBoys <- read.csv("http://sites.google.com/site/chiharahesterberg/data2/ILBoys.csv") 18 | anova(lm(Weight ~ MothersAge, data = ILBoys)) 19 | 20 | anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1] 21 | 22 | summary(aov(Weight ~ MothersAge, data = ILBoys)) 23 | ``` 24 | 25 | ###Section 12.1.2 Permutation test approach 26 | ```{r} 27 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1] 28 | 29 | n <- length(ILBoys$Weight) 30 | N <- 10^4 - 1 31 | results <- numeric(N) 32 | for (i in 1:N) 33 | { 34 | index <- sample(n) 35 | Weight.perm <- ILBoys$Weight[index] 36 | results[i] <- anova(lm(Weight.perm ~ MothersAge, data = ILBoys))$F[1] 37 | } 38 | 39 | (sum(results >= observed) + 1) / (N + 1) 40 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap12ANOVA_Exer.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chap 12 ANOVA - Exercises" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2018" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width = "50%") 10 | ``` 11 | 12 | ###Exercise 6 13 | Simulation 14 | 15 | ```{r} 16 | nA <- 50 # set sample sizes 17 | nB <- 50 18 | nC <- 50 19 | # create groups 20 | Group <- rep(c("A","B","C"), c(nA, nB, nC)) 21 | 22 | counter <- 0 23 | N <- 10^4 24 | 25 | for (i in 1:N) 26 | { 27 | a <- rnorm(nA, 20, 3) # Draw samples 28 | b <- rnorm(nB, 20, 3) 29 | c <- rnorm(nC, 20, 3) 30 | X <- c(a, b, c) # Combine into one vector 31 | 32 | Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value 33 | if (Pvalue < 0.05) # Reject H0? 34 | counter <- counter + 1 # If yes, increase counter 35 | 36 | } 37 | 38 | counter/N # proportion of times H0 rejected 39 | ``` -------------------------------------------------------------------------------- /Edition2/R/Chap12Anova_Exer.R: -------------------------------------------------------------------------------- 1 | #Chapter 12 ANOVA 2 | 3 | #Exercise 6 Simulation 4 | 5 | nA <- 50 # set sample sizes 6 | nB <- 50 7 | nC <- 50 8 | # create groups 9 | Group <- rep(c("A","B","C"), c(nA, nB, nC)) 10 | 11 | counter <- 0 12 | N <- 10^4 13 | 14 | for (i in 1:N) 15 | { 16 | a <- rnorm(nA, 20, 3) # Draw samples 17 | b <- rnorm(nB, 20, 3) 18 | c <- rnorm(nC, 20, 3) 19 | X <- c(a, b, c) # Combine into one vector 20 | 21 | Pvalue <- anova(lm(X ~ Group))$P[1] # Extract P-value 22 | if (Pvalue < 0.05) # Reject H0? 23 | counter <- counter + 1 # If yes, increase counter 24 | 25 | } 26 | 27 | counter/N # proportion of times H0 rejected 28 | -------------------------------------------------------------------------------- /Edition2/README.md: -------------------------------------------------------------------------------- 1 | # Mathematical Statistics with Resampling and R, 2nd edition (2018) 2 | 3 | This is an older edition. For the current edition, see 4 | [https://github.com/lchihara/MathStatsResamplingR](https://github.com/lchihara/MathStatsResamplingR) 5 | 6 | 7 | ## Second Edition 8 | 9 | [Author's website](https://sites.google.com/site/chiharahesterberg) 10 | 11 | [Publisher's website](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+2nd+Edition-p-9781119416531) 12 | 13 | Available on: 14 | 15 | * [Google Books](https://books.google.com/books?id=t2hvDwAAQBAJ) 16 | * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=t2hvDwAAQBAJ) 17 | * [Amazon](https://www.google.com/url?q=https%3A%2F%2Fwww.amazon.com%2FMathematical-Statistics-Resampling-Laura-Chihara-ebook%2Fdp%2FB07HH3KXRH%2Fref%3Dsr_1_1%3Fs%3Dbooks%26ie%3DUTF8%26qid%3D1539059394%26sr%3D1-1%26keywords%3DChihara%2BHesterberg&sa=D&sntz=1&usg=AOvVaw25Q7F0vZTyz2h7LR3_xTe0) 18 | -------------------------------------------------------------------------------- /Edition3/Chapters/c01_GSS2018Questions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c01_GSS2018Questions.pdf -------------------------------------------------------------------------------- /Edition3/Chapters/c06_Supplement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Chapters/c06_Supplement.pdf -------------------------------------------------------------------------------- /Edition3/Data/Data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Data/Data.zip -------------------------------------------------------------------------------- /Edition3/Data/Readme.md: -------------------------------------------------------------------------------- 1 | Zip file contains data sets in csv format. 2 | 3 | Data are also available as an R package (resampledata3) from CRAN. 4 | -------------------------------------------------------------------------------- /Edition3/Errata_Edition3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/Errata_Edition3.pdf -------------------------------------------------------------------------------- /Edition3/README.md: -------------------------------------------------------------------------------- 1 | # Mathematical Statistics with Resampling and R, Third Edition (2022) 2 | 3 | 4 | Data sets, R code, supplementary materials for the textbook Mathematical Statistics with Resampling and R 5 | 6 | ## Contents here 7 | 8 | [Chapters](Chapters) 9 | Supplemental material for chapters, including 10 | additional notes about data and advanced topics. 11 | 12 | [Data](Data) data as .csv files (they are also available as an R package, 13 | see below). 14 | 15 | [RScripts](RScripts) R scripts to supplement chapters. 16 | 17 | 18 | ## Data in an R package 19 | 20 | The data are available as an R package 21 | [resampledata3](https://CRAN.R-project.org/package=resampledata3) 22 | on 23 | [CRAN](https://cran.r-project.org/mirrors.html). 24 | 25 | 26 | 27 | ## Other websites 28 | 29 | 30 | The publisher's website is 31 | [Mathematical Statistics with Resampling and R, 3rd Edition](https://www.wiley.com/en-us/Mathematical+Statistics+with+Resampling+and+R%2C+3rd+Edition-p-9781119874034) 32 | 33 | Available on: 34 | 35 | * [Google Books](https://books.google.com/books?id=d7CAEAAAQBAJ) 36 | * [Google Play Books](https://play.google.com/store/books/details/Laura_M_Chihara_Mathematical_Statistics_with_Resam?id=d7CAEAAAQBAJ) 37 | * [Amazon](https://www.amazon.com/Mathematical-Statistics-Resampling-Laura-Chihara-ebook/dp/B0B99GCGQQ/ref=sr_1_fkmr2_2) 38 | -------------------------------------------------------------------------------- /Edition3/RScripts/c02_RIntroEDA1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lchihara/MathStatsResamplingR/605f40aca79f9dadc1465b6af8830ff1253c0dc1/Edition3/RScripts/c02_RIntroEDA1.pdf -------------------------------------------------------------------------------- /Edition3/RScripts/c02_RIntroEDA2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to R, part 2" 3 | author: "Chihara-Hesterberg" 4 | date: "July 2022" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, out.width="50%") 10 | library(resampledata) 11 | library(ggplot2) 12 | library(dplyr) 13 | ``` 14 | 15 | 16 | ### Vectors in R 17 | 18 | The basic data object in R is the vector. 19 | Even scalars are vectors of length 1. 20 | 21 | There are several ways to create vectors. 22 | 23 | The : operator creates sequences incrementing/decrementing 24 | by 1. 25 | 26 | ```{r} 27 | 1:10 28 | 5:-3 29 | ``` 30 | 31 | The seq function creates sequences also. 32 | ```{r} 33 | seq(0, 3, by = .2) 34 | seq(0, 3, length = 15) 35 | ``` 36 | 37 | To create vectors with no particular pattern, use the 38 | c() function (c for **c**ombine). 39 | 40 | ```{r} 41 | c(1, 4, 8, 2, 9) 42 | x <- c(2, 0, -4) 43 | x 44 | c(x, 0:5, x) 45 | ``` 46 | 47 | For vectors of characters, 48 | 49 | ```{r} 50 | c("a", "b", "c", "d") 51 | ``` 52 | 53 | or logical values (note that there are no double quotes): 54 | 55 | ```{r} 56 | c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE) 57 | ``` 58 | 59 | The rep command for repeating values: 60 | 61 | ```{r} 62 | rep("a", 5) 63 | rep(c("a", "b"), 5) 64 | rep(c("a", "b"), c(5, 2)) 65 | ``` 66 | 67 | ### The class attribute 68 | 69 | Use data.class to determine the class attribute of an object. 70 | 71 | ```{r} 72 | state.name 73 | data.class(state.name) 74 | state.name == "Idaho" 75 | data.class(state.name == "Idaho") 76 | 77 | head(FlightDelays$Carrier) 78 | data.class(FlightDelays$Carrier) 79 | ``` 80 | 81 | 82 | ### Basic Arithmetic 83 | 84 | ```{r} 85 | x <- 1:5 86 | x - 3 87 | x*10 88 | x/10 89 | x^2 90 | 2^x 91 | log(x) 92 | 93 | w <- 6:10 94 | w 95 | x*w #coordinate-wise multiplication 96 | ``` 97 | 98 | #### Logical expressions 99 | 100 | ```{r} 101 | x < 3 102 | ``` 103 | 104 | ### Subsetting a vector 105 | 106 | In many cases, we will want only a portion of a data set. For 107 | subsetting a vector, the basic syntax is vector[*index*]. 108 | In particular, note the use of *brackets* to indicate that we are 109 | subsetting. 110 | 111 | ```{r} 112 | state.name # 50 states (alphabetical order) 113 | state.name[c(1, 25, 50)] # the 1st, 25th, and 50th 114 | state.name[-(1:10)] # remove the first 10. 115 | 116 | z <- c(8, 3, 0, 9, 9, 2, 1, 3) 117 | z 118 | z[4] # The fourth element of z 119 | z[c(1, 3, 4)] # The first, third and fourth element, 120 | z[-c(1, 3, 4)] # All elements except the first, third and fourth 121 | ``` 122 | 123 | To return the values of z less than 4, we first introduce the 124 | which command: 125 | 126 | ```{r} 127 | which(z < 4) # which positions are z values < 4? 128 | index <- which(z < 4) # store in index 129 | index 130 | z[index] # return z[c(2, 3, 6, 7)] 131 | ``` 132 | 133 | Suppose you want to find those observations when the delay length 134 | was greater than the mean delay length. We'll store this in a vector 135 | called index. 136 | 137 | ```{r} 138 | delay <- FlightDelays$Delay 139 | index <- which(delay > mean(delay)) 140 | head(index) 141 | ``` 142 | 143 | Thus, observations in rows 2, 10, 12, 14, 15, 16 are the first 144 | six that correspond to flights that had delays that were larger 145 | than the average delay length. 146 | 147 | ### Extracting parts of a data frame 148 | 149 | To subset particular rows of a data frame, use the filter command in the *dplyr* package. 150 | 151 | For example, to create a data frame with just the United Airlines flights: 152 | ```{r} 153 | United <- FlightDelays %>% filter(Carrier == "UA") 154 | ``` 155 | The select command in the **dplyr** package allows you to extract just certain variables (columns). For example, to create a data frame containing just the Carrier and Delay variables: 156 | 157 | ```{r} 158 | FlightDelays2 <- FlightDelays %>% select(Carrier, Delay) 159 | ``` 160 | Finally, we can combine these two actions to extract just certain rows and certain columns: 161 | 162 | ```{r} 163 | United2 <- FlightDelays %>% filter(Carrier == "UA") %>% select(Carrier, Delay) 164 | ``` 165 | 166 | Now, suppose you want to work with a single variable in a data frame. 167 | 168 | ```{r} 169 | delay <- FlightDelays %>% select(Delay) 170 | head(delay) 171 | mean(delay) 172 | data.class(delay) 173 | ``` 174 | The problem is that in the above, the select command returns a data frame, and the mean command operates on vectors. 175 | 176 | If we just want to extract one variable from a data frame and we want that variable to be a vector, use the pull command. 177 | 178 | ```{r} 179 | delay <- FlightDelays %>% pull(Delay) 180 | mean(delay) 181 | 182 | #Alternatively, we have seen that the $ operator can be used 183 | delay <- FlightDelays$Delay 184 | ``` 185 | 186 | 187 | 188 | -------------------------------------------------------------------------------- /Edition3/RScripts/c03_PermutationTests.R: -------------------------------------------------------------------------------- 1 | #Chapter 3 Permutation Tests 2 | library(resampledata3) 3 | library(dplyr) 4 | library(ggplot2) 5 | 6 | #Section 3.3 7 | 8 | #Beerwings data set 9 | Beerwings %>% group_by(Gender) %>% summarize(mean(Hotwings)) 10 | observed <- 14.5333 - 9.3333 # store observed mean difference 11 | observed 12 | 13 | hotwings <- Beerwings$Hotwings 14 | # Alternative syntax using the dplyr package: 15 | # hotwings <- Beerwings %>% pull(Hotwings) 16 | 17 | N <- 10^5 - 1 # number of times to repeat this process 18 | result <- numeric(N) # space to save the random differences 19 | for (i in 1:N) 20 | { # sample of size 15, from 1 to 30, without replacement 21 | index <- sample(30, size = 15, replace = FALSE) 22 | result[i] <- mean(hotwings[index]) - mean(hotwings[-index]) 23 | } 24 | 25 | ggplot() + geom_histogram(aes(result), bins = 8) + 26 | geom_vline(xintercept = observed, linetype="dashed") 27 | 28 | (sum(result >= observed) + 1)/(N + 1) # P-value 29 | 30 | #----- 31 | #Verizon data set 32 | 33 | Verizon %>% group_by(Group) %>% summarize(mean(Time)) 34 | Time <- Verizon$Time 35 | TimeILEC <- Verizon %>% filter(Group == "ILEC") %>% pull(Time) 36 | TimeCLEC <- Verizon %>% filter(Group == "CLEC") %>% pull(Time) 37 | 38 | observed <- mean(TimeILEC) - mean(TimeCLEC) 39 | observed 40 | 41 | N <- 10^4-1 42 | result <- numeric(N) 43 | for (i in 1:N) 44 | { 45 | index <- sample(1687, size = 1664, replace = FALSE) 46 | result[i] <- mean(Time[index]) - mean(Time[-index]) 47 | } 48 | 49 | ggplot() + geom_histogram(aes(result), bins = 8) + 50 | geom_vline(xintercept = observed, linetype = "dashed") 51 | 52 | (sum(result <= observed) + 1)/(N + 1) 53 | 54 | #--------- 55 | #Other statistics 56 | #Example 3.6 57 | #median 58 | observed <- median(TimeILEC) - median(TimeCLEC) 59 | N <- 10^4-1 60 | result <- numeric(N) 61 | for (i in 1:N) 62 | { 63 | index <- sample(1687, size = 1664, replace = FALSE) 64 | result[i] <- median(Time[index]) - median(Time[-index]) 65 | } 66 | (sum(result <= observed) + 1)/(N + 1) # P-value 67 | 68 | #trimmed mean 69 | #modifications to above 70 | observed <- (mean(TimeILEC, trim = .25) - 71 | mean(TimeCLEC, trim = .25)) 72 | #within for loop above, change to: 73 | result[i] <- (mean(Time[index], trim = .25) - 74 | mean(Time[-index], trim = .25)) 75 | 76 | 77 | #for proportion of time ILEC times > 10 78 | observed <- mean(TimeILEC > 10) - mean(TimeCLEC > 10) 79 | #and in the for loop, modify to 80 | result[i] <- mean(Time[index] > 10) - mean(Time[-index] > 10) 81 | 82 | #for ratio of variances 83 | observed <- var(TimeILEC) / var(TimeCLEC) 84 | result[i] <- var(Time[index]) / var(Time[-index]) 85 | 86 | #Recidivism case study 87 | #Example 3.8 88 | 89 | library(tidyr) 90 | data <- Recidivism %>% drop_na(Age25) %>% 91 | select(Age25, Recid) 92 | table(data$Age25) 93 | proportions(table(data$Age25, data$Recid), 1) 94 | 95 | Recid <- data$Recid # create vector 96 | observed <- .365 - .306 97 | N <- 10^4 - 1 98 | result <- numeric(N) 99 | for (i in 1:N) 100 | { 101 | index <- sample(17019, size = 3077, replace = FALSE) 102 | result[i] <- mean(Recid[index]=="Yes") - 103 | mean(Recid[-index]=="Yes") 104 | } 105 | 2*(sum(result >= observed)+1)/(N+1) 106 | 107 | #Example 3.9 108 | #Pew Research study on Faith among Black Americans 109 | pooled.data <- rep(c(1,0), c(1068, 1283)) # create vector 110 | observed <- (963/2094) - (105/257) # observed difference 111 | # (Mill-Gen Z) 112 | N <- 10^4-1 113 | result <- numeric(N) 114 | 115 | for (i in 1:N) 116 | { 117 | index <- sample(2351, 2094, replace = FALSE) 118 | result[i] <- mean(pooled.data[index]) - 119 | mean(pooled.data[-index]) 120 | } 121 | 2 * (sum(result >= observed)+1) / (N+1) 122 | 123 | #----------------------------------------- 124 | 125 | #Section 3.4 Matched pairs 126 | #Diving 127 | Diff <- Diving2017$Final - Diving2017$Semifinal #difference in two scores 128 | observed <- mean(Diff) #mean of difference 129 | 130 | N <- 10^5-1 131 | result <- numeric(N) 132 | 133 | for (i in 1:N) 134 | { 135 | Sign <- sample(c(-1,1), 12, replace=TRUE) #random vector of 1's or -1's 136 | Diff2 <- Sign*Diff #random pairs (a-b) -> (b-a) 137 | result[i] <- mean(Diff2) #mean of difference 138 | } 139 | 140 | ggplot() + geom_histogram(aes(result), bins = 8) + 141 | geom_vline(xintercept = mean(observed), linetype="dashed") 142 | 143 | 2 * (sum(result >= observed)+1) / (N+1) #P-value 144 | -------------------------------------------------------------------------------- /Edition3/RScripts/c03_SolnExercise.R: -------------------------------------------------------------------------------- 1 | #Chapter 3: Permutation tests 2 | library(resampledata3) 3 | library(ggplot2) 4 | library(dplyr) 5 | 6 | # Exercise 7 Checking different test statistics that will give same P-value 7 | N <- 10^4 - 1 8 | table(FlightDelays$Carrier) 9 | 10 | FlightDelays %>% group_by(Carrier) %>% summarize(mean(Delay), sum(Delay)) 11 | 12 | #Optionally, using base R 13 | tapply(FlightDelays$Delay, FlightDelays$Carrier, mean) 14 | tapply(FlightDelays$Delay, FlightDelays$Carrier, sum) 15 | 16 | observedSumUA <- 17949 17 | observedmeanUA <- 15.98308 18 | observedmeanDiff <- 15.98308 - 10.09738 19 | 20 | sumUA <- numeric(N) 21 | meanUA <- numeric(N) 22 | meanDiff <- numeric(N) 23 | set.seed(2) 24 | for (i in 1:N) { 25 | index <- sample(4029, 1123, replace = FALSE) 26 | sumUA[i] <- sum(Delay[index]) 27 | meanUA[i] <- mean(Delay[index]) 28 | meanDiff[i] <- mean(Delay[index]) - mean(Delay[-index]) 29 | } 30 | 31 | 2 * (sum(sumUA >= observedSumUA) + 1) / (N + 1) #P-value 32 | 33 | 2 * (sum(meanUA >= observedmeanUA) + 1) / (N + 1) #P-value 34 | 35 | 2 * (sum(meanDiff >= observedmeanDiff) + 1) / (N + 1) #P-value 36 | -------------------------------------------------------------------------------- /Edition3/RScripts/c04_SamplingDistributions.R: -------------------------------------------------------------------------------- 1 | #Chapter 4 2 | #Sampling Distributions 3 | 4 | #Example 4.2 5 | #Draw 1000 random samples of size 100 from the exponential 6 | #distribution with lambda = 1/15 7 | Xbar <- numeric(1000) # space for results (vector of 0's) 8 | for (i in 1:1000) 9 | { 10 | x <- rexp(100, rate = 1/15) # draw random sample of size 100 11 | Xbar[i] <- mean(x) # compute mean, save in position i 12 | } 13 | 14 | df <- data.frame(Xbar) 15 | ggplot(df, aes(Xbar)) + geom_histogram(bins = 10) 16 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line() 17 | mean(Xbar) 18 | sd(Xbar) 19 | 20 | #Example 4.3 21 | #Sampling distribution of max from Unif[0,1] 22 | 23 | maxY <- numeric(1000) 24 | for (i in 1:1000) 25 | { 26 | y <- runif(12) # draw random sample of size 12 27 | maxY[i] <- max(y) # find max, save in position i 28 | } 29 | df <- data.frame(maxY) 30 | ggplot(df, aes(maxY)) + geom_histogram(bins = 10) 31 | 32 | #---------------------------------------- 33 | #Example 4.6 34 | #Sum of two values drawn from two different Poisson distributions 35 | X <- rpois(10^4, 5) # Draw 10^4 values from Pois(5) 36 | Y <- rpois(10^4, 12) # Draw 10^4 values from Pois(12) 37 | W <- X + Y 38 | 39 | df1 <- data.frame(W) 40 | df2 <- data.frame(x = 2:35, y = dpois(2:35,17)) 41 | ggplot(df1, aes(W)) + 42 | geom_histogram(aes(y=stat(density)), color = "white", 43 | breaks=seq(2, 36, by = 2)) + 44 | geom_line(data = df2, aes(x = x, y = y)) + 45 | geom_point(data = df2, aes(x = x, y = y), pch = 1) + xlab("") 46 | 47 | mean(W) #compare to theoretical, lambda = 17 48 | var(W) 49 | 50 | #Example 4.7 51 | #Sampling distribution of mean of sample of size 30 from Gamma(5, 2) 52 | Xbar <- numeric(1000) 53 | for (i in 1:1000) 54 | { 55 | x <- rgamma(30, shape = 5, rate = 2) 56 | Xbar[i] <- mean(x) 57 | } 58 | 59 | df <- data.frame(Xbar) 60 | ggplot(df, aes(x = Xbar)) + 61 | geom_histogram(aes(y = stat(density)), color = "white", bins = 10) + 62 | stat_function(fun = dnorm, args = list(mean = 5/2, s = 0.204)) + 63 | labs(x = "Means", y = "Density") 64 | ggplot(df, aes(sample = Xbar)) + geom_qq() + geom_qq_line() 65 | mean(Xbar) 66 | sd(Xbar) 67 | 68 | #---------------------------------------------- 69 | #Example 4.10 70 | # 71 | dbinom(25, 120, .3) 72 | pbinom(25, 120, .3) 73 | 74 | 75 | -------------------------------------------------------------------------------- /Edition3/RScripts/c05_Bootstrap.R: -------------------------------------------------------------------------------- 1 | ##Chapter 5 Bootstrap 2 | library(resampledata3) 3 | library(ggplot2) 4 | library(dplyr) 5 | 6 | #Draw random sample of size 16 from Gamma(1, 1/2) 7 | #Example 5.2 8 | N <- 10^5 9 | mean.boot <- numeric(N) 10 | for (i in 1:N) 11 | { 12 | x <- sample(gamSample, 16, replace = TRUE) # draw resample 13 | mean.boot[i] <- mean(x) # compute mean, store in mean.boot 14 | } 15 | 16 | mean(mean.boot) 17 | sd(mean.boot) 18 | 19 | df <- data.frame(mean.boot) 20 | ggplot(df, aes(mean.boot)) + 21 | geom_histogram(bins = 20, color = "white") 22 | 23 | #----------------- 24 | #Example 5.3 25 | ggplot(Bangladesh, aes(Arsenic)) + 26 | geom_histogram(bins = 10, color = "white") 27 | ggplot(Bangladesh, aes(sample = Arsenic)) + 28 | geom_qq() + geom_qq_line() 29 | 30 | Arsenic <- Bangladesh$Arsenic 31 | 32 | n <- length(Arsenic) 33 | N <- 10^4 34 | mean.boot <- numeric(N) 35 | for (i in 1:N) 36 | { 37 | x <- sample(Arsenic, n, replace = TRUE) 38 | mean.boot[i] <- mean(x) 39 | } 40 | 41 | df <- data.frame(mean.boot) 42 | ggplot(df, aes(mean.boot)) + 43 | geom_histogram(bins = 15, color = "white") + 44 | geom_vline(xintercept = mean(mean.boot), color = "red", lty = 2) 45 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line() 46 | 47 | mean(mean.boot) 48 | mean(mean.boot)-mean(Arsenic) 49 | sd(mean.boot) 50 | 51 | quantile(mean.boot, c(0.025, 0.975)) 52 | 53 | #---------------------------------- 54 | #Example 5.4 Skateboarders 55 | testF <- Skateboard %>% filter(Experimenter == "Female") %>% 56 | pull(Testosterone) 57 | testM <- Skateboard %>% filter(Experimenter == "Male") %>% 58 | pull(Testosterone) 59 | 60 | observed <- mean(testF) - mean(testM) #observed difference 61 | observed 62 | 63 | nf <- length(testF) #sample size 64 | nm <- length(testM) #sample size 65 | 66 | N <- 10^4 67 | mean.boot <- numeric(N) 68 | 69 | for (i in 1:N) 70 | { 71 | resampleF <- sample(testF, nf, replace = TRUE) 72 | resampleM <- sample(testM, nm, replace = TRUE) 73 | mean.boot[i] <- mean(resampleF)-mean(resampleM) 74 | } 75 | 76 | df <- data.frame(mean.boot) 77 | ggplot(df, aes(mean.boot)) + 78 | geom_histogram(bins = 15, color = "white") + 79 | geom_vline(xintercept = observed, color = "green", lty = 2) 80 | ggplot(df, aes(sample = mean.boot)) + geom_qq() + geom_qq_line() 81 | 82 | mean(testF) - mean(testM) 83 | mean(mean.boot) 84 | sd(mean.boot) 85 | quantile(mean.boot, c(0.025, 0.975)) 86 | mean(mean.boot) - (mean(testF) - mean(testM)) # bias 87 | 88 | #------------- 89 | #Example 5.6 90 | #Verizon data 91 | 92 | TimeILEC <- Verizon %>% filter(Group=="ILEC") %>% pull(Time) 93 | TimeCLEC <- Verizon %>% filter(Group=="CLEC") %>% pull(Time) 94 | 95 | observed <- mean(TimeILEC)/mean(TimeCLEC) 96 | observed 97 | 98 | nILEC <- length(TimeILEC) 99 | nCLEC <- length(TimeCLEC) 100 | 101 | N <- 10^4 102 | ratio.boot <- numeric(N) 103 | 104 | for (i in 1:N) 105 | { 106 | resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE) 107 | resampleCLEC <- sample(TimeCLEC, nCLEC, replace = TRUE) 108 | ratio.boot[i] <- mean(resampleILEC)/mean(resampleCLEC) 109 | } 110 | 111 | df <- data.frame(ratio.boot) 112 | ggplot(df, aes(ratio.boot)) + 113 | geom_histogram(bins = 15, color="white") + 114 | xlab("Ratio of means") + 115 | geom_vline(xintercept = observed, lty = 2, color = "red") + 116 | geom_vline(xintercept = mean(ratio.boot), lty = 3, color = "blue") 117 | 118 | ggplot(df, aes(sample = ratio.boot)) + 119 | geom_qq() + geom_qq_line() 120 | 121 | mean(ratio.boot) 122 | sd(ratio.boot) 123 | quantile(ratio.boot, c(0.025, 0.975)) 124 | mean(ratio.boot) - mean(TimeILEC)/mean(TimeCLEC) 125 | 126 | #Example 5.7 Verizon continued 127 | #modifications to above for proportion of times the ILEC 128 | #delay time was greater than 24 hours 129 | N <- 10^4 130 | 131 | prop.boot <- numeric(N) 132 | for (i in 1:N) 133 | { 134 | resampleILEC <- sample(TimeILEC, nILEC, replace = TRUE) 135 | prop.boot[i] <- mean(resampleILEC > 24) 136 | } 137 | 138 | quantile(prop.boot, c(0.025, 0.975)) 139 | #-------------------- 140 | #Example 5.8 141 | #Faith among Black Americans 142 | genZ <- rep(c(1, 0), c(118, 139)) 143 | genX <- rep(c(1, 0), c(965, 1510)) 144 | 145 | observed <- mean(genZ) - mean(genX) # observed diff. 146 | observed 147 | 148 | N <- 10^4 149 | prop.boot <- numeric(N) 150 | for (i in 1:N) 151 | { 152 | resampleZ <- sample(genZ, 257, replace = TRUE) 153 | resampleX <- sample(genX, 2475, replace = TRUE) 154 | prop.boot[i] <- mean(resampleZ) - mean(resampleX) 155 | } 156 | 157 | quantile(prop.boot, c(0.025, 0.975)) 158 | 159 | #---------------------------------------- 160 | #Example 5.6 161 | #Relative risk 162 | 163 | highbp <- rep(c(1,0), c(55,3283)) #high bp sample 164 | lowbp <- rep(c(1,0), c(21,2655)) #low bp sample 165 | 166 | N <- 10^4 167 | rr.boot <- numeric(N) 168 | 169 | for (i in 1:N) 170 | { 171 | resampleHigh <- sample(highbp, 3338, replace = TRUE) 172 | resampleLow <- sample(lowbp, 2676, replace = TRUE) 173 | 174 | rr.boot[i] <- mean(resampleHigh)/mean(resampleLow) #rel. 175 | #risk 176 | } 177 | 178 | quantile(rr.boot, c(0.025, 0.975)) -------------------------------------------------------------------------------- /Edition3/RScripts/c07_MoreConfidenceIntervals.R: -------------------------------------------------------------------------------- 1 | #Chapter 7 Classical confidence intervals 2 | library(resampledata3) 3 | library(dplyr) 4 | library(ggplot2) 5 | 6 | #Example 7.1 7 | #Confidence intervals of mean of samples of size 30 drawn from N(25, 4) 8 | counter <- 0 # set counter to 0 9 | df <- data.frame(x = c(22, 28), y = c(1,100)) 10 | p <- ggplot(df, aes(x = x, y = y)) + geom_vline(xintercept = 25) 11 | 12 | for (i in 1:1000) 13 | { 14 | x <- rnorm(30, 25, 4) # draw a random sample of size 30 15 | L <- mean(x) - 1.96*4/sqrt(30) # lower limit 16 | U <- mean(x) + 1.96*4/sqrt(30) # upper limit 17 | if (L < 25 && 25 < U) # check if 25 is in interval 18 | counter <- counter + 1 # if yes, increase counter by 1 19 | if (i <= 100) # plot first 100 intervals 20 | p <- p + annotate("segment", x = L, xend = U, y = i, yend = i) 21 | } 22 | 23 | p 24 | counter/1000 # proportion of times interval contains mu. 25 | 26 | #------------------------------------ 27 | #Section 7.1.2 28 | #Confidence intervals for mean of samples drawn from normal 29 | #distribution, mean and variance unknown 30 | N <- 10^4 31 | w <- numeric(N) 32 | n <- 15 # sample size 33 | for (i in 1:N) 34 | { 35 | x <- rnorm(n, 25, 7) # draw 15 from N(25, 7^2) 36 | xbar <- mean(x) 37 | s <- sd(x) 38 | w[i] <- (xbar-25) / (s/sqrt(n)) 39 | } 40 | 41 | df <- data.frame(w) 42 | ggplot(df, aes(sample = w)) + geom_qq(size = .8) + 43 | geom_qq_line() 44 | 45 | #----------------------------------- 46 | #Example 7.5 47 | pt(2.8, 27) 48 | qt(0.95, 27) 49 | 50 | #------------------------------------ 51 | #Example 7.6 52 | girls <- NCBirths2004 %>% filter(Gender == "Female") %>% 53 | pull(Weight) 54 | t.test(girls, conf.level = .99)$conf 55 | 56 | #---------------------------------------------- 57 | #Example 7.7 58 | #Samples from right-skewed Gamma(5,2) 59 | tooLow <- 0 # set counter to 0 60 | tooHigh <- 0 # set counter to 0 61 | n <- 20 # sample size 62 | q <- qt(0.975, n-1) # quantile 63 | N <- 10^5 64 | for (i in 1:N) 65 | { 66 | x <- rgamma(n, shape = 5, rate = 2) 67 | xbar <- mean(x) 68 | s <- sd(x) 69 | L <- xbar - q*s/sqrt(n) 70 | U <- xbar + q*s/sqrt(n) 71 | if (U < 5/2) # Does right endpt miss 5/2? 72 | tooLow <- tooLow + 1 # If yes, increase counter 73 | if (5/2 < L) # Does left endpt miss 5/2? 74 | tooHigh <- tooHigh + 1 # If yes, increase counter 75 | } 76 | tooLow/N 77 | tooHigh/N 78 | 79 | #------------------------------------------- 80 | #Example 7.8 81 | t.test(Response ~ Treatment, data = Reading)$conf 82 | 83 | #------------------------------------------ 84 | #Example 7.14 85 | t.test(NCBirths2004$Weight, alt = "greater")$conf 86 | 87 | #----------------------------------------- 88 | #Example 7.17 89 | prop.test(1385, 2193, conf.level = .9)$conf 90 | 91 | prop.test(1385, 2193, conf.level = .9, alt = "greater")$conf 92 | 93 | #---------------------------------------- 94 | #Example 7.20 95 | 96 | prop.test(c(172, 223), c(674, 676))$conf 97 | 98 | #--------------------------------------- 99 | #Example 7.21 100 | #Bootstrap t confidence interval 101 | Arsenic <- Bangladesh$Arsenic 102 | xbar <- mean(Arsenic) 103 | N <- 10^4 104 | n <- length(Arsenic) 105 | Tstar <- numeric(N) 106 | for (i in 1:N) 107 | { 108 | x <- sample(Arsenic, size = n, replace = T) 109 | Tstar[i] <- (mean(x)-xbar) / (sd(x)/sqrt(n)) 110 | } 111 | quantile(Tstar, c(0.025, 0.975)) 112 | 113 | xbar - quantile(Tstar, c(0.975, 0.025)) * sd(Arsenic)/sqrt(n) 114 | 115 | #-------------------------------------------- 116 | #Example 7.22 117 | #Bootstrap t CI for difference in means 118 | TimeILEC <- Verizon \%>\% filter(Group == "ILEC") \%>\% pull(Time) 119 | TimeCLEC <- Verizon \%>\% filter(Group == "CLEC") \%>\% pull(Time) 120 | 121 | thetahat <- mean(TimeILEC) - mean(TimeCLEC) 122 | nx <- length(TimeILEC) # nx=1664 123 | ny <- length(TimeCLEC) # ny=23 124 | SE <- sqrt(var(TimeILEC)/nx + var(TimeCLEC)/ny) 125 | 126 | N <- 10^4 127 | Tstar <- numeric(N) 128 | for (i in 1:N) 129 | { 130 | bootx <- sample(TimeILEC, nx, replace = TRUE) 131 | booty <- sample(TimeCLEC, ny, replace = TRUE) 132 | Tstar[i] <- (mean(bootx) - mean(booty) - thetahat) / 133 | sqrt(var(bootx)/nx + var(booty)/ny) 134 | } 135 | thetahat - quantile(Tstar, c(.975, .025)) * SE 136 | t.test(TimeILEC, TimeCLEC)$conf # for comparison 137 | 138 | #--------------------------------------------- 139 | #Example 7.3 140 | #Bootstrap t with estimated standard errors iterated bootstrap 141 | Arsenic <- Bangladesh$Arsenic 142 | estimate <- mean(Arsenic, trim = 0.25) # 35.95985 143 | 144 | N <- 10^4 # outer loop 145 | N2 <- 10^2 # inner loop 146 | n <- length(Arsenic) 147 | Tstar <- numeric(N) 148 | estimateStar <- numeric(N) 149 | seStar <- numeric(N) 150 | 151 | for (i in 1:N) 152 | { 153 | x <- sample(Arsenic, size = n, replace = T) 154 | 155 | # Inner loop to estimate standard error based on x 156 | estimate2 <- numeric(N2) 157 | for (j in 1:N2) 158 | { 159 | x2 <- sample(x, size = n, replace = T) 160 | estimate2[j] <- mean(x2, trim = 0.25) 161 | } 162 | 163 | estimateStar[i] <- mean(x, trim = 0.25) 164 | seStar[i] <- sd(estimate2) 165 | Tstar[i] <- (estimateStar[i] - estimate) / seStar[i] 166 | } 167 | 168 | 169 | sd(estimateStar) # Standard error 170 | quantile(Tstar, c(0.025, 0.975)) 171 | # Bootstrap t interval 172 | estimate - quantile(Tstar, c(.975, .025)) * sd(estimateStar) 173 | 174 | #Ordinary t interval with bootstrap SE 175 | estimate + qt(c(0.025, 0.975), n-1) * sd(estimateStar) 176 | -------------------------------------------------------------------------------- /Edition3/RScripts/c08_MoreHypothesisTests.R: -------------------------------------------------------------------------------- 1 | #Chapter 8 More Hypothesis Tests 2 | library(resampledata3) 3 | library(ggplot2) 4 | library(dplyr) 5 | 6 | #Example 8.4 7 | t.test(Bangladesh$Arsenic, mu = 100, alt = "Greater") 8 | 9 | #Bootstrap t test approach 10 | Arsenic <- Bangladesh$Arsenic 11 | observedT <- t.test(Arsenic, mu = 100)$statistic 12 | xbar <- mean(Arsenic) 13 | n <- length(Arsenic) 14 | N <- 10^5 15 | Tstar <- numeric(N) 16 | 17 | for (i in 1:N) 18 | { 19 | bootx <- sample(Arsenic, n, replace = TRUE) 20 | Tstar[i] <- (mean(bootx)- xbar)/(sd(bootx)/sqrt(n)) 21 | } 22 | 23 | (sum(Tstar >= observedT)+1)/(N+1) 24 | 25 | 26 | #------------------------------------------ 27 | #Example 8.5 28 | #Comparing two means 29 | t.test(Weight ~ Smoker, data = NCBirths2004, alt = "greater") 30 | 31 | #------------------------------------------- 32 | #Example 8.6 33 | prop.test(c(108, 51), c(143, 119)) 34 | 35 | #------------------------------------------- 36 | #Example 8.15 37 | sum(dbinom(5:8, 8, 0.3185)) 38 | 1 - pbinom(4, 8, 0.3185) #same 39 | 40 | #--------------------------------------------- 41 | #Example 8.19 42 | binom.test(7, 21, 0.5) 43 | 44 | pbinom(7, 21, 0.5696755) 45 | 1 - pbinom(6, 21, 0.1458769) 46 | 47 | #----------------------------- -------------------------------------------------------------------------------- /Edition3/RScripts/c09_Regression.R: -------------------------------------------------------------------------------- 1 | #Chapter 9 2 | #Regression 3 | library(resampledata3) 4 | library(ggplot2) 5 | library(dplyr) 6 | 7 | #Section 9.2 8 | #base R 9 | cor(Spruce$Ht.change, Spruce$Dichange) 10 | #dplyr package 11 | Spruce %>% summarize(coor = cor(Ht.change, Di.change)) 12 | 13 | #ggplot2 package 14 | qqplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() 15 | #base R 16 | plot(Di.change ~ Ht.change, data = Spruce) 17 | 18 | #------------------------------------------------- 19 | #Example 9.3 20 | 21 | spruce.lm <- lm(Di.change ~ Ht.change, data = Spruce) 22 | spruce.lm 23 | 24 | ggplot(Spruce, aes(x = Ht.change, y = Di.change)) + geom_point() + 25 | geom_smooth(method = lm, se = FALSE) 26 | 27 | fitted(spruce.lm) 28 | predict(spruce.lm) #same 29 | 30 | (nrow(Spruce) -1) * var(Spruce$Ht.change) 31 | 32 | #----------------------------------------------- 33 | #Section 9.3 34 | Spruce$Residuals <- resid(spruce.lm) 35 | ggplot(Spruce, aes(x = Ht.change, y = Residuals)) + 36 | geom_point() + geom_hline(yintercept = 0) + 37 | geom_smooth(method = "loess", se = FALSE, span = 2) 38 | 39 | #---------------------------------------------- 40 | #Example 9.8 41 | skate.lm <- lm(Free ~ Short, data = Skating2010) 42 | summary(skate.lm) 43 | 44 | #Section 9.5 45 | #Bootstrapping correlation, slope, intercept, 46 | 47 | N <- 10^4 48 | cor.boot <- numeric(N) 49 | beta.boot <- numeric(N) 50 | alpha.boot <- numeric(N) 51 | yPred.boot <- numeric(N) 52 | n <- nrow(Skating2010) # number of skaters = 24 53 | for (i in 1:N) 54 | { 55 | index <- sample(n, replace = TRUE) # sample from 1,2,...,n 56 | Skate.boot <- Skating2010[index, ] # resampled data 57 | 58 | cor.boot[i] <- cor(Skate.boot$Short, Skate.boot$Free) 59 | 60 | #recalculate linear model estimates 61 | skateBoot.lm <- lm(Free ~ Short, data = Skate.boot) 62 | alpha.boot[i] <- coef(skateBoot.lm)[1] # new intercept 63 | beta.boot[i] <- coef(skateBoot.lm)[2] # new slope 64 | yPred.boot[i] <- alpha.boot[i] + 60 * beta.boot[i] 65 | } 66 | 67 | mean(cor.boot) 68 | sd(cor.boot) 69 | quantile(cor.boot, c(.025,.975)) 70 | 71 | observed <- cor(Skating2010$Short, Skating2010$Free) 72 | 73 | df <- data.frame(cor.boot, beta.boot, alpha.boot, yPred.boot) 74 | 75 | ggplot(df, aes(x = cor.boot)) + 76 | geom_histogram(bins = 20, color = "white") + 77 | geom_vline(xintercept = observed, color = "red", lty = 2) 78 | 79 | #-------------------------------------------- 80 | #Section 9.5.1 Permutation Tests 81 | 82 | N <- 9999 83 | n <- nrow(Skating2010) # number of observations 84 | result <- numeric(N) 85 | observed <- cor(Skating2010$Short, Skating2010$Free) 86 | for (i in 1:N) 87 | { 88 | index <- sample(n, replace=FALSE) 89 | Short.permuted <- Skating2010$Short[index] 90 | result[i] <- cor(Short.permuted, Skating2010$Free) 91 | } 92 | (sum(observed <= result) + 1) / (N + 1) # P-value 93 | 94 | #---------------------------------------------- 95 | #Example 9.12 96 | #Fatalities data 97 | glm(Alcohol ~ Age, data = Fatalities, family = binomial) 98 | f <- function(x){exp(-0.123-0.029*x)/(1+exp(-0.123-0.029*x))} 99 | 100 | ggplot(Fatalities, aes(x = Age, y = Alcohol)) + geom_point() + 101 | stat_function(fun = f) 102 | 103 | #alternative way to define f 104 | f <- function(x){plogis(-0.123 - 0.029*x)} 105 | 106 | #------------------------------------------ 107 | #Section 9.6 108 | #Inference for logistic regression 109 | fit <- glm(Alcohol ~ Age, data = Fatalities, 110 | family = binomial) 111 | data.class(fit) # is a "glm" object, so for help use: 112 | help(glm) 113 | 114 | fit # prints the coefficients and other basic info 115 | coef(fit) # the coefficients as a vector 116 | summary(fit) # gives standard errors for coefficients, etc. 117 | 118 | 119 | # Full bootstrap - slope coeff. and prediction at age 20 120 | N <- 10^3 121 | n <- nrow(Fatalities) # number of observations 122 | alpha.boot <- numeric(N) 123 | beta.boot <- numeric(N) 124 | pPred.boot <- numeric(N) 125 | 126 | for (i in 1:N) 127 | { 128 | index <- sample(n, replace = TRUE) 129 | Fatal.boot <- Fatalities[index, ] # resampled data 130 | 131 | fit.boot <- glm(Alcohol ~ Age, data = Fatal.boot, 132 | family = binomial) 133 | alpha.boot[i] <- coef(fit.boot)[1] # new intercept 134 | beta.boot[i] <- coef(fit.boot)[2] # new slope 135 | pPred.boot[i] <- plogis(alpha.boot[i] + 20 * beta.boot[i]) 136 | } 137 | 138 | quantile(beta.boot, c(.025, .975)) # 95% percentile CI 139 | df <- data.frame(alpha.boot, beta.boot, pPred.boot) 140 | ggplot(df, aes(x = beta.boot)) + 141 | geom_histogram(bins = 20, color = "white") 142 | ggplot(df, aes(sample = beta.boot)) + geom_qq() + geom_qq_line() 143 | 144 | -------------------------------------------------------------------------------- /Edition3/RScripts/c10_CategoricalData.R: -------------------------------------------------------------------------------- 1 | #Chapter 10 2 | #Categorical data 3 | library(resampledata3) 4 | library(ggplot2) 5 | library(dplyr) 6 | 7 | #Section 10.2 Permutation Test of Independence 8 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty, simulate.p.value = TRUE, B = 10^5-1) 9 | mat <- table(GSS2018$Degree, GSS2018$DeathPenalty) 10 | chisq.test(mat, simulate.p.value = TRUE, B = 10^5-1) 11 | 12 | #Section 10.3 13 | 1 - pchisq(50.449, 4) 14 | 15 | chisq.test(GSS2018$Degree, GSS2018$DeathPenalty) 16 | 17 | mat <- rbind(c(42, 50), c(30, 87)) 18 | chisq.test(mat) 19 | fisher.test(mat) 20 | 21 | #Section 10.4 Test of Homogeneity 22 | candy.mat <- rbind(c(42, 20, 38), c(33, 27, 50)) 23 | candy.mat 24 | 25 | chisq.test(candy.mat) 26 | 27 | #Section 10.5 28 | qchisq(c(.2, .4, .6, .8), 10) 29 | 30 | 31 | Homeruns <- Phillies2009$Homeruns 32 | 33 | lambda <- mean(Homeruns) 34 | dpois(0:4, lambda) 35 | table(Homeruns) 36 | 37 | table(Homeruns)/162 38 | -------------------------------------------------------------------------------- /Edition3/RScripts/c10_PermTestIndependence.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------ 2 | #Chapter 10 Categorical data 3 | #Implementation of the permutation test of independence 4 | #This function computes the chi-square 5 | #test statistic 6 | 7 | # 8 | chisq <- function(observed, print = TRUE) { 9 | # Chi-square statistic for independence in a contingency table, 10 | # with related data exploration. 11 | # observed is the observed contingency table 12 | 13 | observedWithTotals <- cbind(observed, total = rowSums(observed)) 14 | observedWithTotals <- rbind(observedWithTotals, total = colSums(observedWithTotals)) 15 | expected <- outer(rowSums(observed), colSums(observed)) / sum(observed) 16 | statistic <- sum((observed-expected)^2/expected) 17 | if (print) 18 | { 19 | cat("Observed, with totals:\n") 20 | print(observedWithTotals) 21 | cat("\nRow Fractions:\n") 22 | print(round(observed / rowSums(observed), 3)) 23 | cat("\nColumn Fractions:\n") 24 | print(round(observed / rep(colSums(observed), each = nrow(observed)), 3)) 25 | 26 | cat("\nExpected:\n") 27 | print(round(expected, 1)) 28 | cat("\nDifference:\n") 29 | print(round(observed - expected, 1)) 30 | 31 | cat("\nChi-squared statistic for independence:", round(statistic, 1), "\n") 32 | } 33 | return(invisible(statistic)) 34 | } 35 | 36 | #------------------------------------------- 37 | 38 | #We use this function on the contingency table for Education and 39 | #DeathPenalty 40 | #set.seed(200) 41 | library(resampledata3) 42 | observed <- chisq(table(GSS2018$Degree, GSS2018$DeathPenalty)) 43 | observed 44 | 45 | #Now, there were 155 people who declined to respond to the 46 | #death penalty question, so we will remove these observations from our 47 | #analysis. 48 | 49 | #We will use the drop_na() command from the tidyr package. The 50 | #command below with create a data frame with variables Degree and DeathPenalty, 51 | #removing any rows with an NA in either variable (though in this case, only 52 | #the death penalty variable has missing values. 53 | 54 | library(tidyr) 55 | df <- drop_na(GSS2018, Degree, DeathPenalty) 56 | #The sample(df$DeathPenalty) command below permutes the 57 | #values in DeathPenalty 58 | N <- 10^5-1 59 | result <- numeric(N) 60 | for (i in 1:N) 61 | { 62 | DP.permuted <- sample(df$DeathPenalty) 63 | GSS.table <- table(df$Degree, DP.permuted) 64 | result[i] <- chisq(GSS.table) 65 | } 66 | 67 | ggplot() + geom_histogram(aes(x = result)) + 68 | geom_vline(xintercept = observed, lty = 2) 69 | 70 | #Check the distribution of the test statistics to help in determining 71 | #the direction of the inequality when computing the $P$-value. 72 | 73 | -------------------------------------------------------------------------------- /Edition3/RScripts/c11_Bayes.R: -------------------------------------------------------------------------------- 1 | #Chapter 11 2 | #Bayesian methods 3 | library(resampledata3) 4 | library(ggplot2) 5 | library(dplyr) 6 | 7 | #Example 11.1 8 | theta <- seq(0, 1, by = .1) 9 | prior <- c(0, .02, .03, .05, .1, .15, .2, .25, .15, .05, 0) 10 | likelihood <- theta * (1 - theta)^2 11 | constant <- sum(prior * likelihood) 12 | posterior <- prior * likelihood / constant 13 | posterior 14 | sum(theta * prior) # prior mean 15 | sum(theta * posterior) # posterior mean 16 | 17 | #continued 18 | likelihood2 <- theta^3 * (1 - theta)^5 # 3 success, 5 fail 19 | constant2 <- sum(prior * likelihood2) 20 | posterior2 <- prior * likelihood2 / constant2 21 | posterior2 22 | 23 | likelihood3 <- theta^2 * (1 - theta)^3 24 | constant3 <- sum(posterior * likelihood3) 25 | posterior3 <- posterior * likelihood3 / constant3 26 | posterior3 # not shown, same as posterior2 27 | sum(theta*posterior2) # posterior mean 28 | 29 | ggplot(df, aes(x = theta, y = prior)) + 30 | geom_point() + geom_line(lty = 1) + 31 | geom_point(aes(y = posterior)) + 32 | geom_line(aes(y = posterior), lty = 2) + 33 | geom_point(aes(y = posterior2)) + 34 | geom_line(aes(y = posterior2), lty = 3) 35 | 36 | #---------------------------------------------------- 37 | #Example 11.3 38 | qbeta(.025, 111, 91) 39 | qbeta(.975, 111, 91) 40 | 1-pbeta(.5, 111, 91) 41 | 42 | ggplot(data.frame(x = c(0,1)), aes(x = x)) + 43 | stat_function(fun = dbeta, aes(lty = "2"), 44 | args = list(shape1 = 1, shape2 = 1)) + 45 | stat_function(fun = dbeta, aes(lty = "1"), 46 | args = list(shape1 = 111, shape2 = 91)) + 47 | scale_linetype_manual(values = c("2" = 2, "1" = 1), 48 | labels = c("Posterior", "Prior"), 49 | guide = guide_legend(reverse = TRUE)) + 50 | scale_x_continuous(breaks = seq(0, 1, by = .2)) + 51 | labs(x = "", y = "Density") + 52 | theme(legend.title = element_blank(), 53 | legend.position = c(.1, .85), 54 | legend.key = element_blank()) 55 | 56 | #------------------------------------------- 57 | #Section 11.5 Sequential data 58 | 59 | n <- c(1874, 1867, 1871, 1868, 1875, 1875) 60 | X <- c(52, 41, 55, 49, 39, 39) 61 | alpha <- X # vector of posterior parameters 62 | beta <- n - X # vector of posterior parameters 63 | N <- 10^5 # replications 64 | theta <- matrix(0.0, nrow = N, ncol = 6) 65 | for (j in 1:6) 66 | { 67 | theta[, j] <- rbeta(N, alpha[j], beta[j]) 68 | } 69 | probBest <- numeric(6) # vector for results 70 | best <- apply(theta, 1, max) # maximum of each row 71 | for (j in 1:6) 72 | { 73 | probBest[j] <- mean(theta[, j] == best) 74 | } 75 | 76 | #probBest contains probabilities of each of the six arms 77 | #being best 78 | 79 | df <- data.frame(theta[1:10^4,]) 80 | names(df) 81 | ggplot(df, aes(x = X1, y = X3)) + geom_point(size = .5) + 82 | geom_abline(slope = 1, intercept = 0) + 83 | annotate("text", x = 0.037, y = 0.042, parse = TRUE, 84 | label = "theta[3] > theta[1]") + 85 | annotate("text", x = 0.042, y = 0.037, parse = TRUE, 86 | label = "theta[1] > theta[3]") + 87 | labs(x = expression(theta[1]), y=expression(theta[3])) 88 | 89 | #---------------------------------------- 90 | probBest 91 | # -------------------------------------------------------------------------------- /Edition3/RScripts/c12_ANOVA.R: -------------------------------------------------------------------------------- 1 | #Chapter 12 ANOVA 2 | library(resampledata3) 3 | library(ggplot2) 4 | library(dplyr) 5 | 6 | #Example 12.1 7 | anova(lm(Weight ~ MothersAge, data = ILBoys)) 8 | anova(lm(Weight ~MothersAge, data = ILBoys))$F[1] #Extract F stat 9 | 10 | summary(aov(Weight ~MothersAge, data = ILBoys)) #same 11 | 12 | #Section 12.1.2 Permutation Test Approach 13 | #Checking the normality condition 14 | ggplot(ILBoys, aes(sample = Weight)) + geom_qq() + 15 | geom_qq_line() + facet_wrap(. ~ MothersAge) 16 | 17 | #Permutation test 18 | observed <- anova(lm(Weight ~ MothersAge, data = ILBoys))$F[1] 19 | n <- length(ILBoys$Weight) 20 | N <- 10^4 - 1 21 | results <- numeric(N) 22 | for (i in 1:N) 23 | { 24 | index <- sample(n) 25 | Wt.perm <- ILBoys$Weight[index] 26 | results[i] <- anova(lm(Wt.perm ~ MothersAge, data = ILBoys))$F[1] 27 | } 28 | 29 | (sum(results >= observed) + 1) / (N + 1) # P value 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mathematical Statistics with Resampling and R 2 | 3 | Data sets, R code, supplementary materials and errata for the textbook 4 | *Mathematical Statistics with Resampling and R* 5 | by 6 | [Laura Chihara](https://lchihara.people.sites.carleton.edu) 7 | and 8 | [Tim Hesterberg](https://www.timhesterberg.net). 9 | 10 | 11 | Current: [Third Edition (2022)](Edition3) 12 | 13 | 14 | Older: 15 | [Second Edition (2018)](Edition2), 16 | [First Edition (2011)](Edition1). 17 | -------------------------------------------------------------------------------- /readme-MathStatsResamplingR.txt: -------------------------------------------------------------------------------- 1 | PLEASE IGNORE THIS FILE. 2 | It contains working notes by Chihara and Hesterberg. 3 | 4 | Some .pdf and .R files listed here are compiled from .tex or .Rmd files in 5 | MathStatsTextbook/trunk/StudentWebMaterials/ 6 | 7 | Some .R files are copied (and possibly edited) from one of 8 | MathStatsTextbook/trunk/R/ 9 | MathStatsTextbook/trunk/StudentWebMaterials/ 10 | --------------------------------------------------------------------------------