├── Data ├── Diabetes.csv ├── bikesharing_test.csv ├── bikesharing_train.csv ├── daily-total-female-births.csv └── iris_all.csv ├── Notebooks ├── 01-XGBoost_BikeRental_Data_Preparation.ipynb ├── 02-XGBoost_Regression_BikeRental.ipynb ├── 03-XGBoost_Binary_Classification_Diabetes_Dataset.ipynb ├── 04-XGBoost_Course_Prepare_Iris_Dataset.ipynb ├── 05-XGBoost_Course_Multiclass_Classification_Iris_Dataset.ipynb ├── 06-XGBoost-TimeSeries.ipynb ├── 07-XGBoost_Feature_Importance_Selection_Diabetes_Dataset.ipynb ├── 08-XGBoost_Hyperparameter_Tuning_Diabetes_Dataset.ipynb ├── 09-AWS_XGBoost_Train_Host_Predict.ipynb └── 10-AWS_XGBoost_Invoke_Endpoint_Predict.ipynb └── README.md /Data/Diabetes.csv: -------------------------------------------------------------------------------- 1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome 2 | 6,148.0,72,35,,33.6,0.627,50.0,1 3 | 1,85.0,66,29,,26.6,0.35100000000000003,31.0,0 4 | 8,183.0,64,0,,23.3,0.672,32.0,1 5 | 1,89.0,66,23,94.0,28.1,0.16699999999999998,21.0,0 6 | 0,137.0,40,35,168.0,43.1,2.2880000000000003,33.0,1 7 | 5,116.0,74,0,,25.6,0.201,30.0,0 8 | 3,78.0,50,32,88.0,31.0,0.248,26.0,1 9 | 10,115.0,0,0,,35.3,0.134,29.0,0 10 | 2,197.0,70,45,543.0,30.5,0.158,53.0,1 11 | 8,125.0,96,0,,,0.23199999999999998,54.0,1 12 | 4,110.0,92,0,,37.6,0.191,30.0,0 13 | 10,168.0,74,0,,38.0,0.537,34.0,1 14 | 10,139.0,80,0,,27.1,1.4409999999999998,57.0,0 15 | 1,189.0,60,23,846.0,30.1,0.39799999999999996,59.0,1 16 | 5,166.0,72,19,175.0,25.8,0.5870000000000001,51.0,1 17 | 7,100.0,0,0,,30.0,0.484,32.0,1 18 | 0,118.0,84,47,230.0,45.8,0.551,31.0,1 19 | 7,107.0,74,0,,29.6,0.254,31.0,1 20 | 1,103.0,30,38,83.0,43.3,0.183,33.0,0 21 | 1,115.0,70,30,96.0,34.6,0.529,32.0,1 22 | 3,126.0,88,41,235.0,39.3,0.7040000000000001,27.0,0 23 | 8,99.0,84,0,,35.4,0.38799999999999996,50.0,0 24 | 7,196.0,90,0,,39.8,0.451,41.0,1 25 | 9,119.0,80,35,,29.0,0.263,29.0,1 26 | 11,143.0,94,33,146.0,36.6,0.254,51.0,1 27 | 10,125.0,70,26,115.0,31.1,0.205,41.0,1 28 | 7,147.0,76,0,,39.4,0.257,43.0,1 29 | 1,97.0,66,15,140.0,23.2,0.48700000000000004,22.0,0 30 | 13,145.0,82,19,110.0,22.2,0.245,57.0,0 31 | 5,117.0,92,0,,34.1,0.337,38.0,0 32 | 5,109.0,75,26,,36.0,0.546,60.0,0 33 | 3,158.0,76,36,245.0,31.6,0.851,28.0,1 34 | 3,88.0,58,11,54.0,24.8,0.267,22.0,0 35 | 6,92.0,92,0,,19.9,0.188,28.0,0 36 | 10,122.0,78,31,,27.6,0.512,45.0,0 37 | 4,103.0,60,33,192.0,24.0,0.966,33.0,0 38 | 11,138.0,76,0,,33.2,0.42,35.0,0 39 | 9,102.0,76,37,,32.9,0.665,46.0,1 40 | 2,90.0,68,42,,38.2,0.503,27.0,1 41 | 4,111.0,72,47,207.0,37.1,1.39,56.0,1 42 | 3,180.0,64,25,70.0,34.0,0.271,26.0,0 43 | 7,133.0,84,0,,40.2,0.696,37.0,0 44 | 7,106.0,92,18,,22.7,0.235,48.0,0 45 | 9,171.0,110,24,240.0,45.4,0.721,54.0,1 46 | 7,159.0,64,0,,27.4,0.294,40.0,0 47 | 0,180.0,66,39,,42.0,1.893,25.0,1 48 | 1,146.0,56,0,,29.7,0.564,29.0,0 49 | 2,71.0,70,27,,28.0,0.586,22.0,0 50 | 7,103.0,66,32,,39.1,0.344,31.0,1 51 | 7,105.0,0,0,,,0.305,24.0,0 52 | 1,103.0,80,11,82.0,19.4,0.491,22.0,0 53 | 1,101.0,50,15,36.0,24.2,0.526,26.0,0 54 | 5,88.0,66,21,23.0,24.4,0.342,30.0,0 55 | 8,176.0,90,34,300.0,33.7,0.467,58.0,1 56 | 7,150.0,66,42,342.0,34.7,0.718,42.0,0 57 | 1,73.0,50,10,,23.0,0.248,21.0,0 58 | 7,187.0,68,39,304.0,37.7,0.254,41.0,1 59 | 0,100.0,88,60,110.0,46.8,0.9620000000000001,31.0,0 60 | 0,146.0,82,0,,40.5,1.781,44.0,0 61 | 0,105.0,64,41,142.0,41.5,0.17300000000000001,22.0,0 62 | 2,84.0,0,0,,,0.304,21.0,0 63 | 8,133.0,72,0,,32.9,0.27,39.0,1 64 | 5,44.0,62,0,,25.0,0.5870000000000001,36.0,0 65 | 2,141.0,58,34,128.0,25.4,0.6990000000000001,24.0,0 66 | 7,114.0,66,0,,32.8,0.258,42.0,1 67 | 5,99.0,74,27,,29.0,0.203,32.0,0 68 | 0,109.0,88,30,,32.5,0.855,38.0,1 69 | 2,109.0,92,0,,42.7,0.845,54.0,0 70 | 1,95.0,66,13,38.0,19.6,0.33399999999999996,25.0,0 71 | 4,146.0,85,27,100.0,28.9,0.18899999999999997,27.0,0 72 | 2,100.0,66,20,90.0,32.9,0.867,28.0,1 73 | 5,139.0,64,35,140.0,28.6,0.41100000000000003,26.0,0 74 | 13,126.0,90,0,,43.4,0.583,42.0,1 75 | 4,129.0,86,20,270.0,35.1,0.231,23.0,0 76 | 1,79.0,75,30,,32.0,0.396,22.0,0 77 | 1,,48,20,,24.7,0.14,22.0,0 78 | 7,62.0,78,0,,32.6,0.391,41.0,0 79 | 5,95.0,72,33,,37.7,0.37,27.0,0 80 | 0,131.0,0,0,,43.2,0.27,26.0,1 81 | 2,112.0,66,22,,25.0,0.307,24.0,0 82 | 3,113.0,44,13,,22.4,0.14,22.0,0 83 | 2,74.0,0,0,,,0.102,22.0,0 84 | 7,83.0,78,26,71.0,29.3,0.767,36.0,0 85 | 0,101.0,65,28,,24.6,0.237,22.0,0 86 | 5,137.0,108,0,,48.8,0.22699999999999998,37.0,1 87 | 2,110.0,74,29,125.0,32.4,0.698,27.0,0 88 | 13,106.0,72,54,,36.6,0.17800000000000002,45.0,0 89 | 2,100.0,68,25,71.0,38.5,0.324,26.0,0 90 | 15,136.0,70,32,110.0,37.1,0.153,43.0,1 91 | 1,107.0,68,19,,26.5,0.165,24.0,0 92 | 1,80.0,55,0,,19.1,0.258,21.0,0 93 | 4,123.0,80,15,176.0,32.0,0.44299999999999995,34.0,0 94 | 7,81.0,78,40,48.0,46.7,0.261,42.0,0 95 | 4,134.0,72,0,,23.8,0.27699999999999997,60.0,1 96 | 2,142.0,82,18,64.0,24.7,0.7609999999999999,21.0,0 97 | 6,144.0,72,27,228.0,33.9,0.255,40.0,0 98 | 2,92.0,62,28,,31.6,0.13,24.0,0 99 | 1,71.0,48,18,76.0,20.4,0.32299999999999995,22.0,0 100 | 6,93.0,50,30,64.0,28.7,0.35600000000000004,23.0,0 101 | 1,122.0,90,51,220.0,49.7,0.325,31.0,1 102 | 1,163.0,72,0,,39.0,1.222,33.0,1 103 | 1,151.0,60,0,,26.1,0.179,22.0,0 104 | 0,125.0,96,0,,22.5,0.262,21.0,0 105 | 1,81.0,72,18,40.0,26.6,0.28300000000000003,24.0,0 106 | 2,85.0,65,0,,39.6,0.93,27.0,0 107 | 1,126.0,56,29,152.0,28.7,0.8009999999999999,21.0,0 108 | 1,96.0,122,0,,22.4,0.207,27.0,0 109 | 4,144.0,58,28,140.0,29.5,0.287,37.0,0 110 | 3,83.0,58,31,18.0,34.3,0.336,25.0,0 111 | 0,95.0,85,25,36.0,37.4,0.247,24.0,1 112 | 3,171.0,72,33,135.0,33.3,0.19899999999999998,24.0,1 113 | 8,155.0,62,26,495.0,34.0,0.5429999999999999,46.0,1 114 | 1,89.0,76,34,37.0,31.2,0.192,23.0,0 115 | 4,76.0,62,0,,34.0,0.391,25.0,0 116 | 7,160.0,54,32,175.0,30.5,0.588,39.0,1 117 | 4,146.0,92,0,,31.2,0.539,61.0,1 118 | 5,124.0,74,0,,34.0,0.22,38.0,1 119 | 5,78.0,48,0,,33.7,0.654,25.0,0 120 | 4,97.0,60,23,,28.2,0.44299999999999995,22.0,0 121 | 4,99.0,76,15,51.0,23.2,0.223,21.0,0 122 | 0,162.0,76,56,100.0,53.2,0.759,25.0,1 123 | 6,111.0,64,39,,34.2,0.26,24.0,0 124 | 2,107.0,74,30,100.0,33.6,0.40399999999999997,23.0,0 125 | 5,132.0,80,0,,26.8,0.18600000000000003,69.0,0 126 | 0,113.0,76,0,,33.3,0.278,23.0,1 127 | 1,88.0,30,42,99.0,55.0,0.496,26.0,1 128 | 3,120.0,70,30,135.0,42.9,0.452,30.0,0 129 | 1,118.0,58,36,94.0,33.3,0.261,23.0,0 130 | 1,117.0,88,24,145.0,34.5,0.40299999999999997,40.0,1 131 | 0,105.0,84,0,,27.9,0.741,62.0,1 132 | 4,173.0,70,14,168.0,29.7,0.361,33.0,1 133 | 9,122.0,56,0,,33.3,1.114,33.0,1 134 | 3,170.0,64,37,225.0,34.5,0.35600000000000004,30.0,1 135 | 8,84.0,74,31,,38.3,0.457,39.0,0 136 | 2,96.0,68,13,49.0,21.1,0.647,26.0,0 137 | 2,125.0,60,20,140.0,33.8,0.08800000000000001,31.0,0 138 | 0,100.0,70,26,50.0,30.8,0.597,21.0,0 139 | 0,93.0,60,25,92.0,28.7,0.532,22.0,0 140 | 0,129.0,80,0,,31.2,0.703,29.0,0 141 | 5,105.0,72,29,325.0,36.9,0.159,28.0,0 142 | 3,128.0,78,0,,21.1,0.268,55.0,0 143 | 5,106.0,82,30,,39.5,0.28600000000000003,38.0,0 144 | 2,108.0,52,26,63.0,32.5,0.318,22.0,0 145 | 10,108.0,66,0,,32.4,0.272,42.0,1 146 | 4,154.0,62,31,284.0,32.8,0.237,23.0,0 147 | 0,102.0,75,23,,,0.5720000000000001,21.0,0 148 | 9,57.0,80,37,,32.8,0.096,41.0,0 149 | 2,106.0,64,35,119.0,30.5,1.4,34.0,0 150 | 5,147.0,78,0,,33.7,0.218,65.0,0 151 | 2,90.0,70,17,,27.3,0.085,22.0,0 152 | 1,136.0,74,50,204.0,37.4,0.39899999999999997,24.0,0 153 | 4,114.0,65,0,,21.9,0.43200000000000005,37.0,0 154 | 9,156.0,86,28,155.0,34.3,1.189,42.0,1 155 | 1,153.0,82,42,485.0,40.6,0.687,23.0,0 156 | 8,188.0,78,0,,47.9,0.13699999999999998,43.0,1 157 | 7,152.0,88,44,,50.0,0.337,36.0,1 158 | 2,99.0,52,15,94.0,24.6,0.637,21.0,0 159 | 1,109.0,56,21,135.0,25.2,0.833,23.0,0 160 | 2,88.0,74,19,53.0,29.0,0.22899999999999998,22.0,0 161 | 17,163.0,72,41,114.0,40.9,0.8170000000000001,47.0,1 162 | 4,151.0,90,38,,29.7,0.294,36.0,0 163 | 7,102.0,74,40,105.0,37.2,0.204,45.0,0 164 | 0,114.0,80,34,285.0,44.2,0.16699999999999998,27.0,0 165 | 2,100.0,64,23,,29.7,0.368,21.0,0 166 | 0,131.0,88,0,,31.6,0.743,32.0,1 167 | 6,104.0,74,18,156.0,29.9,0.722,41.0,1 168 | 3,148.0,66,25,,32.5,0.256,22.0,0 169 | 4,120.0,68,0,,29.6,0.7090000000000001,34.0,0 170 | 4,110.0,66,0,,31.9,0.47100000000000003,29.0,0 171 | 3,111.0,90,12,78.0,28.4,0.495,29.0,0 172 | 6,102.0,82,0,,30.8,0.18,36.0,1 173 | 6,134.0,70,23,130.0,35.4,0.542,29.0,1 174 | 2,87.0,0,23,,28.9,0.773,25.0,0 175 | 1,79.0,60,42,48.0,43.5,0.6779999999999999,23.0,0 176 | 2,75.0,64,24,55.0,29.7,0.37,33.0,0 177 | 8,179.0,72,42,130.0,32.7,0.7190000000000001,36.0,1 178 | 6,85.0,78,0,,31.2,0.382,42.0,0 179 | 0,129.0,110,46,130.0,67.1,0.319,26.0,1 180 | 5,143.0,78,0,,45.0,0.19,47.0,0 181 | 5,130.0,82,0,,39.1,0.956,37.0,1 182 | 6,87.0,80,0,,23.2,0.084,32.0,0 183 | 0,119.0,64,18,92.0,34.9,0.725,23.0,0 184 | 1,,74,20,23.0,27.7,0.299,21.0,0 185 | 5,73.0,60,0,,26.8,0.268,27.0,0 186 | 4,141.0,74,0,,27.6,0.244,40.0,0 187 | 7,194.0,68,28,,35.9,0.745,41.0,1 188 | 8,181.0,68,36,495.0,30.1,0.615,60.0,1 189 | 1,128.0,98,41,58.0,32.0,1.321,33.0,1 190 | 8,109.0,76,39,114.0,27.9,0.64,31.0,1 191 | 5,139.0,80,35,160.0,31.6,0.361,25.0,1 192 | 3,111.0,62,0,,22.6,0.142,21.0,0 193 | 9,123.0,70,44,94.0,33.1,0.374,40.0,0 194 | 7,159.0,66,0,,30.4,0.38299999999999995,36.0,1 195 | 11,135.0,0,0,,52.3,0.578,40.0,1 196 | 8,85.0,55,20,,24.4,0.136,42.0,0 197 | 5,158.0,84,41,210.0,39.4,0.395,29.0,1 198 | 1,105.0,58,0,,24.3,0.187,21.0,0 199 | 3,107.0,62,13,48.0,22.9,0.6779999999999999,23.0,1 200 | 4,109.0,64,44,99.0,34.8,0.905,26.0,1 201 | 4,148.0,60,27,318.0,30.9,0.15,29.0,1 202 | 0,113.0,80,16,,31.0,0.8740000000000001,21.0,0 203 | 1,138.0,82,0,,40.1,0.23600000000000002,28.0,0 204 | 0,108.0,68,20,,27.3,0.787,32.0,0 205 | 2,99.0,70,16,44.0,20.4,0.235,27.0,0 206 | 6,103.0,72,32,190.0,37.7,0.324,55.0,0 207 | 5,111.0,72,28,,23.9,0.40700000000000003,27.0,0 208 | 8,196.0,76,29,280.0,37.5,0.605,57.0,1 209 | 5,162.0,104,0,,37.7,0.151,52.0,1 210 | 1,96.0,64,27,87.0,33.2,0.289,21.0,0 211 | 7,184.0,84,33,,35.5,0.355,41.0,1 212 | 2,81.0,60,22,,27.7,0.29,25.0,0 213 | 0,147.0,85,54,,42.8,0.375,24.0,0 214 | 7,179.0,95,31,,34.2,0.16399999999999998,60.0,0 215 | 0,140.0,65,26,130.0,42.6,0.431,24.0,1 216 | 9,112.0,82,32,175.0,34.2,0.26,36.0,1 217 | 12,151.0,70,40,271.0,41.8,0.742,38.0,1 218 | 5,109.0,62,41,129.0,35.8,0.514,25.0,1 219 | 6,125.0,68,30,120.0,30.0,0.46399999999999997,32.0,0 220 | 5,85.0,74,22,,29.0,1.224,32.0,1 221 | 5,112.0,66,0,,37.8,0.261,41.0,1 222 | 0,177.0,60,29,478.0,34.6,1.072,21.0,1 223 | 2,158.0,90,0,,31.6,0.805,66.0,1 224 | 7,119.0,0,0,,25.2,0.209,37.0,0 225 | 7,142.0,60,33,190.0,28.8,0.687,61.0,0 226 | 1,100.0,66,15,56.0,23.6,0.6659999999999999,26.0,0 227 | 1,87.0,78,27,32.0,34.6,0.10099999999999999,22.0,0 228 | 0,101.0,76,0,,35.7,0.198,26.0,0 229 | 3,162.0,52,38,,37.2,0.652,24.0,1 230 | 4,197.0,70,39,744.0,36.7,2.329,31.0,0 231 | 0,117.0,80,31,53.0,45.2,0.08900000000000001,24.0,0 232 | 4,142.0,86,0,,44.0,0.645,22.0,1 233 | 6,134.0,80,37,370.0,46.2,0.23800000000000002,46.0,1 234 | 1,79.0,80,25,37.0,25.4,0.583,22.0,0 235 | 4,122.0,68,0,,35.0,0.39399999999999996,29.0,0 236 | 3,74.0,68,28,45.0,29.7,0.293,23.0,0 237 | 4,171.0,72,0,,43.6,0.479,26.0,1 238 | 7,181.0,84,21,192.0,35.9,0.586,51.0,1 239 | 0,179.0,90,27,,44.1,0.6859999999999999,23.0,1 240 | 9,164.0,84,21,,30.8,0.831,32.0,1 241 | 0,104.0,76,0,,18.4,0.5820000000000001,27.0,0 242 | 1,91.0,64,24,,29.2,0.192,21.0,0 243 | 4,91.0,70,32,88.0,33.1,0.446,22.0,0 244 | 3,139.0,54,0,,25.6,0.402,22.0,1 245 | 6,119.0,50,22,176.0,27.1,1.318,33.0,1 246 | 2,146.0,76,35,194.0,38.2,0.32899999999999996,29.0,0 247 | 9,184.0,85,15,,30.0,1.213,49.0,1 248 | 10,122.0,68,0,,31.2,0.258,41.0,0 249 | 0,165.0,90,33,680.0,52.3,0.42700000000000005,23.0,0 250 | 9,124.0,70,33,402.0,35.4,0.282,34.0,0 251 | 1,111.0,86,19,,30.1,0.14300000000000002,23.0,0 252 | 9,106.0,52,0,,31.2,0.38,42.0,0 253 | 2,129.0,84,0,,28.0,0.284,27.0,0 254 | 2,90.0,80,14,55.0,24.4,0.249,24.0,0 255 | 0,86.0,68,32,,35.8,0.23800000000000002,25.0,0 256 | 12,92.0,62,7,258.0,27.6,0.9259999999999999,44.0,1 257 | 1,113.0,64,35,,33.6,0.5429999999999999,21.0,1 258 | 3,111.0,56,39,,30.1,0.557,30.0,0 259 | 2,114.0,68,22,,28.7,0.092,25.0,0 260 | 1,193.0,50,16,375.0,25.9,0.655,24.0,0 261 | 11,155.0,76,28,150.0,33.3,1.3530000000000002,51.0,1 262 | 3,191.0,68,15,130.0,30.9,0.299,34.0,0 263 | 3,141.0,0,0,,30.0,0.7609999999999999,27.0,1 264 | 4,95.0,70,32,,32.1,0.612,24.0,0 265 | 3,142.0,80,15,,32.4,0.2,63.0,0 266 | 4,123.0,62,0,,32.0,0.226,35.0,1 267 | 5,96.0,74,18,67.0,33.6,0.997,43.0,0 268 | 0,138.0,0,0,,36.3,0.9329999999999999,25.0,1 269 | 2,128.0,64,42,,40.0,1.101,24.0,0 270 | 0,102.0,52,0,,25.1,0.078,21.0,0 271 | 2,146.0,0,0,,27.5,0.24,28.0,1 272 | 10,101.0,86,37,,45.6,1.136,38.0,1 273 | 2,108.0,62,32,56.0,25.2,0.128,21.0,0 274 | 3,122.0,78,0,,23.0,0.254,40.0,0 275 | 1,71.0,78,50,45.0,33.2,0.42200000000000004,21.0,0 276 | 13,106.0,70,0,,34.2,0.251,52.0,0 277 | 2,100.0,70,52,57.0,40.5,0.677,25.0,0 278 | 7,106.0,60,24,,26.5,0.29600000000000004,29.0,1 279 | 0,104.0,64,23,116.0,27.8,0.45399999999999996,23.0,0 280 | 5,114.0,74,0,,24.9,0.7440000000000001,57.0,0 281 | 2,108.0,62,10,278.0,25.3,0.8809999999999999,22.0,0 282 | 0,146.0,70,0,,37.9,0.33399999999999996,28.0,1 283 | 10,129.0,76,28,122.0,35.9,0.28,39.0,0 284 | 7,133.0,88,15,155.0,32.4,0.262,37.0,0 285 | 7,161.0,86,0,,30.4,0.165,47.0,1 286 | 2,108.0,80,0,,27.0,0.259,52.0,1 287 | 7,136.0,74,26,135.0,26.0,0.647,51.0,0 288 | 5,155.0,84,44,545.0,38.7,0.619,34.0,0 289 | 1,119.0,86,39,220.0,45.6,0.8079999999999999,29.0,1 290 | 4,96.0,56,17,49.0,20.8,0.34,26.0,0 291 | 5,108.0,72,43,75.0,36.1,0.263,33.0,0 292 | 0,78.0,88,29,40.0,36.9,0.434,21.0,0 293 | 0,107.0,62,30,74.0,36.6,0.757,25.0,1 294 | 2,128.0,78,37,182.0,43.3,1.224,31.0,1 295 | 1,128.0,48,45,194.0,40.5,0.613,24.0,1 296 | 0,161.0,50,0,,21.9,0.254,65.0,0 297 | 6,151.0,62,31,120.0,35.5,0.6920000000000001,28.0,0 298 | 2,146.0,70,38,360.0,28.0,0.337,29.0,1 299 | 0,126.0,84,29,215.0,30.7,0.52,24.0,0 300 | 14,100.0,78,25,184.0,36.6,0.41200000000000003,46.0,1 301 | 8,112.0,72,0,,23.6,0.84,58.0,0 302 | 0,167.0,0,0,,32.3,0.8390000000000001,30.0,1 303 | 2,144.0,58,33,135.0,31.6,0.42200000000000004,25.0,1 304 | 5,77.0,82,41,42.0,35.8,0.156,35.0,0 305 | 5,115.0,98,0,,52.9,0.209,28.0,1 306 | 3,150.0,76,0,,21.0,0.207,37.0,0 307 | 2,120.0,76,37,105.0,39.7,0.215,29.0,0 308 | 10,161.0,68,23,132.0,25.5,0.326,47.0,1 309 | 0,137.0,68,14,148.0,24.8,0.14300000000000002,21.0,0 310 | 0,128.0,68,19,180.0,30.5,1.391,25.0,1 311 | 2,124.0,68,28,205.0,32.9,0.875,30.0,1 312 | 6,80.0,66,30,,26.2,0.313,41.0,0 313 | 0,106.0,70,37,148.0,39.4,0.605,22.0,0 314 | 2,155.0,74,17,96.0,26.6,0.433,27.0,1 315 | 3,113.0,50,10,85.0,29.5,0.626,25.0,0 316 | 7,109.0,80,31,,35.9,1.127,43.0,1 317 | 2,112.0,68,22,94.0,34.1,0.315,26.0,0 318 | 3,99.0,80,11,64.0,19.3,0.284,30.0,0 319 | 3,182.0,74,0,,30.5,0.345,29.0,1 320 | 3,115.0,66,39,140.0,38.1,0.15,28.0,0 321 | 6,194.0,78,0,,23.5,0.129,59.0,1 322 | 4,129.0,60,12,231.0,27.5,0.527,31.0,0 323 | 3,112.0,74,30,,31.6,0.19699999999999998,25.0,1 324 | 0,124.0,70,20,,27.4,0.254,36.0,1 325 | 13,152.0,90,33,29.0,26.8,0.731,43.0,1 326 | 2,112.0,75,32,,35.7,0.14800000000000002,21.0,0 327 | 1,157.0,72,21,168.0,25.6,0.12300000000000001,24.0,0 328 | 1,122.0,64,32,156.0,35.1,0.6920000000000001,30.0,1 329 | 10,179.0,70,0,,35.1,0.2,37.0,0 330 | 2,102.0,86,36,120.0,45.5,0.127,23.0,1 331 | 6,105.0,70,32,68.0,30.8,0.122,37.0,0 332 | 8,118.0,72,19,,23.1,1.476,46.0,0 333 | 2,87.0,58,16,52.0,32.7,0.166,25.0,0 334 | 1,180.0,0,0,,43.3,0.282,41.0,1 335 | 12,106.0,80,0,,23.6,0.13699999999999998,44.0,0 336 | 1,95.0,60,18,58.0,23.9,0.26,22.0,0 337 | 0,165.0,76,43,255.0,47.9,0.259,26.0,0 338 | 0,117.0,0,0,,33.8,0.932,44.0,0 339 | 5,115.0,76,0,,31.2,0.34299999999999997,44.0,1 340 | 9,152.0,78,34,171.0,34.2,0.893,33.0,1 341 | 7,178.0,84,0,,39.9,0.331,41.0,1 342 | 1,130.0,70,13,105.0,25.9,0.47200000000000003,22.0,0 343 | 1,95.0,74,21,73.0,25.9,0.6729999999999999,36.0,0 344 | 1,,68,35,,32.0,0.389,22.0,0 345 | 5,122.0,86,0,,34.7,0.29,33.0,0 346 | 8,95.0,72,0,,36.8,0.485,57.0,0 347 | 8,126.0,88,36,108.0,38.5,0.349,49.0,0 348 | 1,139.0,46,19,83.0,28.7,0.654,22.0,0 349 | 3,116.0,0,0,,23.5,0.187,23.0,0 350 | 3,99.0,62,19,74.0,21.8,0.27899999999999997,26.0,0 351 | 5,,80,32,,41.0,0.34600000000000003,37.0,1 352 | 4,92.0,80,0,,42.2,0.237,29.0,0 353 | 4,137.0,84,0,,31.2,0.252,30.0,0 354 | 3,61.0,82,28,,34.4,0.243,46.0,0 355 | 1,90.0,62,12,43.0,27.2,0.58,24.0,0 356 | 3,90.0,78,0,,42.7,0.5589999999999999,21.0,0 357 | 9,165.0,88,0,,30.4,0.302,49.0,1 358 | 1,125.0,50,40,167.0,33.3,0.9620000000000001,28.0,1 359 | 13,129.0,0,30,,39.9,0.569,44.0,1 360 | 12,88.0,74,40,54.0,35.3,0.37799999999999995,48.0,0 361 | 1,196.0,76,36,249.0,36.5,0.875,29.0,1 362 | 5,189.0,64,33,325.0,31.2,0.583,29.0,1 363 | 5,158.0,70,0,,29.8,0.207,63.0,0 364 | 5,103.0,108,37,,39.2,0.305,65.0,0 365 | 4,146.0,78,0,,38.5,0.52,67.0,1 366 | 4,147.0,74,25,293.0,34.9,0.385,30.0,0 367 | 5,99.0,54,28,83.0,34.0,0.499,30.0,0 368 | 6,124.0,72,0,,27.6,0.368,29.0,1 369 | 0,101.0,64,17,,21.0,0.252,21.0,0 370 | 3,81.0,86,16,66.0,27.5,0.306,22.0,0 371 | 1,133.0,102,28,140.0,32.8,0.23399999999999999,45.0,1 372 | 3,173.0,82,48,465.0,38.4,2.137,25.0,1 373 | 0,118.0,64,23,89.0,,1.7309999999999999,21.0,0 374 | 0,84.0,64,22,66.0,35.8,0.545,21.0,0 375 | 2,105.0,58,40,94.0,34.9,0.225,25.0,0 376 | 2,122.0,52,43,158.0,36.2,0.816,28.0,0 377 | 12,140.0,82,43,325.0,39.2,0.528,58.0,1 378 | 0,98.0,82,15,84.0,25.2,0.299,22.0,0 379 | 1,87.0,60,37,75.0,37.2,0.509,22.0,0 380 | 4,156.0,75,0,,48.3,0.23800000000000002,32.0,1 381 | 0,93.0,100,39,72.0,43.4,1.021,35.0,0 382 | 1,107.0,72,30,82.0,30.8,0.821,24.0,0 383 | 0,105.0,68,22,,20.0,0.23600000000000002,22.0,0 384 | 1,109.0,60,8,182.0,25.4,0.9470000000000001,21.0,0 385 | 1,90.0,62,18,59.0,25.1,1.268,25.0,0 386 | 1,125.0,70,24,110.0,24.3,0.221,25.0,0 387 | 1,119.0,54,13,50.0,22.3,0.205,24.0,0 388 | 5,116.0,74,29,,32.3,0.66,35.0,1 389 | 8,105.0,100,36,,43.3,0.239,45.0,1 390 | 5,144.0,82,26,285.0,32.0,0.452,58.0,1 391 | 3,100.0,68,23,81.0,31.6,0.9490000000000001,28.0,0 392 | 1,100.0,66,29,196.0,32.0,0.444,42.0,0 393 | 5,166.0,76,0,,45.7,0.34,27.0,1 394 | 1,131.0,64,14,415.0,23.7,0.389,21.0,0 395 | 4,116.0,72,12,87.0,22.1,0.46299999999999997,37.0,0 396 | 4,158.0,78,0,,32.9,0.8029999999999999,31.0,1 397 | 2,127.0,58,24,275.0,27.7,1.6,25.0,0 398 | 3,96.0,56,34,115.0,24.7,0.9440000000000001,39.0,0 399 | 0,131.0,66,40,,34.3,0.196,22.0,1 400 | 3,82.0,70,0,,21.1,0.389,25.0,0 401 | 3,193.0,70,31,,34.9,0.24100000000000002,25.0,1 402 | 4,95.0,64,0,,32.0,0.161,31.0,1 403 | 6,137.0,61,0,,24.2,0.151,55.0,0 404 | 5,136.0,84,41,88.0,35.0,0.28600000000000003,35.0,1 405 | 9,72.0,78,25,,31.6,0.28,38.0,0 406 | 5,168.0,64,0,,32.9,0.135,41.0,1 407 | 2,123.0,48,32,165.0,42.1,0.52,26.0,0 408 | 4,115.0,72,0,,28.9,0.376,46.0,1 409 | 0,101.0,62,0,,21.9,0.336,25.0,0 410 | 8,197.0,74,0,,25.9,1.1909999999999998,39.0,1 411 | 1,172.0,68,49,579.0,42.4,0.7020000000000001,28.0,1 412 | 6,102.0,90,39,,35.7,0.674,28.0,0 413 | 1,112.0,72,30,176.0,34.4,0.528,25.0,0 414 | 1,143.0,84,23,310.0,42.4,1.0759999999999998,22.0,0 415 | 1,143.0,74,22,61.0,26.2,0.256,21.0,0 416 | 0,138.0,60,35,167.0,34.6,0.534,21.0,1 417 | 3,173.0,84,33,474.0,35.7,0.258,22.0,1 418 | 1,97.0,68,21,,27.2,1.095,22.0,0 419 | 4,144.0,82,32,,38.5,0.5539999999999999,37.0,1 420 | 1,83.0,68,0,,18.2,0.624,27.0,0 421 | 3,129.0,64,29,115.0,26.4,0.21899999999999997,28.0,1 422 | 1,119.0,88,41,170.0,45.3,0.507,26.0,0 423 | 2,94.0,68,18,76.0,26.0,0.561,21.0,0 424 | 0,102.0,64,46,78.0,40.6,0.496,21.0,0 425 | 2,115.0,64,22,,30.8,0.42100000000000004,21.0,0 426 | 8,151.0,78,32,210.0,42.9,0.516,36.0,1 427 | 4,184.0,78,39,277.0,37.0,0.264,31.0,1 428 | 0,94.0,0,0,,,0.256,25.0,0 429 | 1,181.0,64,30,180.0,34.1,0.32799999999999996,38.0,1 430 | 0,135.0,94,46,145.0,40.6,0.284,26.0,0 431 | 1,95.0,82,25,180.0,35.0,0.233,43.0,1 432 | 2,99.0,0,0,,22.2,0.10800000000000001,23.0,0 433 | 3,89.0,74,16,85.0,30.4,0.551,38.0,0 434 | 1,80.0,74,11,60.0,30.0,0.527,22.0,0 435 | 2,139.0,75,0,,25.6,0.16699999999999998,29.0,0 436 | 1,90.0,68,8,,24.5,1.138,36.0,0 437 | 0,141.0,0,0,,42.4,0.205,29.0,1 438 | 12,140.0,85,33,,37.4,0.244,41.0,0 439 | 5,147.0,75,0,,29.9,0.434,28.0,0 440 | 1,97.0,70,15,,18.2,0.147,21.0,0 441 | 6,107.0,88,0,,36.8,0.727,31.0,0 442 | 0,189.0,104,25,,34.3,0.435,41.0,1 443 | 2,83.0,66,23,50.0,32.2,0.49700000000000005,22.0,0 444 | 4,117.0,64,27,120.0,33.2,0.23,24.0,0 445 | 8,108.0,70,0,,30.5,0.955,33.0,1 446 | 4,117.0,62,12,,29.7,0.38,30.0,1 447 | 0,180.0,78,63,14.0,59.4,2.42,25.0,1 448 | 1,100.0,72,12,70.0,25.3,0.6579999999999999,28.0,0 449 | 0,95.0,80,45,92.0,36.5,0.33,26.0,0 450 | 0,104.0,64,37,64.0,33.6,0.51,22.0,1 451 | 0,120.0,74,18,63.0,30.5,0.285,26.0,0 452 | 1,82.0,64,13,95.0,21.2,0.415,23.0,0 453 | 2,134.0,70,0,,28.9,0.542,23.0,1 454 | 0,91.0,68,32,210.0,39.9,0.381,25.0,0 455 | 2,119.0,0,0,,19.6,0.8320000000000001,72.0,0 456 | 2,100.0,54,28,105.0,37.8,0.498,24.0,0 457 | 14,175.0,62,30,,33.6,0.212,38.0,1 458 | 1,135.0,54,0,,26.7,0.687,62.0,0 459 | 5,86.0,68,28,71.0,30.2,0.364,24.0,0 460 | 10,148.0,84,48,237.0,37.6,1.001,51.0,1 461 | 9,134.0,74,33,60.0,25.9,0.46,81.0,0 462 | 9,120.0,72,22,56.0,20.8,0.733,48.0,0 463 | 1,71.0,62,0,,21.8,0.41600000000000004,26.0,0 464 | 8,74.0,70,40,49.0,35.3,0.705,39.0,0 465 | 5,88.0,78,30,,27.6,0.258,37.0,0 466 | 10,115.0,98,0,,24.0,1.022,34.0,0 467 | 0,124.0,56,13,105.0,21.8,0.452,21.0,0 468 | 0,74.0,52,10,36.0,27.8,0.26899999999999996,22.0,0 469 | 0,97.0,64,36,100.0,36.8,0.6,25.0,0 470 | 8,120.0,0,0,,30.0,0.183,38.0,1 471 | 6,154.0,78,41,140.0,46.1,0.5710000000000001,27.0,0 472 | 1,144.0,82,40,,41.3,0.607,28.0,0 473 | 0,137.0,70,38,,33.2,0.17,22.0,0 474 | 0,119.0,66,27,,38.8,0.259,22.0,0 475 | 7,136.0,90,0,,29.9,0.21,50.0,0 476 | 4,114.0,64,0,,28.9,0.126,24.0,0 477 | 0,137.0,84,27,,27.3,0.231,59.0,0 478 | 2,105.0,80,45,191.0,33.7,0.711,29.0,1 479 | 7,114.0,76,17,110.0,23.8,0.466,31.0,0 480 | 8,126.0,74,38,75.0,25.9,0.162,39.0,0 481 | 4,132.0,86,31,,28.0,0.419,63.0,0 482 | 3,158.0,70,30,328.0,35.5,0.344,35.0,1 483 | 0,123.0,88,37,,35.2,0.19699999999999998,29.0,0 484 | 4,85.0,58,22,49.0,27.8,0.306,28.0,0 485 | 0,84.0,82,31,125.0,38.2,0.233,23.0,0 486 | 0,145.0,0,0,,44.2,0.63,31.0,1 487 | 0,135.0,68,42,250.0,42.3,0.365,24.0,1 488 | 1,139.0,62,41,480.0,40.7,0.536,21.0,0 489 | 0,173.0,78,32,265.0,46.5,1.159,58.0,0 490 | 4,99.0,72,17,,25.6,0.294,28.0,0 491 | 8,194.0,80,0,,26.1,0.551,67.0,0 492 | 2,83.0,65,28,66.0,36.8,0.629,24.0,0 493 | 2,89.0,90,30,,33.5,0.292,42.0,0 494 | 4,99.0,68,38,,32.8,0.145,33.0,0 495 | 4,125.0,70,18,122.0,28.9,1.1440000000000001,45.0,1 496 | 3,80.0,0,0,,,0.174,22.0,0 497 | 6,166.0,74,0,,26.6,0.304,66.0,0 498 | 5,110.0,68,0,,26.0,0.292,30.0,0 499 | 2,81.0,72,15,76.0,30.1,0.547,25.0,0 500 | 7,195.0,70,33,145.0,25.1,0.163,55.0,1 501 | 6,154.0,74,32,193.0,29.3,0.8390000000000001,39.0,0 502 | 2,117.0,90,19,71.0,25.2,0.313,21.0,0 503 | 3,84.0,72,32,,37.2,0.267,28.0,0 504 | 6,,68,41,,39.0,0.727,41.0,1 505 | 7,94.0,64,25,79.0,33.3,0.738,41.0,0 506 | 3,96.0,78,39,,37.3,0.23800000000000002,40.0,0 507 | 10,75.0,82,0,,33.3,0.263,38.0,0 508 | 0,180.0,90,26,90.0,36.5,0.314,35.0,1 509 | 1,130.0,60,23,170.0,28.6,0.6920000000000001,21.0,0 510 | 2,84.0,50,23,76.0,30.4,0.968,21.0,0 511 | 8,120.0,78,0,,25.0,0.409,64.0,0 512 | 12,84.0,72,31,,29.7,0.297,46.0,1 513 | 0,139.0,62,17,210.0,22.1,0.207,21.0,0 514 | 9,91.0,68,0,,24.2,0.2,58.0,0 515 | 2,91.0,62,0,,27.3,0.525,22.0,0 516 | 3,99.0,54,19,86.0,25.6,0.154,24.0,0 517 | 3,163.0,70,18,105.0,31.6,0.268,28.0,1 518 | 9,145.0,88,34,165.0,30.3,0.7709999999999999,53.0,1 519 | 7,125.0,86,0,,37.6,0.304,51.0,0 520 | 13,76.0,60,0,,32.8,0.18,41.0,0 521 | 6,129.0,90,7,326.0,19.6,0.5820000000000001,60.0,0 522 | 2,68.0,70,32,66.0,25.0,0.187,25.0,0 523 | 3,124.0,80,33,130.0,33.2,0.305,26.0,0 524 | 6,114.0,0,0,,,0.18899999999999997,26.0,0 525 | 9,130.0,70,0,,34.2,0.652,45.0,1 526 | 3,125.0,58,0,,31.6,0.151,24.0,0 527 | 3,87.0,60,18,,21.8,0.444,21.0,0 528 | 1,97.0,64,19,82.0,18.2,0.299,21.0,0 529 | 3,116.0,74,15,105.0,26.3,0.107,24.0,0 530 | 0,117.0,66,31,188.0,30.8,0.493,22.0,0 531 | 0,111.0,65,0,,24.6,0.66,31.0,0 532 | 2,122.0,60,18,106.0,29.8,0.7170000000000001,22.0,0 533 | 0,107.0,76,0,,45.3,0.6859999999999999,24.0,0 534 | 1,86.0,66,52,65.0,41.3,0.917,29.0,0 535 | 6,91.0,0,0,,29.8,0.501,31.0,0 536 | 1,77.0,56,30,56.0,33.3,1.251,24.0,0 537 | 4,132.0,0,0,,32.9,0.302,23.0,1 538 | 0,105.0,90,0,,29.6,0.19699999999999998,46.0,0 539 | 0,57.0,60,0,,21.7,0.735,67.0,0 540 | 0,127.0,80,37,210.0,36.3,0.804,23.0,0 541 | 3,129.0,92,49,155.0,36.4,0.968,32.0,1 542 | 8,100.0,74,40,215.0,39.4,0.6609999999999999,43.0,1 543 | 3,128.0,72,25,190.0,32.4,0.5489999999999999,27.0,1 544 | 10,90.0,85,32,,34.9,0.825,56.0,1 545 | 4,84.0,90,23,56.0,39.5,0.159,25.0,0 546 | 1,88.0,78,29,76.0,32.0,0.365,29.0,0 547 | 8,186.0,90,35,225.0,34.5,0.423,37.0,1 548 | 5,187.0,76,27,207.0,43.6,1.034,53.0,1 549 | 4,131.0,68,21,166.0,33.1,0.16,28.0,0 550 | 1,164.0,82,43,67.0,32.8,0.341,50.0,0 551 | 4,189.0,110,31,,28.5,0.68,37.0,0 552 | 1,116.0,70,28,,27.4,0.204,21.0,0 553 | 3,84.0,68,30,106.0,31.9,0.591,25.0,0 554 | 6,114.0,88,0,,27.8,0.247,66.0,0 555 | 1,88.0,62,24,44.0,29.9,0.42200000000000004,23.0,0 556 | 1,84.0,64,23,115.0,36.9,0.47100000000000003,28.0,0 557 | 7,124.0,70,33,215.0,25.5,0.161,37.0,0 558 | 1,97.0,70,40,,38.1,0.218,30.0,0 559 | 8,110.0,76,0,,27.8,0.237,58.0,0 560 | 11,103.0,68,40,,46.2,0.126,42.0,0 561 | 11,85.0,74,0,,30.1,0.3,35.0,0 562 | 6,125.0,76,0,,33.8,0.121,54.0,1 563 | 0,198.0,66,32,274.0,41.3,0.502,28.0,1 564 | 1,87.0,68,34,77.0,37.6,0.401,24.0,0 565 | 6,99.0,60,19,54.0,26.9,0.49700000000000005,32.0,0 566 | 0,91.0,80,0,,32.4,0.601,27.0,0 567 | 2,95.0,54,14,88.0,26.1,0.748,22.0,0 568 | 1,99.0,72,30,18.0,38.6,0.41200000000000003,21.0,0 569 | 6,92.0,62,32,126.0,32.0,0.085,46.0,0 570 | 4,154.0,72,29,126.0,31.3,0.33799999999999997,37.0,0 571 | 0,121.0,66,30,165.0,34.3,0.203,33.0,1 572 | 3,78.0,70,0,,32.5,0.27,39.0,0 573 | 2,130.0,96,0,,22.6,0.268,21.0,0 574 | 3,111.0,58,31,44.0,29.5,0.43,22.0,0 575 | 2,98.0,60,17,120.0,34.7,0.198,22.0,0 576 | 1,143.0,86,30,330.0,30.1,0.892,23.0,0 577 | 1,119.0,44,47,63.0,35.5,0.28,25.0,0 578 | 6,108.0,44,20,130.0,24.0,0.813,35.0,0 579 | 2,118.0,80,0,,42.9,0.693,21.0,1 580 | 10,133.0,68,0,,27.0,0.245,36.0,0 581 | 2,197.0,70,99,,34.7,0.575,62.0,1 582 | 0,151.0,90,46,,42.1,0.371,21.0,1 583 | 6,109.0,60,27,,25.0,0.20600000000000002,27.0,0 584 | 12,121.0,78,17,,26.5,0.259,62.0,0 585 | 8,100.0,76,0,,38.7,0.19,42.0,0 586 | 8,124.0,76,24,600.0,28.7,0.687,52.0,1 587 | 1,93.0,56,11,,22.5,0.41700000000000004,22.0,0 588 | 8,143.0,66,0,,34.9,0.129,41.0,1 589 | 6,103.0,66,0,,24.3,0.249,29.0,0 590 | 3,176.0,86,27,156.0,33.3,1.1540000000000001,52.0,1 591 | 0,73.0,0,0,,21.1,0.342,25.0,0 592 | 11,111.0,84,40,,46.8,0.925,45.0,1 593 | 2,112.0,78,50,140.0,39.4,0.175,24.0,0 594 | 3,132.0,80,0,,34.4,0.402,44.0,1 595 | 2,82.0,52,22,115.0,28.5,1.699,25.0,0 596 | 6,123.0,72,45,230.0,33.6,0.733,34.0,0 597 | 0,188.0,82,14,185.0,32.0,0.682,22.0,1 598 | 0,67.0,76,0,,45.3,0.19399999999999998,46.0,0 599 | 1,89.0,24,19,25.0,27.8,0.5589999999999999,21.0,0 600 | 1,173.0,74,0,,36.8,0.08800000000000001,38.0,1 601 | 1,109.0,38,18,120.0,23.1,0.40700000000000003,26.0,0 602 | 1,108.0,88,19,,27.1,0.4,24.0,0 603 | 6,96.0,0,0,,23.7,0.19,28.0,0 604 | 1,124.0,74,36,,27.8,0.1,30.0,0 605 | 7,150.0,78,29,126.0,35.2,0.6920000000000001,54.0,1 606 | 4,183.0,0,0,,28.4,0.212,36.0,1 607 | 1,124.0,60,32,,35.8,0.514,21.0,0 608 | 1,181.0,78,42,293.0,40.0,1.258,22.0,1 609 | 1,92.0,62,25,41.0,19.5,0.48200000000000004,25.0,0 610 | 0,152.0,82,39,272.0,41.5,0.27,27.0,0 611 | 1,111.0,62,13,182.0,24.0,0.138,23.0,0 612 | 3,106.0,54,21,158.0,30.9,0.292,24.0,0 613 | 3,174.0,58,22,194.0,32.9,0.593,36.0,1 614 | 7,168.0,88,42,321.0,38.2,0.787,40.0,1 615 | 6,105.0,80,28,,32.5,0.878,26.0,0 616 | 11,138.0,74,26,144.0,36.1,0.557,50.0,1 617 | 3,106.0,72,0,,25.8,0.207,27.0,0 618 | 6,117.0,96,0,,28.7,0.157,30.0,0 619 | 2,68.0,62,13,15.0,20.1,0.257,23.0,0 620 | 9,112.0,82,24,,28.2,1.2819999999999998,50.0,1 621 | 0,119.0,0,0,,32.4,0.141,24.0,1 622 | 2,112.0,86,42,160.0,38.4,0.24600000000000002,28.0,0 623 | 2,92.0,76,20,,24.2,1.6980000000000002,28.0,0 624 | 6,183.0,94,0,,40.8,1.4609999999999999,45.0,0 625 | 0,94.0,70,27,115.0,43.5,0.34700000000000003,21.0,0 626 | 2,108.0,64,0,,30.8,0.158,21.0,0 627 | 4,90.0,88,47,54.0,37.7,0.36200000000000004,29.0,0 628 | 0,125.0,68,0,,24.7,0.20600000000000002,21.0,0 629 | 0,132.0,78,0,,32.4,0.39299999999999996,21.0,0 630 | 5,128.0,80,0,,34.6,0.14400000000000002,45.0,0 631 | 4,94.0,65,22,,24.7,0.14800000000000002,21.0,0 632 | 7,114.0,64,0,,27.4,0.732,34.0,1 633 | 0,102.0,78,40,90.0,34.5,0.23800000000000002,24.0,0 634 | 2,111.0,60,0,,26.2,0.34299999999999997,23.0,0 635 | 1,128.0,82,17,183.0,27.5,0.115,22.0,0 636 | 10,92.0,62,0,,25.9,0.16699999999999998,31.0,0 637 | 13,104.0,72,0,,31.2,0.465,38.0,1 638 | 5,104.0,74,0,,28.8,0.153,48.0,0 639 | 2,94.0,76,18,66.0,31.6,0.649,23.0,0 640 | 7,97.0,76,32,91.0,40.9,0.871,32.0,1 641 | 1,100.0,74,12,46.0,19.5,0.149,28.0,0 642 | 0,102.0,86,17,105.0,29.3,0.695,27.0,0 643 | 4,128.0,70,0,,34.3,0.303,24.0,0 644 | 6,147.0,80,0,,29.5,0.17800000000000002,50.0,1 645 | 4,90.0,0,0,,28.0,0.61,31.0,0 646 | 3,103.0,72,30,152.0,27.6,0.73,27.0,0 647 | 2,157.0,74,35,440.0,39.4,0.134,30.0,0 648 | 1,167.0,74,17,144.0,23.4,0.447,33.0,1 649 | 0,179.0,50,36,159.0,37.8,0.455,22.0,1 650 | 11,136.0,84,35,130.0,28.3,0.26,42.0,1 651 | 0,107.0,60,25,,26.4,0.133,23.0,0 652 | 1,91.0,54,25,100.0,25.2,0.23399999999999999,23.0,0 653 | 1,117.0,60,23,106.0,33.8,0.466,27.0,0 654 | 5,123.0,74,40,77.0,34.1,0.26899999999999996,28.0,0 655 | 2,120.0,54,0,,26.8,0.455,27.0,0 656 | 1,106.0,70,28,135.0,34.2,0.142,22.0,0 657 | 2,155.0,52,27,540.0,38.7,0.24,25.0,1 658 | 2,101.0,58,35,90.0,21.8,0.155,22.0,0 659 | 1,120.0,80,48,200.0,38.9,1.162,41.0,0 660 | 11,127.0,106,0,,39.0,0.19,51.0,0 661 | 3,80.0,82,31,70.0,34.2,1.2919999999999998,27.0,1 662 | 10,162.0,84,0,,27.7,0.182,54.0,0 663 | 1,199.0,76,43,,42.9,1.3940000000000001,22.0,1 664 | 8,167.0,106,46,231.0,37.6,0.165,43.0,1 665 | 9,145.0,80,46,130.0,37.9,0.637,40.0,1 666 | 6,115.0,60,39,,33.7,0.245,40.0,1 667 | 1,112.0,80,45,132.0,34.8,0.217,24.0,0 668 | 4,145.0,82,18,,32.5,0.235,70.0,1 669 | 10,111.0,70,27,,27.5,0.141,40.0,1 670 | 6,98.0,58,33,190.0,34.0,0.43,43.0,0 671 | 9,154.0,78,30,100.0,30.9,0.16399999999999998,45.0,0 672 | 6,165.0,68,26,168.0,33.6,0.631,49.0,0 673 | 1,99.0,58,10,,25.4,0.551,21.0,0 674 | 10,68.0,106,23,49.0,35.5,0.285,47.0,0 675 | 3,123.0,100,35,240.0,57.3,0.88,22.0,0 676 | 8,91.0,82,0,,35.6,0.5870000000000001,68.0,0 677 | 6,195.0,70,0,,30.9,0.32799999999999996,31.0,1 678 | 9,156.0,86,0,,24.8,0.23,53.0,1 679 | 0,93.0,60,0,,35.3,0.263,25.0,0 680 | 3,121.0,52,0,,36.0,0.127,25.0,1 681 | 2,101.0,58,17,265.0,24.2,0.614,23.0,0 682 | 2,56.0,56,28,45.0,24.2,0.332,22.0,0 683 | 0,162.0,76,36,,49.6,0.364,26.0,1 684 | 0,95.0,64,39,105.0,44.6,0.366,22.0,0 685 | 4,125.0,80,0,,32.3,0.536,27.0,1 686 | 5,136.0,82,0,,,0.64,69.0,0 687 | 2,129.0,74,26,205.0,33.2,0.591,25.0,0 688 | 3,130.0,64,0,,23.1,0.314,22.0,0 689 | 1,107.0,50,19,,28.3,0.18100000000000002,29.0,0 690 | 1,140.0,74,26,180.0,24.1,0.828,23.0,0 691 | 1,144.0,82,46,180.0,46.1,0.335,46.0,1 692 | 8,107.0,80,0,,24.6,0.856,34.0,0 693 | 13,158.0,114,0,,42.3,0.257,44.0,1 694 | 2,121.0,70,32,95.0,39.1,0.8859999999999999,23.0,0 695 | 7,129.0,68,49,125.0,38.5,0.439,43.0,1 696 | 2,90.0,60,0,,23.5,0.191,25.0,0 697 | 7,142.0,90,24,480.0,30.4,0.128,43.0,1 698 | 3,169.0,74,19,125.0,29.9,0.268,31.0,1 699 | 0,99.0,0,0,,25.0,0.253,22.0,0 700 | 4,127.0,88,11,155.0,34.5,0.598,28.0,0 701 | 4,118.0,70,0,,44.5,0.904,26.0,0 702 | 2,122.0,76,27,200.0,35.9,0.483,26.0,0 703 | 6,125.0,78,31,,27.6,0.565,49.0,1 704 | 1,168.0,88,29,,35.0,0.905,52.0,1 705 | 2,129.0,0,0,,38.5,0.304,41.0,0 706 | 4,110.0,76,20,100.0,28.4,0.11800000000000001,27.0,0 707 | 6,80.0,80,36,,39.8,0.177,28.0,0 708 | 10,115.0,0,0,,,0.261,30.0,1 709 | 2,127.0,46,21,335.0,34.4,0.17600000000000002,22.0,0 710 | 9,164.0,78,0,,32.8,0.14800000000000002,45.0,1 711 | 2,93.0,64,32,160.0,38.0,0.674,23.0,1 712 | 3,158.0,64,13,387.0,31.2,0.295,24.0,0 713 | 5,126.0,78,27,22.0,29.6,0.439,40.0,0 714 | 10,129.0,62,36,,41.2,0.441,38.0,1 715 | 0,134.0,58,20,291.0,26.4,0.35200000000000004,21.0,0 716 | 3,102.0,74,0,,29.5,0.121,32.0,0 717 | 7,187.0,50,33,392.0,33.9,0.826,34.0,1 718 | 3,173.0,78,39,185.0,33.8,0.97,31.0,1 719 | 10,94.0,72,18,,23.1,0.595,56.0,0 720 | 1,108.0,60,46,178.0,35.5,0.415,24.0,0 721 | 5,97.0,76,27,,35.6,0.37799999999999995,52.0,1 722 | 4,83.0,86,19,,29.3,0.317,34.0,0 723 | 1,114.0,66,36,200.0,38.1,0.289,21.0,0 724 | 1,149.0,68,29,127.0,29.3,0.349,42.0,1 725 | 5,117.0,86,30,105.0,39.1,0.251,42.0,0 726 | 1,111.0,94,0,,32.8,0.265,45.0,0 727 | 4,112.0,78,40,,39.4,0.23600000000000002,38.0,0 728 | 1,116.0,78,29,180.0,36.1,0.496,25.0,0 729 | 0,141.0,84,26,,32.4,0.433,22.0,0 730 | 2,175.0,88,0,,22.9,0.326,22.0,0 731 | 2,92.0,52,0,,30.1,0.141,22.0,0 732 | 3,130.0,78,23,79.0,28.4,0.32299999999999995,34.0,1 733 | 8,120.0,86,0,,28.4,0.259,22.0,1 734 | 2,174.0,88,37,120.0,44.5,0.6459999999999999,24.0,1 735 | 2,106.0,56,27,165.0,29.0,0.426,22.0,0 736 | 2,105.0,75,0,,23.3,0.56,53.0,0 737 | 4,95.0,60,32,,35.4,0.284,28.0,0 738 | 0,126.0,86,27,120.0,27.4,0.515,21.0,0 739 | 8,65.0,72,23,,32.0,0.6,42.0,0 740 | 2,99.0,60,17,160.0,36.6,0.45299999999999996,21.0,0 741 | 1,102.0,74,0,,39.5,0.293,42.0,1 742 | 11,120.0,80,37,150.0,42.3,0.785,48.0,1 743 | 3,102.0,44,20,94.0,30.8,0.4,26.0,0 744 | 1,109.0,58,18,116.0,28.5,0.21899999999999997,22.0,0 745 | 9,140.0,94,0,,32.7,0.7340000000000001,45.0,1 746 | 13,153.0,88,37,140.0,40.6,1.1740000000000002,39.0,0 747 | 12,100.0,84,33,105.0,30.0,0.488,46.0,0 748 | 1,147.0,94,41,,49.3,0.358,27.0,1 749 | 1,81.0,74,41,57.0,46.3,1.0959999999999999,32.0,0 750 | 3,187.0,70,22,200.0,36.4,0.408,36.0,1 751 | 6,162.0,62,0,,24.3,0.17800000000000002,50.0,1 752 | 4,136.0,70,0,,31.2,1.182,22.0,1 753 | 1,121.0,78,39,74.0,39.0,0.261,28.0,0 754 | 3,108.0,62,24,,26.0,0.223,25.0,0 755 | 0,181.0,88,44,510.0,43.3,0.222,26.0,1 756 | 8,154.0,78,32,,32.4,0.44299999999999995,45.0,1 757 | 1,128.0,88,39,110.0,36.5,1.057,37.0,1 758 | 7,137.0,90,41,,32.0,0.391,39.0,0 759 | 0,123.0,72,0,,36.3,0.258,52.0,1 760 | 1,106.0,76,0,,37.5,0.19699999999999998,26.0,0 761 | 6,190.0,92,0,,35.5,0.278,66.0,1 762 | 2,88.0,58,26,16.0,28.4,0.7659999999999999,22.0,0 763 | 9,170.0,74,31,,44.0,0.40299999999999997,43.0,1 764 | 9,89.0,62,0,,22.5,0.142,33.0,0 765 | 10,101.0,76,48,180.0,32.9,0.171,63.0,0 766 | 2,122.0,70,27,,36.8,0.34,27.0,0 767 | 5,121.0,72,23,112.0,26.2,0.245,30.0,0 768 | 1,126.0,60,0,,30.1,0.349,47.0,1 769 | 1,93.0,70,31,,30.4,0.315,23.0,0 770 | -------------------------------------------------------------------------------- /Data/daily-total-female-births.csv: -------------------------------------------------------------------------------- 1 | "Date","Births" 2 | "1959-01-01",35 3 | "1959-01-02",32 4 | "1959-01-03",30 5 | "1959-01-04",31 6 | "1959-01-05",44 7 | "1959-01-06",29 8 | "1959-01-07",45 9 | "1959-01-08",43 10 | "1959-01-09",38 11 | "1959-01-10",27 12 | "1959-01-11",38 13 | "1959-01-12",33 14 | "1959-01-13",55 15 | "1959-01-14",47 16 | "1959-01-15",45 17 | "1959-01-16",37 18 | "1959-01-17",50 19 | "1959-01-18",43 20 | "1959-01-19",41 21 | "1959-01-20",52 22 | "1959-01-21",34 23 | "1959-01-22",53 24 | "1959-01-23",39 25 | "1959-01-24",32 26 | "1959-01-25",37 27 | "1959-01-26",43 28 | "1959-01-27",39 29 | "1959-01-28",35 30 | "1959-01-29",44 31 | "1959-01-30",38 32 | "1959-01-31",24 33 | "1959-02-01",23 34 | "1959-02-02",31 35 | "1959-02-03",44 36 | "1959-02-04",38 37 | "1959-02-05",50 38 | "1959-02-06",38 39 | "1959-02-07",51 40 | "1959-02-08",31 41 | "1959-02-09",31 42 | "1959-02-10",51 43 | "1959-02-11",36 44 | "1959-02-12",45 45 | "1959-02-13",51 46 | "1959-02-14",34 47 | "1959-02-15",52 48 | "1959-02-16",47 49 | "1959-02-17",45 50 | "1959-02-18",46 51 | "1959-02-19",39 52 | "1959-02-20",48 53 | "1959-02-21",37 54 | "1959-02-22",35 55 | "1959-02-23",52 56 | "1959-02-24",42 57 | "1959-02-25",45 58 | "1959-02-26",39 59 | "1959-02-27",37 60 | "1959-02-28",30 61 | "1959-03-01",35 62 | "1959-03-02",28 63 | "1959-03-03",45 64 | "1959-03-04",34 65 | "1959-03-05",36 66 | "1959-03-06",50 67 | "1959-03-07",44 68 | "1959-03-08",39 69 | "1959-03-09",32 70 | "1959-03-10",39 71 | "1959-03-11",45 72 | "1959-03-12",43 73 | "1959-03-13",39 74 | "1959-03-14",31 75 | "1959-03-15",27 76 | "1959-03-16",30 77 | "1959-03-17",42 78 | "1959-03-18",46 79 | "1959-03-19",41 80 | "1959-03-20",36 81 | "1959-03-21",45 82 | "1959-03-22",46 83 | "1959-03-23",43 84 | "1959-03-24",38 85 | "1959-03-25",34 86 | "1959-03-26",35 87 | "1959-03-27",56 88 | "1959-03-28",36 89 | "1959-03-29",32 90 | "1959-03-30",50 91 | "1959-03-31",41 92 | "1959-04-01",39 93 | "1959-04-02",41 94 | "1959-04-03",47 95 | "1959-04-04",34 96 | "1959-04-05",36 97 | "1959-04-06",33 98 | "1959-04-07",35 99 | "1959-04-08",38 100 | "1959-04-09",38 101 | "1959-04-10",34 102 | "1959-04-11",53 103 | "1959-04-12",34 104 | "1959-04-13",34 105 | "1959-04-14",38 106 | "1959-04-15",35 107 | "1959-04-16",32 108 | "1959-04-17",42 109 | "1959-04-18",34 110 | "1959-04-19",46 111 | "1959-04-20",30 112 | "1959-04-21",46 113 | "1959-04-22",45 114 | "1959-04-23",54 115 | "1959-04-24",34 116 | "1959-04-25",37 117 | "1959-04-26",35 118 | "1959-04-27",40 119 | "1959-04-28",42 120 | "1959-04-29",58 121 | "1959-04-30",51 122 | "1959-05-01",32 123 | "1959-05-02",35 124 | "1959-05-03",38 125 | "1959-05-04",33 126 | "1959-05-05",39 127 | "1959-05-06",47 128 | "1959-05-07",38 129 | "1959-05-08",52 130 | "1959-05-09",30 131 | "1959-05-10",34 132 | "1959-05-11",40 133 | "1959-05-12",35 134 | "1959-05-13",42 135 | "1959-05-14",41 136 | "1959-05-15",42 137 | "1959-05-16",38 138 | "1959-05-17",24 139 | "1959-05-18",34 140 | "1959-05-19",43 141 | "1959-05-20",36 142 | "1959-05-21",55 143 | "1959-05-22",41 144 | "1959-05-23",45 145 | "1959-05-24",41 146 | "1959-05-25",37 147 | "1959-05-26",43 148 | "1959-05-27",39 149 | "1959-05-28",33 150 | "1959-05-29",43 151 | "1959-05-30",40 152 | "1959-05-31",38 153 | "1959-06-01",45 154 | "1959-06-02",46 155 | "1959-06-03",34 156 | "1959-06-04",35 157 | "1959-06-05",48 158 | "1959-06-06",51 159 | "1959-06-07",36 160 | "1959-06-08",33 161 | "1959-06-09",46 162 | "1959-06-10",42 163 | "1959-06-11",48 164 | "1959-06-12",34 165 | "1959-06-13",41 166 | "1959-06-14",35 167 | "1959-06-15",40 168 | "1959-06-16",34 169 | "1959-06-17",30 170 | "1959-06-18",36 171 | "1959-06-19",40 172 | "1959-06-20",39 173 | "1959-06-21",45 174 | "1959-06-22",38 175 | "1959-06-23",47 176 | "1959-06-24",33 177 | "1959-06-25",30 178 | "1959-06-26",42 179 | "1959-06-27",43 180 | "1959-06-28",41 181 | "1959-06-29",41 182 | "1959-06-30",59 183 | "1959-07-01",43 184 | "1959-07-02",45 185 | "1959-07-03",38 186 | "1959-07-04",37 187 | "1959-07-05",45 188 | "1959-07-06",42 189 | "1959-07-07",57 190 | "1959-07-08",46 191 | "1959-07-09",51 192 | "1959-07-10",41 193 | "1959-07-11",47 194 | "1959-07-12",26 195 | "1959-07-13",35 196 | "1959-07-14",44 197 | "1959-07-15",41 198 | "1959-07-16",42 199 | "1959-07-17",36 200 | "1959-07-18",45 201 | "1959-07-19",45 202 | "1959-07-20",45 203 | "1959-07-21",47 204 | "1959-07-22",38 205 | "1959-07-23",42 206 | "1959-07-24",35 207 | "1959-07-25",36 208 | "1959-07-26",39 209 | "1959-07-27",45 210 | "1959-07-28",43 211 | "1959-07-29",47 212 | "1959-07-30",36 213 | "1959-07-31",41 214 | "1959-08-01",50 215 | "1959-08-02",39 216 | "1959-08-03",41 217 | "1959-08-04",46 218 | "1959-08-05",64 219 | "1959-08-06",45 220 | "1959-08-07",34 221 | "1959-08-08",38 222 | "1959-08-09",44 223 | "1959-08-10",48 224 | "1959-08-11",46 225 | "1959-08-12",44 226 | "1959-08-13",37 227 | "1959-08-14",39 228 | "1959-08-15",44 229 | "1959-08-16",45 230 | "1959-08-17",33 231 | "1959-08-18",44 232 | "1959-08-19",38 233 | "1959-08-20",46 234 | "1959-08-21",46 235 | "1959-08-22",40 236 | "1959-08-23",39 237 | "1959-08-24",44 238 | "1959-08-25",48 239 | "1959-08-26",50 240 | "1959-08-27",41 241 | "1959-08-28",42 242 | "1959-08-29",51 243 | "1959-08-30",41 244 | "1959-08-31",44 245 | "1959-09-01",38 246 | "1959-09-02",68 247 | "1959-09-03",40 248 | "1959-09-04",42 249 | "1959-09-05",51 250 | "1959-09-06",44 251 | "1959-09-07",45 252 | "1959-09-08",36 253 | "1959-09-09",57 254 | "1959-09-10",44 255 | "1959-09-11",42 256 | "1959-09-12",53 257 | "1959-09-13",42 258 | "1959-09-14",34 259 | "1959-09-15",40 260 | "1959-09-16",56 261 | "1959-09-17",44 262 | "1959-09-18",53 263 | "1959-09-19",55 264 | "1959-09-20",39 265 | "1959-09-21",59 266 | "1959-09-22",55 267 | "1959-09-23",73 268 | "1959-09-24",55 269 | "1959-09-25",44 270 | "1959-09-26",43 271 | "1959-09-27",40 272 | "1959-09-28",47 273 | "1959-09-29",51 274 | "1959-09-30",56 275 | "1959-10-01",49 276 | "1959-10-02",54 277 | "1959-10-03",56 278 | "1959-10-04",47 279 | "1959-10-05",44 280 | "1959-10-06",43 281 | "1959-10-07",42 282 | "1959-10-08",45 283 | "1959-10-09",50 284 | "1959-10-10",48 285 | "1959-10-11",43 286 | "1959-10-12",40 287 | "1959-10-13",59 288 | "1959-10-14",41 289 | "1959-10-15",42 290 | "1959-10-16",51 291 | "1959-10-17",49 292 | "1959-10-18",45 293 | "1959-10-19",43 294 | "1959-10-20",42 295 | "1959-10-21",38 296 | "1959-10-22",47 297 | "1959-10-23",38 298 | "1959-10-24",36 299 | "1959-10-25",42 300 | "1959-10-26",35 301 | "1959-10-27",28 302 | "1959-10-28",44 303 | "1959-10-29",36 304 | "1959-10-30",45 305 | "1959-10-31",46 306 | "1959-11-01",48 307 | "1959-11-02",49 308 | "1959-11-03",43 309 | "1959-11-04",42 310 | "1959-11-05",59 311 | "1959-11-06",45 312 | "1959-11-07",52 313 | "1959-11-08",46 314 | "1959-11-09",42 315 | "1959-11-10",40 316 | "1959-11-11",40 317 | "1959-11-12",45 318 | "1959-11-13",35 319 | "1959-11-14",35 320 | "1959-11-15",40 321 | "1959-11-16",39 322 | "1959-11-17",33 323 | "1959-11-18",42 324 | "1959-11-19",47 325 | "1959-11-20",51 326 | "1959-11-21",44 327 | "1959-11-22",40 328 | "1959-11-23",57 329 | "1959-11-24",49 330 | "1959-11-25",45 331 | "1959-11-26",49 332 | "1959-11-27",51 333 | "1959-11-28",46 334 | "1959-11-29",44 335 | "1959-11-30",52 336 | "1959-12-01",45 337 | "1959-12-02",32 338 | "1959-12-03",46 339 | "1959-12-04",41 340 | "1959-12-05",34 341 | "1959-12-06",33 342 | "1959-12-07",36 343 | "1959-12-08",49 344 | "1959-12-09",43 345 | "1959-12-10",43 346 | "1959-12-11",34 347 | "1959-12-12",39 348 | "1959-12-13",35 349 | "1959-12-14",52 350 | "1959-12-15",47 351 | "1959-12-16",52 352 | "1959-12-17",39 353 | "1959-12-18",40 354 | "1959-12-19",42 355 | "1959-12-20",42 356 | "1959-12-21",53 357 | "1959-12-22",39 358 | "1959-12-23",40 359 | "1959-12-24",38 360 | "1959-12-25",44 361 | "1959-12-26",34 362 | "1959-12-27",37 363 | "1959-12-28",52 364 | "1959-12-29",48 365 | "1959-12-30",55 366 | "1959-12-31",50 -------------------------------------------------------------------------------- /Data/iris_all.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,class 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3.0,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5.0,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5.0,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3.0,1.4,0.1,Iris-setosa 15 | 4.3,3.0,1.1,0.1,Iris-setosa 16 | 5.8,4.0,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1.0,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5.0,3.0,1.6,0.2,Iris-setosa 28 | 5.0,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5.0,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3.0,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5.0,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5.0,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3.0,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5.0,3.3,1.4,0.2,Iris-setosa 52 | 7.0,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4.0,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1.0,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5.0,2.0,3.5,1.0,Iris-versicolor 63 | 5.9,3.0,4.2,1.5,Iris-versicolor 64 | 6.0,2.2,4.0,1.0,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3.0,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1.0,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4.0,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3.0,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3.0,5.0,1.7,Iris-versicolor 80 | 6.0,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1.0,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1.0,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6.0,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3.0,4.5,1.5,Iris-versicolor 87 | 6.0,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3.0,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4.0,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3.0,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4.0,1.2,Iris-versicolor 95 | 5.0,2.3,3.3,1.0,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3.0,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3.0,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6.0,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3.0,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3.0,5.8,2.2,Iris-virginica 107 | 7.6,3.0,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2.0,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3.0,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5.0,2.0,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3.0,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6.0,2.2,5.0,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2.0,Iris-virginica 124 | 7.7,2.8,6.7,2.0,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6.0,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3.0,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3.0,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2.0,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3.0,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6.0,3.0,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3.0,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5.0,1.9,Iris-virginica 149 | 6.5,3.0,5.2,2.0,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3.0,5.1,1.8,Iris-virginica -------------------------------------------------------------------------------- /Notebooks/01-XGBoost_BikeRental_Data_Preparation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "\n", 13 | "from pandas.plotting import register_matplotlib_converters\n", 14 | "register_matplotlib_converters()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "
You are provided hourly rental data spanning two years. For this competition, the training set is comprised of the first 19 days of each month, while the test set is the 20th to the end of the month. You must predict the total count of bikes rented during each hour covered by the test set, using only information available prior to the rental period (Ref: Kaggle.com)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# Example\n", 49 | "# Converts to log1p(count)\n", 50 | "# Print original count back using expm1\n", 51 | "print('Test log and exp')\n", 52 | "test_count = 100\n", 53 | "print('original value', test_count)\n", 54 | "x = np.log1p(test_count) # log (x+1)\n", 55 | "print('log1p', x)\n", 56 | "print('expm1', np.expm1(x)) # exp(x) - 1" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "columns = ['count', 'season', 'holiday', 'workingday', 'weather', 'temp',\n", 66 | " 'atemp', 'humidity', 'windspeed', 'year', 'month', 'day', 'dayofweek','hour']" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "df = pd.read_csv('../Data/bikesharing_train.csv', parse_dates=['datetime'],index_col=0)\n", 76 | "df_test = pd.read_csv('../Data/bikesharing_test.csv', parse_dates=['datetime'],index_col=0)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# We need to convert datetime to numeric for training.\n", 86 | "# Let's extract key features into separate numeric columns\n", 87 | "def add_features(df):\n", 88 | " df['year'] = df.index.year\n", 89 | " df['month'] = df.index.month\n", 90 | " df['day'] = df.index.day\n", 91 | " df['dayofweek'] = df.index.dayofweek\n", 92 | " df['hour'] = df.index.hour" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "add_features(df)\n", 102 | "add_features(df_test)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "plt.plot(df['2011']['count'],label='2011')\n", 112 | "plt.plot(df['2012']['count'],label='2012')\n", 113 | "plt.xticks(fontsize=14, rotation=45)\n", 114 | "plt.xlabel('Date')\n", 115 | "plt.ylabel('Rental Count')\n", 116 | "plt.title('2011 and 2012 Rentals (Year to Year)')\n", 117 | "plt.legend()\n", 118 | "plt.show()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "plt.plot(df['2011']['count'].map(np.log1p),label='2011')\n", 128 | "plt.plot(df['2012']['count'].map(np.log1p),label='2012')\n", 129 | "plt.xticks(fontsize=14, rotation=45)\n", 130 | "plt.xlabel('Date')\n", 131 | "plt.ylabel('Log(Rental Count)')\n", 132 | "plt.title('2011 and 2012 Rentals (Year to Year)')\n", 133 | "plt.legend()\n", 134 | "plt.show()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "plt.boxplot([df['count']], labels=['count'])\n", 144 | "plt.title('Box Plot - Count')\n", 145 | "plt.ylabel('Target')\n", 146 | "plt.grid(True)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": {}, 153 | "outputs": [], 154 | "source": [ 155 | "# Let's see how the data distribution changes with log1p\n", 156 | "# Evenly distributed\n", 157 | "plt.boxplot([df['count'].map(np.log1p)], labels=['log1p(count)'])\n", 158 | "plt.title('Box Plot - log1p(Count)')\n", 159 | "plt.ylabel('Target')\n", 160 | "plt.grid(True)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "df[\"count\"] = df[\"count\"].map(np.log1p)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "df.head()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "df_test.head()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": {}, 194 | "outputs": [], 195 | "source": [ 196 | "df.dtypes" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "# Save all data\n", 206 | "df.to_csv('../Data/bike_all.csv',index=True,index_label='datetime',columns=columns)" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Training and Validation Set\n", 214 | "### Target Variable as first column followed by input features\n", 215 | "### Training, Validation files do not have a column header" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# Training = 70% of the data\n", 225 | "# Validation = 30% of the data\n", 226 | "# Randomize the datset\n", 227 | "np.random.seed(5)\n", 228 | "l = list(df.index)\n", 229 | "np.random.shuffle(l)\n", 230 | "df = df.loc[l]" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "rows = df.shape[0]\n", 240 | "train = int(.7 * rows)\n", 241 | "test = rows-train" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "rows, train, test" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "columns" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "# Write Training Set\n", 269 | "df.iloc[:train].to_csv('../Data/bike_train.csv'\n", 270 | " ,index=False,header=False\n", 271 | " ,columns=columns)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "# Write Validation Set\n", 281 | "df.iloc[train:].to_csv('../Data/bike_validation.csv'\n", 282 | " ,index=False,header=False\n", 283 | " ,columns=columns)" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "# Test Data has only input features\n", 293 | "df_test.to_csv('../Data/bike_test.csv',index=True,index_label='datetime')" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": null, 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "print(','.join(columns))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "# Write Column List\n", 312 | "with open('../Data/bike_train_column_list.txt','w') as f:\n", 313 | " f.write(','.join(columns))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": {}, 320 | "outputs": [], 321 | "source": [] 322 | } 323 | ], 324 | "metadata": { 325 | "kernelspec": { 326 | "display_name": "Python 3", 327 | "language": "python", 328 | "name": "python3" 329 | }, 330 | "language_info": { 331 | "codemirror_mode": { 332 | "name": "ipython", 333 | "version": 3 334 | }, 335 | "file_extension": ".py", 336 | "mimetype": "text/x-python", 337 | "name": "python", 338 | "nbconvert_exporter": "python", 339 | "pygments_lexer": "ipython3", 340 | "version": "3.7.6" 341 | } 342 | }, 343 | "nbformat": 4, 344 | "nbformat_minor": 1 345 | } 346 | -------------------------------------------------------------------------------- /Notebooks/02-XGBoost_Regression_BikeRental.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Train a model with bike rental data using XGBoost algorithm\n", 8 | "### Training log1p(count) dataset\n", 9 | "### Model is trained with XGBoost installed in notebook instance\n", 10 | "### In the later examples, we will train using SageMaker's XGBoost algorithm" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "# Install xgboost in notebook instance.\n", 20 | "#### Command to install xgboost\n", 21 | "# !pip install xgboost==0.90" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import sys\n", 31 | "import numpy as np\n", 32 | "import pandas as pd\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "from sklearn.metrics import mean_squared_error, mean_absolute_error\n", 35 | "\n", 36 | "# XGBoost \n", 37 | "import xgboost as xgb\n", 38 | "\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "\n", 41 | "from pandas.plotting import register_matplotlib_converters\n", 42 | "register_matplotlib_converters()" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "
You are provided hourly rental data spanning two years. For this competition, the training set is comprised of the first 19 days of each month, while the test set is the 20th to the end of the month. You must predict the total count of bikes rented during each hour covered by the test set, using only information available prior to the rental period (Ref: Kaggle.com)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "column_list_file = '../Data/bike_train_column_list.txt'\n", 77 | "train_file = '../Data/bike_train.csv'\n", 78 | "validation_file = '../Data/bike_validation.csv'\n", 79 | "test_file = '../Data/bike_test.csv'" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "columns = ''\n", 89 | "with open(column_list_file,'r') as f:\n", 90 | " columns = f.read().split(',')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "columns" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "# Specify the column names as the file does not have column header\n", 109 | "df_train = pd.read_csv(train_file,names=columns)\n", 110 | "df_validation = pd.read_csv(validation_file,names=columns)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "df_train.head()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "df_validation.head()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "X_train = df_train.iloc[:,1:] # Features: 1st column onwards \n", 138 | "y_train = df_train.iloc[:,0].ravel() # Target: 0th column\n", 139 | "\n", 140 | "X_validation = df_validation.iloc[:,1:]\n", 141 | "y_validation = df_validation.iloc[:,0].ravel()" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# XGBoost Training Parameter Reference: \n", 151 | "# https://github.com/dmlc/xgboost/blob/master/doc/parameter.md\n", 152 | "#regressor = xgb.XGBRegressor(max_depth=5,eta=0.1,subsample=0.7,num_round=150)\n", 153 | "regressor = xgb.XGBRegressor(max_depth=5,n_estimators=150)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "regressor" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "regressor.fit(X_train,y_train, eval_set = [(X_train, y_train), (X_validation, y_validation)])" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "df_train['count'].describe()" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "metadata": {}, 187 | "outputs": [], 188 | "source": [ 189 | "eval_result = regressor.evals_result()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "training_rounds = range(len(eval_result['validation_0']['rmse']))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "plt.scatter(x=training_rounds,y=eval_result['validation_0']['rmse'],label='Training Error')\n", 208 | "plt.scatter(x=training_rounds,y=eval_result['validation_1']['rmse'],label='Validation Error')\n", 209 | "plt.grid(True)\n", 210 | "plt.xlabel('Iteration')\n", 211 | "plt.ylabel('RMSE')\n", 212 | "plt.title('Training Vs Validation Error')\n", 213 | "plt.legend()\n", 214 | "plt.show()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "xgb.plot_importance(regressor)\n", 224 | "plt.show()" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "# Updated - Changed to validation dataset\n", 234 | "# Compare actual vs predicted performance with dataset not seen by the model before\n", 235 | "df = pd.read_csv(validation_file,names=columns)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "df.head()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "X_test = df.iloc[:,1:]\n", 254 | "print(X_test[:5])" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "result = regressor.predict(X_test)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "result[:5]" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "df.head()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "df['count_predicted'] = result" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": {}, 297 | "outputs": [], 298 | "source": [ 299 | "df.head()" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "# Negative Values are predicted\n", 309 | "df['count_predicted'].describe()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": null, 315 | "metadata": {}, 316 | "outputs": [], 317 | "source": [ 318 | "df[df['count_predicted'] < 0]" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "def adjust_count(x):\n", 328 | " if x < 0:\n", 329 | " return 0\n", 330 | " else:\n", 331 | " return x" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "df['count_predicted'] = df['count_predicted'].map(adjust_count)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "df[df['count_predicted'] < 0]" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "df['count'] = df['count'].map(np.expm1)\n", 359 | "df['count_predicted'] = df['count_predicted'].map(np.expm1)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "# Actual Vs Predicted\n", 369 | "plt.plot(df['count'], label='Actual')\n", 370 | "plt.plot(df['count_predicted'],label='Predicted')\n", 371 | "plt.xlabel('Sample')\n", 372 | "plt.ylabel('Count')\n", 373 | "plt.xlim([100,150])\n", 374 | "plt.title('Validation Dataset - Predicted Vs. Actual')\n", 375 | "plt.legend()\n", 376 | "plt.show()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "# Over prediction and Under Prediction needs to be balanced\n", 386 | "# Training Data Residuals\n", 387 | "residuals = (df['count'] - df['count_predicted'])\n", 388 | "\n", 389 | "plt.hist(residuals)\n", 390 | "plt.grid(True)\n", 391 | "plt.xlabel('Actual - Predicted')\n", 392 | "plt.ylabel('Count')\n", 393 | "plt.title('Residuals Distribution')\n", 394 | "plt.axvline(color='r')\n", 395 | "plt.show()" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": {}, 402 | "outputs": [], 403 | "source": [ 404 | "value_counts = (residuals > 0).value_counts(sort=False)\n", 405 | "print(' Under Estimation: {0:.2f}'.format(value_counts[True]/len(residuals)))\n", 406 | "print(' Over Estimation: {0:.2f}'.format(value_counts[False]/len(residuals)))" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "import sklearn.metrics as metrics\n", 416 | "print(\"RMSE: {0:.2f}\".format(metrics.mean_squared_error(df['count'],\n", 417 | " df['count_predicted'])**.5))" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": {}, 424 | "outputs": [], 425 | "source": [ 426 | "# Metric Use By Kaggle\n", 427 | "def compute_rmsle(y_true, y_pred):\n", 428 | " if type(y_true) != np.ndarray:\n", 429 | " y_true = np.array(y_true)\n", 430 | " \n", 431 | " if type(y_pred) != np.ndarray:\n", 432 | " y_pred = np.array(y_pred)\n", 433 | " \n", 434 | " return(np.average((np.log1p(y_pred) - np.log1p(y_true))**2)**.5)" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "print(\"RMSLE: {0:.2f}\".format(compute_rmsle(df['count'],df['count_predicted'])))" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "# Prepare Data for Submission to Kaggle\n", 453 | "df_test = pd.read_csv(test_file,parse_dates=['datetime'])" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "df_test.head()" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "X_test = df_test.iloc[:,1:] # Exclude datetime for prediction" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "X_test.head()" 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "execution_count": null, 486 | "metadata": {}, 487 | "outputs": [], 488 | "source": [ 489 | "result = regressor.predict(X_test)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "result[:5]" 499 | ] 500 | }, 501 | { 502 | "cell_type": "code", 503 | "execution_count": null, 504 | "metadata": {}, 505 | "outputs": [], 506 | "source": [ 507 | "np.expm1(result)" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": null, 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "# Convert result to actual count\n", 517 | "df_test[\"count\"] = np.expm1(result)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": null, 523 | "metadata": {}, 524 | "outputs": [], 525 | "source": [ 526 | "df_test.head()" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": null, 532 | "metadata": {}, 533 | "outputs": [], 534 | "source": [ 535 | "df_test[df_test[\"count\"] < 0]" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": {}, 542 | "outputs": [], 543 | "source": [ 544 | "df_test[['datetime','count']].to_csv('../Data/predicted_count.csv',index=False)" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": null, 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [] 553 | } 554 | ], 555 | "metadata": { 556 | "kernelspec": { 557 | "display_name": "Python 3", 558 | "language": "python", 559 | "name": "python3" 560 | }, 561 | "language_info": { 562 | "codemirror_mode": { 563 | "name": "ipython", 564 | "version": 3 565 | }, 566 | "file_extension": ".py", 567 | "mimetype": "text/x-python", 568 | "name": "python", 569 | "nbconvert_exporter": "python", 570 | "pygments_lexer": "ipython3", 571 | "version": "3.7.6" 572 | } 573 | }, 574 | "nbformat": 4, 575 | "nbformat_minor": 2 576 | } 577 | -------------------------------------------------------------------------------- /Notebooks/03-XGBoost_Binary_Classification_Diabetes_Dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "nbpresent": { 7 | "id": "782a07bf-08de-4030-88e1-6731c4ac956e" 8 | } 9 | }, 10 | "source": [ 11 | "## Diabetes dataset \n", 12 | "### Predict if a person is at risk of developing diabetes\n", 13 | "\n", 14 | "### This Dataset is Freely Available\n", 15 | "\n", 16 | "### Overview:\n", 17 | "The data was collected and made available by the \"National Institute of Diabetes and Digestive and Kidney Diseases\" as part of the Pima Indians Diabetes Database. \n", 18 | "\n", 19 | "`Diabetes.csv` is available [from Kaggle](https://www.kaggle.com/uciml/pima-indians-diabetes-database). We have several questions - what information is more correlated with a positive diagnosis, and if we can only ask two questions to a patient, what should we ask and how would we give them a risk of being diagnosed.\n", 20 | "\n", 21 | "++++++++++++++++++++++++++++++++++++\n", 22 | "\n", 23 | "The following features have been provided to help us predict whether a person is diabetic or not:\n", 24 | "* **Pregnancies:** Number of times pregnant\n", 25 | "* **Glucose:** Plasma glucose concentration over 2 hours in an oral glucose tolerance test\n", 26 | "* **BloodPressure:** Diastolic blood pressure (mm Hg)\n", 27 | "* **SkinThickness:** Triceps skin fold thickness (mm)\n", 28 | "* **Insulin:** 2-Hour serum insulin (mu U/ml)\n", 29 | "* **BMI:** Body mass index (weight in kg/(height in m)2)\n", 30 | "* **DiabetesPedigreeFunction:** Diabetes pedigree function (a function which scores likelihood of diabetes based on family history)\n", 31 | "* **Age:** Age (years)\n", 32 | "* **Outcome:** Class variable (0 if non-diabetic, 1 if diabetic)\n", 33 | "\n", 34 | "### Binary Classification problem - XGBoost" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "nbpresent": { 42 | "id": "6c6a8672-d428-410a-82fa-7f587c9ef2ae" 43 | } 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "# Install xgboost in notebook instance.\n", 48 | "#### Command to install xgboost\n", 49 | "#!pip install xgboost==0.90" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "nbpresent": { 57 | "id": "652b58d4-3b75-405f-9f11-24d0cd1f9656" 58 | } 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "import sys\n", 63 | "import numpy as np\n", 64 | "import pandas as pd\n", 65 | "import matplotlib.pyplot as plt\n", 66 | "import itertools\n", 67 | "\n", 68 | "import xgboost as xgb\n", 69 | "from sklearn.metrics import classification_report, confusion_matrix" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "nbpresent": { 77 | "id": "a3946273-d086-4564-b0f1-6adc225191c3" 78 | } 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "data = pd.read_csv(\"../Data/Diabetes.csv\")" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "data.describe()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "data.info()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "## only keep rows where non of the columns has 0 value (except the first and last columns)\n", 110 | "data = data[~(data[data.columns[1:-1]] == 0).any(axis=1)]\n", 111 | "data.reset_index(inplace=True, drop = True)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Dealing with Missing Values" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "# using isnull() function \n", 128 | "# print(data.isnull().any().sum())\n", 129 | "print(data.isnull().sum())\n", 130 | "#data.isnull()" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "data.drop(columns=['Insulin'], inplace = True)\n", 140 | "data.reset_index(inplace=True, drop = True)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "### Replace missing values in each column with the mean or median of that column\n", 150 | "#data.fillna(data.mean())\n", 151 | "data.fillna(data.median(), inplace=True)\n", 152 | "\n", 153 | "### Drop all rows that contain missing values?\n", 154 | "#data = data.dropna()\n", 155 | "#data.reset_index(inplace=True, drop = True)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Split Data" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# Training = 70% of the data\n", 172 | "# Validation = 30% of the data\n", 173 | "# Randomize the datset\n", 174 | "np.random.seed(5)\n", 175 | "l = list(data.index)\n", 176 | "np.random.shuffle(l)\n", 177 | "data = data.iloc[l]\n", 178 | "data.reset_index(inplace=True, drop = True)\n", 179 | "data" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "rows = data.shape[0]\n", 189 | "train = int(.7 * rows)\n", 190 | "test = rows - train" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "rows, train, test" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "# Training Set\n", 209 | "df_train = data[:train]\n", 210 | "#df_train" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "# validation Set\n", 220 | "df_validation = data[train:]\n", 221 | "#df_validation" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": { 228 | "nbpresent": { 229 | "id": "a195ae30-1962-4427-859b-73a013dc10d6" 230 | } 231 | }, 232 | "outputs": [], 233 | "source": [ 234 | "df_train.head()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "377 * 8" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": { 250 | "nbpresent": { 251 | "id": "e30e8aeb-1ca2-4851-bc2d-1bdee29ab1cf" 252 | } 253 | }, 254 | "outputs": [], 255 | "source": [ 256 | "df_validation.head()" 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": { 263 | "nbpresent": { 264 | "id": "3b240613-803d-4fa9-93cf-53ef68df7b93" 265 | } 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "X_train = df_train.iloc[:,:-1] # Features: all columns excep last\n", 270 | "y_train = df_train.iloc[:,-1].ravel() # Target: last column\n", 271 | "\n", 272 | "X_validation = df_validation.iloc[:,:-1]\n", 273 | "y_validation = df_validation.iloc[:,-1].ravel()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "y_validation.shape" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": { 289 | "nbpresent": { 290 | "id": "9edc89e7-45d3-4350-9eb4-3e0938c3c55e" 291 | } 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "# Launch a classifier\n", 296 | "# XGBoost Training Parameter Reference: \n", 297 | "# https://xgboost.readthedocs.io/en/latest/parameter.html\n", 298 | "classifier = xgb.XGBClassifier (objective=\"binary:logistic\")" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "metadata": { 305 | "nbpresent": { 306 | "id": "348296fb-8c9b-4598-ad2e-d1fe8e10f76a" 307 | } 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "classifier" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "nbpresent": { 319 | "id": "9839d7ce-e791-4d93-bc5f-28604ffde022" 320 | } 321 | }, 322 | "outputs": [], 323 | "source": [ 324 | "classifier.fit(X_train,\n", 325 | " y_train, \n", 326 | " eval_set = [(X_train, y_train), (X_validation, y_validation)], \n", 327 | " eval_metric=['logloss'],\n", 328 | " early_stopping_rounds=20)" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": { 335 | "nbpresent": { 336 | "id": "e08f22c1-4346-4e2d-96a2-9974ed5c59ff" 337 | } 338 | }, 339 | "outputs": [], 340 | "source": [ 341 | "eval_result = classifier.evals_result()" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "nbpresent": { 349 | "id": "092776c3-a611-4f40-91e2-664b3b99d05e" 350 | } 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "training_rounds = range(len(eval_result['validation_0']['logloss']))" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": { 361 | "nbpresent": { 362 | "id": "2e9af3f7-fb85-4c52-83d5-ff9cae457294" 363 | } 364 | }, 365 | "outputs": [], 366 | "source": [ 367 | "print(training_rounds)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "nbpresent": { 375 | "id": "5e71239a-e321-43ba-ac2c-993b57b3be3a" 376 | } 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "plt.scatter(x=training_rounds,y=eval_result['validation_0']['logloss'],label='Training Error')\n", 381 | "plt.scatter(x=training_rounds,y=eval_result['validation_1']['logloss'],label='Validation Error')\n", 382 | "plt.grid(True)\n", 383 | "plt.xlabel('Iteration')\n", 384 | "plt.ylabel('LogLoss')\n", 385 | "plt.title('Training Vs Validation Error')\n", 386 | "plt.legend()\n", 387 | "plt.show()" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "#### Notice:\n", 395 | "* Model is not generalising well, low train error but high validation error\n", 396 | "* Model has high variance!" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "nbpresent": { 404 | "id": "f144f315-6d38-429e-8c17-06c17a446198" 405 | } 406 | }, 407 | "outputs": [], 408 | "source": [ 409 | "xgb.plot_importance(classifier)\n", 410 | "plt.show()" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": { 416 | "nbpresent": { 417 | "id": "3312675d-307c-4eff-b835-34f0e7f57924" 418 | } 419 | }, 420 | "source": [ 421 | "#### Predict the Validation Set" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": null, 427 | "metadata": { 428 | "nbpresent": { 429 | "id": "9b5cb70d-6069-4511-810e-fd17e72667dd" 430 | } 431 | }, 432 | "outputs": [], 433 | "source": [ 434 | "X_test = df_validation.iloc[:,:-1]" 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": null, 440 | "metadata": { 441 | "nbpresent": { 442 | "id": "f611c852-50e3-4a1a-9134-c1c6e82ad780" 443 | } 444 | }, 445 | "outputs": [], 446 | "source": [ 447 | "result = classifier.predict(X_test)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "result[:5]" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "nbpresent": { 464 | "id": "2c573c2b-4143-4e01-b107-e6b871ce0249" 465 | } 466 | }, 467 | "outputs": [], 468 | "source": [ 469 | "df_validation['predicted_class'] = result" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": { 476 | "nbpresent": { 477 | "id": "5ad0fa04-6896-46b5-bc23-40d61480d7ca" 478 | } 479 | }, 480 | "outputs": [], 481 | "source": [ 482 | "df_validation.head()" 483 | ] 484 | }, 485 | { 486 | "cell_type": "markdown", 487 | "metadata": {}, 488 | "source": [ 489 | "## Binary Classifier Metrics" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "# Reference: https://scikit-learn.org/stable/modules/model_evaluation.html\n", 499 | "# Explicitly stating labels. Pass=1, Fail=0\n", 500 | "def true_positive(y_true, y_pred): \n", 501 | " return confusion_matrix(y_true, y_pred,labels=[1,0])[0, 0]\n", 502 | "\n", 503 | "def true_negative(y_true, y_pred): \n", 504 | " return confusion_matrix(y_true,y_pred,labels=[1,0])[1, 1]\n", 505 | "\n", 506 | "def false_positive(y_true, y_pred): \n", 507 | " return confusion_matrix(y_true, y_pred,labels=[1,0])[1, 0]\n", 508 | "\n", 509 | "def false_negative(y_true, y_pred): \n", 510 | " return confusion_matrix(y_true, y_pred,labels=[1,0])[0, 1]" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": null, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "# Compute Binary Classifier Metrics\n", 520 | "# Returns a dictionary {\"MetricName\":Value,...}\n", 521 | "\n", 522 | "def binary_classifier_metrics(y_true, y_pred):\n", 523 | " metrics = {}\n", 524 | "\n", 525 | " # References: \n", 526 | " # https://docs.aws.amazon.com/machine-learning/latest/dg/binary-classification.html\n", 527 | " # https://en.wikipedia.org/wiki/Confusion_matrix\n", 528 | " \n", 529 | " # Definition:\n", 530 | " # true positive = tp = how many samples were correctly classified as positive (count)\n", 531 | " # true negative = tn = how many samples were correctly classified as negative (count)\n", 532 | " # false positive = fp = how many negative samples were mis-classified as positive (count)\n", 533 | " # false_negative = fn = how many positive samples were mis-classified as negative (count)\n", 534 | " \n", 535 | " # positive = number of positive samples (count)\n", 536 | " # = true positive + false negative\n", 537 | " # negative = number of negative samples (count)\n", 538 | " # = true negative + false positive\n", 539 | " \n", 540 | " tp = true_positive(y_true, y_pred)\n", 541 | " tn = true_negative(y_true, y_pred)\n", 542 | " fp = false_positive(y_true, y_pred)\n", 543 | " fn = false_negative(y_true, y_pred)\n", 544 | " \n", 545 | " positive = tp + fn\n", 546 | " negative = tn + fp\n", 547 | " \n", 548 | " metrics['TruePositive'] = tp\n", 549 | " metrics['TrueNegative'] = tn\n", 550 | " metrics['FalsePositive'] = fp\n", 551 | " metrics['FalseNegative'] = fn\n", 552 | " \n", 553 | " metrics['Positive'] = positive\n", 554 | " metrics['Negative'] = negative\n", 555 | " \n", 556 | " # True Positive Rate (TPR, Recall) = true positive/positive\n", 557 | " # How many positives were correctly classified? (fraction)\n", 558 | " # Recall value closer to 1 is better. closer to 0 is worse\n", 559 | " if tp == 0:\n", 560 | " recall = 0\n", 561 | " else:\n", 562 | " recall = tp/positive\n", 563 | " \n", 564 | " metrics['Recall'] = recall\n", 565 | " \n", 566 | " # True Negative Rate = True Negative/negative\n", 567 | " # How many negatives were correctly classified? (fraction)\n", 568 | " # True Negative Rate value closer to 1 is better. closer to 0 is worse\n", 569 | " if tn == 0:\n", 570 | " tnr = 0\n", 571 | " else:\n", 572 | " tnr = tn/(negative)\n", 573 | " metrics['TrueNegativeRate'] = tnr\n", 574 | " \n", 575 | " # Precision = True Positive/(True Positive + False Positive)\n", 576 | " # How many positives classified by the algorithm are really positives? (fraction)\n", 577 | " # Precision value closer to 1 is better. closer to 0 is worse\n", 578 | " if tp == 0:\n", 579 | " precision = 0\n", 580 | " else:\n", 581 | " precision = tp/(tp + fp)\n", 582 | " metrics['Precision'] = precision\n", 583 | " \n", 584 | " # Accuracy = (True Positive + True Negative)/(total positive + total negative)\n", 585 | " # How many positives and negatives were correctly classified? (fraction)\n", 586 | " # Accuracy value closer to 1 is better. closer to 0 is worse\n", 587 | " accuracy = (tp + tn)/(positive + negative)\n", 588 | " metrics['Accuracy'] = accuracy\n", 589 | " \n", 590 | " # False Positive Rate (FPR, False Alarm) = False Positive/(total negative)\n", 591 | " # How many negatives were mis-classified as positives (fraction)\n", 592 | " # False Positive Rate value closer to 0 is better. closer to 1 is worse\n", 593 | " if fp == 0:\n", 594 | " fpr = 0\n", 595 | " else:\n", 596 | " fpr = fp/(negative)\n", 597 | " metrics['FalsePositiveRate'] = fpr\n", 598 | " \n", 599 | " # False Negative Rate (FNR, Misses) = False Negative/(total Positive)\n", 600 | " # How many positives were mis-classified as negative (fraction)\n", 601 | " # False Negative Rate value closer to 0 is better. closer to 1 is worse\n", 602 | " fnr = fn/(positive)\n", 603 | " metrics['FalseNegativeRate'] = fnr\n", 604 | " \n", 605 | " # F1 Score = harmonic mean of Precision and Recall\n", 606 | " # F1 Score closer to 1 is better. Closer to 0 is worse.\n", 607 | " if precision == 0 or recall == 0:\n", 608 | " f1 = 0\n", 609 | " else: \n", 610 | " f1 = 2*precision*recall/(precision+recall)\n", 611 | "\n", 612 | " metrics['F1'] = f1\n", 613 | " \n", 614 | " return metrics" 615 | ] 616 | }, 617 | { 618 | "cell_type": "code", 619 | "execution_count": null, 620 | "metadata": {}, 621 | "outputs": [], 622 | "source": [ 623 | "# Reference: \n", 624 | "# https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html\n", 625 | "def plot_confusion_matrix(cm, classes,\n", 626 | " normalize=False,\n", 627 | " title='Confusion matrix',\n", 628 | " cmap=plt.cm.Blues):\n", 629 | " \"\"\"\n", 630 | " This function prints and plots the confusion matrix.\n", 631 | " Normalization can be applied by setting `normalize=True`.\n", 632 | " \"\"\"\n", 633 | " if normalize:\n", 634 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 635 | " #print(\"Normalized confusion matrix\")\n", 636 | " #else:\n", 637 | " # print('Confusion matrix, without normalization')\n", 638 | "\n", 639 | " #print(cm)\n", 640 | "\n", 641 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", 642 | " plt.title(title)\n", 643 | " plt.colorbar()\n", 644 | " tick_marks = np.arange(len(classes))\n", 645 | " plt.xticks(tick_marks, classes, rotation=45)\n", 646 | " plt.yticks(tick_marks, classes)\n", 647 | "\n", 648 | " fmt = '.2f' if normalize else 'd'\n", 649 | " thresh = cm.max() / 2.\n", 650 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", 651 | " plt.text(j, i, format(cm[i, j], fmt),\n", 652 | " horizontalalignment=\"center\",\n", 653 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 654 | "\n", 655 | " plt.ylabel('True label')\n", 656 | " plt.xlabel('Predicted label')\n", 657 | " plt.tight_layout()" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "# Compute confusion matrix\n", 667 | "cnf_matrix = confusion_matrix(df_validation['Outcome'], df_validation['predicted_class'],labels=[1,0])" 668 | ] 669 | }, 670 | { 671 | "cell_type": "code", 672 | "execution_count": null, 673 | "metadata": {}, 674 | "outputs": [], 675 | "source": [ 676 | "# Plot confusion matrix\n", 677 | "plt.figure()\n", 678 | "plot_confusion_matrix(cnf_matrix, classes=['Diabetic','Normal'],\n", 679 | " title='Confusion Matrix')" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": null, 685 | "metadata": {}, 686 | "outputs": [], 687 | "source": [ 688 | "# Plot confusion matrix\n", 689 | "plt.figure()\n", 690 | "plot_confusion_matrix(cnf_matrix, classes=['Diabetic','Normal'],\n", 691 | " title='Confusion Matrix - Fraction', normalize=True)" 692 | ] 693 | }, 694 | { 695 | "cell_type": "code", 696 | "execution_count": null, 697 | "metadata": {}, 698 | "outputs": [], 699 | "source": [ 700 | "metrics = [binary_classifier_metrics(df_validation['Outcome'], df_validation['predicted_class'])]\n", 701 | "df_metrics=pd.DataFrame.from_dict(metrics)\n", 702 | "df_metrics.index = ['Model']" 703 | ] 704 | }, 705 | { 706 | "cell_type": "code", 707 | "execution_count": null, 708 | "metadata": {}, 709 | "outputs": [], 710 | "source": [ 711 | "df_metrics" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": null, 717 | "metadata": {}, 718 | "outputs": [], 719 | "source": [ 720 | "print('Counts')\n", 721 | "print(df_metrics[['TruePositive',\n", 722 | " 'FalseNegative',\n", 723 | " 'FalsePositive',\n", 724 | " 'TrueNegative',]].round(2))\n", 725 | "print()\n", 726 | "print('Fractions')\n", 727 | "print(df_metrics[['Recall',\n", 728 | " 'FalseNegativeRate',\n", 729 | " 'FalsePositiveRate',\n", 730 | " 'TrueNegativeRate',]].round(2))\n", 731 | "print()\n", 732 | "\n", 733 | "print(df_metrics[['Precision',\n", 734 | " 'Accuracy',\n", 735 | " 'F1']].round(2))" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": null, 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "print(classification_report(\n", 745 | " df_validation['Outcome'],\n", 746 | " df_validation['predicted_class'],\n", 747 | " labels=[1,0],\n", 748 | " target_names=['Diabetic','Normal']))" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "#### Model Performance not Good Enough?\n", 756 | "#### Debug your Data before you debug your Model!" 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": null, 762 | "metadata": {}, 763 | "outputs": [], 764 | "source": [] 765 | } 766 | ], 767 | "metadata": { 768 | "kernelspec": { 769 | "display_name": "Python 3", 770 | "language": "python", 771 | "name": "python3" 772 | }, 773 | "language_info": { 774 | "codemirror_mode": { 775 | "name": "ipython", 776 | "version": 3 777 | }, 778 | "file_extension": ".py", 779 | "mimetype": "text/x-python", 780 | "name": "python", 781 | "nbconvert_exporter": "python", 782 | "pygments_lexer": "ipython3", 783 | "version": "3.7.6" 784 | } 785 | }, 786 | "nbformat": 4, 787 | "nbformat_minor": 2 788 | } 789 | -------------------------------------------------------------------------------- /Notebooks/04-XGBoost_Course_Prepare_Iris_Dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "from sklearn import preprocessing" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "