├── LICENSE ├── README.md ├── projects ├── boston_housing │ ├── .ipynb_checkpoints │ │ └── boston_housing-checkpoint.ipynb │ ├── README.md │ ├── boston_housing.html │ ├── boston_housing.ipynb │ ├── decision tree regressor.png │ ├── housing.csv │ ├── visuals.py │ └── visuals.pyc ├── creating_customer_segments │ ├── .ipynb_checkpoints │ │ └── customer_segments-checkpoint.ipynb │ ├── README.md │ ├── customer_segments.html │ ├── customer_segments.ipynb │ ├── customers.csv │ ├── debug.log │ ├── pca.png │ ├── renders.py │ └── renders.pyc ├── smartcab │ ├── Project 4 final report.pdf │ ├── Q_learning_new_state_space.png │ ├── README.md │ ├── images │ │ ├── car-black.png │ │ ├── car-blue.png │ │ ├── car-cyan.png │ │ ├── car-green.png │ │ ├── car-magenta.png │ │ ├── car-orange.png │ │ ├── car-red.png │ │ ├── car-white.png │ │ └── car-yellow.png │ ├── smartcab │ │ ├── .ipynb_checkpoints │ │ │ └── Test-checkpoint.ipynb │ │ ├── Q-learning_over50.png │ │ ├── Q-learning_over_100.1.png │ │ ├── Q-learning_over_100.2.png │ │ ├── Q_learning_new_state_space.png │ │ ├── Q_learning_with_epsilon_decay_final_parameter.png │ │ ├── Test.ipynb │ │ ├── __init__.py │ │ ├── agent.py │ │ ├── analysis.py │ │ ├── analysis.pyc │ │ ├── environment.py │ │ ├── environment.pyc │ │ ├── planner.py │ │ ├── planner.pyc │ │ ├── random_trial_50.png │ │ ├── simulator.py │ │ └── simulator.pyc │ └── smartcabpic.jpg ├── student_intervention │ ├── .ipynb_checkpoints │ │ └── student_intervention-checkpoint.ipynb │ ├── README.md │ ├── data2d.png │ ├── data_2d_to_3d_hyperplane.png │ ├── data_in_R3.png │ ├── debug.log │ ├── student-data.csv │ ├── student_intervention.html │ ├── student_intervention.ipynb │ └── table.png └── titanic_survival_exploration │ ├── .ipynb_checkpoints │ └── Titanic_Survival_Exploration-checkpoint.ipynb │ ├── README.md │ ├── Titanic_Survival_Exploration.html │ ├── Titanic_Survival_Exploration.ipynb │ ├── debug.log │ ├── gender.png │ ├── titanic_data.csv │ ├── titanic_visualizations.py │ └── titanic_visualizations.pyc └── verified certificate.png /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Tahsin Mayeesha 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Udacity-Machine-Learning-Nanodegree 2 | 3 | ![](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/blob/master/verified%20certificate.png) 4 | 5 | ## About 6 | 7 | This repo contains the coursework from Udacity's Machine Learning Nanodegree from June 2016-Feb 2017. 8 | 9 | 10 | 11 | ## Projects 12 | 13 | * P1 - [Exploring Titanic Survival(Optional)](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/titanic_survival_exploration) 14 | * P2 - [Predicting Boston Housing Prices](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/boston_housing) 15 | * P3 - [Building a Student Intervention System for Struggling Students](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/student_intervention) 16 | * P3 - [Creating Customer Segments from Wholesale Distributor Data](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/creating_customer_segments) 17 | * P4 - [Training a Smartcab to drive with Q-Learning](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/smartcab) 18 | 19 | ## Coursework 20 | 21 | * [Intro to Data Science](https://www.udacity.com/courses/intro-to-data-science--ud359) 22 | * [Intro to Machine Learning](https://www.udacity.com/course/intro-to-machine-learning--ud120) 23 | * [Georgia Tech : Machine Learning](https://www.udacity.com/course/machine-learning--ud262) 24 | * [Reinforcement Learning](https://www.udacity.com/courses/reinforcement-learning--ud600) 25 | 26 | -------------------------------------------------------------------------------- /projects/boston_housing/README.md: -------------------------------------------------------------------------------- 1 | # Project 1: Model Evaluation & Validation 2 | ## Predicting Boston Housing Prices 3 | 4 | This project uses supervised learning techniques to predict the price of houses in boston area from the provided features. It's a classic dataset, provided by both [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Housing) and included in many libraries in python. I used a DecisionTreeRegressor model with variying depth to compare model accuracy and ultimately compared it to a K-nearest neighbor model after turning the max_depth of Decision Tree and n_neighbors of the K-nearest. The best performing model is a Decision Tree with max_depth of 4 which yields about 0.8 in score. The evaluation metric for this project is R^2. 5 | 6 | ![](decision tree regressor.png) 7 | 8 | 9 | 10 | ### Install 11 | 12 | This project requires **Python 2.7** and the following Python libraries installed: 13 | 14 | - [NumPy](http://www.numpy.org/) 15 | - [matplotlib](http://matplotlib.org/) 16 | - [scikit-learn](http://scikit-learn.org/stable/) 17 | 18 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html) 19 | 20 | ### Run 21 | 22 | In a terminal or command window, navigate to the top-level project directory `boston_housing/` (that contains this README) and run one of the following commands: 23 | 24 | ```ipython notebook boston_housing.ipynb``` 25 | ```jupyter notebook boston_housing.ipynb``` 26 | 27 | This will open the iPython Notebook software and project file in your browser. 28 | -------------------------------------------------------------------------------- /projects/boston_housing/decision tree regressor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/boston_housing/decision tree regressor.png -------------------------------------------------------------------------------- /projects/boston_housing/housing.csv: -------------------------------------------------------------------------------- 1 | RM,LSTAT,PTRATIO,MDEV 2 | 6.575,4.98,15.3,504000.0 3 | 6.421,9.14,17.8,453600.0 4 | 7.185,4.03,17.8,728700.0 5 | 6.998,2.94,18.7,701400.0 6 | 7.147,5.33,18.7,760200.0 7 | 6.43,5.21,18.7,602700.0 8 | 6.012,12.43,15.2,480900.0 9 | 6.172,19.15,15.2,569100.0 10 | 5.631,29.93,15.2,346500.0 11 | 6.004,17.1,15.2,396900.0 12 | 6.377,20.45,15.2,315000.0 13 | 6.009,13.27,15.2,396900.0 14 | 5.889,15.71,15.2,455700.0 15 | 5.949,8.26,21.0,428400.0 16 | 6.096,10.26,21.0,382200.0 17 | 5.834,8.47,21.0,417900.0 18 | 5.935,6.58,21.0,485100.0 19 | 5.99,14.67,21.0,367500.0 20 | 5.456,11.69,21.0,424200.0 21 | 5.727,11.28,21.0,382200.0 22 | 5.57,21.02,21.0,285600.0 23 | 5.965,13.83,21.0,411600.0 24 | 6.142,18.72,21.0,319200.0 25 | 5.813,19.88,21.0,304500.0 26 | 5.924,16.3,21.0,327600.0 27 | 5.599,16.51,21.0,291900.0 28 | 5.813,14.81,21.0,348600.0 29 | 6.047,17.28,21.0,310800.0 30 | 6.495,12.8,21.0,386400.0 31 | 6.674,11.98,21.0,441000.0 32 | 5.713,22.6,21.0,266700.0 33 | 6.072,13.04,21.0,304500.0 34 | 5.95,27.71,21.0,277200.0 35 | 5.701,18.35,21.0,275100.0 36 | 6.096,20.34,21.0,283500.0 37 | 5.933,9.68,19.2,396900.0 38 | 5.841,11.41,19.2,420000.0 39 | 5.85,8.77,19.2,441000.0 40 | 5.966,10.13,19.2,518700.0 41 | 6.595,4.32,18.3,646800.0 42 | 7.024,1.98,18.3,732900.0 43 | 6.77,4.84,17.9,558600.0 44 | 6.169,5.81,17.9,531300.0 45 | 6.211,7.44,17.9,518700.0 46 | 6.069,9.55,17.9,445200.0 47 | 5.682,10.21,17.9,405300.0 48 | 5.786,14.15,17.9,420000.0 49 | 6.03,18.8,17.9,348600.0 50 | 5.399,30.81,17.9,302400.0 51 | 5.602,16.2,17.9,407400.0 52 | 5.963,13.45,16.8,413700.0 53 | 6.115,9.43,16.8,430500.0 54 | 6.511,5.28,16.8,525000.0 55 | 5.998,8.43,16.8,491400.0 56 | 5.888,14.8,21.1,396900.0 57 | 7.249,4.81,17.9,743400.0 58 | 6.383,5.77,17.3,518700.0 59 | 6.816,3.95,15.1,663600.0 60 | 6.145,6.86,19.7,489300.0 61 | 5.927,9.22,19.7,411600.0 62 | 5.741,13.15,19.7,392700.0 63 | 5.966,14.44,19.7,336000.0 64 | 6.456,6.73,19.7,466200.0 65 | 6.762,9.5,19.7,525000.0 66 | 7.104,8.05,18.6,693000.0 67 | 6.29,4.67,16.1,493500.0 68 | 5.787,10.24,16.1,407400.0 69 | 5.878,8.1,18.9,462000.0 70 | 5.594,13.09,18.9,365400.0 71 | 5.885,8.79,18.9,438900.0 72 | 6.417,6.72,19.2,508200.0 73 | 5.961,9.88,19.2,455700.0 74 | 6.065,5.52,19.2,478800.0 75 | 6.245,7.54,19.2,491400.0 76 | 6.273,6.78,18.7,506100.0 77 | 6.286,8.94,18.7,449400.0 78 | 6.279,11.97,18.7,420000.0 79 | 6.14,10.27,18.7,436800.0 80 | 6.232,12.34,18.7,445200.0 81 | 5.874,9.1,18.7,426300.0 82 | 6.727,5.29,19.0,588000.0 83 | 6.619,7.22,19.0,501900.0 84 | 6.302,6.72,19.0,520800.0 85 | 6.167,7.51,19.0,480900.0 86 | 6.389,9.62,18.5,501900.0 87 | 6.63,6.53,18.5,558600.0 88 | 6.015,12.86,18.5,472500.0 89 | 6.121,8.44,18.5,466200.0 90 | 7.007,5.5,17.8,495600.0 91 | 7.079,5.7,17.8,602700.0 92 | 6.417,8.81,17.8,474600.0 93 | 6.405,8.2,17.8,462000.0 94 | 6.442,8.16,18.2,480900.0 95 | 6.211,6.21,18.2,525000.0 96 | 6.249,10.59,18.2,432600.0 97 | 6.625,6.65,18.0,596400.0 98 | 6.163,11.34,18.0,449400.0 99 | 8.069,4.21,18.0,812700.0 100 | 7.82,3.57,18.0,919800.0 101 | 7.416,6.19,18.0,697200.0 102 | 6.727,9.42,20.9,577500.0 103 | 6.781,7.67,20.9,556500.0 104 | 6.405,10.63,20.9,390600.0 105 | 6.137,13.44,20.9,405300.0 106 | 6.167,12.33,20.9,422100.0 107 | 5.851,16.47,20.9,409500.0 108 | 5.836,18.66,20.9,409500.0 109 | 6.127,14.09,20.9,428400.0 110 | 6.474,12.27,20.9,415800.0 111 | 6.229,15.55,20.9,407400.0 112 | 6.195,13.0,20.9,455700.0 113 | 6.715,10.16,17.8,478800.0 114 | 5.913,16.21,17.8,394800.0 115 | 6.092,17.09,17.8,392700.0 116 | 6.254,10.45,17.8,388500.0 117 | 5.928,15.76,17.8,384300.0 118 | 6.176,12.04,17.8,445200.0 119 | 6.021,10.3,17.8,403200.0 120 | 5.872,15.37,17.8,428400.0 121 | 5.731,13.61,17.8,405300.0 122 | 5.87,14.37,19.1,462000.0 123 | 6.004,14.27,19.1,426300.0 124 | 5.961,17.93,19.1,430500.0 125 | 5.856,25.41,19.1,363300.0 126 | 5.879,17.58,19.1,394800.0 127 | 5.986,14.81,19.1,449400.0 128 | 5.613,27.26,19.1,329700.0 129 | 5.693,17.19,21.2,340200.0 130 | 6.431,15.39,21.2,378000.0 131 | 5.637,18.34,21.2,300300.0 132 | 6.458,12.6,21.2,403200.0 133 | 6.326,12.26,21.2,411600.0 134 | 6.372,11.12,21.2,483000.0 135 | 5.822,15.03,21.2,386400.0 136 | 5.757,17.31,21.2,327600.0 137 | 6.335,16.96,21.2,380100.0 138 | 5.942,16.9,21.2,365400.0 139 | 6.454,14.59,21.2,359100.0 140 | 5.857,21.32,21.2,279300.0 141 | 6.151,18.46,21.2,373800.0 142 | 6.174,24.16,21.2,294000.0 143 | 5.019,34.41,21.2,302400.0 144 | 5.403,26.82,14.7,281400.0 145 | 5.468,26.42,14.7,327600.0 146 | 4.903,29.29,14.7,247800.0 147 | 6.13,27.8,14.7,289800.0 148 | 5.628,16.65,14.7,327600.0 149 | 4.926,29.53,14.7,306600.0 150 | 5.186,28.32,14.7,373800.0 151 | 5.597,21.45,14.7,323400.0 152 | 6.122,14.1,14.7,451500.0 153 | 5.404,13.28,14.7,411600.0 154 | 5.012,12.12,14.7,321300.0 155 | 5.709,15.79,14.7,407400.0 156 | 6.129,15.12,14.7,357000.0 157 | 6.152,15.02,14.7,327600.0 158 | 5.272,16.14,14.7,275100.0 159 | 6.943,4.59,14.7,867300.0 160 | 6.066,6.43,14.7,510300.0 161 | 6.51,7.39,14.7,489300.0 162 | 6.25,5.5,14.7,567000.0 163 | 5.854,11.64,14.7,476700.0 164 | 6.101,9.81,14.7,525000.0 165 | 5.877,12.14,14.7,499800.0 166 | 6.319,11.1,14.7,499800.0 167 | 6.402,11.32,14.7,468300.0 168 | 5.875,14.43,14.7,365400.0 169 | 5.88,12.03,14.7,401100.0 170 | 5.572,14.69,16.6,485100.0 171 | 6.416,9.04,16.6,495600.0 172 | 5.859,9.64,16.6,474600.0 173 | 6.546,5.33,16.6,617400.0 174 | 6.02,10.11,16.6,487200.0 175 | 6.315,6.29,16.6,516600.0 176 | 6.86,6.92,16.6,627900.0 177 | 6.98,5.04,17.8,781200.0 178 | 7.765,7.56,17.8,835800.0 179 | 6.144,9.45,17.8,760200.0 180 | 7.155,4.82,17.8,795900.0 181 | 6.563,5.68,17.8,682500.0 182 | 5.604,13.98,17.8,554400.0 183 | 6.153,13.15,17.8,621600.0 184 | 6.782,6.68,15.2,672000.0 185 | 6.556,4.56,15.2,625800.0 186 | 7.185,5.39,15.2,732900.0 187 | 6.951,5.1,15.2,777000.0 188 | 6.739,4.69,15.2,640500.0 189 | 7.178,2.87,15.2,764400.0 190 | 6.8,5.03,15.6,653100.0 191 | 6.604,4.38,15.6,611100.0 192 | 7.287,4.08,12.6,699300.0 193 | 7.107,8.61,12.6,636300.0 194 | 7.274,6.62,12.6,726600.0 195 | 6.975,4.56,17.0,732900.0 196 | 7.135,4.45,17.0,690900.0 197 | 6.162,7.43,14.7,506100.0 198 | 7.61,3.11,14.7,888300.0 199 | 7.853,3.81,14.7,1018500.0 200 | 5.891,10.87,18.6,474600.0 201 | 6.326,10.97,18.6,512400.0 202 | 5.783,18.06,18.6,472500.0 203 | 6.064,14.66,18.6,512400.0 204 | 5.344,23.09,18.6,420000.0 205 | 5.96,17.27,18.6,455700.0 206 | 5.404,23.98,18.6,405300.0 207 | 5.807,16.03,18.6,470400.0 208 | 6.375,9.38,18.6,590100.0 209 | 5.412,29.55,18.6,497700.0 210 | 6.182,9.47,18.6,525000.0 211 | 5.888,13.51,16.4,489300.0 212 | 6.642,9.69,16.4,602700.0 213 | 5.951,17.92,16.4,451500.0 214 | 6.373,10.5,16.4,483000.0 215 | 6.951,9.71,17.4,560700.0 216 | 6.164,21.46,17.4,455700.0 217 | 6.879,9.93,17.4,577500.0 218 | 6.618,7.6,17.4,632100.0 219 | 8.266,4.14,17.4,940800.0 220 | 8.04,3.13,17.4,789600.0 221 | 7.163,6.36,17.4,663600.0 222 | 7.686,3.92,17.4,980700.0 223 | 6.552,3.76,17.4,661500.0 224 | 5.981,11.65,17.4,510300.0 225 | 7.412,5.25,17.4,665700.0 226 | 8.337,2.47,17.4,875700.0 227 | 8.247,3.95,17.4,1014300.0 228 | 6.726,8.05,17.4,609000.0 229 | 6.086,10.88,17.4,504000.0 230 | 6.631,9.54,17.4,527100.0 231 | 7.358,4.73,17.4,661500.0 232 | 6.481,6.36,16.6,497700.0 233 | 6.606,7.37,16.6,489300.0 234 | 6.897,11.38,16.6,462000.0 235 | 6.095,12.4,16.6,422100.0 236 | 6.358,11.22,16.6,466200.0 237 | 6.393,5.19,16.6,497700.0 238 | 5.593,12.5,19.1,369600.0 239 | 5.605,18.46,19.1,388500.0 240 | 6.108,9.16,19.1,510300.0 241 | 6.226,10.15,19.1,430500.0 242 | 6.433,9.52,19.1,514500.0 243 | 6.718,6.56,19.1,550200.0 244 | 6.487,5.9,19.1,512400.0 245 | 6.438,3.59,19.1,520800.0 246 | 6.957,3.53,19.1,621600.0 247 | 8.259,3.54,19.1,898800.0 248 | 6.108,6.57,16.4,459900.0 249 | 5.876,9.25,16.4,438900.0 250 | 7.454,3.11,15.9,924000.0 251 | 7.333,7.79,13.0,756000.0 252 | 6.842,6.9,13.0,632100.0 253 | 7.203,9.59,13.0,709800.0 254 | 7.52,7.26,13.0,905100.0 255 | 8.398,5.91,13.0,1024800.0 256 | 7.327,11.25,13.0,651000.0 257 | 7.206,8.1,13.0,766500.0 258 | 5.56,10.45,13.0,478800.0 259 | 7.014,14.79,13.0,644700.0 260 | 7.47,3.16,13.0,913500.0 261 | 5.92,13.65,18.6,434700.0 262 | 5.856,13.0,18.6,443100.0 263 | 6.24,6.59,18.6,529200.0 264 | 6.538,7.73,18.6,512400.0 265 | 7.691,6.58,18.6,739200.0 266 | 6.758,3.53,17.6,680400.0 267 | 6.854,2.98,17.6,672000.0 268 | 7.267,6.05,17.6,697200.0 269 | 6.826,4.16,17.6,695100.0 270 | 6.482,7.19,17.6,611100.0 271 | 6.812,4.85,14.9,737100.0 272 | 7.82,3.76,14.9,953400.0 273 | 6.968,4.59,14.9,743400.0 274 | 7.645,3.01,14.9,966000.0 275 | 7.088,7.85,15.3,676200.0 276 | 6.453,8.23,15.3,462000.0 277 | 6.23,12.93,18.2,422100.0 278 | 6.209,7.14,16.6,487200.0 279 | 6.315,7.6,16.6,468300.0 280 | 6.565,9.51,16.6,520800.0 281 | 6.861,3.33,19.2,598500.0 282 | 7.148,3.56,19.2,783300.0 283 | 6.63,4.7,19.2,585900.0 284 | 6.127,8.58,16.0,501900.0 285 | 6.009,10.4,16.0,455700.0 286 | 6.678,6.27,16.0,600600.0 287 | 6.549,7.39,16.0,569100.0 288 | 5.79,15.84,16.0,426300.0 289 | 6.345,4.97,14.8,472500.0 290 | 7.041,4.74,14.8,609000.0 291 | 6.871,6.07,14.8,520800.0 292 | 6.59,9.5,16.1,462000.0 293 | 6.495,8.67,16.1,554400.0 294 | 6.982,4.86,16.1,695100.0 295 | 7.236,6.93,18.4,758100.0 296 | 6.616,8.93,18.4,596400.0 297 | 7.42,6.47,18.4,701400.0 298 | 6.849,7.53,18.4,592200.0 299 | 6.635,4.54,18.4,478800.0 300 | 5.972,9.97,18.4,426300.0 301 | 4.973,12.64,18.4,338100.0 302 | 6.122,5.98,18.4,464100.0 303 | 6.023,11.72,18.4,407400.0 304 | 6.266,7.9,18.4,453600.0 305 | 6.567,9.28,18.4,499800.0 306 | 5.705,11.5,18.4,340200.0 307 | 5.914,18.33,18.4,373800.0 308 | 5.782,15.94,18.4,415800.0 309 | 6.382,10.36,18.4,485100.0 310 | 6.113,12.73,18.4,441000.0 311 | 6.426,7.2,19.6,499800.0 312 | 6.376,6.87,19.6,485100.0 313 | 6.041,7.7,19.6,428400.0 314 | 5.708,11.74,19.6,388500.0 315 | 6.415,6.12,19.6,525000.0 316 | 6.431,5.08,19.6,516600.0 317 | 6.312,6.15,19.6,483000.0 318 | 6.083,12.79,19.6,466200.0 319 | 5.868,9.97,16.9,405300.0 320 | 6.333,7.34,16.9,474600.0 321 | 6.144,9.09,16.9,415800.0 322 | 5.706,12.43,16.9,359100.0 323 | 6.031,7.83,16.9,407400.0 324 | 6.316,5.68,20.2,466200.0 325 | 6.31,6.75,20.2,434700.0 326 | 6.037,8.01,20.2,443100.0 327 | 5.869,9.8,20.2,409500.0 328 | 5.895,10.56,20.2,388500.0 329 | 6.059,8.51,20.2,432600.0 330 | 5.985,9.74,20.2,399000.0 331 | 5.968,9.29,20.2,392700.0 332 | 7.241,5.49,15.5,686700.0 333 | 6.54,8.65,15.9,346500.0 334 | 6.696,7.18,17.6,501900.0 335 | 6.874,4.61,17.6,655200.0 336 | 6.014,10.53,18.8,367500.0 337 | 5.898,12.67,18.8,361200.0 338 | 6.516,6.36,17.9,485100.0 339 | 6.635,5.99,17.0,514500.0 340 | 6.939,5.89,19.7,558600.0 341 | 6.49,5.98,19.7,480900.0 342 | 6.579,5.49,18.3,506100.0 343 | 5.884,7.79,18.3,390600.0 344 | 6.728,4.5,17.0,632100.0 345 | 5.663,8.05,22.0,382200.0 346 | 5.936,5.57,22.0,432600.0 347 | 6.212,17.6,20.2,373800.0 348 | 6.395,13.27,20.2,455700.0 349 | 6.127,11.48,20.2,476700.0 350 | 6.112,12.67,20.2,474600.0 351 | 6.398,7.79,20.2,525000.0 352 | 6.251,14.19,20.2,417900.0 353 | 5.362,10.19,20.2,436800.0 354 | 5.803,14.64,20.2,352800.0 355 | 3.561,7.12,20.2,577500.0 356 | 4.963,14.0,20.2,459900.0 357 | 3.863,13.33,20.2,485100.0 358 | 4.906,34.77,20.2,289800.0 359 | 4.138,37.97,20.2,289800.0 360 | 7.313,13.44,20.2,315000.0 361 | 6.649,23.24,20.2,291900.0 362 | 6.794,21.24,20.2,279300.0 363 | 6.38,23.69,20.2,275100.0 364 | 6.223,21.78,20.2,214200.0 365 | 6.968,17.21,20.2,218400.0 366 | 6.545,21.08,20.2,228900.0 367 | 5.536,23.6,20.2,237300.0 368 | 5.52,24.56,20.2,258300.0 369 | 4.368,30.63,20.2,184800.0 370 | 5.277,30.81,20.2,151200.0 371 | 4.652,28.28,20.2,220500.0 372 | 5.0,31.99,20.2,155400.0 373 | 4.88,30.62,20.2,214200.0 374 | 5.39,20.85,20.2,241500.0 375 | 5.713,17.11,20.2,317100.0 376 | 6.051,18.76,20.2,487200.0 377 | 5.036,25.68,20.2,203700.0 378 | 6.193,15.17,20.2,289800.0 379 | 5.887,16.35,20.2,266700.0 380 | 6.471,17.12,20.2,275100.0 381 | 6.405,19.37,20.2,262500.0 382 | 5.747,19.92,20.2,178500.0 383 | 5.453,30.59,20.2,105000.0 384 | 5.852,29.97,20.2,132300.0 385 | 5.987,26.77,20.2,117600.0 386 | 6.343,20.32,20.2,151200.0 387 | 6.404,20.31,20.2,254100.0 388 | 5.349,19.77,20.2,174300.0 389 | 5.531,27.38,20.2,178500.0 390 | 5.683,22.98,20.2,105000.0 391 | 4.138,23.34,20.2,249900.0 392 | 5.608,12.13,20.2,585900.0 393 | 5.617,26.4,20.2,361200.0 394 | 6.852,19.78,20.2,577500.0 395 | 5.757,10.11,20.2,315000.0 396 | 6.657,21.22,20.2,361200.0 397 | 4.628,34.37,20.2,375900.0 398 | 5.155,20.08,20.2,342300.0 399 | 4.519,36.98,20.2,147000.0 400 | 6.434,29.05,20.2,151200.0 401 | 6.782,25.79,20.2,157500.0 402 | 5.304,26.64,20.2,218400.0 403 | 5.957,20.62,20.2,184800.0 404 | 6.824,22.74,20.2,176400.0 405 | 6.411,15.02,20.2,350700.0 406 | 6.006,15.7,20.2,298200.0 407 | 5.648,14.1,20.2,436800.0 408 | 6.103,23.29,20.2,281400.0 409 | 5.565,17.16,20.2,245700.0 410 | 5.896,24.39,20.2,174300.0 411 | 5.837,15.69,20.2,214200.0 412 | 6.202,14.52,20.2,228900.0 413 | 6.193,21.52,20.2,231000.0 414 | 6.38,24.08,20.2,199500.0 415 | 6.348,17.64,20.2,304500.0 416 | 6.833,19.69,20.2,296100.0 417 | 6.425,12.03,20.2,338100.0 418 | 6.436,16.22,20.2,300300.0 419 | 6.208,15.17,20.2,245700.0 420 | 6.629,23.27,20.2,281400.0 421 | 6.461,18.05,20.2,201600.0 422 | 6.152,26.45,20.2,182700.0 423 | 5.935,34.02,20.2,176400.0 424 | 5.627,22.88,20.2,268800.0 425 | 5.818,22.11,20.2,220500.0 426 | 6.406,19.52,20.2,359100.0 427 | 6.219,16.59,20.2,386400.0 428 | 6.485,18.85,20.2,323400.0 429 | 5.854,23.79,20.2,226800.0 430 | 6.459,23.98,20.2,247800.0 431 | 6.341,17.79,20.2,312900.0 432 | 6.251,16.44,20.2,264600.0 433 | 6.185,18.13,20.2,296100.0 434 | 6.417,19.31,20.2,273000.0 435 | 6.749,17.44,20.2,281400.0 436 | 6.655,17.73,20.2,319200.0 437 | 6.297,17.27,20.2,338100.0 438 | 7.393,16.74,20.2,373800.0 439 | 6.728,18.71,20.2,312900.0 440 | 6.525,18.13,20.2,296100.0 441 | 5.976,19.01,20.2,266700.0 442 | 5.936,16.94,20.2,283500.0 443 | 6.301,16.23,20.2,312900.0 444 | 6.081,14.7,20.2,420000.0 445 | 6.701,16.42,20.2,344400.0 446 | 6.376,14.65,20.2,371700.0 447 | 6.317,13.99,20.2,409500.0 448 | 6.513,10.29,20.2,424200.0 449 | 6.209,13.22,20.2,449400.0 450 | 5.759,14.13,20.2,417900.0 451 | 5.952,17.15,20.2,399000.0 452 | 6.003,21.32,20.2,401100.0 453 | 5.926,18.13,20.2,401100.0 454 | 5.713,14.76,20.2,422100.0 455 | 6.167,16.29,20.2,417900.0 456 | 6.229,12.87,20.2,411600.0 457 | 6.437,14.36,20.2,487200.0 458 | 6.98,11.66,20.2,625800.0 459 | 5.427,18.14,20.2,289800.0 460 | 6.162,24.1,20.2,279300.0 461 | 6.484,18.68,20.2,350700.0 462 | 5.304,24.91,20.2,252000.0 463 | 6.185,18.03,20.2,306600.0 464 | 6.229,13.11,20.2,449400.0 465 | 6.242,10.74,20.2,483000.0 466 | 6.75,7.74,20.2,497700.0 467 | 7.061,7.01,20.2,525000.0 468 | 5.762,10.42,20.2,457800.0 469 | 5.871,13.34,20.2,432600.0 470 | 6.312,10.58,20.2,445200.0 471 | 6.114,14.98,20.2,401100.0 472 | 5.905,11.45,20.2,432600.0 473 | 5.454,18.06,20.1,319200.0 474 | 5.414,23.97,20.1,147000.0 475 | 5.093,29.68,20.1,170100.0 476 | 5.983,18.07,20.1,285600.0 477 | 5.983,13.35,20.1,422100.0 478 | 5.707,12.01,19.2,457800.0 479 | 5.926,13.59,19.2,514500.0 480 | 5.67,17.6,19.2,485100.0 481 | 5.39,21.14,19.2,413700.0 482 | 5.794,14.1,19.2,384300.0 483 | 6.019,12.92,19.2,445200.0 484 | 5.569,15.1,19.2,367500.0 485 | 6.027,14.33,19.2,352800.0 486 | 6.593,9.67,21.0,470400.0 487 | 6.12,9.08,21.0,432600.0 488 | 6.976,5.64,21.0,501900.0 489 | 6.794,6.48,21.0,462000.0 490 | 6.03,7.88,21.0,249900.0 491 | -------------------------------------------------------------------------------- /projects/boston_housing/visuals.py: -------------------------------------------------------------------------------- 1 | ########################################### 2 | # Suppress matplotlib user warnings 3 | # Necessary for newer version of matplotlib 4 | import warnings 5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib") 6 | ########################################### 7 | 8 | import matplotlib.pyplot as pl 9 | import numpy as np 10 | import sklearn.learning_curve as curves 11 | from sklearn.tree import DecisionTreeRegressor 12 | from sklearn.cross_validation import ShuffleSplit, train_test_split 13 | 14 | def ModelLearning(X, y): 15 | """ Calculates the performance of several models with varying sizes of training data. 16 | The learning and testing scores for each model are then plotted. """ 17 | 18 | # Create 10 cross-validation sets for training and testing 19 | cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0) 20 | 21 | # Generate the training set sizes increasing by 50 22 | train_sizes = np.rint(np.linspace(1, X.shape[0]*0.8 - 1, 9)).astype(int) 23 | 24 | # Create the figure window 25 | fig = pl.figure(figsize=(10,7)) 26 | 27 | # Create three different models based on max_depth 28 | for k, depth in enumerate([1,3,6,10]): 29 | 30 | # Create a Decision tree regressor at max_depth = depth 31 | regressor = DecisionTreeRegressor(max_depth = depth) 32 | 33 | # Calculate the training and testing scores 34 | sizes, train_scores, test_scores = curves.learning_curve(regressor, X, y, \ 35 | cv = cv, train_sizes = train_sizes, scoring = 'r2') 36 | 37 | # Find the mean and standard deviation for smoothing 38 | train_std = np.std(train_scores, axis = 1) 39 | train_mean = np.mean(train_scores, axis = 1) 40 | test_std = np.std(test_scores, axis = 1) 41 | test_mean = np.mean(test_scores, axis = 1) 42 | 43 | # Subplot the learning curve 44 | ax = fig.add_subplot(2, 2, k+1) 45 | ax.plot(sizes, train_mean, 'o-', color = 'r', label = 'Training Score') 46 | ax.plot(sizes, test_mean, 'o-', color = 'g', label = 'Testing Score') 47 | ax.fill_between(sizes, train_mean - train_std, \ 48 | train_mean + train_std, alpha = 0.15, color = 'r') 49 | ax.fill_between(sizes, test_mean - test_std, \ 50 | test_mean + test_std, alpha = 0.15, color = 'g') 51 | 52 | # Labels 53 | ax.set_title('max_depth = %s'%(depth)) 54 | ax.set_xlabel('Number of Training Points') 55 | ax.set_ylabel('Score') 56 | ax.set_xlim([0, X.shape[0]*0.8]) 57 | ax.set_ylim([-0.05, 1.05]) 58 | 59 | # Visual aesthetics 60 | ax.legend(bbox_to_anchor=(1.05, 2.05), loc='lower left', borderaxespad = 0.) 61 | fig.suptitle('Decision Tree Regressor Learning Performances', fontsize = 16, y = 1.03) 62 | fig.tight_layout() 63 | fig.show() 64 | 65 | 66 | def ModelComplexity(X, y): 67 | """ Calculates the performance of the model as model complexity increases. 68 | The learning and testing errors rates are then plotted. """ 69 | 70 | # Create 10 cross-validation sets for training and testing 71 | cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0) 72 | 73 | # Vary the max_depth parameter from 1 to 10 74 | max_depth = np.arange(1,11) 75 | 76 | # Calculate the training and testing scores 77 | train_scores, test_scores = curves.validation_curve(DecisionTreeRegressor(), X, y, \ 78 | param_name = "max_depth", param_range = max_depth, cv = cv, scoring = 'r2') 79 | 80 | # Find the mean and standard deviation for smoothing 81 | train_mean = np.mean(train_scores, axis=1) 82 | train_std = np.std(train_scores, axis=1) 83 | test_mean = np.mean(test_scores, axis=1) 84 | test_std = np.std(test_scores, axis=1) 85 | 86 | # Plot the validation curve 87 | pl.figure(figsize=(7, 5)) 88 | pl.title('Decision Tree Regressor Complexity Performance') 89 | pl.plot(max_depth, train_mean, 'o-', color = 'r', label = 'Training Score') 90 | pl.plot(max_depth, test_mean, 'o-', color = 'g', label = 'Validation Score') 91 | pl.fill_between(max_depth, train_mean - train_std, \ 92 | train_mean + train_std, alpha = 0.15, color = 'r') 93 | pl.fill_between(max_depth, test_mean - test_std, \ 94 | test_mean + test_std, alpha = 0.15, color = 'g') 95 | 96 | # Visual aesthetics 97 | pl.legend(loc = 'lower right') 98 | pl.xlabel('Maximum Depth') 99 | pl.ylabel('Score') 100 | pl.ylim([-0.05,1.05]) 101 | pl.show() 102 | 103 | 104 | def PredictTrials(X, y, fitter, data): 105 | """ Performs trials of fitting and predicting data. """ 106 | 107 | # Store the predicted prices 108 | prices = [] 109 | 110 | for k in range(10): 111 | # Split the data 112 | X_train, X_test, y_train, y_test = train_test_split(X, y, \ 113 | test_size = 0.2, random_state = k) 114 | 115 | # Fit the data 116 | reg = fitter(X_train, y_train) 117 | 118 | # Make a prediction 119 | pred = reg.predict([data[0]])[0] 120 | prices.append(pred) 121 | 122 | # Result 123 | print "Trial {}: ${:,.2f}".format(k+1, pred) 124 | 125 | # Display price range 126 | print "\nRange in prices: ${:,.2f}".format(max(prices) - min(prices)) -------------------------------------------------------------------------------- /projects/boston_housing/visuals.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/boston_housing/visuals.pyc -------------------------------------------------------------------------------- /projects/creating_customer_segments/README.md: -------------------------------------------------------------------------------- 1 | # Project 3: Unsupervised Learning 2 | ## Creating Customer Segments 3 | 4 | ### Install 5 | 6 | This project requires **Python 2.7** and the following Python libraries installed: 7 | 8 | - [NumPy](http://www.numpy.org/) 9 | - [Pandas](http://pandas.pydata.org) 10 | - [matplotlib](http://matplotlib.org/) 11 | - [scikit-learn](http://scikit-learn.org/stable/) 12 | 13 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html) 14 | 15 | ### Overview 16 | 17 | This project uses unsupervised machine learning techniques to segment the customers by applying PCA for dimensionality reduction and using the PCA components to cluster the customers into different segments. 18 | 19 | ![](pca.png) 20 | 21 | 22 | 23 | ### Code 24 | 25 | Template code is provided in the notebook `customer_segments.ipynb` notebook file. Additional supporting code can be found in `renders.py`. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. 26 | 27 | ### Run 28 | 29 | In a terminal or command window, navigate to the top-level project directory `creating_customer_segments/` (that contains this README) and run one of the following commands: 30 | 31 | ```ipython notebook customer_segments.ipynb``` 32 | ```jupyter notebook customer_segments.ipynb``` 33 | 34 | This will open the iPython Notebook software and project file in your browser. 35 | 36 | ## Data 37 | 38 | The dataset used in this project is included as `customers.csv`. You can find more information on this dataset on the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Wholesale+customers) page. 39 | -------------------------------------------------------------------------------- /projects/creating_customer_segments/customers.csv: -------------------------------------------------------------------------------- 1 | Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicatessen 2 | 2,3,12669,9656,7561,214,2674,1338 3 | 2,3,7057,9810,9568,1762,3293,1776 4 | 2,3,6353,8808,7684,2405,3516,7844 5 | 1,3,13265,1196,4221,6404,507,1788 6 | 2,3,22615,5410,7198,3915,1777,5185 7 | 2,3,9413,8259,5126,666,1795,1451 8 | 2,3,12126,3199,6975,480,3140,545 9 | 2,3,7579,4956,9426,1669,3321,2566 10 | 1,3,5963,3648,6192,425,1716,750 11 | 2,3,6006,11093,18881,1159,7425,2098 12 | 2,3,3366,5403,12974,4400,5977,1744 13 | 2,3,13146,1124,4523,1420,549,497 14 | 2,3,31714,12319,11757,287,3881,2931 15 | 2,3,21217,6208,14982,3095,6707,602 16 | 2,3,24653,9465,12091,294,5058,2168 17 | 1,3,10253,1114,3821,397,964,412 18 | 2,3,1020,8816,12121,134,4508,1080 19 | 1,3,5876,6157,2933,839,370,4478 20 | 2,3,18601,6327,10099,2205,2767,3181 21 | 1,3,7780,2495,9464,669,2518,501 22 | 2,3,17546,4519,4602,1066,2259,2124 23 | 1,3,5567,871,2010,3383,375,569 24 | 1,3,31276,1917,4469,9408,2381,4334 25 | 2,3,26373,36423,22019,5154,4337,16523 26 | 2,3,22647,9776,13792,2915,4482,5778 27 | 2,3,16165,4230,7595,201,4003,57 28 | 1,3,9898,961,2861,3151,242,833 29 | 1,3,14276,803,3045,485,100,518 30 | 2,3,4113,20484,25957,1158,8604,5206 31 | 1,3,43088,2100,2609,1200,1107,823 32 | 1,3,18815,3610,11107,1148,2134,2963 33 | 1,3,2612,4339,3133,2088,820,985 34 | 1,3,21632,1318,2886,266,918,405 35 | 1,3,29729,4786,7326,6130,361,1083 36 | 1,3,1502,1979,2262,425,483,395 37 | 2,3,688,5491,11091,833,4239,436 38 | 1,3,29955,4362,5428,1729,862,4626 39 | 2,3,15168,10556,12477,1920,6506,714 40 | 2,3,4591,15729,16709,33,6956,433 41 | 1,3,56159,555,902,10002,212,2916 42 | 1,3,24025,4332,4757,9510,1145,5864 43 | 1,3,19176,3065,5956,2033,2575,2802 44 | 2,3,10850,7555,14961,188,6899,46 45 | 2,3,630,11095,23998,787,9529,72 46 | 2,3,9670,7027,10471,541,4618,65 47 | 2,3,5181,22044,21531,1740,7353,4985 48 | 2,3,3103,14069,21955,1668,6792,1452 49 | 2,3,44466,54259,55571,7782,24171,6465 50 | 2,3,11519,6152,10868,584,5121,1476 51 | 2,3,4967,21412,28921,1798,13583,1163 52 | 1,3,6269,1095,1980,3860,609,2162 53 | 1,3,3347,4051,6996,239,1538,301 54 | 2,3,40721,3916,5876,532,2587,1278 55 | 2,3,491,10473,11532,744,5611,224 56 | 1,3,27329,1449,1947,2436,204,1333 57 | 1,3,5264,3683,5005,1057,2024,1130 58 | 2,3,4098,29892,26866,2616,17740,1340 59 | 2,3,5417,9933,10487,38,7572,1282 60 | 1,3,13779,1970,1648,596,227,436 61 | 1,3,6137,5360,8040,129,3084,1603 62 | 2,3,8590,3045,7854,96,4095,225 63 | 2,3,35942,38369,59598,3254,26701,2017 64 | 2,3,7823,6245,6544,4154,4074,964 65 | 2,3,9396,11601,15775,2896,7677,1295 66 | 1,3,4760,1227,3250,3724,1247,1145 67 | 2,3,85,20959,45828,36,24231,1423 68 | 1,3,9,1534,7417,175,3468,27 69 | 2,3,19913,6759,13462,1256,5141,834 70 | 1,3,2446,7260,3993,5870,788,3095 71 | 1,3,8352,2820,1293,779,656,144 72 | 1,3,16705,2037,3202,10643,116,1365 73 | 1,3,18291,1266,21042,5373,4173,14472 74 | 1,3,4420,5139,2661,8872,1321,181 75 | 2,3,19899,5332,8713,8132,764,648 76 | 2,3,8190,6343,9794,1285,1901,1780 77 | 1,3,20398,1137,3,4407,3,975 78 | 1,3,717,3587,6532,7530,529,894 79 | 2,3,12205,12697,28540,869,12034,1009 80 | 1,3,10766,1175,2067,2096,301,167 81 | 1,3,1640,3259,3655,868,1202,1653 82 | 1,3,7005,829,3009,430,610,529 83 | 2,3,219,9540,14403,283,7818,156 84 | 2,3,10362,9232,11009,737,3537,2342 85 | 1,3,20874,1563,1783,2320,550,772 86 | 2,3,11867,3327,4814,1178,3837,120 87 | 2,3,16117,46197,92780,1026,40827,2944 88 | 2,3,22925,73498,32114,987,20070,903 89 | 1,3,43265,5025,8117,6312,1579,14351 90 | 1,3,7864,542,4042,9735,165,46 91 | 1,3,24904,3836,5330,3443,454,3178 92 | 1,3,11405,596,1638,3347,69,360 93 | 1,3,12754,2762,2530,8693,627,1117 94 | 2,3,9198,27472,32034,3232,18906,5130 95 | 1,3,11314,3090,2062,35009,71,2698 96 | 2,3,5626,12220,11323,206,5038,244 97 | 1,3,3,2920,6252,440,223,709 98 | 2,3,23,2616,8118,145,3874,217 99 | 1,3,403,254,610,774,54,63 100 | 1,3,503,112,778,895,56,132 101 | 1,3,9658,2182,1909,5639,215,323 102 | 2,3,11594,7779,12144,3252,8035,3029 103 | 2,3,1420,10810,16267,1593,6766,1838 104 | 2,3,2932,6459,7677,2561,4573,1386 105 | 1,3,56082,3504,8906,18028,1480,2498 106 | 1,3,14100,2132,3445,1336,1491,548 107 | 1,3,15587,1014,3970,910,139,1378 108 | 2,3,1454,6337,10704,133,6830,1831 109 | 2,3,8797,10646,14886,2471,8969,1438 110 | 2,3,1531,8397,6981,247,2505,1236 111 | 2,3,1406,16729,28986,673,836,3 112 | 1,3,11818,1648,1694,2276,169,1647 113 | 2,3,12579,11114,17569,805,6457,1519 114 | 1,3,19046,2770,2469,8853,483,2708 115 | 1,3,14438,2295,1733,3220,585,1561 116 | 1,3,18044,1080,2000,2555,118,1266 117 | 1,3,11134,793,2988,2715,276,610 118 | 1,3,11173,2521,3355,1517,310,222 119 | 1,3,6990,3880,5380,1647,319,1160 120 | 1,3,20049,1891,2362,5343,411,933 121 | 1,3,8258,2344,2147,3896,266,635 122 | 1,3,17160,1200,3412,2417,174,1136 123 | 1,3,4020,3234,1498,2395,264,255 124 | 1,3,12212,201,245,1991,25,860 125 | 2,3,11170,10769,8814,2194,1976,143 126 | 1,3,36050,1642,2961,4787,500,1621 127 | 1,3,76237,3473,7102,16538,778,918 128 | 1,3,19219,1840,1658,8195,349,483 129 | 2,3,21465,7243,10685,880,2386,2749 130 | 1,3,140,8847,3823,142,1062,3 131 | 1,3,42312,926,1510,1718,410,1819 132 | 1,3,7149,2428,699,6316,395,911 133 | 1,3,2101,589,314,346,70,310 134 | 1,3,14903,2032,2479,576,955,328 135 | 1,3,9434,1042,1235,436,256,396 136 | 1,3,7388,1882,2174,720,47,537 137 | 1,3,6300,1289,2591,1170,199,326 138 | 1,3,4625,8579,7030,4575,2447,1542 139 | 1,3,3087,8080,8282,661,721,36 140 | 1,3,13537,4257,5034,155,249,3271 141 | 1,3,5387,4979,3343,825,637,929 142 | 1,3,17623,4280,7305,2279,960,2616 143 | 1,3,30379,13252,5189,321,51,1450 144 | 1,3,37036,7152,8253,2995,20,3 145 | 1,3,10405,1596,1096,8425,399,318 146 | 1,3,18827,3677,1988,118,516,201 147 | 2,3,22039,8384,34792,42,12591,4430 148 | 1,3,7769,1936,2177,926,73,520 149 | 1,3,9203,3373,2707,1286,1082,526 150 | 1,3,5924,584,542,4052,283,434 151 | 1,3,31812,1433,1651,800,113,1440 152 | 1,3,16225,1825,1765,853,170,1067 153 | 1,3,1289,3328,2022,531,255,1774 154 | 1,3,18840,1371,3135,3001,352,184 155 | 1,3,3463,9250,2368,779,302,1627 156 | 1,3,622,55,137,75,7,8 157 | 2,3,1989,10690,19460,233,11577,2153 158 | 2,3,3830,5291,14855,317,6694,3182 159 | 1,3,17773,1366,2474,3378,811,418 160 | 2,3,2861,6570,9618,930,4004,1682 161 | 2,3,355,7704,14682,398,8077,303 162 | 2,3,1725,3651,12822,824,4424,2157 163 | 1,3,12434,540,283,1092,3,2233 164 | 1,3,15177,2024,3810,2665,232,610 165 | 2,3,5531,15726,26870,2367,13726,446 166 | 2,3,5224,7603,8584,2540,3674,238 167 | 2,3,15615,12653,19858,4425,7108,2379 168 | 2,3,4822,6721,9170,993,4973,3637 169 | 1,3,2926,3195,3268,405,1680,693 170 | 1,3,5809,735,803,1393,79,429 171 | 1,3,5414,717,2155,2399,69,750 172 | 2,3,260,8675,13430,1116,7015,323 173 | 2,3,200,25862,19816,651,8773,6250 174 | 1,3,955,5479,6536,333,2840,707 175 | 2,3,514,7677,19805,937,9836,716 176 | 1,3,286,1208,5241,2515,153,1442 177 | 2,3,2343,7845,11874,52,4196,1697 178 | 1,3,45640,6958,6536,7368,1532,230 179 | 1,3,12759,7330,4533,1752,20,2631 180 | 1,3,11002,7075,4945,1152,120,395 181 | 1,3,3157,4888,2500,4477,273,2165 182 | 1,3,12356,6036,8887,402,1382,2794 183 | 1,3,112151,29627,18148,16745,4948,8550 184 | 1,3,694,8533,10518,443,6907,156 185 | 1,3,36847,43950,20170,36534,239,47943 186 | 1,3,327,918,4710,74,334,11 187 | 1,3,8170,6448,1139,2181,58,247 188 | 1,3,3009,521,854,3470,949,727 189 | 1,3,2438,8002,9819,6269,3459,3 190 | 2,3,8040,7639,11687,2758,6839,404 191 | 2,3,834,11577,11522,275,4027,1856 192 | 1,3,16936,6250,1981,7332,118,64 193 | 1,3,13624,295,1381,890,43,84 194 | 1,3,5509,1461,2251,547,187,409 195 | 2,3,180,3485,20292,959,5618,666 196 | 1,3,7107,1012,2974,806,355,1142 197 | 1,3,17023,5139,5230,7888,330,1755 198 | 1,1,30624,7209,4897,18711,763,2876 199 | 2,1,2427,7097,10391,1127,4314,1468 200 | 1,1,11686,2154,6824,3527,592,697 201 | 1,1,9670,2280,2112,520,402,347 202 | 2,1,3067,13240,23127,3941,9959,731 203 | 2,1,4484,14399,24708,3549,14235,1681 204 | 1,1,25203,11487,9490,5065,284,6854 205 | 1,1,583,685,2216,469,954,18 206 | 1,1,1956,891,5226,1383,5,1328 207 | 2,1,1107,11711,23596,955,9265,710 208 | 1,1,6373,780,950,878,288,285 209 | 2,1,2541,4737,6089,2946,5316,120 210 | 1,1,1537,3748,5838,1859,3381,806 211 | 2,1,5550,12729,16767,864,12420,797 212 | 1,1,18567,1895,1393,1801,244,2100 213 | 2,1,12119,28326,39694,4736,19410,2870 214 | 1,1,7291,1012,2062,1291,240,1775 215 | 1,1,3317,6602,6861,1329,3961,1215 216 | 2,1,2362,6551,11364,913,5957,791 217 | 1,1,2806,10765,15538,1374,5828,2388 218 | 2,1,2532,16599,36486,179,13308,674 219 | 1,1,18044,1475,2046,2532,130,1158 220 | 2,1,18,7504,15205,1285,4797,6372 221 | 1,1,4155,367,1390,2306,86,130 222 | 1,1,14755,899,1382,1765,56,749 223 | 1,1,5396,7503,10646,91,4167,239 224 | 1,1,5041,1115,2856,7496,256,375 225 | 2,1,2790,2527,5265,5612,788,1360 226 | 1,1,7274,659,1499,784,70,659 227 | 1,1,12680,3243,4157,660,761,786 228 | 2,1,20782,5921,9212,1759,2568,1553 229 | 1,1,4042,2204,1563,2286,263,689 230 | 1,1,1869,577,572,950,4762,203 231 | 1,1,8656,2746,2501,6845,694,980 232 | 2,1,11072,5989,5615,8321,955,2137 233 | 1,1,2344,10678,3828,1439,1566,490 234 | 1,1,25962,1780,3838,638,284,834 235 | 1,1,964,4984,3316,937,409,7 236 | 1,1,15603,2703,3833,4260,325,2563 237 | 1,1,1838,6380,2824,1218,1216,295 238 | 1,1,8635,820,3047,2312,415,225 239 | 1,1,18692,3838,593,4634,28,1215 240 | 1,1,7363,475,585,1112,72,216 241 | 1,1,47493,2567,3779,5243,828,2253 242 | 1,1,22096,3575,7041,11422,343,2564 243 | 1,1,24929,1801,2475,2216,412,1047 244 | 1,1,18226,659,2914,3752,586,578 245 | 1,1,11210,3576,5119,561,1682,2398 246 | 1,1,6202,7775,10817,1183,3143,1970 247 | 2,1,3062,6154,13916,230,8933,2784 248 | 1,1,8885,2428,1777,1777,430,610 249 | 1,1,13569,346,489,2077,44,659 250 | 1,1,15671,5279,2406,559,562,572 251 | 1,1,8040,3795,2070,6340,918,291 252 | 1,1,3191,1993,1799,1730,234,710 253 | 2,1,6134,23133,33586,6746,18594,5121 254 | 1,1,6623,1860,4740,7683,205,1693 255 | 1,1,29526,7961,16966,432,363,1391 256 | 1,1,10379,17972,4748,4686,1547,3265 257 | 1,1,31614,489,1495,3242,111,615 258 | 1,1,11092,5008,5249,453,392,373 259 | 1,1,8475,1931,1883,5004,3593,987 260 | 1,1,56083,4563,2124,6422,730,3321 261 | 1,1,53205,4959,7336,3012,967,818 262 | 1,1,9193,4885,2157,327,780,548 263 | 1,1,7858,1110,1094,6818,49,287 264 | 1,1,23257,1372,1677,982,429,655 265 | 1,1,2153,1115,6684,4324,2894,411 266 | 2,1,1073,9679,15445,61,5980,1265 267 | 1,1,5909,23527,13699,10155,830,3636 268 | 2,1,572,9763,22182,2221,4882,2563 269 | 1,1,20893,1222,2576,3975,737,3628 270 | 2,1,11908,8053,19847,1069,6374,698 271 | 1,1,15218,258,1138,2516,333,204 272 | 1,1,4720,1032,975,5500,197,56 273 | 1,1,2083,5007,1563,1120,147,1550 274 | 1,1,514,8323,6869,529,93,1040 275 | 1,3,36817,3045,1493,4802,210,1824 276 | 1,3,894,1703,1841,744,759,1153 277 | 1,3,680,1610,223,862,96,379 278 | 1,3,27901,3749,6964,4479,603,2503 279 | 1,3,9061,829,683,16919,621,139 280 | 1,3,11693,2317,2543,5845,274,1409 281 | 2,3,17360,6200,9694,1293,3620,1721 282 | 1,3,3366,2884,2431,977,167,1104 283 | 2,3,12238,7108,6235,1093,2328,2079 284 | 1,3,49063,3965,4252,5970,1041,1404 285 | 1,3,25767,3613,2013,10303,314,1384 286 | 1,3,68951,4411,12609,8692,751,2406 287 | 1,3,40254,640,3600,1042,436,18 288 | 1,3,7149,2247,1242,1619,1226,128 289 | 1,3,15354,2102,2828,8366,386,1027 290 | 1,3,16260,594,1296,848,445,258 291 | 1,3,42786,286,471,1388,32,22 292 | 1,3,2708,2160,2642,502,965,1522 293 | 1,3,6022,3354,3261,2507,212,686 294 | 1,3,2838,3086,4329,3838,825,1060 295 | 2,2,3996,11103,12469,902,5952,741 296 | 1,2,21273,2013,6550,909,811,1854 297 | 2,2,7588,1897,5234,417,2208,254 298 | 1,2,19087,1304,3643,3045,710,898 299 | 2,2,8090,3199,6986,1455,3712,531 300 | 2,2,6758,4560,9965,934,4538,1037 301 | 1,2,444,879,2060,264,290,259 302 | 2,2,16448,6243,6360,824,2662,2005 303 | 2,2,5283,13316,20399,1809,8752,172 304 | 2,2,2886,5302,9785,364,6236,555 305 | 2,2,2599,3688,13829,492,10069,59 306 | 2,2,161,7460,24773,617,11783,2410 307 | 2,2,243,12939,8852,799,3909,211 308 | 2,2,6468,12867,21570,1840,7558,1543 309 | 1,2,17327,2374,2842,1149,351,925 310 | 1,2,6987,1020,3007,416,257,656 311 | 2,2,918,20655,13567,1465,6846,806 312 | 1,2,7034,1492,2405,12569,299,1117 313 | 1,2,29635,2335,8280,3046,371,117 314 | 2,2,2137,3737,19172,1274,17120,142 315 | 1,2,9784,925,2405,4447,183,297 316 | 1,2,10617,1795,7647,1483,857,1233 317 | 2,2,1479,14982,11924,662,3891,3508 318 | 1,2,7127,1375,2201,2679,83,1059 319 | 1,2,1182,3088,6114,978,821,1637 320 | 1,2,11800,2713,3558,2121,706,51 321 | 2,2,9759,25071,17645,1128,12408,1625 322 | 1,2,1774,3696,2280,514,275,834 323 | 1,2,9155,1897,5167,2714,228,1113 324 | 1,2,15881,713,3315,3703,1470,229 325 | 1,2,13360,944,11593,915,1679,573 326 | 1,2,25977,3587,2464,2369,140,1092 327 | 1,2,32717,16784,13626,60869,1272,5609 328 | 1,2,4414,1610,1431,3498,387,834 329 | 1,2,542,899,1664,414,88,522 330 | 1,2,16933,2209,3389,7849,210,1534 331 | 1,2,5113,1486,4583,5127,492,739 332 | 1,2,9790,1786,5109,3570,182,1043 333 | 2,2,11223,14881,26839,1234,9606,1102 334 | 1,2,22321,3216,1447,2208,178,2602 335 | 2,2,8565,4980,67298,131,38102,1215 336 | 2,2,16823,928,2743,11559,332,3486 337 | 2,2,27082,6817,10790,1365,4111,2139 338 | 1,2,13970,1511,1330,650,146,778 339 | 1,2,9351,1347,2611,8170,442,868 340 | 1,2,3,333,7021,15601,15,550 341 | 1,2,2617,1188,5332,9584,573,1942 342 | 2,3,381,4025,9670,388,7271,1371 343 | 2,3,2320,5763,11238,767,5162,2158 344 | 1,3,255,5758,5923,349,4595,1328 345 | 2,3,1689,6964,26316,1456,15469,37 346 | 1,3,3043,1172,1763,2234,217,379 347 | 1,3,1198,2602,8335,402,3843,303 348 | 2,3,2771,6939,15541,2693,6600,1115 349 | 2,3,27380,7184,12311,2809,4621,1022 350 | 1,3,3428,2380,2028,1341,1184,665 351 | 2,3,5981,14641,20521,2005,12218,445 352 | 1,3,3521,1099,1997,1796,173,995 353 | 2,3,1210,10044,22294,1741,12638,3137 354 | 1,3,608,1106,1533,830,90,195 355 | 2,3,117,6264,21203,228,8682,1111 356 | 1,3,14039,7393,2548,6386,1333,2341 357 | 1,3,190,727,2012,245,184,127 358 | 1,3,22686,134,218,3157,9,548 359 | 2,3,37,1275,22272,137,6747,110 360 | 1,3,759,18664,1660,6114,536,4100 361 | 1,3,796,5878,2109,340,232,776 362 | 1,3,19746,2872,2006,2601,468,503 363 | 1,3,4734,607,864,1206,159,405 364 | 1,3,2121,1601,2453,560,179,712 365 | 1,3,4627,997,4438,191,1335,314 366 | 1,3,2615,873,1524,1103,514,468 367 | 2,3,4692,6128,8025,1619,4515,3105 368 | 1,3,9561,2217,1664,1173,222,447 369 | 1,3,3477,894,534,1457,252,342 370 | 1,3,22335,1196,2406,2046,101,558 371 | 1,3,6211,337,683,1089,41,296 372 | 2,3,39679,3944,4955,1364,523,2235 373 | 1,3,20105,1887,1939,8164,716,790 374 | 1,3,3884,3801,1641,876,397,4829 375 | 2,3,15076,6257,7398,1504,1916,3113 376 | 1,3,6338,2256,1668,1492,311,686 377 | 1,3,5841,1450,1162,597,476,70 378 | 2,3,3136,8630,13586,5641,4666,1426 379 | 1,3,38793,3154,2648,1034,96,1242 380 | 1,3,3225,3294,1902,282,68,1114 381 | 2,3,4048,5164,10391,130,813,179 382 | 1,3,28257,944,2146,3881,600,270 383 | 1,3,17770,4591,1617,9927,246,532 384 | 1,3,34454,7435,8469,2540,1711,2893 385 | 1,3,1821,1364,3450,4006,397,361 386 | 1,3,10683,21858,15400,3635,282,5120 387 | 1,3,11635,922,1614,2583,192,1068 388 | 1,3,1206,3620,2857,1945,353,967 389 | 1,3,20918,1916,1573,1960,231,961 390 | 1,3,9785,848,1172,1677,200,406 391 | 1,3,9385,1530,1422,3019,227,684 392 | 1,3,3352,1181,1328,5502,311,1000 393 | 1,3,2647,2761,2313,907,95,1827 394 | 1,3,518,4180,3600,659,122,654 395 | 1,3,23632,6730,3842,8620,385,819 396 | 1,3,12377,865,3204,1398,149,452 397 | 1,3,9602,1316,1263,2921,841,290 398 | 2,3,4515,11991,9345,2644,3378,2213 399 | 1,3,11535,1666,1428,6838,64,743 400 | 1,3,11442,1032,582,5390,74,247 401 | 1,3,9612,577,935,1601,469,375 402 | 1,3,4446,906,1238,3576,153,1014 403 | 1,3,27167,2801,2128,13223,92,1902 404 | 1,3,26539,4753,5091,220,10,340 405 | 1,3,25606,11006,4604,127,632,288 406 | 1,3,18073,4613,3444,4324,914,715 407 | 1,3,6884,1046,1167,2069,593,378 408 | 1,3,25066,5010,5026,9806,1092,960 409 | 2,3,7362,12844,18683,2854,7883,553 410 | 2,3,8257,3880,6407,1646,2730,344 411 | 1,3,8708,3634,6100,2349,2123,5137 412 | 1,3,6633,2096,4563,1389,1860,1892 413 | 1,3,2126,3289,3281,1535,235,4365 414 | 1,3,97,3605,12400,98,2970,62 415 | 1,3,4983,4859,6633,17866,912,2435 416 | 1,3,5969,1990,3417,5679,1135,290 417 | 2,3,7842,6046,8552,1691,3540,1874 418 | 2,3,4389,10940,10908,848,6728,993 419 | 1,3,5065,5499,11055,364,3485,1063 420 | 2,3,660,8494,18622,133,6740,776 421 | 1,3,8861,3783,2223,633,1580,1521 422 | 1,3,4456,5266,13227,25,6818,1393 423 | 2,3,17063,4847,9053,1031,3415,1784 424 | 1,3,26400,1377,4172,830,948,1218 425 | 2,3,17565,3686,4657,1059,1803,668 426 | 2,3,16980,2884,12232,874,3213,249 427 | 1,3,11243,2408,2593,15348,108,1886 428 | 1,3,13134,9347,14316,3141,5079,1894 429 | 1,3,31012,16687,5429,15082,439,1163 430 | 1,3,3047,5970,4910,2198,850,317 431 | 1,3,8607,1750,3580,47,84,2501 432 | 1,3,3097,4230,16483,575,241,2080 433 | 1,3,8533,5506,5160,13486,1377,1498 434 | 1,3,21117,1162,4754,269,1328,395 435 | 1,3,1982,3218,1493,1541,356,1449 436 | 1,3,16731,3922,7994,688,2371,838 437 | 1,3,29703,12051,16027,13135,182,2204 438 | 1,3,39228,1431,764,4510,93,2346 439 | 2,3,14531,15488,30243,437,14841,1867 440 | 1,3,10290,1981,2232,1038,168,2125 441 | 1,3,2787,1698,2510,65,477,52 442 | -------------------------------------------------------------------------------- /projects/creating_customer_segments/debug.log: -------------------------------------------------------------------------------- 1 | [0308/005334.831:ERROR:crash_report_database_win.cc(426)] unexpected header 2 | -------------------------------------------------------------------------------- /projects/creating_customer_segments/pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/creating_customer_segments/pca.png -------------------------------------------------------------------------------- /projects/creating_customer_segments/renders.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.cm as cm 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn.decomposition import pca 6 | 7 | def pca_results(good_data, pca): 8 | ''' 9 | Create a DataFrame of the PCA results 10 | Includes dimension feature weights and explained variance 11 | Visualizes the PCA results 12 | ''' 13 | 14 | # Dimension indexing 15 | dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)] 16 | 17 | # PCA components 18 | components = pd.DataFrame(np.round(pca.components_, 4), columns = good_data.keys()) 19 | components.index = dimensions 20 | 21 | # PCA explained variance 22 | ratios = pca.explained_variance_ratio_.reshape(len(pca.components_), 1) 23 | variance_ratios = pd.DataFrame(np.round(ratios, 4), columns = ['Explained Variance']) 24 | variance_ratios.index = dimensions 25 | 26 | # Create a bar plot visualization 27 | fig, ax = plt.subplots(figsize = (14,8)) 28 | 29 | # Plot the feature weights as a function of the components 30 | components.plot(ax = ax, kind = 'bar'); 31 | ax.set_ylabel("Feature Weights") 32 | ax.set_xticklabels(dimensions, rotation=0) 33 | 34 | 35 | # Display the explained variance ratios 36 | for i, ev in enumerate(pca.explained_variance_ratio_): 37 | ax.text(i-0.40, ax.get_ylim()[1] + 0.05, "Explained Variance\n %.4f"%(ev)) 38 | 39 | # Return a concatenated DataFrame 40 | return pd.concat([variance_ratios, components], axis = 1) 41 | 42 | def cluster_results(reduced_data, preds, centers, pca_samples): 43 | ''' 44 | Visualizes the PCA-reduced cluster data in two dimensions 45 | Adds cues for cluster centers and student-selected sample data 46 | ''' 47 | 48 | predictions = pd.DataFrame(preds, columns = ['Cluster']) 49 | plot_data = pd.concat([predictions, reduced_data], axis = 1) 50 | 51 | # Generate the cluster plot 52 | fig, ax = plt.subplots(figsize = (14,8)) 53 | 54 | # Color map 55 | cmap = cm.get_cmap('gist_rainbow') 56 | 57 | # Color the points based on assigned cluster 58 | for i, cluster in plot_data.groupby('Cluster'): 59 | cluster.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \ 60 | color = cmap((i)*1.0/(len(centers)-1)), label = 'Cluster %i'%(i), s=30); 61 | 62 | # Plot centers with indicators 63 | for i, c in enumerate(centers): 64 | ax.scatter(x = c[0], y = c[1], color = 'white', edgecolors = 'black', \ 65 | alpha = 1, linewidth = 2, marker = 'o', s=200); 66 | ax.scatter(x = c[0], y = c[1], marker='$%d$'%(i), alpha = 1, s=100); 67 | 68 | # Plot transformed sample points 69 | ax.scatter(x = pca_samples[:,0], y = pca_samples[:,1], \ 70 | s = 150, linewidth = 4, color = 'black', marker = 'x'); 71 | 72 | # Set plot title 73 | ax.set_title("Cluster Learning on PCA-Reduced Data - Centroids Marked by Number\nTransformed Sample Data Marked by Black Cross"); 74 | 75 | 76 | def channel_results(reduced_data, outliers, pca_samples): 77 | ''' 78 | Visualizes the PCA-reduced cluster data in two dimensions using the full dataset 79 | Data is labeled by "Channel" and cues added for student-selected sample data 80 | ''' 81 | 82 | # Check that the dataset is loadable 83 | try: 84 | full_data = pd.read_csv("customers.csv") 85 | except: 86 | print "Dataset could not be loaded. Is the file missing?" 87 | return False 88 | 89 | # Create the Channel DataFrame 90 | channel = pd.DataFrame(full_data['Channel'], columns = ['Channel']) 91 | channel = channel.drop(channel.index[outliers]).reset_index(drop = True) 92 | labeled = pd.concat([reduced_data, channel], axis = 1) 93 | 94 | # Generate the cluster plot 95 | fig, ax = plt.subplots(figsize = (14,8)) 96 | 97 | # Color map 98 | cmap = cm.get_cmap('gist_rainbow') 99 | 100 | # Color the points based on assigned Channel 101 | labels = ['Hotel/Restaurant/Cafe', 'Retailer'] 102 | grouped = labeled.groupby('Channel') 103 | for i, channel in grouped: 104 | channel.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \ 105 | color = cmap((i-1)*1.0/2), label = labels[i-1], s=30); 106 | 107 | # Plot transformed sample points 108 | for i, sample in enumerate(pca_samples): 109 | ax.scatter(x = sample[0], y = sample[1], \ 110 | s = 200, linewidth = 3, color = 'black', marker = 'o', facecolors = 'none'); 111 | ax.scatter(x = sample[0]+0.25, y = sample[1]+0.3, marker='$%d$'%(i), alpha = 1, s=125); 112 | 113 | # Set plot title 114 | ax.set_title("PCA-Reduced Data Labeled by 'Channel'\nTransformed Sample Data Circled"); -------------------------------------------------------------------------------- /projects/creating_customer_segments/renders.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/creating_customer_segments/renders.pyc -------------------------------------------------------------------------------- /projects/smartcab/Project 4 final report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/Project 4 final report.pdf -------------------------------------------------------------------------------- /projects/smartcab/Q_learning_new_state_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/Q_learning_new_state_space.png -------------------------------------------------------------------------------- /projects/smartcab/README.md: -------------------------------------------------------------------------------- 1 | # Project 4: Reinforcement Learning 2 | ## Train a Smartcab How to Drive 3 | 4 | ### Install 5 | 6 | This project requires **Python 2.7** with the [pygame](https://www.pygame.org/wiki/GettingStarted) library installed. 7 | 8 | ### Overview 9 | 10 | ![](smartcabpic.jpg) 11 | 12 | 13 | 14 | The smartcab operates in a grid like city with other agents. Per trial the smarcab gets assigned a goal end point and starts moving there from a given random start point. I've implemented Q-learning algorithm to teach the smartcab how to reach the goal state while avoiding other agents and avoiding penalties. Penalty is given when the smartcab violates a traffic law or collides with other agents. Final Q-learning model with tuned hyper-parameters achieves 99% accuracy. 15 | 16 | ![](Q_learning_new_state_space.png) 17 | 18 | ### Code 19 | 20 | Template code is provided in the `smartcab/agent.py` python file. Additional supporting python code can be found in `smartcab/enviroment.py`, `smartcab/planner.py`, and `smartcab/simulator.py`. Supporting images for the graphical user interface can be found in the `images` folder. 21 | 22 | ### Run 23 | 24 | In a terminal or command window, navigate to the top-level project directory `smartcab/` (that contains this README) and run one of the following commands: 25 | 26 | ```python smartcab/agent.py``` 27 | ```python -m smartcab.agent``` 28 | 29 | This will run the `agent.py` file and execute your agent code. 30 | -------------------------------------------------------------------------------- /projects/smartcab/images/car-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-black.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-blue.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-cyan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-cyan.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-green.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-magenta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-magenta.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-orange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-orange.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-red.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-red.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-white.png -------------------------------------------------------------------------------- /projects/smartcab/images/car-yellow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-yellow.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/.ipynb_checkpoints/Test-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import random" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "actions = ['left','right','forward',None]\n", 23 | "light = ['red','green']\n", 24 | "oncoming = ['left','right','forward',None]\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "Q = {}" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 44, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "random_key = (actions[0],light[0],oncoming[1])" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 45, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "if random_key not in Q:\n", 58 | " Q[random_key] = {action : random.randint(20,26) for action in actions}" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 46, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "{('left', 'green', 'forward'): {None: 20,\n", 72 | " 'forward': 20,\n", 73 | " 'left': 20,\n", 74 | " 'right': 20},\n", 75 | " ('left', 'red', 'forward'): {None: 20,\n", 76 | " 'forward': 23,\n", 77 | " 'left': 21,\n", 78 | " 'right': 24},\n", 79 | " ('left', 'red', 'right'): {None: 26, 'forward': 21, 'left': 21, 'right': 26}}" 80 | ] 81 | }, 82 | "execution_count": 46, 83 | "metadata": {}, 84 | "output_type": "execute_result" 85 | } 86 | ], 87 | "source": [ 88 | "Q" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 47, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "action = {a:v for a,v in Q[random_key].items() if v==max(Q[random_key].values())}" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 48, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "{None: 26, 'right': 26}" 113 | ] 114 | }, 115 | "execution_count": 48, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "action" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 2", 137 | "language": "python", 138 | "name": "python2" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 2 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython2", 150 | "version": "2.7.12" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 0 155 | } 156 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Q-learning_over50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over50.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Q-learning_over_100.1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over_100.1.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Q-learning_over_100.2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over_100.2.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Q_learning_new_state_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q_learning_new_state_space.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Q_learning_with_epsilon_decay_final_parameter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q_learning_with_epsilon_decay_final_parameter.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/Test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import random" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "actions = ['left','right','forward',None]\n", 23 | "light = ['red','green']\n", 24 | "oncoming = ['left','right','forward',None]\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 4, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "Q = {}" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 5, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "random_key = (actions[0],light[0],oncoming[1])" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "if random_key not in Q:\n", 58 | " Q[random_key] = {action : random.randint(20,26) for action in actions}" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 7, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [ 68 | { 69 | "data": { 70 | "text/plain": [ 71 | "{('left', 'red', 'right'): {None: 24, 'forward': 26, 'left': 22, 'right': 21}}" 72 | ] 73 | }, 74 | "execution_count": 7, 75 | "metadata": {}, 76 | "output_type": "execute_result" 77 | } 78 | ], 79 | "source": [ 80 | "Q" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 8, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "action = {a:v for a,v in Q[random_key].items() if v==max(Q[random_key].values())}" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 9, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "{'forward': 26}" 105 | ] 106 | }, 107 | "execution_count": 9, 108 | "metadata": {}, 109 | "output_type": "execute_result" 110 | } 111 | ], 112 | "source": [ 113 | "action" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 10, 119 | "metadata": { 120 | "collapsed": false 121 | }, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "'forward'" 127 | ] 128 | }, 129 | "execution_count": 10, 130 | "metadata": {}, 131 | "output_type": "execute_result" 132 | } 133 | ], 134 | "source": [ 135 | "random.choice(action.keys())" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [ 145 | { 146 | "name": "stdout", 147 | "output_type": "stream", 148 | "text": [ 149 | "Reporter.__init__(): Initialized with metrics: ['reward', 'flubber']\n", 150 | "Summary (2 metrics):-\n", 151 | "Name: reward, samples: 100, type: float64\n", 152 | "Mean: 0.465168053464, s.d.: 0.283774983872\n", 153 | "Name: flubber, samples: 10, type: float64\n", 154 | "Mean: 2.03614942375, s.d.: 0.579376033361\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "%run analysis.py" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 12, 165 | "metadata": { 166 | "collapsed": false 167 | }, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "26" 173 | ] 174 | }, 175 | "execution_count": 12, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "max(Q[('left', 'red', 'right')].values())" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": null, 187 | "metadata": { 188 | "collapsed": true 189 | }, 190 | "outputs": [], 191 | "source": [] 192 | } 193 | ], 194 | "metadata": { 195 | "kernelspec": { 196 | "display_name": "Python 2", 197 | "language": "python", 198 | "name": "python2" 199 | }, 200 | "language_info": { 201 | "codemirror_mode": { 202 | "name": "ipython", 203 | "version": 2 204 | }, 205 | "file_extension": ".py", 206 | "mimetype": "text/x-python", 207 | "name": "python", 208 | "nbconvert_exporter": "python", 209 | "pygments_lexer": "ipython2", 210 | "version": "2.7.12" 211 | } 212 | }, 213 | "nbformat": 4, 214 | "nbformat_minor": 0 215 | } 216 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/__init__.py -------------------------------------------------------------------------------- /projects/smartcab/smartcab/agent.py: -------------------------------------------------------------------------------- 1 | import random 2 | from environment import Agent, Environment 3 | from planner import RoutePlanner 4 | from simulator import Simulator 5 | 6 | class LearningAgent(Agent): 7 | """An agent that learns to drive in the smartcab world.""" 8 | 9 | def __init__(self, env): 10 | super(LearningAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color 11 | self.color = 'red' # override color 12 | self.planner = RoutePlanner(self.env, self) # simple route planner to get next_waypoint 13 | 14 | # TODO: Initialize any additional variables here 15 | self.correct_actions = ["forward","left","right",None] 16 | 17 | # add the previous state, action and reward variables for updating Q-values 18 | 19 | self.previous_state = None 20 | self.previous_action = None 21 | self.previous_reward = None 22 | 23 | # Add parameters for the Q-table 24 | 25 | self.gamma = 0.3 26 | self.alpha = 0.5 27 | self.epsilon = 0.1 28 | self.Q = {} 29 | self.default_Q_val = 0 30 | self.step_number = 0 31 | 32 | 33 | 34 | def reset(self, destination=None): 35 | self.planner.route_to(destination) 36 | # TODO: Prepare for a new trip; reset any variables here, if required 37 | 38 | self.previous_state = None 39 | self.previous_action = None 40 | self.previous_reward = None 41 | self.step_number+=1 42 | 43 | def update(self, t): 44 | # Gather inputs 45 | self.next_waypoint = self.planner.next_waypoint() # from route planner, also displayed by simulator 46 | inputs = self.env.sense(self) 47 | deadline = self.env.get_deadline(self) 48 | 49 | # TODO: Update state 50 | 51 | 52 | 53 | # self.state = (("light",inputs["light"]),("oncoming",inputs["oncoming"]),("waypoint",self.next_waypoint)) 54 | 55 | self.state = (("light",inputs["light"]),("oncoming",inputs["oncoming"]),("waypoint",self.next_waypoint),("left",inputs["left"]),("right",inputs["right"])) 56 | 57 | #1st Q. Implement a basic agent that chooses action randomly 58 | # action = random.choice(self.correct_actions) 59 | # what if the next_waypoint was chosen as the action? 60 | #action = self.next_waypoint 61 | 62 | 63 | # TO DO: Choose action on basis of Q-learning 64 | 65 | if self.state in self.Q: # if we have been into this state before 66 | if random.random() > self.epsilon/self.step_number: # epsilon should be a small number so that we use the learned values of Q-table most of the time 67 | # choose the action that has the max Q-value, can be greater than one if most of the actions has just been initialized 68 | potential_actions = {a:v for a,v in self.Q[self.state].items() if v==max(self.Q[self.state].values())} 69 | action = random.choice(potential_actions.keys()) 70 | else: 71 | action = random.choice(self.correct_actions) 72 | 73 | 74 | else: 75 | # if we have not been into this state before then initialize the Q-values for each valid action 76 | self.Q[self.state] = {action:self.default_Q_val for action in self.correct_actions} 77 | # then choose a random action from the set of valid action 78 | action = random.choice(self.correct_actions) 79 | 80 | 81 | 82 | 83 | # Execute action and get reward 84 | reward = self.env.act(self, action) 85 | 86 | # TODO: Learn policy based on state, action, reward/ Updating Q-table values 87 | 88 | 89 | 90 | if self.previous_state != None: 91 | # Complete the equation after rewatching the videos 92 | self.Q[self.previous_state][self.previous_action] = (1-self.alpha)*self.Q[self.previous_state][self.previous_action] + \ 93 | self.alpha*(self.previous_reward + self.gamma*max(self.Q[self.state].values())) 94 | 95 | 96 | 97 | 98 | self.previous_state = self.state 99 | self.previous_action = action 100 | self.previous_reward = reward 101 | 102 | 103 | 104 | 105 | print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}".format(deadline, inputs, action, reward) #[debug] 106 | print "State = {}".format(self.state) 107 | 108 | def run(): 109 | """Run the agent for a finite number of trials.""" 110 | 111 | # Set up environment and agent 112 | e = Environment() # create environment (also adds some dummy traffic) 113 | a = e.create_agent(LearningAgent) # create agent 114 | e.set_primary_agent(a, enforce_deadline=True) # specify agent to track 115 | # NOTE: You can set enforce_deadline=False while debugging to allow longer trials 116 | 117 | # Now simulate it 118 | sim = Simulator(e, update_delay=0.1, display=False,live_plot = True) # create simulator (uses pygame when display=True, if available) 119 | # NOTE: To speed up simulation, reduce update_delay and/or set display=False 120 | 121 | sim.run(n_trials=100) # run for a specified number of trials 122 | # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line 123 | 124 | 125 | if __name__ == '__main__': 126 | run() 127 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/analysis.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import OrderedDict 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | 8 | class Metric(object): 9 | """Named sequence of x and y values, with optional plotting helpers.""" 10 | 11 | def __init__(self, name): 12 | self.name = name 13 | self.reset() 14 | 15 | def collect(self, x, y): 16 | self.xdata.append(x) 17 | self.ydata.append(y) 18 | 19 | def plot(self, ax): 20 | self.plot_obj, = ax.plot(self.xdata, self.ydata, 'o-', label=self.name) 21 | 22 | def refresh(self): 23 | self.plot_obj.set_data(self.xdata, self.ydata) 24 | 25 | def reset(self): 26 | self.xdata = [] 27 | self.ydata = [] 28 | 29 | 30 | class Reporter(object): 31 | """Collect metrics, analyze and report summary statistics.""" 32 | 33 | def __init__(self, metrics=[], live_plot=False): 34 | self.metrics = OrderedDict() 35 | self.live_plot = live_plot 36 | 37 | for name in metrics: 38 | self.metrics[name] = Metric(name) 39 | 40 | if self.live_plot: 41 | if not plt.isinteractive(): 42 | plt.ion() 43 | self.plot() 44 | 45 | print "Reporter.__init__(): Initialized with metrics: {}".format(metrics) # [debug] 46 | 47 | def collect(self, name, x, y): 48 | if not name in self.metrics: 49 | self.metrics[name] = Metric(name) 50 | if self.live_plot: 51 | self.metrics[name].plot(self.ax) 52 | self.ax.legend() # add new metric to legend 53 | print "Reporter.collect(): New metric added: {}".format(name) # [debug] 54 | self.metrics[name].collect(x, y) 55 | if self.live_plot: 56 | self.metrics[name].refresh() 57 | 58 | def plot(self): 59 | if not hasattr(self, 'fig') or not hasattr(self, 'ax'): 60 | self.fig, self.ax = plt.subplots() 61 | for name in self.metrics: 62 | self.metrics[name].plot(self.ax) 63 | #self.ax.set_autoscalex_on(True) 64 | #self.ax.set_autoscaley_on(True) 65 | self.ax.grid() 66 | self.ax.legend() 67 | else: 68 | for name in self.metrics: 69 | self.metrics[name].refresh() 70 | self.refresh_plot() 71 | 72 | def refresh_plot(self): 73 | self.ax.relim() 74 | self.ax.autoscale_view() 75 | self.fig.canvas.draw() 76 | self.fig.canvas.flush_events() 77 | plt.draw() 78 | 79 | def summary(self): 80 | return [pd.Series(metric.ydata, index=metric.xdata, name=name) for name, metric in self.metrics.iteritems()] 81 | 82 | 83 | def show_plot(self): 84 | if plt.isinteractive(): 85 | plt.ioff() 86 | self.plot() 87 | summary1 = self.summary() 88 | print "Summary ({} metrics):-".format(len(summary1)) 89 | for metric in summary1: 90 | print "Name: {}, samples: {}, type: {}".format(metric.name, len(metric), metric.dtype) 91 | if metric.name == "success": 92 | print "Total : {} success out of {} samples".format(sum(metric),len(metric)) 93 | else: 94 | print "Mean: {}, s.d.: {}".format(metric.mean(), metric.std()) 95 | #print metric[:5] # [debug] 96 | 97 | 98 | plt.show() 99 | 100 | 101 | def reset(self): 102 | for name in self.metrics: 103 | self.metrics[name].reset() 104 | if self.live_plot: 105 | self.metrics[name].refresh() 106 | 107 | 108 | def test_reporter(): 109 | plt.ion() 110 | rep = Reporter(metrics=['reward', 'flubber'], live_plot=True) 111 | for i in xrange(100): 112 | rep.collect('reward', i, np.random.random()) 113 | if i % 10 == 1: 114 | rep.collect('flubber', i, np.random.random() * 2 + 1) 115 | rep.refresh_plot() 116 | time.sleep(0.01) 117 | rep.plot() 118 | summary = rep.summary() 119 | print "Summary ({} metrics):-".format(len(summary)) 120 | for metric in summary: 121 | print "Name: {}, samples: {}, type: {}".format(metric.name, len(metric), metric.dtype) 122 | print "Mean: {}, s.d.: {}".format(metric.mean(), metric.std()) 123 | #print metric[:5] # [debug] 124 | plt.ioff() 125 | plt.show() 126 | 127 | 128 | if __name__ == '__main__': 129 | test_reporter() 130 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/analysis.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/analysis.pyc -------------------------------------------------------------------------------- /projects/smartcab/smartcab/environment.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | from collections import OrderedDict 4 | 5 | from simulator import Simulator 6 | 7 | class TrafficLight(object): 8 | """A traffic light that switches periodically.""" 9 | 10 | valid_states = [True, False] # True = NS open, False = EW open 11 | 12 | def __init__(self, state=None, period=None): 13 | self.state = state if state is not None else random.choice(self.valid_states) 14 | self.period = period if period is not None else random.choice([3, 4, 5]) 15 | self.last_updated = 0 16 | 17 | def reset(self): 18 | self.last_updated = 0 19 | 20 | def update(self, t): 21 | if t - self.last_updated >= self.period: 22 | self.state = not self.state # assuming state is boolean 23 | self.last_updated = t 24 | 25 | 26 | class Environment(object): 27 | """Environment within which all agents operate.""" 28 | 29 | valid_actions = [None, 'forward', 'left', 'right'] 30 | valid_inputs = {'light': TrafficLight.valid_states, 'oncoming': valid_actions, 'left': valid_actions, 'right': valid_actions} 31 | valid_headings = [(1, 0), (0, -1), (-1, 0), (0, 1)] # ENWS 32 | hard_time_limit = -100 # even if enforce_deadline is False, end trial when deadline reaches this value (to avoid deadlocks) 33 | 34 | def __init__(self, num_dummies=3): 35 | self.num_dummies = num_dummies # no. of dummy agents 36 | 37 | # Initialize simulation variables 38 | self.done = False 39 | self.t = 0 40 | self.agent_states = OrderedDict() 41 | self.status_text = "" 42 | 43 | 44 | 45 | 46 | # Road network 47 | self.grid_size = (8, 6) # (cols, rows) 48 | self.bounds = (1, 1, self.grid_size[0], self.grid_size[1]) 49 | self.block_size = 100 50 | self.intersections = OrderedDict() 51 | self.roads = [] 52 | for x in xrange(self.bounds[0], self.bounds[2] + 1): 53 | for y in xrange(self.bounds[1], self.bounds[3] + 1): 54 | self.intersections[(x, y)] = TrafficLight() # a traffic light at each intersection 55 | 56 | for a in self.intersections: 57 | for b in self.intersections: 58 | if a == b: 59 | continue 60 | if (abs(a[0] - b[0]) + abs(a[1] - b[1])) == 1: # L1 distance = 1 61 | self.roads.append((a, b)) 62 | 63 | # Dummy agents 64 | for i in xrange(self.num_dummies): 65 | self.create_agent(DummyAgent) 66 | 67 | # Primary agent and associated parameters 68 | self.primary_agent = None # to be set explicitly 69 | self.enforce_deadline = False 70 | 71 | # Step data (updated after each environment step) 72 | self.step_data = { 73 | 't': 0, 74 | 'deadline': 0, 75 | 'waypoint': None, 76 | 'inputs': None, 77 | 'action': None, 78 | 'reward': 0.0 79 | } 80 | 81 | # Trial data (updated at the end of each trial) 82 | self.trial_data = { 83 | 'net_reward': 0.0, # total reward earned in current trial 84 | 'final_deadline': None, # deadline value (time remaining) 85 | 'success': 0 # whether the agent reached the destination in time 86 | } 87 | 88 | def create_agent(self, agent_class, *args, **kwargs): 89 | agent = agent_class(self, *args, **kwargs) 90 | self.agent_states[agent] = {'location': random.choice(self.intersections.keys()), 'heading': (0, 1)} 91 | return agent 92 | 93 | def set_primary_agent(self, agent, enforce_deadline=False): 94 | self.primary_agent = agent 95 | self.enforce_deadline = enforce_deadline 96 | 97 | def reset(self): 98 | self.done = False 99 | self.t = 0 100 | self.success = 0 101 | 102 | # Reset traffic lights 103 | for traffic_light in self.intersections.itervalues(): 104 | traffic_light.reset() 105 | 106 | # Pick a start and a destination 107 | start = random.choice(self.intersections.keys()) 108 | destination = random.choice(self.intersections.keys()) 109 | 110 | # Ensure starting location and destination are not too close 111 | while self.compute_dist(start, destination) < 4: 112 | start = random.choice(self.intersections.keys()) 113 | destination = random.choice(self.intersections.keys()) 114 | 115 | start_heading = random.choice(self.valid_headings) 116 | deadline = self.compute_dist(start, destination) * 5 117 | print "Environment.reset(): Trial set up with start = {}, destination = {}, deadline = {}".format(start, destination, deadline) 118 | 119 | # Initialize agent(s) 120 | for agent in self.agent_states.iterkeys(): 121 | self.agent_states[agent] = { 122 | 'location': start if agent is self.primary_agent else random.choice(self.intersections.keys()), 123 | 'heading': start_heading if agent is self.primary_agent else random.choice(self.valid_headings), 124 | 'destination': destination if agent is self.primary_agent else None, 125 | 'deadline': deadline if agent is self.primary_agent else None} 126 | agent.reset(destination=(destination if agent is self.primary_agent else None)) 127 | if agent is self.primary_agent: 128 | # Reset metrics for this trial (step data will be set during the step) 129 | self.trial_data['net_reward'] = 0.0 130 | self.trial_data['final_deadline'] = deadline 131 | self.trial_data['success'] = 0 132 | 133 | def step(self): 134 | #print "Environment.step(): t = {}".format(self.t) # [debug] 135 | 136 | # Update traffic lights 137 | for intersection, traffic_light in self.intersections.iteritems(): 138 | traffic_light.update(self.t) 139 | 140 | # Update agents 141 | for agent in self.agent_states.iterkeys(): 142 | agent.update(self.t) 143 | 144 | if self.done: 145 | return # primary agent might have reached destination 146 | 147 | if self.primary_agent is not None: 148 | agent_deadline = self.agent_states[self.primary_agent]['deadline'] 149 | if agent_deadline <= self.hard_time_limit: 150 | self.done = True 151 | print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit) 152 | elif self.enforce_deadline and agent_deadline <= 0: 153 | self.done = True 154 | print "Environment.step(): Primary agent ran out of time! Trial aborted." 155 | self.agent_states[self.primary_agent]['deadline'] = agent_deadline - 1 156 | 157 | self.t += 1 158 | 159 | def sense(self, agent): 160 | assert agent in self.agent_states, "Unknown agent!" 161 | 162 | state = self.agent_states[agent] 163 | location = state['location'] 164 | heading = state['heading'] 165 | light = 'green' if (self.intersections[location].state and heading[1] != 0) or ((not self.intersections[location].state) and heading[0] != 0) else 'red' 166 | 167 | # Populate oncoming, left, right 168 | oncoming = None 169 | left = None 170 | right = None 171 | for other_agent, other_state in self.agent_states.iteritems(): 172 | if agent == other_agent or location != other_state['location'] or (heading[0] == other_state['heading'][0] and heading[1] == other_state['heading'][1]): 173 | continue 174 | other_heading = other_agent.get_next_waypoint() 175 | if (heading[0] * other_state['heading'][0] + heading[1] * other_state['heading'][1]) == -1: 176 | if oncoming != 'left': # we don't want to override oncoming == 'left' 177 | oncoming = other_heading 178 | elif (heading[1] == other_state['heading'][0] and -heading[0] == other_state['heading'][1]): 179 | if right != 'forward' and right != 'left': # we don't want to override right == 'forward or 'left' 180 | right = other_heading 181 | else: 182 | if left != 'forward': # we don't want to override left == 'forward' 183 | left = other_heading 184 | 185 | return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right} 186 | 187 | def get_deadline(self, agent): 188 | return self.agent_states[agent]['deadline'] if agent is self.primary_agent else None 189 | 190 | def act(self, agent, action): 191 | assert agent in self.agent_states, "Unknown agent!" 192 | assert action in self.valid_actions, "Invalid action!" 193 | 194 | state = self.agent_states[agent] 195 | location = state['location'] 196 | heading = state['heading'] 197 | light = 'green' if (self.intersections[location].state and heading[1] != 0) or ((not self.intersections[location].state) and heading[0] != 0) else 'red' 198 | inputs = self.sense(agent) 199 | 200 | # Move agent if within bounds and obeys traffic rules 201 | reward = 0 # reward/penalty 202 | move_okay = True 203 | if action == 'forward': 204 | if light != 'green': 205 | move_okay = False 206 | elif action == 'left': 207 | if light == 'green' and (inputs['oncoming'] == None or inputs['oncoming'] == 'left'): 208 | heading = (heading[1], -heading[0]) 209 | else: 210 | move_okay = False 211 | elif action == 'right': 212 | if light == 'green' or (inputs['oncoming'] != 'left' and inputs['left'] != 'forward'): 213 | heading = (-heading[1], heading[0]) 214 | else: 215 | move_okay = False 216 | 217 | if move_okay: 218 | # Valid move (could be null) 219 | if action is not None: 220 | # Valid non-null move 221 | location = ((location[0] + heading[0] - self.bounds[0]) % (self.bounds[2] - self.bounds[0] + 1) + self.bounds[0], 222 | (location[1] + heading[1] - self.bounds[1]) % (self.bounds[3] - self.bounds[1] + 1) + self.bounds[1]) # wrap-around 223 | #if self.bounds[0] <= location[0] <= self.bounds[2] and self.bounds[1] <= location[1] <= self.bounds[3]: # bounded 224 | state['location'] = location 225 | state['heading'] = heading 226 | reward = 2.0 if action == agent.get_next_waypoint() else -0.5 # valid, but is it correct? (as per waypoint) 227 | else: 228 | # Valid null move 229 | reward = 0.0 230 | else: 231 | # Invalid move 232 | reward = -1.0 233 | 234 | if agent is self.primary_agent: 235 | if state['location'] == state['destination']: 236 | if state['deadline'] >= 0: 237 | reward += 10 # bonus 238 | self.trial_data['success'] = 1 239 | self.done = True 240 | print "Environment.act(): Primary agent has reached destination!" # [debug] 241 | 242 | self.status_text = "state: {}\naction: {}\nreward: {}".format(agent.get_state(), action, reward) 243 | #print "Environment.act() [POST]: location: {}, heading: {}, action: {}, reward: {}".format(location, heading, action, reward) # [debug] 244 | 245 | # Update metrics 246 | self.step_data['t'] = self.t 247 | self.trial_data['final_deadline'] = self.step_data['deadline'] = state['deadline'] 248 | self.step_data['waypoint'] = agent.get_next_waypoint() 249 | self.step_data['inputs'] = inputs 250 | self.step_data['action'] = action 251 | self.step_data['reward'] = reward 252 | self.trial_data['net_reward'] += reward 253 | print "Environment.act(): Step data: {}".format(self.step_data) # [debug] 254 | 255 | return reward 256 | 257 | def compute_dist(self, a, b): 258 | """L1 distance between two points.""" 259 | return abs(b[0] - a[0]) + abs(b[1] - a[1]) 260 | 261 | def stats_trial(): 262 | return self.success 263 | 264 | 265 | class Agent(object): 266 | """Base class for all agents.""" 267 | 268 | def __init__(self, env): 269 | self.env = env 270 | self.state = None 271 | self.next_waypoint = None 272 | self.color = 'cyan' 273 | 274 | def reset(self, destination=None): 275 | pass 276 | 277 | def update(self, t): 278 | pass 279 | 280 | def get_state(self): 281 | return self.state 282 | 283 | def get_next_waypoint(self): 284 | return self.next_waypoint 285 | 286 | 287 | class DummyAgent(Agent): 288 | color_choices = ['blue', 'cyan', 'magenta', 'orange'] 289 | 290 | def __init__(self, env): 291 | super(DummyAgent, self).__init__(env) # sets self.env = env, state = None, next_waypoint = None, and a default color 292 | self.next_waypoint = random.choice(Environment.valid_actions[1:]) 293 | self.color = random.choice(self.color_choices) 294 | 295 | def update(self, t): 296 | inputs = self.env.sense(self) 297 | 298 | action_okay = True 299 | if self.next_waypoint == 'right': 300 | if inputs['light'] == 'red' and inputs['left'] == 'forward': 301 | action_okay = False 302 | elif self.next_waypoint == 'forward': 303 | if inputs['light'] == 'red': 304 | action_okay = False 305 | elif self.next_waypoint == 'left': 306 | if inputs['light'] == 'red' or (inputs['oncoming'] == 'forward' or inputs['oncoming'] == 'right'): 307 | action_okay = False 308 | 309 | action = None 310 | if action_okay: 311 | action = self.next_waypoint 312 | self.next_waypoint = random.choice(Environment.valid_actions[1:]) 313 | reward = self.env.act(self, action) 314 | #print "DummyAgent.update(): t = {}, inputs = {}, action = {}, reward = {}".format(t, inputs, action, reward) # [debug] 315 | #print "DummyAgent.update(): next_waypoint = {}".format(self.next_waypoint) # [debug] 316 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/environment.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/environment.pyc -------------------------------------------------------------------------------- /projects/smartcab/smartcab/planner.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class RoutePlanner(object): 4 | """Silly route planner that is meant for a perpendicular grid network.""" 5 | 6 | def __init__(self, env, agent): 7 | self.env = env 8 | self.agent = agent 9 | self.destination = None 10 | 11 | def route_to(self, destination=None): 12 | self.destination = destination if destination is not None else random.choice(self.env.intersections.keys()) 13 | print "RoutePlanner.route_to(): destination = {}".format(destination) # [debug] 14 | 15 | def next_waypoint(self): 16 | location = self.env.agent_states[self.agent]['location'] 17 | heading = self.env.agent_states[self.agent]['heading'] 18 | delta = (self.destination[0] - location[0], self.destination[1] - location[1]) 19 | if delta[0] == 0 and delta[1] == 0: 20 | return None 21 | elif delta[0] != 0: # EW difference 22 | if delta[0] * heading[0] > 0: # facing correct EW direction 23 | return 'forward' 24 | elif delta[0] * heading[0] < 0: # facing opposite EW direction 25 | return 'right' # long U-turn 26 | elif delta[0] * heading[1] > 0: 27 | return 'left' 28 | else: 29 | return 'right' 30 | elif delta[1] != 0: # NS difference (turn logic is slightly different) 31 | if delta[1] * heading[1] > 0: # facing correct NS direction 32 | return 'forward' 33 | elif delta[1] * heading[1] < 0: # facing opposite NS direction 34 | return 'right' # long U-turn 35 | elif delta[1] * heading[0] > 0: 36 | return 'right' 37 | else: 38 | return 'left' 39 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/planner.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/planner.pyc -------------------------------------------------------------------------------- /projects/smartcab/smartcab/random_trial_50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/random_trial_50.png -------------------------------------------------------------------------------- /projects/smartcab/smartcab/simulator.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import random 4 | import importlib 5 | 6 | import numpy as np 7 | 8 | from analysis import Reporter 9 | 10 | class Simulator(object): 11 | """Simulates agents in a dynamic smartcab environment. 12 | 13 | Uses PyGame to display GUI, if available. 14 | """ 15 | 16 | colors = { 17 | 'black' : ( 0, 0, 0), 18 | 'white' : (255, 255, 255), 19 | 'red' : (255, 0, 0), 20 | 'green' : ( 0, 255, 0), 21 | 'blue' : ( 0, 0, 255), 22 | 'cyan' : ( 0, 200, 200), 23 | 'magenta' : (200, 0, 200), 24 | 'yellow' : (255, 255, 0), 25 | 'orange' : (255, 128, 0) 26 | } 27 | 28 | def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False): 29 | self.env = env 30 | self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size) 31 | self.width, self.height = self.size 32 | 33 | self.bg_color = self.colors['white'] 34 | self.road_width = 5 35 | self.road_color = self.colors['black'] 36 | 37 | self.quit = False 38 | self.start_time = None 39 | self.current_time = 0.0 40 | self.last_updated = 0.0 41 | self.update_delay = update_delay # duration between each step (in secs) 42 | 43 | self.display = display 44 | if self.display: 45 | try: 46 | self.pygame = importlib.import_module('pygame') 47 | self.pygame.init() 48 | self.screen = self.pygame.display.set_mode(self.size) 49 | 50 | self.frame_delay = max(1, int(self.update_delay * 1000)) # delay between GUI frames in ms (min: 1) 51 | self.agent_sprite_size = (32, 32) 52 | self.agent_circle_radius = 10 # radius of circle, when using simple representation 53 | for agent in self.env.agent_states: 54 | agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("../images", "car-{}.png".format(agent.color))), self.agent_sprite_size) 55 | agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height()) 56 | 57 | self.font = self.pygame.font.Font(None, 28) 58 | self.paused = False 59 | except ImportError as e: 60 | self.display = False 61 | print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e) 62 | except Exception as e: 63 | self.display = False 64 | print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e) 65 | 66 | # Setup metrics to report 67 | self.live_plot = live_plot 68 | self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot) 69 | self.avg_net_reward_window = 10 70 | 71 | 72 | def run(self, n_trials=1): 73 | self.quit = False 74 | self.rep.reset() 75 | for trial in xrange(n_trials): 76 | print "Simulator.run(): Trial {}".format(trial) # [debug] 77 | self.env.reset() 78 | self.current_time = 0.0 79 | self.last_updated = 0.0 80 | self.start_time = time.time() 81 | while True: 82 | try: 83 | # Update current time 84 | self.current_time = time.time() - self.start_time 85 | #print "Simulator.run(): current_time = {:.3f}".format(self.current_time) 86 | 87 | # Handle GUI events 88 | if self.display: 89 | for event in self.pygame.event.get(): 90 | if event.type == self.pygame.QUIT: 91 | self.quit = True 92 | elif event.type == self.pygame.KEYDOWN: 93 | if event.key == 27: # Esc 94 | self.quit = True 95 | elif event.unicode == u' ': 96 | self.paused = True 97 | 98 | if self.paused: 99 | self.pause() 100 | 101 | # Update environment 102 | if self.current_time - self.last_updated >= self.update_delay: 103 | self.env.step() 104 | # TODO: Log step data 105 | self.last_updated = self.current_time 106 | 107 | # Render GUI and sleep 108 | if self.display: 109 | self.render() 110 | self.pygame.time.wait(self.frame_delay) 111 | except KeyboardInterrupt: 112 | self.quit = True 113 | finally: 114 | if self.quit or self.env.done: 115 | break 116 | 117 | if self.quit: 118 | break 119 | 120 | # Collect/update metrics 121 | self.rep.collect('net_reward', trial, self.env.trial_data['net_reward']) # total reward obtained in this trial 122 | self.rep.collect('avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[-self.avg_net_reward_window:])) # rolling mean of reward 123 | self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline']) # final deadline value (time remaining) 124 | self.rep.collect('success', trial, self.env.trial_data['success']) 125 | if self.live_plot: 126 | self.rep.refresh_plot() # autoscales axes, draws stuff and flushes events 127 | 128 | 129 | 130 | 131 | # Report final metrics 132 | if self.display: 133 | self.pygame.display.quit() # need to shutdown pygame before showing metrics plot 134 | # TODO: Figure out why having both game and plot displays makes things crash! 135 | 136 | if self.live_plot: 137 | self.rep.show_plot() # holds till user closes plot window 138 | 139 | 140 | 141 | def render(self): 142 | # Clear screen 143 | self.screen.fill(self.bg_color) 144 | 145 | # Draw elements 146 | # * Static elements 147 | for road in self.env.roads: 148 | self.pygame.draw.line(self.screen, self.road_color, (road[0][0] * self.env.block_size, road[0][1] * self.env.block_size), (road[1][0] * self.env.block_size, road[1][1] * self.env.block_size), self.road_width) 149 | 150 | for intersection, traffic_light in self.env.intersections.iteritems(): 151 | self.pygame.draw.circle(self.screen, self.road_color, (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size), 10) 152 | if traffic_light.state: # North-South is open 153 | self.pygame.draw.line(self.screen, self.colors['green'], 154 | (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size - 15), 155 | (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size + 15), self.road_width) 156 | else: # East-West is open 157 | self.pygame.draw.line(self.screen, self.colors['green'], 158 | (intersection[0] * self.env.block_size - 15, intersection[1] * self.env.block_size), 159 | (intersection[0] * self.env.block_size + 15, intersection[1] * self.env.block_size), self.road_width) 160 | 161 | # * Dynamic elements 162 | for agent, state in self.env.agent_states.iteritems(): 163 | # Compute precise agent location here (back from the intersection some) 164 | agent_offset = (2 * state['heading'][0] * self.agent_circle_radius, 2 * state['heading'][1] * self.agent_circle_radius) 165 | agent_pos = (state['location'][0] * self.env.block_size - agent_offset[0], state['location'][1] * self.env.block_size - agent_offset[1]) 166 | agent_color = self.colors[agent.color] 167 | if hasattr(agent, '_sprite') and agent._sprite is not None: 168 | # Draw agent sprite (image), properly rotated 169 | rotated_sprite = agent._sprite if state['heading'] == (1, 0) else self.pygame.transform.rotate(agent._sprite, 180 if state['heading'][0] == -1 else state['heading'][1] * -90) 170 | self.screen.blit(rotated_sprite, 171 | self.pygame.rect.Rect(agent_pos[0] - agent._sprite_size[0] / 2, agent_pos[1] - agent._sprite_size[1] / 2, 172 | agent._sprite_size[0], agent._sprite_size[1])) 173 | else: 174 | # Draw simple agent (circle with a short line segment poking out to indicate heading) 175 | self.pygame.draw.circle(self.screen, agent_color, agent_pos, self.agent_circle_radius) 176 | self.pygame.draw.line(self.screen, agent_color, agent_pos, state['location'], self.road_width) 177 | if agent.get_next_waypoint() is not None: 178 | self.screen.blit(self.font.render(agent.get_next_waypoint(), True, agent_color, self.bg_color), (agent_pos[0] + 10, agent_pos[1] + 10)) 179 | if state['destination'] is not None: 180 | self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 6) 181 | self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 15, 2) 182 | 183 | # * Overlays 184 | text_y = 10 185 | for text in self.env.status_text.split('\n'): 186 | self.screen.blit(self.font.render(text, True, self.colors['red'], self.bg_color), (100, text_y)) 187 | text_y += 20 188 | 189 | # Flip buffers 190 | self.pygame.display.flip() 191 | 192 | def pause(self): 193 | abs_pause_time = time.time() 194 | pause_text = "[PAUSED] Press any key to continue..." 195 | self.screen.blit(self.font.render(pause_text, True, self.colors['cyan'], self.bg_color), (100, self.height - 40)) 196 | self.pygame.display.flip() 197 | print pause_text # [debug] 198 | while self.paused: 199 | for event in self.pygame.event.get(): 200 | if event.type == self.pygame.KEYDOWN: 201 | self.paused = False 202 | self.pygame.time.wait(self.frame_delay) 203 | self.screen.blit(self.font.render(pause_text, True, self.bg_color, self.bg_color), (100, self.height - 40)) 204 | self.start_time += (time.time() - abs_pause_time) 205 | -------------------------------------------------------------------------------- /projects/smartcab/smartcab/simulator.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/simulator.pyc -------------------------------------------------------------------------------- /projects/smartcab/smartcabpic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcabpic.jpg -------------------------------------------------------------------------------- /projects/student_intervention/README.md: -------------------------------------------------------------------------------- 1 | # Project 2: Supervised Learning 2 | ## Building a Student Intervention System 3 | 4 | ### Install 5 | 6 | This project requires **Python 2.7** and the following Python libraries installed: 7 | 8 | - [NumPy](http://www.numpy.org/) 9 | - [Pandas](http://pandas.pydata.org) 10 | - [scikit-learn](http://scikit-learn.org/stable/) 11 | 12 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html) 13 | 14 | Udacity recommends our students install [Anaconda](https://www.continuum.io/downloads), a pre-packaged Python distribution that contains all of the necessary libraries and software for this project. 15 | 16 | ### Overview 17 | 18 | This project uses supervised machine learning techniques to predict which students should be flagged for intervention as high risk students and compares different classifiers. 19 | 20 | ![](table.png) 21 | 22 | ### Code 23 | 24 | Template code is provided in the notebook `student_intervention.ipynb` notebook file. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project. 25 | 26 | ### Run 27 | 28 | In a terminal or command window, navigate to the top-level project directory `student_intervention/` (that contains this README) and run one of the following commands: 29 | 30 | ```ipython notebook student_intervention.ipynb``` 31 | ```jupyter notebook student_intervention.ipynb``` 32 | 33 | This will open the iPython Notebook software and project file in your browser. 34 | 35 | ## Data 36 | 37 | The dataset used in this project is included as `student-data.csv`. This dataset has the following attributes: 38 | 39 | - `school` : student's school (binary: "GP" or "MS") 40 | - `sex` : student's sex (binary: "F" - female or "M" - male) 41 | - `age` : student's age (numeric: from 15 to 22) 42 | - `address` : student's home address type (binary: "U" - urban or "R" - rural) 43 | - `famsize` : family size (binary: "LE3" - less or equal to 3 or "GT3" - greater than 3) 44 | - `Pstatus` : parent's cohabitation status (binary: "T" - living together or "A" - apart) 45 | - `Medu` : mother's education (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education) 46 | - `Fedu` : father's education (numeric: 0 - none, 1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education) 47 | - `Mjob` : mother's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other") 48 | - `Fjob` : father's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other") 49 | - `reason` : reason to choose this school (nominal: close to "home", school "reputation", "course" preference or "other") 50 | - `guardian` : student's guardian (nominal: "mother", "father" or "other") 51 | - `traveltime` : home to school travel time (numeric: 1 - <15 min., 2 - 15 to 30 min., 3 - 30 min. to 1 hour, or 4 - >1 hour) 52 | - `studytime` : weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours) 53 | - `failures` : number of past class failures (numeric: n if 1<=n<3, else 4) 54 | - `schoolsup` : extra educational support (binary: yes or no) 55 | - `famsup` : family educational support (binary: yes or no) 56 | - `paid` : extra paid classes within the course subject (Math or Portuguese) (binary: yes or no) 57 | - `activities` : extra-curricular activities (binary: yes or no) 58 | - `nursery` : attended nursery school (binary: yes or no) 59 | - `higher` : wants to take higher education (binary: yes or no) 60 | - `internet` : Internet access at home (binary: yes or no) 61 | - `romantic` : with a romantic relationship (binary: yes or no) 62 | - `famrel` : quality of family relationships (numeric: from 1 - very bad to 5 - excellent) 63 | - `freetime` : free time after school (numeric: from 1 - very low to 5 - very high) 64 | - `goout` : going out with friends (numeric: from 1 - very low to 5 - very high) 65 | - `Dalc` : workday alcohol consumption (numeric: from 1 - very low to 5 - very high) 66 | - `Walc` : weekend alcohol consumption (numeric: from 1 - very low to 5 - very high) 67 | - `health` : current health status (numeric: from 1 - very bad to 5 - very good) 68 | - `absences` : number of school absences (numeric: from 0 to 93) 69 | - `passed` : did the student pass the final exam (binary: yes or no) 70 | -------------------------------------------------------------------------------- /projects/student_intervention/data2d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data2d.png -------------------------------------------------------------------------------- /projects/student_intervention/data_2d_to_3d_hyperplane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data_2d_to_3d_hyperplane.png -------------------------------------------------------------------------------- /projects/student_intervention/data_in_R3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data_in_R3.png -------------------------------------------------------------------------------- /projects/student_intervention/debug.log: -------------------------------------------------------------------------------- 1 | [0308/005733.265:ERROR:crash_report_database_win.cc(426)] unexpected header 2 | -------------------------------------------------------------------------------- /projects/student_intervention/student-data.csv: -------------------------------------------------------------------------------- 1 | school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,passed 2 | GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,6,no 3 | GP,F,17,U,GT3,T,1,1,at_home,other,course,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,4,no 4 | GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,3,yes,no,yes,no,yes,yes,yes,no,4,3,2,2,3,3,10,yes 5 | GP,F,15,U,GT3,T,4,2,health,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,3,2,2,1,1,5,2,yes 6 | GP,F,16,U,GT3,T,3,3,other,other,home,father,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,2,5,4,yes 7 | GP,M,16,U,LE3,T,4,3,services,other,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,2,1,2,5,10,yes 8 | GP,M,16,U,LE3,T,2,2,other,other,home,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,4,4,1,1,3,0,yes 9 | GP,F,17,U,GT3,A,4,4,other,teacher,home,mother,2,2,0,yes,yes,no,no,yes,yes,no,no,4,1,4,1,1,1,6,no 10 | GP,M,15,U,LE3,A,3,2,services,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,1,1,0,yes 11 | GP,M,15,U,GT3,T,3,4,other,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,5,1,1,1,5,0,yes 12 | GP,F,15,U,GT3,T,4,4,teacher,health,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,1,2,2,0,no 13 | GP,F,15,U,GT3,T,2,1,services,other,reputation,father,3,3,0,no,yes,no,yes,yes,yes,yes,no,5,2,2,1,1,4,4,yes 14 | GP,M,15,U,LE3,T,4,4,health,services,course,father,1,1,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,3,5,2,yes 15 | GP,M,15,U,GT3,T,4,3,teacher,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,5,4,3,1,2,3,2,yes 16 | GP,M,15,U,GT3,A,2,2,other,other,home,other,1,3,0,no,yes,no,no,yes,yes,yes,yes,4,5,2,1,1,3,0,yes 17 | GP,F,16,U,GT3,T,4,4,health,other,home,mother,1,1,0,no,yes,no,no,yes,yes,yes,no,4,4,4,1,2,2,4,yes 18 | GP,F,16,U,GT3,T,4,4,services,services,reputation,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,3,2,3,1,2,2,6,yes 19 | GP,F,16,U,GT3,T,3,3,other,other,reputation,mother,3,2,0,yes,yes,no,yes,yes,yes,no,no,5,3,2,1,1,4,4,yes 20 | GP,M,17,U,GT3,T,3,2,services,services,course,mother,1,1,3,no,yes,no,yes,yes,yes,yes,no,5,5,5,2,4,5,16,no 21 | GP,M,16,U,LE3,T,4,3,health,other,home,father,1,1,0,no,no,yes,yes,yes,yes,yes,no,3,1,3,1,3,5,4,yes 22 | GP,M,15,U,GT3,T,4,3,teacher,other,reputation,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,4,1,1,1,1,0,yes 23 | GP,M,15,U,GT3,T,4,4,health,health,other,father,1,1,0,no,yes,yes,no,yes,yes,yes,no,5,4,2,1,1,5,0,yes 24 | GP,M,16,U,LE3,T,4,2,teacher,other,course,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,4,5,1,1,3,5,2,yes 25 | GP,M,16,U,LE3,T,2,2,other,other,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,4,2,4,5,0,yes 26 | GP,F,15,R,GT3,T,2,4,services,health,course,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,5,2,no 27 | GP,F,16,U,GT3,T,2,2,services,services,home,mother,1,1,2,no,yes,yes,no,no,yes,yes,no,1,2,2,1,3,5,14,no 28 | GP,M,15,U,GT3,T,2,2,other,other,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,2,5,2,yes 29 | GP,M,15,U,GT3,T,4,2,health,services,other,mother,1,1,0,no,no,yes,no,yes,yes,yes,no,2,2,4,2,4,1,4,yes 30 | GP,M,16,U,LE3,A,3,4,services,other,home,mother,1,2,0,yes,yes,no,yes,yes,yes,yes,no,5,3,3,1,1,5,4,yes 31 | GP,M,16,U,GT3,T,4,4,teacher,teacher,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,5,5,5,5,16,yes 32 | GP,M,15,U,GT3,T,4,4,health,services,home,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,5,4,2,3,4,5,0,yes 33 | GP,M,15,U,GT3,T,4,4,services,services,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,1,1,1,5,0,yes 34 | GP,M,15,R,GT3,T,4,3,teacher,at_home,course,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,4,5,2,1,1,5,0,yes 35 | GP,M,15,U,LE3,T,3,3,other,other,course,mother,1,2,0,no,no,no,yes,no,yes,yes,no,5,3,2,1,1,2,0,yes 36 | GP,M,16,U,GT3,T,3,2,other,other,home,mother,1,1,0,no,yes,yes,no,no,yes,yes,no,5,4,3,1,1,5,0,yes 37 | GP,F,15,U,GT3,T,2,3,other,other,other,father,2,1,0,no,yes,no,yes,yes,yes,no,no,3,5,1,1,1,5,0,no 38 | GP,M,15,U,LE3,T,4,3,teacher,services,home,mother,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,4,3,1,1,4,2,yes 39 | GP,M,16,R,GT3,A,4,4,other,teacher,reputation,mother,2,3,0,no,yes,no,yes,yes,yes,yes,yes,2,4,3,1,1,5,7,yes 40 | GP,F,15,R,GT3,T,3,4,services,health,course,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,5,2,yes 41 | GP,F,15,R,GT3,T,2,2,at_home,other,reputation,mother,1,1,0,yes,yes,yes,yes,yes,yes,no,no,4,3,1,1,1,2,8,yes 42 | GP,F,16,U,LE3,T,2,2,other,other,home,mother,2,2,1,no,yes,no,yes,no,yes,yes,yes,3,3,3,1,2,3,25,yes 43 | GP,M,15,U,LE3,T,4,4,teacher,other,home,other,1,1,0,no,yes,no,no,no,yes,yes,yes,5,4,3,2,4,5,8,yes 44 | GP,M,15,U,GT3,T,4,4,services,teacher,course,father,1,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,3,1,1,5,2,yes 45 | GP,M,15,U,GT3,T,2,2,services,services,course,father,1,1,0,yes,yes,no,no,yes,yes,yes,no,5,4,1,1,1,1,0,yes 46 | GP,F,16,U,LE3,T,2,2,other,at_home,course,father,2,2,1,yes,no,no,yes,yes,yes,yes,no,4,3,3,2,2,5,14,no 47 | GP,F,15,U,LE3,A,4,3,other,other,course,mother,1,2,0,yes,yes,yes,yes,yes,yes,yes,yes,5,2,2,1,1,5,8,no 48 | GP,F,16,U,LE3,A,3,3,other,services,home,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,2,3,5,1,4,3,12,yes 49 | GP,M,16,U,GT3,T,4,3,health,services,reputation,mother,1,4,0,no,no,no,yes,yes,yes,yes,no,4,2,2,1,1,2,4,yes 50 | GP,M,15,U,GT3,T,4,2,teacher,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,3,2,2,5,2,yes 51 | GP,F,15,U,GT3,T,4,4,services,teacher,other,father,1,2,1,yes,yes,no,yes,no,yes,yes,no,4,4,4,1,1,3,2,no 52 | GP,F,16,U,LE3,T,2,2,services,services,course,mother,3,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,2,3,4,2,yes 53 | GP,F,15,U,LE3,T,4,2,health,other,other,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,5,2,yes 54 | GP,M,15,U,LE3,A,4,2,health,health,other,father,2,1,1,no,no,no,no,yes,yes,no,no,5,5,5,3,4,5,6,yes 55 | GP,F,15,U,GT3,T,4,4,services,services,course,mother,1,1,0,yes,yes,yes,no,yes,yes,yes,no,3,3,4,2,3,5,0,yes 56 | GP,F,15,U,LE3,A,3,3,other,other,other,mother,1,1,0,no,no,yes,no,yes,yes,yes,no,5,3,4,4,4,1,6,yes 57 | GP,F,16,U,GT3,A,2,1,other,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,5,3,4,1,1,2,8,yes 58 | GP,F,15,U,GT3,A,4,3,services,services,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,1,0,yes 59 | GP,M,15,U,GT3,T,4,4,teacher,health,reputation,mother,1,2,0,no,yes,no,yes,yes,yes,no,no,3,2,2,1,1,5,4,yes 60 | GP,M,15,U,LE3,T,1,2,other,at_home,home,father,1,2,0,yes,yes,no,yes,yes,yes,yes,no,4,3,2,1,1,5,2,no 61 | GP,F,16,U,GT3,T,4,2,services,other,course,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,2,3,1,1,5,2,yes 62 | GP,F,16,R,GT3,T,4,4,health,teacher,other,mother,1,2,0,no,yes,no,yes,yes,yes,no,no,2,4,4,2,3,4,6,yes 63 | GP,F,16,U,GT3,T,1,1,services,services,course,father,4,1,0,yes,yes,no,yes,no,yes,yes,yes,5,5,5,5,5,5,6,yes 64 | GP,F,16,U,LE3,T,1,2,other,services,reputation,father,1,2,0,yes,no,no,yes,yes,yes,yes,no,4,4,3,1,1,1,4,no 65 | GP,F,16,U,GT3,T,4,3,teacher,health,home,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,3,4,4,2,4,4,2,no 66 | GP,F,15,U,LE3,T,4,3,services,services,reputation,father,1,2,0,yes,no,no,yes,yes,yes,yes,yes,4,4,4,2,4,2,0,yes 67 | GP,F,16,U,LE3,T,4,3,teacher,services,course,mother,3,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,3,1,2,1,2,yes 68 | GP,M,15,U,GT3,A,4,4,other,services,reputation,mother,1,4,0,no,yes,no,yes,no,yes,yes,yes,1,3,3,5,5,3,4,yes 69 | GP,F,16,U,GT3,T,3,1,services,other,course,mother,1,4,0,yes,yes,yes,no,yes,yes,yes,no,4,3,3,1,2,5,4,no 70 | GP,F,15,R,LE3,T,2,2,health,services,reputation,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,no,4,1,3,1,3,4,2,no 71 | GP,F,15,R,LE3,T,3,1,other,other,reputation,father,2,4,0,no,yes,no,no,no,yes,yes,no,4,4,2,2,3,3,12,yes 72 | GP,M,16,U,GT3,T,3,1,other,other,reputation,father,2,4,0,no,yes,yes,no,yes,yes,yes,no,4,3,2,1,1,5,0,yes 73 | GP,M,15,U,GT3,T,4,2,other,other,course,mother,1,4,0,no,no,no,no,yes,yes,yes,no,3,3,3,1,1,3,0,yes 74 | GP,F,15,R,GT3,T,1,1,other,other,reputation,mother,1,2,2,yes,yes,no,no,no,yes,yes,yes,3,3,4,2,4,5,2,no 75 | GP,M,16,U,GT3,T,3,1,other,other,reputation,mother,1,1,0,no,no,no,yes,yes,yes,no,no,5,3,2,2,2,5,2,yes 76 | GP,F,16,U,GT3,T,3,3,other,services,home,mother,1,2,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,3,2,4,5,54,yes 77 | GP,M,15,U,GT3,T,4,3,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,2,3,5,6,yes 78 | GP,M,15,U,GT3,T,4,0,teacher,other,course,mother,2,4,0,no,no,no,yes,yes,yes,yes,no,3,4,3,1,1,1,8,yes 79 | GP,F,16,U,GT3,T,2,2,other,other,reputation,mother,1,4,0,no,no,yes,no,yes,yes,yes,yes,5,2,3,1,3,3,0,yes 80 | GP,M,17,U,GT3,T,2,1,other,other,home,mother,2,1,3,yes,yes,no,yes,yes,no,yes,no,4,5,1,1,1,3,2,yes 81 | GP,F,16,U,GT3,T,3,4,at_home,other,course,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,2,4,3,1,2,3,12,no 82 | GP,M,15,U,GT3,T,2,3,other,services,course,father,1,1,0,yes,yes,yes,yes,no,yes,yes,yes,3,2,2,1,3,3,2,yes 83 | GP,M,15,U,GT3,T,2,3,other,other,home,mother,1,3,0,yes,no,yes,no,no,yes,yes,no,5,3,2,1,2,5,4,yes 84 | GP,F,15,U,LE3,T,3,2,services,other,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,4,1,1,5,10,no 85 | GP,M,15,U,LE3,T,2,2,services,services,home,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,5,3,3,1,3,4,4,yes 86 | GP,F,15,U,GT3,T,1,1,other,other,home,father,1,2,0,no,yes,no,yes,no,yes,yes,no,4,3,2,2,3,4,2,yes 87 | GP,F,15,U,GT3,T,4,4,services,services,reputation,father,2,2,2,no,no,yes,no,yes,yes,yes,yes,4,4,4,2,3,5,6,no 88 | GP,F,16,U,LE3,T,2,2,at_home,other,course,mother,1,2,0,no,yes,no,no,yes,yes,no,no,4,3,4,1,2,2,4,no 89 | GP,F,15,U,GT3,T,4,2,other,other,reputation,mother,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,3,3,1,3,1,4,yes 90 | GP,M,16,U,GT3,T,2,2,services,other,reputation,father,2,2,1,no,no,yes,yes,no,yes,yes,no,4,4,2,1,1,3,12,yes 91 | GP,M,16,U,LE3,A,4,4,teacher,health,reputation,mother,1,2,0,no,yes,no,no,yes,yes,no,no,4,1,3,3,5,5,18,no 92 | GP,F,16,U,GT3,T,3,3,other,other,home,mother,1,3,0,no,yes,yes,no,yes,yes,yes,yes,4,3,3,1,3,4,0,no 93 | GP,F,15,U,GT3,T,4,3,services,other,reputation,mother,1,1,0,no,no,yes,yes,yes,yes,yes,no,4,5,5,1,3,1,4,yes 94 | GP,F,16,U,LE3,T,3,1,other,other,home,father,1,2,0,yes,yes,no,no,yes,yes,no,no,3,3,3,2,3,2,4,no 95 | GP,F,16,U,GT3,T,4,2,teacher,services,home,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,5,3,3,1,1,1,0,yes 96 | GP,M,15,U,LE3,T,2,2,services,health,reputation,mother,1,4,0,no,yes,no,yes,yes,yes,yes,no,4,3,4,1,1,4,6,yes 97 | GP,F,15,R,GT3,T,1,1,at_home,other,home,mother,2,4,1,yes,yes,yes,yes,yes,yes,yes,no,3,1,2,1,1,1,2,yes 98 | GP,M,16,R,GT3,T,4,3,services,other,reputation,mother,2,1,0,yes,yes,no,yes,no,yes,yes,no,3,3,3,1,1,4,2,yes 99 | GP,F,16,U,GT3,T,2,1,other,other,course,mother,1,2,0,no,yes,yes,no,yes,yes,no,yes,4,3,5,1,1,5,2,yes 100 | GP,F,16,U,GT3,T,4,4,other,other,reputation,mother,1,1,0,no,no,no,yes,no,yes,yes,no,5,3,4,1,2,1,6,yes 101 | GP,F,16,U,GT3,T,4,3,other,at_home,course,mother,1,3,0,yes,yes,yes,no,yes,yes,yes,no,5,3,5,1,1,3,0,no 102 | GP,M,16,U,GT3,T,4,4,services,services,other,mother,1,1,0,yes,yes,yes,yes,yes,yes,yes,no,4,5,5,5,5,4,14,no 103 | GP,M,16,U,GT3,T,4,4,services,teacher,other,father,1,3,0,no,yes,no,yes,yes,yes,yes,yes,4,4,3,1,1,4,0,yes 104 | GP,M,15,U,GT3,T,4,4,services,other,course,mother,1,1,0,no,yes,no,yes,no,yes,yes,no,5,3,3,1,1,5,4,yes 105 | GP,F,15,U,GT3,T,3,2,services,other,home,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,no,4,3,5,1,1,2,26,no 106 | GP,M,15,U,GT3,A,3,4,services,other,course,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,4,1,1,1,0,yes 107 | GP,F,15,U,GT3,A,3,3,other,health,reputation,father,1,4,0,yes,no,no,no,yes,yes,no,no,4,3,3,1,1,4,10,yes 108 | GP,F,15,U,GT3,T,2,2,other,other,course,mother,1,4,0,yes,yes,yes,no,yes,yes,yes,no,5,1,2,1,1,3,8,no 109 | GP,M,16,U,GT3,T,3,3,services,other,home,father,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,3,3,1,1,5,2,yes 110 | GP,M,15,R,GT3,T,4,4,other,other,home,father,4,4,0,no,yes,yes,yes,yes,yes,yes,yes,1,3,5,3,5,1,6,yes 111 | GP,F,16,U,LE3,T,4,4,health,health,other,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,5,4,5,1,1,4,4,yes 112 | GP,M,15,U,LE3,A,4,4,teacher,teacher,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,5,5,3,1,1,4,6,yes 113 | GP,F,16,R,GT3,T,3,3,services,other,reputation,father,1,3,1,yes,yes,no,yes,yes,yes,yes,no,4,1,2,1,1,2,0,yes 114 | GP,F,16,U,GT3,T,2,2,at_home,other,home,mother,1,2,1,yes,no,no,yes,yes,yes,yes,no,3,1,2,1,1,5,6,yes 115 | GP,M,15,U,LE3,T,4,2,teacher,other,course,mother,1,1,0,no,no,no,no,yes,yes,yes,no,3,5,2,1,1,3,10,yes 116 | GP,M,15,R,GT3,T,2,1,health,services,reputation,mother,1,2,0,no,no,no,yes,yes,yes,yes,yes,5,4,2,1,1,5,8,no 117 | GP,M,16,U,GT3,T,4,4,teacher,teacher,course,father,1,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,4,1,2,5,2,yes 118 | GP,M,15,U,GT3,T,4,4,other,teacher,reputation,father,2,2,0,no,yes,no,yes,yes,yes,no,no,4,4,3,1,1,2,2,yes 119 | GP,M,16,U,GT3,T,3,3,other,services,home,father,2,1,0,no,no,no,yes,yes,yes,yes,no,5,4,2,1,1,5,0,yes 120 | GP,M,17,R,GT3,T,1,3,other,other,course,father,3,2,1,no,yes,no,yes,yes,yes,yes,no,5,2,4,1,4,5,20,no 121 | GP,M,15,U,GT3,T,3,4,other,other,reputation,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,4,3,1,2,4,6,yes 122 | GP,F,15,U,GT3,T,1,2,at_home,services,course,mother,1,2,0,no,no,no,no,no,yes,yes,no,3,2,3,1,2,1,2,yes 123 | GP,M,15,U,GT3,T,2,2,services,services,home,father,1,4,0,no,yes,yes,yes,yes,yes,yes,no,5,5,4,1,2,5,6,yes 124 | GP,F,16,U,LE3,T,2,4,other,health,course,father,2,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,2,2,1,2,5,2,yes 125 | GP,M,16,U,GT3,T,4,4,health,other,course,mother,1,1,0,no,yes,no,yes,yes,yes,yes,no,3,4,4,1,4,5,18,yes 126 | GP,F,16,U,GT3,T,2,2,other,other,home,mother,1,2,0,no,no,yes,no,yes,yes,yes,yes,5,4,4,1,1,5,0,no 127 | GP,M,15,U,GT3,T,3,4,services,services,home,father,1,1,0,yes,no,no,no,yes,yes,yes,no,5,5,5,3,2,5,0,yes 128 | GP,F,15,U,LE3,A,3,4,other,other,home,mother,1,2,0,yes,no,no,yes,yes,yes,yes,yes,5,3,2,1,1,1,0,yes 129 | GP,F,19,U,GT3,T,0,1,at_home,other,course,other,1,2,3,no,yes,no,no,no,no,no,no,3,4,2,1,1,5,2,no 130 | GP,M,18,R,GT3,T,2,2,services,other,reputation,mother,1,1,2,no,yes,no,yes,yes,yes,yes,no,3,3,3,1,2,4,0,no 131 | GP,M,16,R,GT3,T,4,4,teacher,teacher,course,mother,1,1,0,no,no,yes,yes,yes,yes,yes,no,3,5,5,2,5,4,8,yes 132 | GP,F,15,R,GT3,T,3,4,services,teacher,course,father,2,3,2,no,yes,no,no,yes,yes,yes,yes,4,2,2,2,2,5,0,no 133 | GP,F,15,U,GT3,T,1,1,at_home,other,course,mother,3,1,0,no,yes,no,yes,no,yes,yes,yes,4,3,3,1,2,4,0,no 134 | GP,F,17,U,LE3,T,2,2,other,other,course,father,1,1,0,no,yes,no,no,yes,yes,yes,yes,3,4,4,1,3,5,12,yes 135 | GP,F,16,U,GT3,A,3,4,services,other,course,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,2,1,1,4,5,16,yes 136 | GP,M,15,R,GT3,T,3,4,at_home,teacher,course,mother,4,2,0,no,yes,no,no,yes,yes,no,yes,5,3,3,1,1,5,0,no 137 | GP,F,15,U,GT3,T,4,4,services,at_home,course,mother,1,3,0,no,yes,no,yes,yes,yes,yes,yes,4,3,3,1,1,5,0,no 138 | GP,M,17,R,GT3,T,3,4,at_home,other,course,mother,3,2,0,no,no,no,no,yes,yes,no,no,5,4,5,2,4,5,0,no 139 | GP,F,16,U,GT3,A,3,3,other,other,course,other,2,1,2,no,yes,no,yes,no,yes,yes,yes,4,3,2,1,1,5,0,no 140 | GP,M,16,U,LE3,T,1,1,services,other,course,mother,1,2,1,no,no,no,no,yes,yes,no,yes,4,4,4,1,3,5,0,yes 141 | GP,F,15,U,GT3,T,4,4,teacher,teacher,course,mother,2,1,0,no,no,no,yes,yes,yes,yes,no,4,3,2,1,1,5,0,yes 142 | GP,M,15,U,GT3,T,4,3,teacher,services,course,father,2,4,0,yes,yes,no,no,yes,yes,yes,no,2,2,2,1,1,3,0,no 143 | GP,M,16,U,LE3,T,2,2,services,services,reputation,father,2,1,2,no,yes,no,yes,yes,yes,yes,no,2,3,3,2,2,2,8,no 144 | GP,F,15,U,GT3,T,4,4,teacher,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,4,2,2,1,1,5,2,yes 145 | GP,F,16,U,LE3,T,1,1,at_home,at_home,course,mother,1,1,0,no,no,no,no,yes,yes,yes,no,3,4,4,3,3,1,2,yes 146 | GP,M,17,U,GT3,T,2,1,other,other,home,mother,1,1,3,no,yes,no,no,yes,yes,yes,no,5,4,5,1,2,5,0,no 147 | GP,F,15,U,GT3,T,1,1,other,services,course,father,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,2,1,2,5,0,yes 148 | GP,F,15,U,GT3,T,3,2,health,services,home,father,1,2,3,no,yes,no,no,yes,yes,yes,no,3,3,2,1,1,3,0,no 149 | GP,F,15,U,GT3,T,1,2,at_home,other,course,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,4,3,2,1,1,5,2,yes 150 | GP,M,16,U,GT3,T,4,4,teacher,teacher,course,mother,1,1,0,no,yes,no,no,yes,no,yes,yes,3,3,2,2,1,5,0,no 151 | GP,M,15,U,LE3,A,2,1,services,other,course,mother,4,1,3,no,no,no,no,yes,yes,yes,no,4,5,5,2,5,5,0,yes 152 | GP,M,18,U,LE3,T,1,1,other,other,course,mother,1,1,3,no,no,no,no,yes,no,yes,yes,2,3,5,2,5,4,0,no 153 | GP,M,16,U,LE3,T,2,1,at_home,other,course,mother,1,1,1,no,no,no,yes,yes,yes,no,yes,4,4,4,3,5,5,6,yes 154 | GP,F,15,R,GT3,T,3,3,services,services,reputation,other,2,3,2,no,yes,yes,yes,yes,yes,yes,yes,4,2,1,2,3,3,8,yes 155 | GP,M,19,U,GT3,T,3,2,services,at_home,home,mother,1,1,3,no,yes,no,no,yes,no,yes,yes,4,5,4,1,1,4,0,no 156 | GP,F,17,U,GT3,T,4,4,other,teacher,course,mother,1,1,0,yes,yes,no,no,yes,yes,no,yes,4,2,1,1,1,4,0,yes 157 | GP,M,15,R,GT3,T,2,3,at_home,services,course,mother,1,2,0,yes,no,yes,yes,yes,yes,no,no,4,4,4,1,1,1,2,no 158 | GP,M,17,R,LE3,T,1,2,other,other,reputation,mother,1,1,0,no,no,no,no,yes,yes,no,no,2,2,2,3,3,5,8,yes 159 | GP,F,18,R,GT3,T,1,1,at_home,other,course,mother,3,1,3,no,yes,no,yes,no,yes,no,no,5,2,5,1,5,4,6,yes 160 | GP,M,16,R,GT3,T,2,2,at_home,other,course,mother,3,1,0,no,no,no,no,no,yes,no,no,4,2,2,1,2,3,2,yes 161 | GP,M,16,U,GT3,T,3,3,other,services,course,father,1,2,1,no,yes,yes,no,yes,yes,yes,yes,4,5,5,4,4,5,4,yes 162 | GP,M,17,R,LE3,T,2,1,at_home,other,course,mother,2,1,2,no,no,no,yes,yes,no,yes,yes,3,3,2,2,2,5,0,no 163 | GP,M,15,R,GT3,T,3,2,other,other,course,mother,2,2,2,yes,yes,no,no,yes,yes,yes,yes,4,4,4,1,4,3,6,no 164 | GP,M,16,U,LE3,T,1,2,other,other,course,mother,2,1,1,no,no,no,yes,yes,yes,no,no,4,4,4,2,4,5,0,no 165 | GP,M,17,U,GT3,T,1,3,at_home,services,course,father,1,1,0,no,no,no,no,yes,no,yes,no,5,3,3,1,4,2,2,yes 166 | GP,M,17,R,LE3,T,1,1,other,services,course,mother,4,2,3,no,no,no,yes,yes,no,no,yes,5,3,5,1,5,5,0,no 167 | GP,M,16,U,GT3,T,3,2,services,services,course,mother,2,1,1,no,yes,no,yes,no,no,no,no,4,5,2,1,1,2,16,yes 168 | GP,M,16,U,GT3,T,2,2,other,other,course,father,1,2,0,no,no,no,no,yes,no,yes,no,4,3,5,2,4,4,4,yes 169 | GP,F,16,U,GT3,T,4,2,health,services,home,father,1,2,0,no,no,yes,no,yes,yes,yes,yes,4,2,3,1,1,3,0,yes 170 | GP,F,16,U,GT3,T,2,2,other,other,home,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,5,1,5,1,1,4,0,no 171 | GP,F,16,U,GT3,T,4,4,health,health,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,4,4,2,1,1,3,0,yes 172 | GP,M,16,U,GT3,T,3,4,other,other,course,father,3,1,2,no,yes,no,yes,no,yes,yes,no,3,4,5,2,4,2,0,no 173 | GP,M,16,U,GT3,T,1,0,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,2,1,1,3,2,yes 174 | GP,M,17,U,LE3,T,4,4,teacher,other,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,4,4,1,3,5,0,yes 175 | GP,F,16,U,GT3,T,1,3,at_home,services,home,mother,1,2,3,no,no,no,yes,no,yes,yes,yes,4,3,5,1,1,3,0,no 176 | GP,F,16,U,LE3,T,3,3,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,4,5,1,1,4,4,no 177 | GP,M,17,U,LE3,T,4,3,teacher,other,course,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,4,4,4,4,4,4,4,no 178 | GP,F,16,U,GT3,T,2,2,services,other,reputation,mother,2,2,0,no,no,yes,yes,no,yes,yes,no,3,4,4,1,4,5,2,yes 179 | GP,M,17,U,GT3,T,3,3,other,other,reputation,father,1,2,0,no,no,no,yes,no,yes,yes,no,4,3,4,1,4,4,4,no 180 | GP,M,16,R,GT3,T,4,2,teacher,services,other,mother,1,1,0,no,yes,no,yes,yes,yes,yes,yes,4,3,3,3,4,3,10,no 181 | GP,M,17,U,GT3,T,4,3,other,other,course,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,5,2,3,1,1,2,4,yes 182 | GP,M,16,U,GT3,T,4,3,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,3,4,3,2,3,3,10,no 183 | GP,M,16,U,GT3,T,3,3,services,other,home,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,4,2,3,1,2,3,2,yes 184 | GP,F,17,U,GT3,T,2,4,services,services,reputation,father,1,2,0,no,yes,no,yes,yes,yes,no,no,5,4,2,2,3,5,0,yes 185 | GP,F,17,U,LE3,T,3,3,other,other,reputation,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,5,3,3,2,3,1,56,no 186 | GP,F,16,U,GT3,T,3,2,other,other,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,1,2,2,1,2,1,14,yes 187 | GP,M,17,U,GT3,T,3,3,services,services,other,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,4,3,4,2,3,4,12,yes 188 | GP,M,16,U,GT3,T,1,2,services,services,other,mother,1,1,0,no,yes,yes,yes,yes,yes,yes,yes,3,3,3,1,2,3,2,yes 189 | GP,M,16,U,LE3,T,2,1,other,other,course,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,4,2,3,1,2,5,0,yes 190 | GP,F,17,U,GT3,A,3,3,health,other,reputation,mother,1,2,0,no,yes,no,no,no,yes,yes,yes,3,3,3,1,3,3,6,no 191 | GP,M,17,R,GT3,T,1,2,at_home,other,home,mother,1,2,0,no,no,no,no,yes,yes,no,no,3,1,3,1,5,3,4,yes 192 | GP,F,16,U,GT3,T,2,3,services,services,course,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,3,3,1,1,2,10,yes 193 | GP,F,17,U,GT3,T,1,1,at_home,services,course,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,3,3,1,1,3,0,no 194 | GP,M,17,U,GT3,T,1,2,at_home,services,other,other,2,2,0,no,no,yes,yes,no,yes,yes,no,4,4,4,4,5,5,12,no 195 | GP,M,16,R,GT3,T,3,3,services,services,reputation,mother,1,1,0,no,yes,no,yes,yes,yes,yes,no,4,3,2,3,4,5,8,yes 196 | GP,M,16,U,GT3,T,2,3,other,other,home,father,2,1,0,no,no,no,no,yes,yes,yes,no,5,3,3,1,1,3,0,yes 197 | GP,F,17,U,LE3,T,2,4,services,services,course,father,1,2,0,no,no,no,yes,yes,yes,yes,yes,4,3,2,1,1,5,0,yes 198 | GP,M,17,U,GT3,T,4,4,services,teacher,home,mother,1,1,0,no,no,no,no,yes,yes,yes,no,5,2,3,1,2,5,4,yes 199 | GP,M,16,R,LE3,T,3,3,teacher,other,home,father,3,1,0,no,yes,yes,yes,yes,yes,yes,no,3,3,4,3,5,3,8,yes 200 | GP,F,17,U,GT3,T,4,4,services,teacher,home,mother,2,1,1,no,yes,no,no,yes,yes,yes,no,4,2,4,2,3,2,24,yes 201 | GP,F,16,U,LE3,T,4,4,teacher,teacher,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,5,2,1,2,3,0,yes 202 | GP,F,16,U,GT3,T,4,3,health,other,home,mother,1,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,5,1,5,2,2,yes 203 | GP,F,16,U,GT3,T,2,3,other,other,reputation,mother,1,2,0,yes,yes,yes,yes,yes,yes,no,no,4,4,3,1,3,4,6,yes 204 | GP,F,17,U,GT3,T,1,1,other,other,course,mother,1,2,0,no,yes,yes,no,no,yes,no,no,4,4,4,1,3,1,4,yes 205 | GP,F,17,R,GT3,T,2,2,other,other,reputation,mother,1,1,0,no,yes,no,no,yes,yes,yes,no,5,3,2,1,2,3,18,no 206 | GP,F,16,R,GT3,T,2,2,services,services,reputation,mother,2,4,0,no,yes,yes,yes,no,yes,yes,no,5,3,5,1,1,5,6,yes 207 | GP,F,17,U,GT3,T,3,4,at_home,services,home,mother,1,3,1,no,yes,yes,no,yes,yes,yes,yes,4,4,3,3,4,5,28,no 208 | GP,F,16,U,GT3,A,3,1,services,other,course,mother,1,2,3,no,yes,yes,no,yes,yes,yes,no,2,3,3,2,2,4,5,no 209 | GP,F,16,U,GT3,T,4,3,teacher,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,1,3,2,1,1,1,10,yes 210 | GP,F,16,U,GT3,T,1,1,at_home,other,home,mother,2,1,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,4,5,6,yes 211 | GP,F,17,R,GT3,T,4,3,teacher,other,reputation,mother,2,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,2,1,1,4,6,no 212 | GP,F,19,U,GT3,T,3,3,other,other,reputation,other,1,4,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,2,3,10,no 213 | GP,M,17,U,LE3,T,4,4,services,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,5,3,5,4,5,3,13,yes 214 | GP,F,16,U,GT3,A,2,2,other,other,reputation,mother,1,2,0,yes,yes,yes,no,yes,yes,yes,no,3,3,4,1,1,4,0,yes 215 | GP,M,18,U,GT3,T,2,2,services,other,home,mother,1,2,1,no,yes,yes,yes,yes,yes,yes,no,4,4,4,2,4,5,15,no 216 | GP,F,17,R,LE3,T,4,4,services,other,other,mother,1,1,0,no,yes,yes,no,yes,yes,no,no,5,2,1,1,2,3,12,yes 217 | GP,F,17,U,LE3,T,3,2,other,other,reputation,mother,2,2,0,no,no,yes,no,yes,yes,yes,no,4,4,4,1,3,1,2,yes 218 | GP,F,17,U,GT3,T,4,3,other,other,reputation,mother,1,2,2,no,no,yes,no,yes,yes,yes,yes,3,4,5,2,4,1,22,no 219 | GP,M,18,U,LE3,T,3,3,services,health,home,father,1,2,1,no,yes,yes,no,yes,yes,yes,no,3,2,4,2,4,4,13,no 220 | GP,F,17,U,GT3,T,2,3,at_home,other,home,father,2,1,0,no,yes,yes,no,yes,yes,no,no,3,3,3,1,4,3,3,no 221 | GP,F,17,U,GT3,T,2,2,at_home,at_home,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,1,4,4,yes 222 | GP,F,17,R,GT3,T,2,1,at_home,services,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,4,2,5,1,2,5,2,no 223 | GP,F,17,U,GT3,T,1,1,at_home,other,reputation,mother,1,3,1,no,yes,no,yes,yes,yes,no,yes,4,3,4,1,1,5,0,no 224 | GP,F,16,U,GT3,T,2,3,services,teacher,other,mother,1,2,0,yes,no,no,no,yes,yes,yes,no,2,3,1,1,1,3,2,yes 225 | GP,M,18,U,GT3,T,2,2,other,other,home,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,5,5,4,0,yes 226 | GP,F,16,U,GT3,T,4,4,teacher,services,home,mother,1,3,0,no,yes,no,yes,no,yes,yes,no,5,3,2,1,1,5,0,yes 227 | GP,F,18,R,GT3,T,3,1,other,other,reputation,mother,1,2,1,no,no,no,yes,yes,yes,yes,yes,5,3,3,1,1,4,16,no 228 | GP,F,17,U,GT3,T,3,2,other,other,course,mother,1,2,0,no,no,no,yes,no,yes,yes,no,5,3,4,1,3,3,10,yes 229 | GP,M,17,U,LE3,T,2,3,services,services,reputation,father,1,2,0,no,yes,yes,no,no,yes,yes,no,5,3,3,1,3,3,2,yes 230 | GP,M,18,U,LE3,T,2,1,at_home,other,course,mother,4,2,0,yes,yes,yes,yes,yes,yes,yes,yes,4,3,2,4,5,3,14,no 231 | GP,F,17,U,GT3,A,2,1,other,other,course,mother,2,3,0,no,no,no,yes,yes,yes,yes,yes,3,2,3,1,2,3,10,yes 232 | GP,F,17,U,LE3,T,4,3,health,other,reputation,father,1,2,0,no,no,no,yes,yes,yes,yes,yes,3,2,3,1,2,3,14,yes 233 | GP,M,17,R,GT3,T,2,2,other,other,course,father,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,5,2,1,1,1,4,yes 234 | GP,M,17,U,GT3,T,4,4,teacher,teacher,reputation,mother,1,2,0,yes,yes,no,yes,yes,yes,yes,yes,4,5,5,1,3,2,14,no 235 | GP,M,16,U,GT3,T,4,4,health,other,reputation,father,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,2,4,2,4,1,2,yes 236 | GP,M,16,U,LE3,T,1,1,other,other,home,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,4,2,1,1,5,18,no 237 | GP,M,16,U,GT3,T,3,2,at_home,other,reputation,mother,2,3,0,no,no,no,yes,yes,yes,yes,yes,5,3,3,1,3,2,10,yes 238 | GP,M,17,U,LE3,T,2,2,other,other,home,father,1,2,0,no,no,yes,yes,no,yes,yes,yes,4,4,2,5,5,4,4,yes 239 | GP,F,16,U,GT3,T,2,1,other,other,home,mother,1,1,0,no,no,no,no,yes,yes,yes,yes,4,5,2,1,1,5,20,yes 240 | GP,F,17,R,GT3,T,2,1,at_home,services,course,mother,3,2,0,no,no,no,yes,yes,yes,no,no,2,1,1,1,1,3,2,yes 241 | GP,M,18,U,GT3,T,2,2,other,services,reputation,father,1,2,1,no,no,no,no,yes,no,yes,no,5,5,4,3,5,2,0,no 242 | GP,M,17,U,LE3,T,4,3,health,other,course,mother,2,2,0,no,no,no,yes,yes,yes,yes,yes,2,5,5,1,4,5,14,yes 243 | GP,M,17,R,LE3,A,4,4,teacher,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,2,3,4,2,yes 244 | GP,M,16,U,LE3,T,4,3,teacher,other,course,mother,1,1,0,no,no,no,yes,no,yes,yes,no,5,4,5,1,1,3,0,no 245 | GP,M,16,U,GT3,T,4,4,services,services,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,5,3,2,1,2,5,0,yes 246 | GP,F,18,U,GT3,T,2,1,other,other,course,other,2,3,0,no,yes,yes,no,no,yes,yes,yes,4,4,4,1,1,3,0,no 247 | GP,M,16,U,GT3,T,2,1,other,other,course,mother,3,1,0,no,no,no,no,yes,yes,yes,no,4,3,3,1,1,4,6,yes 248 | GP,M,17,U,GT3,T,2,3,other,other,course,father,2,1,0,no,no,no,no,yes,yes,yes,no,5,2,2,1,1,2,4,yes 249 | GP,M,22,U,GT3,T,3,1,services,services,other,mother,1,1,3,no,no,no,no,no,no,yes,yes,5,4,5,5,5,1,16,no 250 | GP,M,18,R,LE3,T,3,3,other,services,course,mother,1,2,1,no,yes,no,no,yes,yes,yes,yes,4,3,3,1,3,5,8,no 251 | GP,M,16,U,GT3,T,0,2,other,other,other,mother,1,1,0,no,no,yes,no,no,yes,yes,no,4,3,2,2,4,5,0,yes 252 | GP,M,18,U,GT3,T,3,2,services,other,course,mother,2,1,1,no,no,no,no,yes,no,yes,no,4,4,5,2,4,5,0,no 253 | GP,M,16,U,GT3,T,3,3,at_home,other,reputation,other,3,2,0,yes,yes,no,no,no,yes,yes,no,5,3,3,1,3,2,6,yes 254 | GP,M,18,U,GT3,T,2,1,services,services,other,mother,1,1,1,no,no,no,no,no,no,yes,no,3,2,5,2,5,5,4,no 255 | GP,M,16,R,GT3,T,2,1,other,other,course,mother,2,1,0,no,no,no,yes,no,yes,no,no,3,3,2,1,3,3,0,no 256 | GP,M,17,R,GT3,T,2,1,other,other,course,mother,1,1,0,no,no,no,no,no,yes,yes,no,4,4,2,2,4,5,0,yes 257 | GP,M,17,U,LE3,T,1,1,health,other,course,mother,2,1,1,no,yes,no,yes,yes,yes,yes,no,4,4,4,1,2,5,2,no 258 | GP,F,17,U,LE3,T,4,2,teacher,services,reputation,mother,1,4,0,no,yes,yes,yes,yes,yes,yes,no,4,2,3,1,1,4,6,yes 259 | GP,M,19,U,LE3,A,4,3,services,at_home,reputation,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,3,1,1,1,1,12,yes 260 | GP,M,18,U,GT3,T,2,1,other,other,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,2,4,1,2,4,8,yes 261 | GP,F,17,U,LE3,T,2,2,services,services,course,father,1,4,0,no,no,yes,yes,yes,yes,yes,yes,3,4,1,1,1,2,0,no 262 | GP,F,18,U,GT3,T,4,3,services,other,home,father,1,2,0,no,yes,yes,no,yes,yes,yes,yes,3,1,2,1,3,2,21,yes 263 | GP,M,18,U,GT3,T,4,3,teacher,other,course,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,4,3,2,1,1,3,2,no 264 | GP,M,18,R,GT3,T,3,2,other,other,course,mother,1,3,0,no,no,no,yes,no,yes,no,no,5,3,2,1,1,3,1,yes 265 | GP,F,17,U,GT3,T,3,3,other,other,home,mother,1,3,0,no,no,no,yes,no,yes,no,no,3,2,3,1,1,4,4,no 266 | GP,F,18,U,GT3,T,2,2,at_home,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,3,1,1,3,0,no 267 | GP,M,18,R,LE3,A,3,4,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,2,5,3,4,1,13,yes 268 | GP,M,17,U,GT3,T,3,1,services,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,5,4,4,3,4,5,2,yes 269 | GP,F,18,R,GT3,T,4,4,teacher,other,reputation,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,4,3,4,2,2,4,8,yes 270 | GP,M,18,U,GT3,T,4,2,health,other,reputation,father,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,5,4,5,1,3,5,10,yes 271 | GP,F,18,R,GT3,T,2,1,other,other,reputation,mother,2,2,0,no,yes,no,no,yes,no,yes,yes,4,3,5,1,2,3,0,no 272 | GP,F,19,U,GT3,T,3,3,other,services,home,other,1,2,2,no,yes,yes,yes,yes,yes,yes,no,4,3,5,3,3,5,15,no 273 | GP,F,18,U,GT3,T,2,3,other,services,reputation,father,1,4,0,no,yes,yes,yes,yes,yes,yes,yes,4,5,5,1,3,2,4,yes 274 | GP,F,18,U,LE3,T,1,1,other,other,home,mother,2,2,0,no,yes,yes,no,no,yes,no,no,4,4,3,1,1,3,2,yes 275 | GP,M,17,R,GT3,T,1,2,at_home,at_home,home,mother,1,2,0,no,yes,yes,yes,no,yes,no,yes,3,5,2,2,2,1,2,yes 276 | GP,F,17,U,GT3,T,2,4,at_home,health,reputation,mother,2,2,0,no,yes,yes,no,yes,yes,yes,yes,4,3,3,1,1,1,2,yes 277 | GP,F,17,U,LE3,T,2,2,services,other,course,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,yes,4,4,4,2,3,5,6,yes 278 | GP,F,18,R,GT3,A,3,2,other,services,home,mother,2,2,0,no,no,no,no,no,no,yes,yes,4,1,1,1,1,5,75,no 279 | GP,M,18,U,GT3,T,4,4,teacher,services,home,mother,2,1,0,no,no,yes,yes,yes,yes,yes,no,3,2,4,1,4,3,22,no 280 | GP,F,18,U,GT3,T,4,4,health,health,reputation,father,1,2,1,yes,yes,no,yes,yes,yes,yes,yes,2,4,4,1,1,4,15,no 281 | GP,M,18,U,LE3,T,4,3,teacher,services,course,mother,2,1,0,no,no,yes,yes,yes,yes,yes,no,4,2,3,1,2,1,8,yes 282 | GP,M,17,U,LE3,A,4,1,services,other,home,mother,2,1,0,no,no,yes,yes,yes,yes,yes,yes,4,5,4,2,4,5,30,no 283 | GP,M,17,U,LE3,A,3,2,teacher,services,home,mother,1,1,1,no,no,no,no,yes,yes,yes,no,4,4,4,3,4,3,19,yes 284 | GP,F,18,R,LE3,T,1,1,at_home,other,reputation,mother,2,4,0,no,yes,yes,yes,yes,yes,no,no,5,2,2,1,1,3,1,yes 285 | GP,F,18,U,GT3,T,1,1,other,other,home,mother,2,2,0,yes,no,no,yes,yes,yes,yes,no,5,4,4,1,1,4,4,yes 286 | GP,F,17,U,GT3,T,2,2,other,other,course,mother,1,2,0,no,yes,no,no,no,yes,yes,no,5,4,5,1,2,5,4,yes 287 | GP,M,17,U,GT3,T,1,1,other,other,reputation,father,1,2,0,no,no,yes,no,no,yes,yes,no,4,3,3,1,2,4,2,yes 288 | GP,F,18,U,GT3,T,2,2,at_home,at_home,other,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,2,2,5,yes 289 | GP,F,17,U,GT3,T,1,1,services,teacher,reputation,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,6,yes 290 | GP,M,18,U,GT3,T,2,1,services,services,reputation,mother,1,3,0,no,no,yes,yes,yes,yes,yes,no,4,2,4,1,3,2,6,yes 291 | GP,M,18,U,LE3,A,4,4,teacher,teacher,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,3,1,1,2,9,yes 292 | GP,M,18,U,GT3,T,4,2,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,2,1,4,5,11,yes 293 | GP,F,17,U,GT3,T,4,3,health,services,reputation,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,2,3,0,yes 294 | GP,F,18,U,LE3,T,2,1,services,at_home,reputation,mother,1,2,1,no,no,no,no,yes,yes,yes,yes,5,4,3,1,1,5,12,yes 295 | GP,F,17,R,LE3,T,3,1,services,other,reputation,mother,2,4,0,no,yes,yes,no,yes,yes,no,no,3,1,2,1,1,3,6,yes 296 | GP,M,18,R,LE3,T,3,2,services,other,reputation,mother,2,3,0,no,yes,yes,yes,yes,yes,yes,no,5,4,2,1,1,4,8,yes 297 | GP,M,17,U,GT3,T,3,3,health,other,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,no,4,4,3,1,3,5,4,yes 298 | GP,F,19,U,GT3,T,4,4,health,other,reputation,other,2,2,0,no,yes,yes,yes,yes,yes,yes,no,2,3,4,2,3,2,0,no 299 | GP,F,18,U,LE3,T,4,3,other,other,home,other,2,2,0,no,yes,yes,no,yes,yes,yes,yes,4,4,5,1,2,2,10,no 300 | GP,F,18,U,GT3,T,4,3,other,other,reputation,father,1,4,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,0,yes 301 | GP,M,18,U,LE3,T,4,4,teacher,teacher,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,yes,1,4,2,2,2,1,5,yes 302 | GP,F,18,U,LE3,A,4,4,health,other,home,mother,1,2,0,no,yes,no,no,yes,yes,yes,yes,4,2,4,1,1,4,14,yes 303 | GP,M,17,U,LE3,T,4,4,other,teacher,home,father,2,1,0,no,no,yes,no,yes,yes,yes,no,4,1,1,2,2,5,0,yes 304 | GP,F,17,U,GT3,T,4,2,other,other,reputation,mother,2,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,0,yes 305 | GP,F,17,U,GT3,T,3,2,health,health,reputation,father,1,4,0,no,yes,yes,yes,no,yes,yes,no,5,2,2,1,2,5,0,yes 306 | GP,M,19,U,GT3,T,3,3,other,other,home,other,1,2,1,no,yes,no,yes,yes,yes,yes,yes,4,4,4,1,1,3,20,yes 307 | GP,F,18,U,GT3,T,2,4,services,at_home,reputation,other,1,2,1,no,yes,yes,yes,yes,yes,yes,no,4,4,3,1,1,3,8,yes 308 | GP,M,20,U,GT3,A,3,2,services,other,course,other,1,1,0,no,no,no,yes,yes,yes,no,no,5,5,3,1,1,5,0,yes 309 | GP,M,19,U,GT3,T,4,4,teacher,services,reputation,other,2,1,1,no,yes,yes,no,yes,yes,yes,yes,4,3,4,1,1,4,38,no 310 | GP,M,19,R,GT3,T,3,3,other,services,reputation,father,1,2,1,no,no,no,yes,yes,yes,no,yes,4,5,3,1,2,5,0,yes 311 | GP,F,19,U,LE3,T,1,1,at_home,other,reputation,other,1,2,1,yes,yes,no,yes,no,yes,yes,no,4,4,3,1,3,3,18,yes 312 | GP,F,19,U,LE3,T,1,2,services,services,home,other,1,2,1,no,no,no,yes,no,yes,no,yes,4,2,4,2,2,3,0,no 313 | GP,F,19,U,GT3,T,2,1,at_home,other,other,other,3,2,0,no,yes,no,no,yes,no,yes,yes,3,4,1,1,1,2,20,yes 314 | GP,M,19,U,GT3,T,1,2,other,services,course,other,1,2,1,no,no,no,no,no,yes,yes,no,4,5,2,2,2,4,3,yes 315 | GP,F,19,U,LE3,T,3,2,services,other,reputation,other,2,2,1,no,yes,yes,no,no,yes,yes,yes,4,2,2,1,2,1,22,yes 316 | GP,F,19,U,GT3,T,1,1,at_home,health,home,other,1,3,2,no,no,no,no,no,yes,yes,yes,4,1,2,1,1,3,14,yes 317 | GP,F,19,R,GT3,T,2,3,other,other,reputation,other,1,3,1,no,no,no,no,yes,yes,yes,yes,4,1,2,1,1,3,40,yes 318 | GP,F,18,U,GT3,T,2,1,services,other,course,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,5,3,3,1,2,1,0,no 319 | GP,F,18,U,GT3,T,4,3,other,other,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,4,1,1,5,9,no 320 | GP,F,17,R,GT3,T,3,4,at_home,services,course,father,1,3,0,no,yes,yes,yes,no,yes,yes,no,4,3,4,2,5,5,0,yes 321 | GP,F,18,U,GT3,T,4,4,teacher,other,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,4,3,3,5,2,yes 322 | GP,F,17,U,GT3,A,4,3,services,services,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,5,2,2,1,2,5,23,yes 323 | GP,F,17,U,GT3,T,2,2,other,other,course,mother,1,2,0,no,yes,no,no,yes,yes,no,yes,4,2,2,1,1,3,12,no 324 | GP,F,17,R,LE3,T,2,2,services,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,3,3,2,2,2,3,3,yes 325 | GP,F,17,U,GT3,T,3,1,services,services,course,father,1,3,0,no,yes,no,no,no,yes,yes,no,3,4,3,2,3,5,1,yes 326 | GP,F,17,U,LE3,T,0,2,at_home,at_home,home,father,2,3,0,no,no,no,no,yes,yes,yes,no,3,3,3,2,3,2,0,yes 327 | GP,M,18,U,GT3,T,4,4,other,other,course,mother,1,3,0,no,no,no,yes,yes,yes,yes,no,4,3,3,2,2,3,3,yes 328 | GP,M,17,U,GT3,T,3,3,other,services,reputation,mother,1,1,0,no,no,no,yes,no,yes,yes,no,4,3,5,3,5,5,3,yes 329 | GP,M,17,R,GT3,T,2,2,services,other,course,mother,4,1,0,no,yes,no,no,yes,yes,yes,no,4,4,5,5,5,4,8,yes 330 | GP,F,17,U,GT3,T,4,4,teacher,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,5,4,4,1,3,4,7,no 331 | GP,F,17,U,GT3,T,4,4,teacher,teacher,course,mother,2,3,0,no,yes,yes,no,no,yes,yes,yes,4,3,3,1,2,4,4,yes 332 | GP,M,18,U,LE3,T,2,2,other,other,course,mother,1,4,0,no,yes,no,yes,yes,yes,yes,no,4,5,5,2,4,5,2,no 333 | GP,F,17,R,GT3,T,2,4,at_home,other,course,father,1,3,0,no,yes,no,no,yes,yes,yes,yes,4,4,3,1,1,5,7,yes 334 | GP,F,18,U,GT3,T,3,3,services,services,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,3,4,1,1,4,0,no 335 | GP,F,18,U,LE3,T,2,2,other,other,home,other,1,2,0,no,no,no,yes,no,yes,yes,yes,4,3,3,1,1,2,0,no 336 | GP,F,18,R,GT3,T,2,2,at_home,other,course,mother,2,4,0,no,no,no,yes,yes,yes,no,no,4,4,4,1,1,4,0,no 337 | GP,F,17,U,GT3,T,3,4,services,other,course,mother,1,3,0,no,no,no,no,yes,yes,yes,no,4,4,5,1,3,5,16,yes 338 | GP,F,19,R,GT3,A,3,1,services,at_home,home,other,1,3,1,no,no,yes,no,yes,yes,no,no,5,4,3,1,2,5,12,yes 339 | GP,F,17,U,GT3,T,3,2,other,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,4,3,2,2,3,2,0,no 340 | GP,F,18,U,LE3,T,3,3,services,services,home,mother,1,4,0,no,yes,no,no,yes,yes,yes,no,5,3,3,1,1,1,7,yes 341 | GP,F,17,R,GT3,A,3,2,other,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,2,3,2,4,yes 342 | GP,F,19,U,GT3,T,2,1,services,services,home,other,1,3,1,no,no,yes,yes,yes,yes,yes,yes,4,3,4,1,3,3,4,yes 343 | GP,M,18,U,GT3,T,4,4,teacher,services,home,father,1,2,1,no,yes,no,yes,yes,yes,yes,no,4,3,3,2,2,2,0,no 344 | GP,M,18,U,LE3,T,3,4,services,other,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,yes,4,3,3,1,3,5,11,yes 345 | GP,F,17,U,GT3,A,2,2,at_home,at_home,home,father,1,2,1,no,yes,no,no,yes,yes,yes,yes,3,3,1,1,2,4,0,no 346 | GP,F,18,U,GT3,T,2,3,at_home,other,course,mother,1,3,0,no,yes,no,no,yes,yes,yes,no,4,3,3,1,2,3,4,yes 347 | GP,F,18,U,GT3,T,3,2,other,services,other,mother,1,3,0,no,no,no,no,yes,yes,yes,yes,5,4,3,2,3,1,7,yes 348 | GP,M,18,R,GT3,T,4,3,teacher,services,course,mother,1,3,0,no,no,no,no,yes,yes,yes,yes,5,3,2,1,2,4,9,yes 349 | GP,M,18,U,GT3,T,4,3,teacher,other,course,mother,1,3,0,no,yes,yes,no,yes,yes,yes,yes,5,4,5,2,3,5,0,no 350 | GP,F,17,U,GT3,T,4,3,health,other,reputation,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,3,1,3,4,0,yes 351 | MS,M,18,R,GT3,T,3,2,other,other,course,mother,2,1,1,no,yes,no,no,no,yes,yes,no,2,5,5,5,5,5,10,yes 352 | MS,M,19,R,GT3,T,1,1,other,services,home,other,3,2,3,no,no,no,no,yes,yes,yes,no,5,4,4,3,3,2,8,no 353 | MS,M,17,U,GT3,T,3,3,health,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,4,5,4,2,3,3,2,yes 354 | MS,M,18,U,LE3,T,1,3,at_home,services,course,mother,1,1,1,no,no,no,no,yes,no,yes,yes,4,3,3,2,3,3,7,no 355 | MS,M,19,R,GT3,T,1,1,other,other,home,other,3,1,1,no,yes,no,no,yes,yes,yes,no,4,4,4,3,3,5,4,no 356 | MS,M,17,R,GT3,T,4,3,services,other,home,mother,2,2,0,no,yes,yes,yes,no,yes,yes,yes,4,5,5,1,3,2,4,yes 357 | MS,F,18,U,GT3,T,3,3,services,services,course,father,1,2,0,no,yes,no,no,yes,yes,no,yes,5,3,4,1,1,5,0,no 358 | MS,F,17,R,GT3,T,4,4,teacher,services,other,father,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,2,5,4,yes 359 | MS,F,17,U,LE3,A,3,2,services,other,reputation,mother,2,2,0,no,no,no,no,yes,yes,no,yes,1,2,3,1,2,5,2,yes 360 | MS,M,18,U,LE3,T,1,1,other,services,home,father,2,1,0,no,no,no,no,no,yes,yes,yes,3,3,2,1,2,3,4,yes 361 | MS,F,18,U,LE3,T,1,1,at_home,services,course,father,2,3,0,no,no,no,no,yes,yes,yes,no,5,3,2,1,1,4,0,yes 362 | MS,F,18,R,LE3,A,1,4,at_home,other,course,mother,3,2,0,no,no,no,no,yes,yes,no,yes,4,3,4,1,4,5,0,yes 363 | MS,M,18,R,LE3,T,1,1,at_home,other,other,mother,2,2,1,no,no,no,yes,no,no,no,no,4,4,3,2,3,5,2,yes 364 | MS,F,18,U,GT3,T,3,3,services,services,other,mother,2,2,0,no,yes,no,no,yes,yes,yes,yes,4,3,2,1,3,3,0,yes 365 | MS,F,17,U,LE3,T,4,4,at_home,at_home,course,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,2,3,4,1,1,1,0,yes 366 | MS,F,17,R,GT3,T,1,2,other,services,course,father,2,2,0,no,no,no,no,no,yes,no,no,3,2,2,1,2,3,0,yes 367 | MS,M,18,R,GT3,T,1,3,at_home,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,no,no,3,3,4,2,4,3,4,yes 368 | MS,M,18,U,LE3,T,4,4,teacher,services,other,mother,2,3,0,no,no,yes,no,yes,yes,yes,yes,4,2,2,2,2,5,0,yes 369 | MS,F,17,R,GT3,T,1,1,other,services,reputation,mother,3,1,1,no,yes,yes,no,yes,yes,yes,yes,5,2,1,1,2,1,0,no 370 | MS,F,18,U,GT3,T,2,3,at_home,services,course,father,2,1,0,no,yes,yes,no,yes,yes,yes,yes,5,2,3,1,2,4,0,yes 371 | MS,F,18,R,GT3,T,4,4,other,teacher,other,father,3,2,0,no,yes,yes,no,no,yes,yes,yes,3,2,2,4,2,5,10,yes 372 | MS,F,19,U,LE3,T,3,2,services,services,home,other,2,2,2,no,no,no,yes,yes,yes,no,yes,3,2,2,1,1,3,4,no 373 | MS,M,18,R,LE3,T,1,2,at_home,services,other,father,3,1,0,no,yes,yes,yes,yes,no,yes,yes,4,3,3,2,3,3,3,yes 374 | MS,F,17,U,GT3,T,2,2,other,at_home,home,mother,1,3,0,no,no,no,yes,yes,yes,no,yes,3,4,3,1,1,3,8,yes 375 | MS,F,17,R,GT3,T,1,2,other,other,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,3,5,5,1,3,1,14,no 376 | MS,F,18,R,LE3,T,4,4,other,other,reputation,mother,2,3,0,no,no,no,no,yes,yes,yes,no,5,4,4,1,1,1,0,yes 377 | MS,F,18,R,GT3,T,1,1,other,other,home,mother,4,3,0,no,no,no,no,yes,yes,yes,no,4,3,2,1,2,4,2,yes 378 | MS,F,20,U,GT3,T,4,2,health,other,course,other,2,3,2,no,yes,yes,no,no,yes,yes,yes,5,4,3,1,1,3,4,yes 379 | MS,F,18,R,LE3,T,4,4,teacher,services,course,mother,1,2,0,no,no,yes,yes,yes,yes,yes,no,5,4,3,3,4,2,4,yes 380 | MS,F,18,U,GT3,T,3,3,other,other,home,mother,1,2,0,no,no,yes,no,yes,yes,yes,yes,4,1,3,1,2,1,0,yes 381 | MS,F,17,R,GT3,T,3,1,at_home,other,reputation,mother,1,2,0,no,yes,yes,yes,no,yes,yes,no,4,5,4,2,3,1,17,yes 382 | MS,M,18,U,GT3,T,4,4,teacher,teacher,home,father,1,2,0,no,no,yes,yes,no,yes,yes,no,3,2,4,1,4,2,4,yes 383 | MS,M,18,R,GT3,T,2,1,other,other,other,mother,2,1,0,no,no,no,yes,no,yes,yes,yes,4,4,3,1,3,5,5,no 384 | MS,M,17,U,GT3,T,2,3,other,services,home,father,2,2,0,no,no,no,yes,yes,yes,yes,no,4,4,3,1,1,3,2,yes 385 | MS,M,19,R,GT3,T,1,1,other,services,other,mother,2,1,1,no,no,no,no,yes,yes,no,no,4,3,2,1,3,5,0,no 386 | MS,M,18,R,GT3,T,4,2,other,other,home,father,2,1,1,no,no,yes,no,yes,yes,no,no,5,4,3,4,3,3,14,no 387 | MS,F,18,R,GT3,T,2,2,at_home,other,other,mother,2,3,0,no,no,yes,no,yes,yes,no,no,5,3,3,1,3,4,2,yes 388 | MS,F,18,R,GT3,T,4,4,teacher,at_home,reputation,mother,3,1,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,3,2,2,5,7,no 389 | MS,F,19,R,GT3,T,2,3,services,other,course,mother,1,3,1,no,no,no,yes,no,yes,yes,no,5,4,2,1,2,5,0,no 390 | MS,F,18,U,LE3,T,3,1,teacher,services,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,4,1,1,1,0,no 391 | MS,F,18,U,GT3,T,1,1,other,other,course,mother,2,2,1,no,no,no,yes,yes,yes,no,no,1,1,1,1,1,5,0,no 392 | MS,M,20,U,LE3,A,2,2,services,services,course,other,1,2,2,no,yes,yes,no,yes,yes,no,no,5,5,4,4,5,4,11,no 393 | MS,M,17,U,LE3,T,3,1,services,services,course,mother,2,1,0,no,no,no,no,no,yes,yes,no,2,4,5,3,4,2,3,yes 394 | MS,M,21,R,GT3,T,1,1,other,other,course,other,1,1,3,no,no,no,no,no,yes,no,no,5,5,3,3,3,3,3,no 395 | MS,M,18,R,LE3,T,3,2,services,other,course,mother,3,1,0,no,no,no,no,no,yes,yes,no,4,4,1,3,4,5,0,yes 396 | MS,M,19,U,LE3,T,1,1,other,at_home,course,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,2,3,3,3,5,5,no 397 | -------------------------------------------------------------------------------- /projects/student_intervention/student_intervention.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Machine Learning Engineer Nanodegree\n", 8 | "## Supervised Learning\n", 9 | "## Project 2: Building a Student Intervention System" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Welcome to the second project of the Machine Learning Engineer Nanodegree! In this notebook, some template code has already been provided for you, and it will be your job to implement the additional functionality necessary to successfully complete this project. Sections that begin with **'Implementation'** in the header indicate that the following block of code will require additional functionality which you must provide. Instructions will be provided for each section and the specifics of the implementation are marked in the code block with a `'TODO'` statement. Please be sure to read the instructions carefully!\n", 17 | "\n", 18 | "In addition to implementing code, there will be questions that you must answer which relate to the project and your implementation. Each section where you will answer a question is preceded by a **'Question X'** header. Carefully read each question and provide thorough answers in the following text boxes that begin with **'Answer:'**. Your project submission will be evaluated based on your answers to each of the questions and the implementation you provide. \n", 19 | "\n", 20 | ">**Note:** Code and Markdown cells can be executed using the **Shift + Enter** keyboard shortcut. In addition, Markdown cells can be edited by typically double-clicking the cell to enter edit mode." 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "### Question 1 - Classification vs. Regression\n", 28 | "*Your goal for this project is to identify students who might need early intervention before they fail to graduate. Which type of supervised learning problem is this, classification or regression? Why?*" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "**Answer: **\n", 36 | "\n", 37 | "We want to identify students who might need early intervention before they fail to graduate, so we have to seperate them into two classes based on whether they are likely to pass or fail. This is a classification problem as we are predicting discrete labels instead of continuous output." 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Exploring the Data\n", 45 | "Run the code cell below to load necessary Python libraries and load the student data. Note that the last column from this dataset, `'passed'`, will be our target label (whether the student graduated or didn't graduate). All other columns are features about each student." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 1, 51 | "metadata": { 52 | "collapsed": false 53 | }, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "Student data read successfully!\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "# Import libraries\n", 65 | "import numpy as np\n", 66 | "import pandas as pd\n", 67 | "from time import time\n", 68 | "from sklearn.metrics import f1_score\n", 69 | "\n", 70 | "# Read student data\n", 71 | "student_data = pd.read_csv(\"student-data.csv\")\n", 72 | "print \"Student data read successfully!\"" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 2, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/html": [ 85 | "
\n", 86 | "\n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | "
schoolsexageaddressfamsizePstatusMeduFeduMjobFjob...internetromanticfamrelfreetimegooutDalcWalchealthabsencespassed
0GPF18UGT3A44at_hometeacher...nono4341136no
1GPF17UGT3T11at_homeother...yesno5331134no
2GPF15ULE3T11at_homeother...yesno43223310yes
3GPF15UGT3T42healthservices...yesyes3221152yes
4GPF16UGT3T33otherother...nono4321254yes
\n", 236 | "

5 rows × 31 columns

\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " school sex age address famsize Pstatus Medu Fedu Mjob Fjob \\\n", 241 | "0 GP F 18 U GT3 A 4 4 at_home teacher \n", 242 | "1 GP F 17 U GT3 T 1 1 at_home other \n", 243 | "2 GP F 15 U LE3 T 1 1 at_home other \n", 244 | "3 GP F 15 U GT3 T 4 2 health services \n", 245 | "4 GP F 16 U GT3 T 3 3 other other \n", 246 | "\n", 247 | " ... internet romantic famrel freetime goout Dalc Walc health absences \\\n", 248 | "0 ... no no 4 3 4 1 1 3 6 \n", 249 | "1 ... yes no 5 3 3 1 1 3 4 \n", 250 | "2 ... yes no 4 3 2 2 3 3 10 \n", 251 | "3 ... yes yes 3 2 2 1 1 5 2 \n", 252 | "4 ... no no 4 3 2 1 2 5 4 \n", 253 | "\n", 254 | " passed \n", 255 | "0 no \n", 256 | "1 no \n", 257 | "2 yes \n", 258 | "3 yes \n", 259 | "4 yes \n", 260 | "\n", 261 | "[5 rows x 31 columns]" 262 | ] 263 | }, 264 | "execution_count": 2, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "student_data.head()\n", 271 | "\n" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 51, 277 | "metadata": { 278 | "collapsed": false 279 | }, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "yes 265\n", 285 | "no 130\n", 286 | "Name: passed, dtype: int64" 287 | ] 288 | }, 289 | "execution_count": 51, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "student_data[\"passed\"].value_counts()" 296 | ] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "metadata": {}, 301 | "source": [ 302 | "### Implementation: Data Exploration\n", 303 | "Let's begin by investigating the dataset to determine how many students we have information on, and learn about the graduation rate among these students. In the code cell below, you will need to compute the following:\n", 304 | "- The total number of students, `n_students`.\n", 305 | "- The total number of features for each student, `n_features`.\n", 306 | "- The number of those students who passed, `n_passed`.\n", 307 | "- The number of those students who failed, `n_failed`.\n", 308 | "- The graduation rate of the class, `grad_rate`, in percent (%).\n" 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 3, 314 | "metadata": { 315 | "collapsed": false 316 | }, 317 | "outputs": [ 318 | { 319 | "name": "stdout", 320 | "output_type": "stream", 321 | "text": [ 322 | "Total number of students: 395\n", 323 | "Number of features: 30\n", 324 | "Number of students who passed: 265\n", 325 | "Number of students who failed: 130\n", 326 | "Graduation rate of the class: 67.09%\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "# TODO: Calculate number of students\n", 332 | "n_students = student_data.shape[0]\n", 333 | "\n", 334 | "# TODO: Calculate number of features\n", 335 | "n_features = student_data.shape[1] - 1\n", 336 | "\n", 337 | "# TODO: Calculate passing students\n", 338 | "n_passed = student_data[\"passed\"].value_counts()[\"yes\"]\n", 339 | "\n", 340 | "# TODO: Calculate failing students\n", 341 | "n_failed = student_data[\"passed\"].value_counts()[\"no\"]\n", 342 | "\n", 343 | "# TODO: Calculate graduation rate\n", 344 | "grad_rate = (265/395.0)*100\n", 345 | "\n", 346 | "# Print the results\n", 347 | "print \"Total number of students: {}\".format(n_students)\n", 348 | "print \"Number of features: {}\".format(n_features)\n", 349 | "print \"Number of students who passed: {}\".format(n_passed)\n", 350 | "print \"Number of students who failed: {}\".format(n_failed)\n", 351 | "print \"Graduation rate of the class: {:.2f}%\".format(grad_rate)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "## Preparing the Data\n", 359 | "In this section, we will prepare the data for modeling, training and testing.\n", 360 | "\n", 361 | "### Identify feature and target columns\n", 362 | "It is often the case that the data you obtain contains non-numeric features. This can be a problem, as most machine learning algorithms expect numeric data to perform computations with.\n", 363 | "\n", 364 | "Run the code cell below to separate the student data into feature and target columns to see if any features are non-numeric." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 53, 370 | "metadata": { 371 | "collapsed": false 372 | }, 373 | "outputs": [ 374 | { 375 | "name": "stdout", 376 | "output_type": "stream", 377 | "text": [ 378 | "Feature columns:\n", 379 | "['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']\n", 380 | "\n", 381 | "Target column: passed\n", 382 | "\n", 383 | "Feature values:\n", 384 | " school sex age address famsize Pstatus Medu Fedu Mjob Fjob \\\n", 385 | "0 GP F 18 U GT3 A 4 4 at_home teacher \n", 386 | "1 GP F 17 U GT3 T 1 1 at_home other \n", 387 | "2 GP F 15 U LE3 T 1 1 at_home other \n", 388 | "3 GP F 15 U GT3 T 4 2 health services \n", 389 | "4 GP F 16 U GT3 T 3 3 other other \n", 390 | "\n", 391 | " ... higher internet romantic famrel freetime goout Dalc Walc health \\\n", 392 | "0 ... yes no no 4 3 4 1 1 3 \n", 393 | "1 ... yes yes no 5 3 3 1 1 3 \n", 394 | "2 ... yes yes no 4 3 2 2 3 3 \n", 395 | "3 ... yes yes yes 3 2 2 1 1 5 \n", 396 | "4 ... yes no no 4 3 2 1 2 5 \n", 397 | "\n", 398 | " absences \n", 399 | "0 6 \n", 400 | "1 4 \n", 401 | "2 10 \n", 402 | "3 2 \n", 403 | "4 4 \n", 404 | "\n", 405 | "[5 rows x 30 columns]\n" 406 | ] 407 | } 408 | ], 409 | "source": [ 410 | "# Extract feature columns\n", 411 | "feature_cols = list(student_data.columns[:-1])\n", 412 | "\n", 413 | "# Extract target column 'passed'\n", 414 | "target_col = student_data.columns[-1] \n", 415 | "\n", 416 | "# Show the list of columns\n", 417 | "print \"Feature columns:\\n{}\".format(feature_cols)\n", 418 | "print \"\\nTarget column: {}\".format(target_col)\n", 419 | "\n", 420 | "# Separate the data into feature data and target data (X_all and y_all, respectively)\n", 421 | "X_all = student_data[feature_cols]\n", 422 | "y_all = student_data[target_col]\n", 423 | "\n", 424 | "# Show the feature information by printing the first five rows\n", 425 | "print \"\\nFeature values:\"\n", 426 | "print X_all.head()" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "### Preprocess Feature Columns\n", 434 | "\n", 435 | "As you can see, there are several non-numeric columns that need to be converted! Many of them are simply `yes`/`no`, e.g. `internet`. These can be reasonably converted into `1`/`0` (binary) values.\n", 436 | "\n", 437 | "Other columns, like `Mjob` and `Fjob`, have more than two values, and are known as _categorical variables_. The recommended way to handle such a column is to create as many columns as possible values (e.g. `Fjob_teacher`, `Fjob_other`, `Fjob_services`, etc.), and assign a `1` to one of them and `0` to all others.\n", 438 | "\n", 439 | "These generated columns are sometimes called _dummy variables_, and we will use the [`pandas.get_dummies()`](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html?highlight=get_dummies#pandas.get_dummies) function to perform this transformation. Run the code cell below to perform the preprocessing routine discussed in this section." 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 54, 445 | "metadata": { 446 | "collapsed": false 447 | }, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "Processed feature columns (48 total features):\n", 454 | "['school_GP', 'school_MS', 'sex_F', 'sex_M', 'age', 'address_R', 'address_U', 'famsize_GT3', 'famsize_LE3', 'Pstatus_A', 'Pstatus_T', 'Medu', 'Fedu', 'Mjob_at_home', 'Mjob_health', 'Mjob_other', 'Mjob_services', 'Mjob_teacher', 'Fjob_at_home', 'Fjob_health', 'Fjob_other', 'Fjob_services', 'Fjob_teacher', 'reason_course', 'reason_home', 'reason_other', 'reason_reputation', 'guardian_father', 'guardian_mother', 'guardian_other', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']\n" 455 | ] 456 | } 457 | ], 458 | "source": [ 459 | "def preprocess_features(X):\n", 460 | " ''' Preprocesses the student data and converts non-numeric binary variables into\n", 461 | " binary (0/1) variables. Converts categorical variables into dummy variables. '''\n", 462 | " \n", 463 | " # Initialize new output DataFrame\n", 464 | " output = pd.DataFrame(index = X.index)\n", 465 | "\n", 466 | " # Investigate each feature column for the data\n", 467 | " for col, col_data in X.iteritems():\n", 468 | " \n", 469 | " # If data type is non-numeric, replace all yes/no values with 1/0\n", 470 | " if col_data.dtype == object:\n", 471 | " col_data = col_data.replace(['yes', 'no'], [1, 0])\n", 472 | "\n", 473 | " # If data type is categorical, convert to dummy variables\n", 474 | " if col_data.dtype == object:\n", 475 | " # Example: 'school' => 'school_GP' and 'school_MS'\n", 476 | " col_data = pd.get_dummies(col_data, prefix = col) \n", 477 | " \n", 478 | " # Collect the revised columns\n", 479 | " output = output.join(col_data)\n", 480 | " \n", 481 | " return output\n", 482 | "\n", 483 | "X_all = preprocess_features(X_all)\n", 484 | "print \"Processed feature columns ({} total features):\\n{}\".format(len(X_all.columns), list(X_all.columns))" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "### Implementation: Training and Testing Data Split\n", 492 | "So far, we have converted all _categorical_ features into numeric values. For the next step, we split the data (both features and corresponding labels) into training and test sets. In the following code cell below, you will need to implement the following:\n", 493 | "- Randomly shuffle and split the data (`X_all`, `y_all`) into training and testing subsets.\n", 494 | " - Use 300 training points (approximately 75%) and 95 testing points (approximately 25%).\n", 495 | " - Set a `random_state` for the function(s) you use, if provided.\n", 496 | " - Store the results in `X_train`, `X_test`, `y_train`, and `y_test`." 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 68, 502 | "metadata": { 503 | "collapsed": false 504 | }, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "Training set has 300 samples.\n", 511 | "Testing set has 95 samples.\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "# TODO: Import any additional functionality you may need here\n", 517 | "\n", 518 | "from sklearn.cross_validation import train_test_split\n", 519 | "\n", 520 | "# TODO: Set the number of training points\n", 521 | "num_train = 300\n", 522 | "\n", 523 | "# Set the number of testing points\n", 524 | "num_test = X_all.shape[0] - num_train\n", 525 | "\n", 526 | "# TODO: Shuffle and split the dataset into the number of training and testing points above\n", 527 | "X_train,X_test,y_train, y_test = train_test_split(X_all,y_all,test_size = num_test, random_state = 0)\n", 528 | "\n", 529 | "# Show the results of the split\n", 530 | "print \"Training set has {} samples.\".format(X_train.shape[0])\n", 531 | "print \"Testing set has {} samples.\".format(X_test.shape[0])" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "## Training and Evaluating Models\n", 539 | "In this section, you will choose 3 supervised learning models that are appropriate for this problem and available in `scikit-learn`. You will first discuss the reasoning behind choosing these three models by considering what you know about the data and each model's strengths and weaknesses. You will then fit the model to varying sizes of training data (100 data points, 200 data points, and 300 data points) and measure the F1 score. You will need to produce three tables (one for each model) that shows the training set size, training time, prediction time, F1 score on the training set, and F1 score on the testing set." 540 | ] 541 | }, 542 | { 543 | "cell_type": "markdown", 544 | "metadata": {}, 545 | "source": [ 546 | "### Question 2 - Model Application\n", 547 | "*List three supervised learning models that are appropriate for this problem. What are the general applications of each model? What are their strengths and weaknesses? Given what you know about the data, why did you choose these models to be applied?*" 548 | ] 549 | }, 550 | { 551 | "cell_type": "markdown", 552 | "metadata": {}, 553 | "source": [ 554 | "**Answer: **\n", 555 | "\n", 556 | "The three supervised learning models that I've chosen are :\n", 557 | "1. Decision Trees\n", 558 | "2. Support Vector Machines\n", 559 | "3. K-Nearest Neighbors\n", 560 | "\n", 561 | "**1. Decision Trees** : \n", 562 | "\n", 563 | "Decision Trees are widely used in several industries including medicine(to classify diseases based on patient's features for example), biomedical research, financial analysis(fraud detection,credit defaulting etc) for both classification and regression problem, astronomy to social media websites for predicting engagement/ad clicks for it's interpretability and versatality.\n", 564 | "\n", 565 | "* Strengths : \n", 566 | " 1. Decision Trees are interpretable and easy to visualize.\n", 567 | " 2. Decision Trees can handle both categorical and numerical data\n", 568 | " 3. All else being equal, decision trees prefer shorter trees to longer trees by splitting on the \"best features\"(using information gain or gini impurity index), so it's easy to understand what are the most important features in a dataset\n", 569 | "\n", 570 | "\n", 571 | "* Weeknesses :\n", 572 | "\n", 573 | " 1. Decision trees grow exponentially with the number of instances and more features\n", 574 | " 2. Decision trees overfit very easily as it picks up subtle variances in the data set. However, over-fitting can be minimized by calculating best maximum depth of the tree, minimum number of samples to spilt per node and pruning techniques after creating the tree. Random forest, another model based on decision trees are incredibly popular as it minimizes errors by ensembling over many decision trees.\n", 575 | "\n", 576 | "* Reasons for choosing this model : \n", 577 | " 1. This dataset has many features and a problem like predicting which students need intervention is unlikely to be linear relationship completely as many details influence a student's learning.\n", 578 | " 2. Most features in this data set are binary which is for a decision tree to handle with conditionals and the resulting tree will be easier to interpret and thus determine a course of action to ensure student's learning rate improves.\n", 579 | " \n", 580 | "\n", 581 | "** 2. Support Vector Machine :** \n", 582 | "\n", 583 | "Support vector machines classify data by finding the maximum margin hyperplane that seperates class labels, it's also a very popular model like the other two, decision trees and K-nearest neighbors and used in industry for classification and regression tasks. Support Vector Machines have been successfully used on high dimensional data such as genetic data(protein structure prediction), music(song genre classification, music retrival), image classification(histogram based), image retrieval etc.\n", 584 | "\n", 585 | "* Strengths :\n", 586 | " 1. As Support Vector Machine tries to find the seperator hyperplane that has the maximum distance between the seperate classes, it's not prone to overfitting.\n", 587 | " 2. Linear SVM produces a line as decision boundary, but SVM is also effetive with high dimensional data by using Kernel-trick (mapping the data points to higher dimensional spaces to find the appropriate class labels) \n", 588 | " \n", 589 | "* Weeknesses :\n", 590 | " 1. Performance depends on the choice of Kernel. Large data sets may take a lot of time to train.\n", 591 | " 2. SVM works really well for datasets that has a clear margin of seperation, but performs poorly on noisy datasets.\n", 592 | " \n", 593 | "* Reasons for choosing this model :\n", 594 | " 1. As there are many features in this dataset, if this datset has a good margin of seperation SVM would be able to pick it up. \n", 595 | " 2. SVM also works well with high dimensional data with kernel trick, and this dataset has many features.\n", 596 | "\n", 597 | "\n", 598 | "**3. K Nearest Neighbor ** :\n", 599 | "\n", 600 | "K nearest neighbor is a method of 'instance based learning'/lazy learning as the computation begins when we start predicting, it's also a non-parametric method. K nearest neighbor tries to find similar instances for each query and predicts based on their average/majority voting for classification and regression problem. It can be used in many different cases including content retrieval for photos, videos, text and recommending products etc. It's one of the most popular methods in data mining.\n", 601 | "\n", 602 | "* Strengths :\n", 603 | " 1. It does not make any assumptions about the distribution of data. Rather it simply tries to find the most similar k neighbors for each query based on some distance metric/similarity measurement and uses the whole data set for each query.\n", 604 | " 2. After choosing the number of neighbors k and the similarity metric d , the algorithm is simple to implement in production.\n", 605 | " 3. It's possible to weight the contribution of the neighbors for predicting labels (weighting the nearest neighbors highest and the distant one's lower) for higher accuracy. For datasets that don't follow a general pattern, K nearest neighbor is often a really good choice.\n", 606 | " \n", 607 | "* Weeknesses: \n", 608 | " 1. K-nearest neighbor requires the entire dataset to be preserved in the memory. Unlike a parametric model like linear regression where we just have to train once to find the parameters, we can't throw away the data set and this can make the space requirement incredibly high.\n", 609 | " 2. It's important to use domain knowledge and grid search techniques to find a good similarity measure and a good k, in practice there can be many variations of distance metrics which can yield different performances.\n", 610 | " 3. It's less interpretable than models like decision tree where we can understand which features are the strongest.\n", 611 | " \n", 612 | "* Reasons for choosing this model:\n", 613 | " 1. Students who are failing may have similar patterns such as similar amount of time invested in recreation over studying, similar number of family members and income, similar geographic region etc which K nearest neighbor can deal with easily.\n", 614 | " " 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "### Setup\n", 622 | "Run the code cell below to initialize three helper functions which you can use for training and testing the three supervised learning models you've chosen above. The functions are as follows:\n", 623 | "- `train_classifier` - takes as input a classifier and training data and fits the classifier to the data.\n", 624 | "- `predict_labels` - takes as input a fit classifier, features, and a target labeling and makes predictions using the F1 score.\n", 625 | "- `train_predict` - takes as input a classifier, and the training and testing data, and performs `train_clasifier` and `predict_labels`.\n", 626 | " - This function will report the F1 score for both the training and testing data separately." 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 83, 632 | "metadata": { 633 | "collapsed": false 634 | }, 635 | "outputs": [], 636 | "source": [ 637 | "def train_classifier(clf, X_train, y_train):\n", 638 | " ''' Fits a classifier to the training data. '''\n", 639 | " \n", 640 | " # Start the clock, train the classifier, then stop the clock\n", 641 | " start = time()\n", 642 | " clf.fit(X_train, y_train)\n", 643 | " end = time()\n", 644 | " \n", 645 | " # Print the results\n", 646 | " print \"Trained model in {:.4f} seconds\".format(end - start)\n", 647 | "\n", 648 | " \n", 649 | "def predict_labels(clf, features, target):\n", 650 | " ''' Makes predictions using a fit classifier based on F1 score. '''\n", 651 | " \n", 652 | " # Start the clock, make predictions, then stop the clock\n", 653 | " start = time()\n", 654 | " y_pred = clf.predict(features)\n", 655 | " end = time()\n", 656 | " \n", 657 | " # Print and return results\n", 658 | " print \"Made predictions in {:.4f} seconds.\".format(end - start)\n", 659 | " return f1_score(target.values, y_pred, pos_label='yes')\n", 660 | "\n", 661 | "\n", 662 | "def train_predict(clf, X_train, y_train, X_test, y_test):\n", 663 | " ''' Train and predict using a classifer based on F1 score. '''\n", 664 | " \n", 665 | " # Indicate the classifier and the training set size\n", 666 | " print \"Training a {} using a training set size of {}. . .\".format(clf.__class__.__name__, len(X_train))\n", 667 | " \n", 668 | " # Train the classifier\n", 669 | " train_classifier(clf, X_train, y_train)\n", 670 | " \n", 671 | " # Print the results of prediction for both training and testing\n", 672 | " print \"F1 score for training set: {:.4f}.\".format(predict_labels(clf, X_train, y_train))\n", 673 | " print \"F1 score for test set: {:.4f}.\".format(predict_labels(clf, X_test, y_test))\n", 674 | " print \"\\n\"" 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": {}, 680 | "source": [ 681 | "### Implementation: Model Performance Metrics\n", 682 | "With the predefined functions above, you will now import the three supervised learning models of your choice and run the `train_predict` function for each one. Remember that you will need to train and predict on each classifier for three different training set sizes: 100, 200, and 300. Hence, you should expect to have 9 different outputs below — 3 for each model using the varying training set sizes. In the following code cell, you will need to implement the following:\n", 683 | "- Import the three supervised learning models you've discussed in the previous section.\n", 684 | "- Initialize the three models and store them in `clf_A`, `clf_B`, and `clf_C`.\n", 685 | " - Use a `random_state` for each model you use, if provided.\n", 686 | " - **Note:** Use the default settings for each model — you will tune one specific model in a later section.\n", 687 | "- Create the different training set sizes to be used to train each model.\n", 688 | " - *Do not reshuffle and resplit the data! The new training points should be drawn from `X_train` and `y_train`.*\n", 689 | "- Fit each model with each training set size and make predictions on the test set (9 in total). \n", 690 | "**Note:** Three tables are provided after the following code cell which can be used to store your results." 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 84, 696 | "metadata": { 697 | "collapsed": false 698 | }, 699 | "outputs": [ 700 | { 701 | "name": "stdout", 702 | "output_type": "stream", 703 | "text": [ 704 | "Training a DecisionTreeClassifier using a training set size of 100. . .\n", 705 | "Trained model in 0.0050 seconds\n", 706 | "Made predictions in 0.0010 seconds.\n", 707 | "F1 score for training set: 1.0000.\n", 708 | "Made predictions in 0.0010 seconds.\n", 709 | "F1 score for test set: 0.6942.\n", 710 | "\n", 711 | "\n", 712 | "Training a DecisionTreeClassifier using a training set size of 200. . .\n", 713 | "Trained model in 0.0050 seconds\n", 714 | "Made predictions in 0.0010 seconds.\n", 715 | "F1 score for training set: 1.0000.\n", 716 | "Made predictions in 0.0000 seconds.\n", 717 | "F1 score for test set: 0.7132.\n", 718 | "\n", 719 | "\n", 720 | "Training a DecisionTreeClassifier using a training set size of 300. . .\n", 721 | "Trained model in 0.0000 seconds\n", 722 | "Made predictions in 0.0000 seconds.\n", 723 | "F1 score for training set: 1.0000.\n", 724 | "Made predictions in 0.0000 seconds.\n", 725 | "F1 score for test set: 0.7167.\n", 726 | "\n", 727 | "\n", 728 | "\n", 729 | "\n", 730 | "\n", 731 | "\n", 732 | "Training a SVC using a training set size of 100. . .\n", 733 | "Trained model in 0.0000 seconds\n", 734 | "Made predictions in 0.0000 seconds.\n", 735 | "F1 score for training set: 0.8591.\n", 736 | "Made predictions in 0.0000 seconds.\n", 737 | "F1 score for test set: 0.7838.\n", 738 | "\n", 739 | "\n", 740 | "Training a SVC using a training set size of 200. . .\n", 741 | "Trained model in 0.0180 seconds\n", 742 | "Made predictions in 0.0000 seconds.\n", 743 | "F1 score for training set: 0.8693.\n", 744 | "Made predictions in 0.0150 seconds.\n", 745 | "F1 score for test set: 0.7755.\n", 746 | "\n", 747 | "\n", 748 | "Training a SVC using a training set size of 300. . .\n", 749 | "Trained model in 0.0310 seconds\n", 750 | "Made predictions in 0.0160 seconds.\n", 751 | "F1 score for training set: 0.8692.\n", 752 | "Made predictions in 0.0000 seconds.\n", 753 | "F1 score for test set: 0.7586.\n", 754 | "\n", 755 | "\n", 756 | "\n", 757 | "\n", 758 | "\n", 759 | "\n", 760 | "Training a KNeighborsClassifier using a training set size of 100. . .\n", 761 | "Trained model in 0.0000 seconds\n", 762 | "Made predictions in 0.0000 seconds.\n", 763 | "F1 score for training set: 0.7972.\n", 764 | "Made predictions in 0.0000 seconds.\n", 765 | "F1 score for test set: 0.7068.\n", 766 | "\n", 767 | "\n", 768 | "Training a KNeighborsClassifier using a training set size of 200. . .\n", 769 | "Trained model in 0.0000 seconds\n", 770 | "Made predictions in 0.0220 seconds.\n", 771 | "F1 score for training set: 0.8571.\n", 772 | "Made predictions in 0.0070 seconds.\n", 773 | "F1 score for test set: 0.7121.\n", 774 | "\n", 775 | "\n", 776 | "Training a KNeighborsClassifier using a training set size of 300. . .\n", 777 | "Trained model in 0.0030 seconds\n", 778 | "Made predictions in 0.0250 seconds.\n", 779 | "F1 score for training set: 0.8722.\n", 780 | "Made predictions in 0.0000 seconds.\n", 781 | "F1 score for test set: 0.7482.\n", 782 | "\n", 783 | "\n" 784 | ] 785 | } 786 | ], 787 | "source": [ 788 | "# TODO: Import the three supervised learning models from sklearn\n", 789 | "from sklearn.tree import DecisionTreeClassifier\n", 790 | "from sklearn.svm import SVC\n", 791 | "from sklearn.neighbors import KNeighborsClassifier \n", 792 | "\n", 793 | "# TODO: Initialize the three models\n", 794 | "clf_A = DecisionTreeClassifier(random_state =0)\n", 795 | "clf_B = SVC(random_state = 0)\n", 796 | "clf_C = KNeighborsClassifier()\n", 797 | "\n", 798 | "\n", 799 | "training_sizes = [100,200,300]\n", 800 | "\n", 801 | "# TODO: Execute the 'train_predict' function for each classifier and each training set size\n", 802 | "\n", 803 | "# Decision Tree\n", 804 | "for size in training_sizes:\n", 805 | " train_predict(clf_A, X_train[:size], y_train[:size], X_test, y_test)\n", 806 | " \n", 807 | "print \"\\n\\n\\n\"\n", 808 | " \n", 809 | "# Support Vector Machine \n", 810 | "for size in training_sizes:\n", 811 | " train_predict(clf_B, X_train[:size], y_train[:size], X_test, y_test)\n", 812 | " \n", 813 | "print \"\\n\\n\\n\"\n", 814 | " \n", 815 | "# K Neareset Neighbor Classifier\n", 816 | "for size in training_sizes:\n", 817 | " train_predict(clf_C, X_train[:size], y_train[:size], X_test, y_test)" 818 | ] 819 | }, 820 | { 821 | "cell_type": "markdown", 822 | "metadata": {}, 823 | "source": [ 824 | "### Tabular Results\n", 825 | "Edit the cell below to see how a table can be designed in [Markdown](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet#tables). You can record your results from above in the tables provided." 826 | ] 827 | }, 828 | { 829 | "cell_type": "markdown", 830 | "metadata": {}, 831 | "source": [ 832 | "** Classifer 1 - DecisionTreeClassifier?** \n", 833 | "\n", 834 | "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n", 835 | "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n", 836 | "| 100 |0.0050 seconds |0.0010 seconds. |1.0000 |0.6942 |\n", 837 | "| 200 |0.0050 seconds |0.0000 seconds |1.0000 |0.7132 |\n", 838 | "| 300 |0.0000 seconds |0.0000 seconds |1.0000 |0.7167 |\n", 839 | "\n", 840 | "\n", 841 | "\n", 842 | "\n", 843 | "** Classifer 2 - SVM?** \n", 844 | "\n", 845 | "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n", 846 | "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n", 847 | "| 100 |0.0000 seconds |0.0000 seconds |0.8591 |0.7838 |\n", 848 | "| 200 |0.0180 seconds |0.0150 seconds |0.8693 | 0.7755 |\n", 849 | "| 300 |0.0310 seconds |0.0000 seconds |0.8692 |0.7586 |\n", 850 | "\n", 851 | "\n", 852 | "** Classifer 3 - K-Nearest Neighbor ** \n", 853 | "\n", 854 | "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n", 855 | "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n", 856 | "| 100 |0.0000 seconds |0.0000 seconds |0.7972 |0.7068 |\n", 857 | "| 200 |0.0000 seconds |0.0070 seconds |0.8571 |0.7121 |\n", 858 | "| 300 | 0.0030 seconds |0.0000 seconds |0.8722 |0.7482 |\n", 859 | "\n", 860 | "\n" 861 | ] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "execution_count": 92, 866 | "metadata": { 867 | "collapsed": false 868 | }, 869 | "outputs": [ 870 | { 871 | "name": "stdout", 872 | "output_type": "stream", 873 | "text": [ 874 | "yes 206\n", 875 | "no 94\n", 876 | "Name: passed, dtype: int64\n", 877 | "yes 59\n", 878 | "no 36\n", 879 | "Name: passed, dtype: int64\n" 880 | ] 881 | } 882 | ], 883 | "source": [ 884 | "print y_train.value_counts()\n", 885 | "print y_test.value_counts()" 886 | ] 887 | }, 888 | { 889 | "cell_type": "code", 890 | "execution_count": 94, 891 | "metadata": { 892 | "collapsed": false 893 | }, 894 | "outputs": [ 895 | { 896 | "name": "stdout", 897 | "output_type": "stream", 898 | "text": [ 899 | "0.708033333333\n", 900 | "0.772633333333\n", 901 | "0.722366666667\n" 902 | ] 903 | } 904 | ], 905 | "source": [ 906 | "decision_tree_f1_average = (0.6942+0.7132+0.7167)/3.0\n", 907 | "svm_f1_average = (0.7838 + 0.7755 + 0.7586)/3.0\n", 908 | "k_nearest_f1_average = (0.7068+0.7121+0.7482)/3.0 \n", 909 | "\n", 910 | "print decision_tree_f1_average\n", 911 | "print svm_f1_average\n", 912 | "print k_nearest_f1_average" 913 | ] 914 | }, 915 | { 916 | "cell_type": "markdown", 917 | "metadata": {}, 918 | "source": [ 919 | "## Choosing the Best Model\n", 920 | "In this final section, you will choose from the three supervised learning models the *best* model to use on the student data. You will then perform a grid search optimization for the model over the entire training set (`X_train` and `y_train`) by tuning at least one parameter to improve upon the untuned model's F1 score. " 921 | ] 922 | }, 923 | { 924 | "cell_type": "markdown", 925 | "metadata": {}, 926 | "source": [ 927 | "### Question 3 - Chosing the Best Model\n", 928 | "*Based on the experiments you performed earlier, in one to two paragraphs, explain to the board of supervisors what single model you chose as the best model. Which model is generally the most appropriate based on the available data, limited resources, cost, and performance?*" 929 | ] 930 | }, 931 | { 932 | "cell_type": "markdown", 933 | "metadata": {}, 934 | "source": [ 935 | "**Answer: **\n", 936 | "\n", 937 | "The model I would choose as the best model is SVM.\n", 938 | "\n", 939 | "Reasons : \n", 940 | " 1. DecisionTreeClassifier shows clear signs of overfitting. It fits the training data perfectly with a F1-score of 1, but performs worse on the testing data compared to both SVM and k-nearest neighbor. So Decision Tree would clearly not be an appropriate model for this data set.\n", 941 | " 2. K-Nearest Neighbor actually shows quite stable performance over training and testing data sets and performs better both on the training and testing data sets steadily as the score increased with more training data(possibly because it found similar students with more training instances for the query instances). However, K-nearest's performance on the test data set is still poor compared to SVM.\n", 942 | " 3. SVM's average test score is 0.7726, beating both decision tree(average f1 on test set = 0.7080) and k-nearest neighbor(average f1 score 0.7223), based on scores SVM is the best choice. It's true that there's subtle differences of computation time for training and testing phases but for a small data set like this the differences are not that important." 943 | ] 944 | }, 945 | { 946 | "cell_type": "markdown", 947 | "metadata": {}, 948 | "source": [ 949 | "### Question 4 - Model in Layman's Terms\n", 950 | "*In one to two paragraphs, explain to the board of directors in layman's terms how the final model chosen is supposed to work. For example if you've chosen to use a decision tree or a support vector machine, how does the model go about making a prediction?*" 951 | ] 952 | }, 953 | { 954 | "cell_type": "markdown", 955 | "metadata": {}, 956 | "source": [ 957 | "**Answer: **\n", 958 | "\n", 959 | "The model that was chosen is called Support Vector Machine which is a linear seperator. Intuitively in the simplest case, we can imagine a 2D plane where we plot the data and labels on the x and y axis respective and we want to seperate the labels using a line. We can choose many lines for this task, assuming the labels are not overlapping, however, support vector machine will choose the \"maximum margin\" line, the line that has the biggest distance from the nearest points of both classes, i.e the line which is actually in the 'middle'. We choose this line to generalize the model to test data and avoid overfitting, a line too close to either of the classes can misclassify quickly.\n", 960 | "\n", 961 | "\n", 962 | "\n", 963 | "For the higher dimension data sets instead of a line we map the datapoints to higher dimensions(with 'kernel trick') and find the maximum margin hyperplane to seperate the classes with as much gap as possible. For example in the image below a line could not have seperated circular data in 2D, so data has been mapped to 3D space where a clear seperating hyperplane was found, then the labels were used to classify the instances.\n", 964 | "\n", 965 | "\n", 966 | "\n", 967 | "\n", 968 | "\n", 969 | "For practical purposes, choosing decision tree would have been more interpretable, but in this case would have led to overfitting (as we have seen in the table) and intervention of a student who's actually doing well because of a bad model would have led to negative consequences in this student's life. Choosing something like K-Nearest perhaps would have been stable perhaps, but not as interpretable as decision trees. However if we scale to millions of students, decision trees will also grow exponentially and K-Nearest neighbors woudld have to iterate over all the millions of students to find similar one's.\n", 970 | "\n", 971 | "On the other hand, Support vector machine's clearly showed best performance so far and it's an widely used algorithm in the industry too, so SVM was chosen. Visualizing SVM is not as easy as decision tree's, but it has better performance." 972 | ] 973 | }, 974 | { 975 | "cell_type": "markdown", 976 | "metadata": {}, 977 | "source": [ 978 | "### Implementation: Model Tuning\n", 979 | "Fine tune the chosen model. Use grid search (`GridSearchCV`) with at least one important parameter tuned with at least 3 different values. You will need to use the entire training set for this. In the code cell below, you will need to implement the following:\n", 980 | "- Import [`sklearn.grid_search.gridSearchCV`](http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html) and [`sklearn.metrics.make_scorer`](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html).\n", 981 | "- Create a dictionary of parameters you wish to tune for the chosen model.\n", 982 | " - Example: `parameters = {'parameter' : [list of values]}`.\n", 983 | "- Initialize the classifier you've chosen and store it in `clf`.\n", 984 | "- Create the F1 scoring function using `make_scorer` and store it in `f1_scorer`.\n", 985 | " - Set the `pos_label` parameter to the correct value!\n", 986 | "- Perform grid search on the classifier `clf` using `f1_scorer` as the scoring method, and store it in `grid_obj`.\n", 987 | "- Fit the grid search object to the training data (`X_train`, `y_train`), and store it in `grid_obj`." 988 | ] 989 | }, 990 | { 991 | "cell_type": "code", 992 | "execution_count": 85, 993 | "metadata": { 994 | "collapsed": false 995 | }, 996 | "outputs": [ 997 | { 998 | "name": "stdout", 999 | "output_type": "stream", 1000 | "text": [ 1001 | "{'kernel': 'rbf', 'C': 1, 'verbose': False, 'probability': False, 'degree': 3, 'shrinking': True, 'max_iter': -1, 'decision_function_shape': None, 'random_state': None, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 'auto', 'class_weight': None}\n", 1002 | "Made predictions in 0.0250 seconds.\n", 1003 | "Tuned model has a training F1 score of 0.8692.\n", 1004 | "Made predictions in 0.0100 seconds.\n", 1005 | "Tuned model has a testing F1 score of 0.7586.\n" 1006 | ] 1007 | } 1008 | ], 1009 | "source": [ 1010 | "# TODO: Import 'GridSearchCV' and 'make_scorer'\n", 1011 | "\n", 1012 | "from sklearn.grid_search import GridSearchCV\n", 1013 | "from sklearn.metrics import make_scorer\n", 1014 | "\n", 1015 | "# TODO: Create the parameters list you wish to tune\n", 1016 | "parameters = {'kernel':('linear', 'poly','rbf'), 'C':[0.25,0.5,1, 10,50]}\n", 1017 | "\n", 1018 | "# TODO: Initialize the classifier\n", 1019 | "clf = SVC()\n", 1020 | "\n", 1021 | "# TODO: Make an f1 scoring function using 'make_scorer' \n", 1022 | "f1_scorer = make_scorer(f1_score,pos_label = \"yes\")\n", 1023 | "\n", 1024 | "# TODO: Perform grid search on the classifier using the f1_scorer as the scoring method\n", 1025 | "grid_obj = GridSearchCV(clf,param_grid = parameters,scoring = f1_scorer)\n", 1026 | "\n", 1027 | "# TODO: Fit the grid search object to the training data and find the optimal parameters\n", 1028 | "grid_obj.fit(X_train,y_train)\n", 1029 | "\n", 1030 | "# Get the estimator\n", 1031 | "clf = grid_obj.best_estimator_\n", 1032 | "print clf.get_params()\n", 1033 | "# Report the final F1 score for training and testing after parameter tuning\n", 1034 | "print \"Tuned model has a training F1 score of {:.4f}.\".format(predict_labels(clf, X_train, y_train))\n", 1035 | "print \"Tuned model has a testing F1 score of {:.4f}.\".format(predict_labels(clf, X_test, y_test))" 1036 | ] 1037 | }, 1038 | { 1039 | "cell_type": "markdown", 1040 | "metadata": {}, 1041 | "source": [ 1042 | "### Question 5 - Final F1 Score\n", 1043 | "*What is the final model's F1 score for training and testing? How does that score compare to the untuned model?*" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "metadata": {}, 1049 | "source": [ 1050 | "**Answer: **\n", 1051 | "\n", 1052 | "Final models F1 Score for training : 0.8692\n", 1053 | "Final models F1 score for test : 0.7586.\n", 1054 | "\n", 1055 | "It shows no difference from the 300 training point model chosen above, but it does not perform worse either. The most probable reason behind this situation is probably that grid search ended up choosing the default parameters despite given more options. I tried to reduce the C parameter, but it chose the value 1 again, which is also the default value, despite given more options for the Kernel it again chose the default version, which is \"rbf\" for non-linear datasets which is also optimum. The number of training points also don't vary as the total number of training points are 300. \n", 1056 | "\n", 1057 | "So the grid search model is giving similar performance to the former one. This data set also is not balanced, there are more students who graduated than the one's who didn't graduate, perhaps that lead to more noise in the data which made SVM perform 0.7586 F1-score only in testing which is quite different from the training one." 1058 | ] 1059 | }, 1060 | { 1061 | "cell_type": "markdown", 1062 | "metadata": {}, 1063 | "source": [ 1064 | "> **Note**: Once you have completed all of the code implementations and successfully answered each question above, you may finalize your work by exporting the iPython Notebook as an HTML document. You can do this by using the menu above and navigating to \n", 1065 | "**File -> Download as -> HTML (.html)**. Include the finished document along with this notebook as your submission." 1066 | ] 1067 | } 1068 | ], 1069 | "metadata": { 1070 | "kernelspec": { 1071 | "display_name": "Python 2", 1072 | "language": "python", 1073 | "name": "python2" 1074 | }, 1075 | "language_info": { 1076 | "codemirror_mode": { 1077 | "name": "ipython", 1078 | "version": 2 1079 | }, 1080 | "file_extension": ".py", 1081 | "mimetype": "text/x-python", 1082 | "name": "python", 1083 | "nbconvert_exporter": "python", 1084 | "pygments_lexer": "ipython2", 1085 | "version": "2.7.11" 1086 | } 1087 | }, 1088 | "nbformat": 4, 1089 | "nbformat_minor": 0 1090 | } 1091 | -------------------------------------------------------------------------------- /projects/student_intervention/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/table.png -------------------------------------------------------------------------------- /projects/titanic_survival_exploration/README.md: -------------------------------------------------------------------------------- 1 | # Project 0: Introduction and Fundamentals 2 | ## Titanic Survival Exploration 3 | 4 | ### Install 5 | 6 | This project requires **Python 2.7** and the following Python libraries installed: 7 | 8 | - [NumPy](http://www.numpy.org/) 9 | - [Pandas](http://pandas.pydata.org) 10 | - [matplotlib](http://matplotlib.org/) 11 | - [scikit-learn](http://scikit-learn.org/stable/) 12 | 13 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html) 14 | 15 | ### Overview 16 | 17 | This is an optional exploratory project to see which variables are more important for predicting titanic survival. It's more of a 'hello world' project for machine learning. 18 | 19 | ![](gender.png) 20 | 21 | ### Code 22 | 23 | Template code is provided in the notebook `titanic_survival_exploration.ipynb` notebook file. Additional supporting code can be found in `titanic_visualizations.py`. 24 | 25 | ### Run 26 | 27 | In a terminal or command window, navigate to the top-level project directory `titanic_survival_exploration/` (that contains this README) and run one of the following commands: 28 | 29 | ```ipython notebook titanic_survival_exploration.ipynb``` 30 | ```jupyter notebook titanic_survival_exploration.ipynb``` 31 | 32 | This will open the iPython Notebook software and project file in your browser. 33 | 34 | ## Data 35 | 36 | The dataset used in this project is included as `titanic_data.csv`. This dataset is provided by Udacity and contains the following attributes: 37 | 38 | - `survival` ? Survival (0 = No; 1 = Yes) 39 | - `pclass` ? Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd) 40 | - `name` ? Name 41 | - `sex` ? Sex 42 | - `age` ? Age 43 | - `sibsp` ? Number of Siblings/Spouses Aboard 44 | - `parch` ? Number of Parents/Children Aboard 45 | - `ticket` ? Ticket Number 46 | - `fare` ? Passenger Fare 47 | - `cabin` ? Cabin 48 | - `embarked` ? Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton) -------------------------------------------------------------------------------- /projects/titanic_survival_exploration/debug.log: -------------------------------------------------------------------------------- 1 | [0308/013815.059:ERROR:crash_report_database_win.cc(426)] unexpected header 2 | -------------------------------------------------------------------------------- /projects/titanic_survival_exploration/gender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/titanic_survival_exploration/gender.png -------------------------------------------------------------------------------- /projects/titanic_survival_exploration/titanic_visualizations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | 5 | def filter_data(data, condition): 6 | """ 7 | Remove elements that do not match the condition provided. 8 | Takes a data list as input and returns a filtered list. 9 | Conditions should be a list of strings of the following format: 10 | ' ' 11 | where the following operations are valid: >, <, >=, <=, ==, != 12 | 13 | Example: ["Sex == 'male'", 'Age < 18'] 14 | """ 15 | 16 | field, op, value = condition.split(" ") 17 | 18 | # convert value into number or strip excess quotes if string 19 | try: 20 | value = float(value) 21 | except: 22 | value = value.strip("\'\"") 23 | 24 | # get booleans for filtering 25 | if op == ">": 26 | matches = data[field] > value 27 | elif op == "<": 28 | matches = data[field] < value 29 | elif op == ">=": 30 | matches = data[field] >= value 31 | elif op == "<=": 32 | matches = data[field] <= value 33 | elif op == "==": 34 | matches = data[field] == value 35 | elif op == "!=": 36 | matches = data[field] != value 37 | else: # catch invalid operation codes 38 | raise Exception("Invalid comparison operator. Only >, <, >=, <=, ==, != allowed.") 39 | 40 | # filter data and outcomes 41 | data = data[matches].reset_index(drop = True) 42 | return data 43 | 44 | def survival_stats(data, outcomes, key, filters = []): 45 | """ 46 | Print out selected statistics regarding survival, given a feature of 47 | interest and any number of filters (including no filters) 48 | """ 49 | 50 | # Check that the key exists 51 | if key not in data.columns.values : 52 | print "'{}' is not a feature of the Titanic data. Did you spell something wrong?".format(key) 53 | return False 54 | 55 | # Return the function before visualizing if 'Cabin' or 'Ticket' 56 | # is selected: too many unique categories to display 57 | if(key == 'Cabin' or key == 'PassengerId' or key == 'Ticket'): 58 | print "'{}' has too many unique categories to display! Try a different feature.".format(key) 59 | return False 60 | 61 | # Merge data and outcomes into single dataframe 62 | all_data = pd.concat([data, outcomes], axis = 1) 63 | 64 | # Apply filters to data 65 | for condition in filters: 66 | all_data = filter_data(all_data, condition) 67 | 68 | # Create outcomes DataFrame 69 | all_data = all_data[[key, 'Survived']] 70 | 71 | # Create plotting figure 72 | plt.figure(figsize=(8,6)) 73 | 74 | # 'Numerical' features 75 | if(key == 'Age' or key == 'Fare'): 76 | 77 | # Remove NaN values from Age data 78 | all_data = all_data[~np.isnan(all_data[key])] 79 | 80 | # Divide the range of data into bins and count survival rates 81 | min_value = all_data[key].min() 82 | max_value = all_data[key].max() 83 | value_range = max_value - min_value 84 | 85 | # 'Fares' has larger range of values than 'Age' so create more bins 86 | if(key == 'Fare'): 87 | bins = np.arange(0, all_data['Fare'].max() + 20, 20) 88 | if(key == 'Age'): 89 | bins = np.arange(0, all_data['Age'].max() + 10, 10) 90 | 91 | # Overlay each bin's survival rates 92 | nonsurv_vals = all_data[all_data['Survived'] == 0][key].reset_index(drop = True) 93 | surv_vals = all_data[all_data['Survived'] == 1][key].reset_index(drop = True) 94 | plt.hist(nonsurv_vals, bins = bins, alpha = 0.6, 95 | color = 'red', label = 'Did not survive') 96 | plt.hist(surv_vals, bins = bins, alpha = 0.6, 97 | color = 'green', label = 'Survived') 98 | 99 | # Add legend to plot 100 | plt.xlim(0, bins.max()) 101 | plt.legend(framealpha = 0.8) 102 | 103 | # 'Categorical' features 104 | else: 105 | 106 | # Set the various categories 107 | if(key == 'Pclass'): 108 | values = np.arange(1,4) 109 | if(key == 'Parch' or key == 'SibSp'): 110 | values = np.arange(0,np.max(data[key]) + 1) 111 | if(key == 'Embarked'): 112 | values = ['C', 'Q', 'S'] 113 | if(key == 'Sex'): 114 | values = ['male', 'female'] 115 | 116 | # Create DataFrame containing categories and count of each 117 | frame = pd.DataFrame(index = np.arange(len(values)), columns=(key,'Survived','NSurvived')) 118 | for i, value in enumerate(values): 119 | frame.loc[i] = [value, \ 120 | len(all_data[(all_data['Survived'] == 1) & (all_data[key] == value)]), \ 121 | len(all_data[(all_data['Survived'] == 0) & (all_data[key] == value)])] 122 | 123 | # Set the width of each bar 124 | bar_width = 0.4 125 | 126 | # Display each category's survival rates 127 | for i in np.arange(len(frame)): 128 | nonsurv_bar = plt.bar(i-bar_width, frame.loc[i]['NSurvived'], width = bar_width, color = 'r') 129 | surv_bar = plt.bar(i, frame.loc[i]['Survived'], width = bar_width, color = 'g') 130 | 131 | plt.xticks(np.arange(len(frame)), values) 132 | plt.legend((nonsurv_bar[0], surv_bar[0]),('Did not survive', 'Survived'), framealpha = 0.8) 133 | 134 | # Common attributes for plot formatting 135 | plt.xlabel(key) 136 | plt.ylabel('Number of Passengers') 137 | plt.title('Passenger Survival Statistics With \'%s\' Feature'%(key)) 138 | plt.show() 139 | 140 | # Report number of passengers with missing values 141 | if sum(pd.isnull(all_data[key])): 142 | nan_outcomes = all_data[pd.isnull(all_data[key])]['Survived'] 143 | print "Passengers with missing '{}' values: {} ({} survived, {} did not survive)".format( \ 144 | key, len(nan_outcomes), sum(nan_outcomes == 1), sum(nan_outcomes == 0)) 145 | 146 | -------------------------------------------------------------------------------- /projects/titanic_survival_exploration/titanic_visualizations.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/titanic_survival_exploration/titanic_visualizations.pyc -------------------------------------------------------------------------------- /verified certificate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/verified certificate.png --------------------------------------------------------------------------------