├── LICENSE
├── README.md
├── projects
    ├── boston_housing
    │   ├── .ipynb_checkpoints
    │   │   └── boston_housing-checkpoint.ipynb
    │   ├── README.md
    │   ├── boston_housing.html
    │   ├── boston_housing.ipynb
    │   ├── decision tree regressor.png
    │   ├── housing.csv
    │   ├── visuals.py
    │   └── visuals.pyc
    ├── creating_customer_segments
    │   ├── .ipynb_checkpoints
    │   │   └── customer_segments-checkpoint.ipynb
    │   ├── README.md
    │   ├── customer_segments.html
    │   ├── customer_segments.ipynb
    │   ├── customers.csv
    │   ├── debug.log
    │   ├── pca.png
    │   ├── renders.py
    │   └── renders.pyc
    ├── smartcab
    │   ├── Project 4 final report.pdf
    │   ├── Q_learning_new_state_space.png
    │   ├── README.md
    │   ├── images
    │   │   ├── car-black.png
    │   │   ├── car-blue.png
    │   │   ├── car-cyan.png
    │   │   ├── car-green.png
    │   │   ├── car-magenta.png
    │   │   ├── car-orange.png
    │   │   ├── car-red.png
    │   │   ├── car-white.png
    │   │   └── car-yellow.png
    │   ├── smartcab
    │   │   ├── .ipynb_checkpoints
    │   │   │   └── Test-checkpoint.ipynb
    │   │   ├── Q-learning_over50.png
    │   │   ├── Q-learning_over_100.1.png
    │   │   ├── Q-learning_over_100.2.png
    │   │   ├── Q_learning_new_state_space.png
    │   │   ├── Q_learning_with_epsilon_decay_final_parameter.png
    │   │   ├── Test.ipynb
    │   │   ├── __init__.py
    │   │   ├── agent.py
    │   │   ├── analysis.py
    │   │   ├── analysis.pyc
    │   │   ├── environment.py
    │   │   ├── environment.pyc
    │   │   ├── planner.py
    │   │   ├── planner.pyc
    │   │   ├── random_trial_50.png
    │   │   ├── simulator.py
    │   │   └── simulator.pyc
    │   └── smartcabpic.jpg
    ├── student_intervention
    │   ├── .ipynb_checkpoints
    │   │   └── student_intervention-checkpoint.ipynb
    │   ├── README.md
    │   ├── data2d.png
    │   ├── data_2d_to_3d_hyperplane.png
    │   ├── data_in_R3.png
    │   ├── debug.log
    │   ├── student-data.csv
    │   ├── student_intervention.html
    │   ├── student_intervention.ipynb
    │   └── table.png
    └── titanic_survival_exploration
    │   ├── .ipynb_checkpoints
    │       └── Titanic_Survival_Exploration-checkpoint.ipynb
    │   ├── README.md
    │   ├── Titanic_Survival_Exploration.html
    │   ├── Titanic_Survival_Exploration.ipynb
    │   ├── debug.log
    │   ├── gender.png
    │   ├── titanic_data.csv
    │   ├── titanic_visualizations.py
    │   └── titanic_visualizations.pyc
└── verified certificate.png


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Tahsin Mayeesha
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Udacity-Machine-Learning-Nanodegree
 2 | 
 3 | ![](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/blob/master/verified%20certificate.png)
 4 | 
 5 | ## About 
 6 | 
 7 | This repo contains the coursework from Udacity's Machine Learning Nanodegree from June 2016-Feb 2017.
 8 | 
 9 | 
10 | 
11 | ## Projects
12 | 
13 | * P1 - [Exploring Titanic Survival(Optional)](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/titanic_survival_exploration)
14 | * P2 - [Predicting Boston Housing Prices](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/boston_housing)
15 | * P3 - [Building a Student Intervention System for Struggling Students](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/student_intervention)
16 | * P3 - [Creating Customer Segments from Wholesale Distributor Data](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/creating_customer_segments)
17 | * P4 - [Training a Smartcab to drive with Q-Learning](https://github.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree/tree/master/projects/smartcab)
18 | 
19 | ## Coursework
20 | 
21 | * [Intro to Data Science](https://www.udacity.com/courses/intro-to-data-science--ud359)
22 | * [Intro to Machine Learning](https://www.udacity.com/course/intro-to-machine-learning--ud120)
23 | * [Georgia Tech : Machine Learning](https://www.udacity.com/course/machine-learning--ud262)
24 | * [Reinforcement Learning](https://www.udacity.com/courses/reinforcement-learning--ud600)
25 | 
26 | 


--------------------------------------------------------------------------------
/projects/boston_housing/README.md:
--------------------------------------------------------------------------------
 1 | # Project 1: Model Evaluation & Validation
 2 | ## Predicting Boston Housing Prices
 3 | 
 4 | This project uses supervised learning techniques to predict the price of houses in boston area from the provided features. It's a classic dataset, provided by both [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Housing) and included in many libraries in python. I used a DecisionTreeRegressor model with variying depth to compare model accuracy and ultimately compared it to a K-nearest neighbor model after turning the max_depth of Decision Tree and n_neighbors of the K-nearest. The best performing model is a Decision Tree with max_depth of 4 which yields about 0.8 in score. The evaluation metric for this project is R^2.
 5 | 
 6 | ![](decision tree regressor.png)
 7 | 
 8 | 
 9 | 
10 | ### Install
11 | 
12 | This project requires **Python 2.7** and the following Python libraries installed:
13 | 
14 | - [NumPy](http://www.numpy.org/)
15 | - [matplotlib](http://matplotlib.org/)
16 | - [scikit-learn](http://scikit-learn.org/stable/)
17 | 
18 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html)
19 | 
20 | ### Run
21 | 
22 | In a terminal or command window, navigate to the top-level project directory `boston_housing/` (that contains this README) and run one of the following commands:
23 | 
24 | ```ipython notebook boston_housing.ipynb```  
25 | ```jupyter notebook boston_housing.ipynb```
26 | 
27 | This will open the iPython Notebook software and project file in your browser.
28 | 


--------------------------------------------------------------------------------
/projects/boston_housing/decision tree regressor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/boston_housing/decision tree regressor.png


--------------------------------------------------------------------------------
/projects/boston_housing/housing.csv:
--------------------------------------------------------------------------------
  1 | RM,LSTAT,PTRATIO,MDEV
  2 | 6.575,4.98,15.3,504000.0
  3 | 6.421,9.14,17.8,453600.0
  4 | 7.185,4.03,17.8,728700.0
  5 | 6.998,2.94,18.7,701400.0
  6 | 7.147,5.33,18.7,760200.0
  7 | 6.43,5.21,18.7,602700.0
  8 | 6.012,12.43,15.2,480900.0
  9 | 6.172,19.15,15.2,569100.0
 10 | 5.631,29.93,15.2,346500.0
 11 | 6.004,17.1,15.2,396900.0
 12 | 6.377,20.45,15.2,315000.0
 13 | 6.009,13.27,15.2,396900.0
 14 | 5.889,15.71,15.2,455700.0
 15 | 5.949,8.26,21.0,428400.0
 16 | 6.096,10.26,21.0,382200.0
 17 | 5.834,8.47,21.0,417900.0
 18 | 5.935,6.58,21.0,485100.0
 19 | 5.99,14.67,21.0,367500.0
 20 | 5.456,11.69,21.0,424200.0
 21 | 5.727,11.28,21.0,382200.0
 22 | 5.57,21.02,21.0,285600.0
 23 | 5.965,13.83,21.0,411600.0
 24 | 6.142,18.72,21.0,319200.0
 25 | 5.813,19.88,21.0,304500.0
 26 | 5.924,16.3,21.0,327600.0
 27 | 5.599,16.51,21.0,291900.0
 28 | 5.813,14.81,21.0,348600.0
 29 | 6.047,17.28,21.0,310800.0
 30 | 6.495,12.8,21.0,386400.0
 31 | 6.674,11.98,21.0,441000.0
 32 | 5.713,22.6,21.0,266700.0
 33 | 6.072,13.04,21.0,304500.0
 34 | 5.95,27.71,21.0,277200.0
 35 | 5.701,18.35,21.0,275100.0
 36 | 6.096,20.34,21.0,283500.0
 37 | 5.933,9.68,19.2,396900.0
 38 | 5.841,11.41,19.2,420000.0
 39 | 5.85,8.77,19.2,441000.0
 40 | 5.966,10.13,19.2,518700.0
 41 | 6.595,4.32,18.3,646800.0
 42 | 7.024,1.98,18.3,732900.0
 43 | 6.77,4.84,17.9,558600.0
 44 | 6.169,5.81,17.9,531300.0
 45 | 6.211,7.44,17.9,518700.0
 46 | 6.069,9.55,17.9,445200.0
 47 | 5.682,10.21,17.9,405300.0
 48 | 5.786,14.15,17.9,420000.0
 49 | 6.03,18.8,17.9,348600.0
 50 | 5.399,30.81,17.9,302400.0
 51 | 5.602,16.2,17.9,407400.0
 52 | 5.963,13.45,16.8,413700.0
 53 | 6.115,9.43,16.8,430500.0
 54 | 6.511,5.28,16.8,525000.0
 55 | 5.998,8.43,16.8,491400.0
 56 | 5.888,14.8,21.1,396900.0
 57 | 7.249,4.81,17.9,743400.0
 58 | 6.383,5.77,17.3,518700.0
 59 | 6.816,3.95,15.1,663600.0
 60 | 6.145,6.86,19.7,489300.0
 61 | 5.927,9.22,19.7,411600.0
 62 | 5.741,13.15,19.7,392700.0
 63 | 5.966,14.44,19.7,336000.0
 64 | 6.456,6.73,19.7,466200.0
 65 | 6.762,9.5,19.7,525000.0
 66 | 7.104,8.05,18.6,693000.0
 67 | 6.29,4.67,16.1,493500.0
 68 | 5.787,10.24,16.1,407400.0
 69 | 5.878,8.1,18.9,462000.0
 70 | 5.594,13.09,18.9,365400.0
 71 | 5.885,8.79,18.9,438900.0
 72 | 6.417,6.72,19.2,508200.0
 73 | 5.961,9.88,19.2,455700.0
 74 | 6.065,5.52,19.2,478800.0
 75 | 6.245,7.54,19.2,491400.0
 76 | 6.273,6.78,18.7,506100.0
 77 | 6.286,8.94,18.7,449400.0
 78 | 6.279,11.97,18.7,420000.0
 79 | 6.14,10.27,18.7,436800.0
 80 | 6.232,12.34,18.7,445200.0
 81 | 5.874,9.1,18.7,426300.0
 82 | 6.727,5.29,19.0,588000.0
 83 | 6.619,7.22,19.0,501900.0
 84 | 6.302,6.72,19.0,520800.0
 85 | 6.167,7.51,19.0,480900.0
 86 | 6.389,9.62,18.5,501900.0
 87 | 6.63,6.53,18.5,558600.0
 88 | 6.015,12.86,18.5,472500.0
 89 | 6.121,8.44,18.5,466200.0
 90 | 7.007,5.5,17.8,495600.0
 91 | 7.079,5.7,17.8,602700.0
 92 | 6.417,8.81,17.8,474600.0
 93 | 6.405,8.2,17.8,462000.0
 94 | 6.442,8.16,18.2,480900.0
 95 | 6.211,6.21,18.2,525000.0
 96 | 6.249,10.59,18.2,432600.0
 97 | 6.625,6.65,18.0,596400.0
 98 | 6.163,11.34,18.0,449400.0
 99 | 8.069,4.21,18.0,812700.0
100 | 7.82,3.57,18.0,919800.0
101 | 7.416,6.19,18.0,697200.0
102 | 6.727,9.42,20.9,577500.0
103 | 6.781,7.67,20.9,556500.0
104 | 6.405,10.63,20.9,390600.0
105 | 6.137,13.44,20.9,405300.0
106 | 6.167,12.33,20.9,422100.0
107 | 5.851,16.47,20.9,409500.0
108 | 5.836,18.66,20.9,409500.0
109 | 6.127,14.09,20.9,428400.0
110 | 6.474,12.27,20.9,415800.0
111 | 6.229,15.55,20.9,407400.0
112 | 6.195,13.0,20.9,455700.0
113 | 6.715,10.16,17.8,478800.0
114 | 5.913,16.21,17.8,394800.0
115 | 6.092,17.09,17.8,392700.0
116 | 6.254,10.45,17.8,388500.0
117 | 5.928,15.76,17.8,384300.0
118 | 6.176,12.04,17.8,445200.0
119 | 6.021,10.3,17.8,403200.0
120 | 5.872,15.37,17.8,428400.0
121 | 5.731,13.61,17.8,405300.0
122 | 5.87,14.37,19.1,462000.0
123 | 6.004,14.27,19.1,426300.0
124 | 5.961,17.93,19.1,430500.0
125 | 5.856,25.41,19.1,363300.0
126 | 5.879,17.58,19.1,394800.0
127 | 5.986,14.81,19.1,449400.0
128 | 5.613,27.26,19.1,329700.0
129 | 5.693,17.19,21.2,340200.0
130 | 6.431,15.39,21.2,378000.0
131 | 5.637,18.34,21.2,300300.0
132 | 6.458,12.6,21.2,403200.0
133 | 6.326,12.26,21.2,411600.0
134 | 6.372,11.12,21.2,483000.0
135 | 5.822,15.03,21.2,386400.0
136 | 5.757,17.31,21.2,327600.0
137 | 6.335,16.96,21.2,380100.0
138 | 5.942,16.9,21.2,365400.0
139 | 6.454,14.59,21.2,359100.0
140 | 5.857,21.32,21.2,279300.0
141 | 6.151,18.46,21.2,373800.0
142 | 6.174,24.16,21.2,294000.0
143 | 5.019,34.41,21.2,302400.0
144 | 5.403,26.82,14.7,281400.0
145 | 5.468,26.42,14.7,327600.0
146 | 4.903,29.29,14.7,247800.0
147 | 6.13,27.8,14.7,289800.0
148 | 5.628,16.65,14.7,327600.0
149 | 4.926,29.53,14.7,306600.0
150 | 5.186,28.32,14.7,373800.0
151 | 5.597,21.45,14.7,323400.0
152 | 6.122,14.1,14.7,451500.0
153 | 5.404,13.28,14.7,411600.0
154 | 5.012,12.12,14.7,321300.0
155 | 5.709,15.79,14.7,407400.0
156 | 6.129,15.12,14.7,357000.0
157 | 6.152,15.02,14.7,327600.0
158 | 5.272,16.14,14.7,275100.0
159 | 6.943,4.59,14.7,867300.0
160 | 6.066,6.43,14.7,510300.0
161 | 6.51,7.39,14.7,489300.0
162 | 6.25,5.5,14.7,567000.0
163 | 5.854,11.64,14.7,476700.0
164 | 6.101,9.81,14.7,525000.0
165 | 5.877,12.14,14.7,499800.0
166 | 6.319,11.1,14.7,499800.0
167 | 6.402,11.32,14.7,468300.0
168 | 5.875,14.43,14.7,365400.0
169 | 5.88,12.03,14.7,401100.0
170 | 5.572,14.69,16.6,485100.0
171 | 6.416,9.04,16.6,495600.0
172 | 5.859,9.64,16.6,474600.0
173 | 6.546,5.33,16.6,617400.0
174 | 6.02,10.11,16.6,487200.0
175 | 6.315,6.29,16.6,516600.0
176 | 6.86,6.92,16.6,627900.0
177 | 6.98,5.04,17.8,781200.0
178 | 7.765,7.56,17.8,835800.0
179 | 6.144,9.45,17.8,760200.0
180 | 7.155,4.82,17.8,795900.0
181 | 6.563,5.68,17.8,682500.0
182 | 5.604,13.98,17.8,554400.0
183 | 6.153,13.15,17.8,621600.0
184 | 6.782,6.68,15.2,672000.0
185 | 6.556,4.56,15.2,625800.0
186 | 7.185,5.39,15.2,732900.0
187 | 6.951,5.1,15.2,777000.0
188 | 6.739,4.69,15.2,640500.0
189 | 7.178,2.87,15.2,764400.0
190 | 6.8,5.03,15.6,653100.0
191 | 6.604,4.38,15.6,611100.0
192 | 7.287,4.08,12.6,699300.0
193 | 7.107,8.61,12.6,636300.0
194 | 7.274,6.62,12.6,726600.0
195 | 6.975,4.56,17.0,732900.0
196 | 7.135,4.45,17.0,690900.0
197 | 6.162,7.43,14.7,506100.0
198 | 7.61,3.11,14.7,888300.0
199 | 7.853,3.81,14.7,1018500.0
200 | 5.891,10.87,18.6,474600.0
201 | 6.326,10.97,18.6,512400.0
202 | 5.783,18.06,18.6,472500.0
203 | 6.064,14.66,18.6,512400.0
204 | 5.344,23.09,18.6,420000.0
205 | 5.96,17.27,18.6,455700.0
206 | 5.404,23.98,18.6,405300.0
207 | 5.807,16.03,18.6,470400.0
208 | 6.375,9.38,18.6,590100.0
209 | 5.412,29.55,18.6,497700.0
210 | 6.182,9.47,18.6,525000.0
211 | 5.888,13.51,16.4,489300.0
212 | 6.642,9.69,16.4,602700.0
213 | 5.951,17.92,16.4,451500.0
214 | 6.373,10.5,16.4,483000.0
215 | 6.951,9.71,17.4,560700.0
216 | 6.164,21.46,17.4,455700.0
217 | 6.879,9.93,17.4,577500.0
218 | 6.618,7.6,17.4,632100.0
219 | 8.266,4.14,17.4,940800.0
220 | 8.04,3.13,17.4,789600.0
221 | 7.163,6.36,17.4,663600.0
222 | 7.686,3.92,17.4,980700.0
223 | 6.552,3.76,17.4,661500.0
224 | 5.981,11.65,17.4,510300.0
225 | 7.412,5.25,17.4,665700.0
226 | 8.337,2.47,17.4,875700.0
227 | 8.247,3.95,17.4,1014300.0
228 | 6.726,8.05,17.4,609000.0
229 | 6.086,10.88,17.4,504000.0
230 | 6.631,9.54,17.4,527100.0
231 | 7.358,4.73,17.4,661500.0
232 | 6.481,6.36,16.6,497700.0
233 | 6.606,7.37,16.6,489300.0
234 | 6.897,11.38,16.6,462000.0
235 | 6.095,12.4,16.6,422100.0
236 | 6.358,11.22,16.6,466200.0
237 | 6.393,5.19,16.6,497700.0
238 | 5.593,12.5,19.1,369600.0
239 | 5.605,18.46,19.1,388500.0
240 | 6.108,9.16,19.1,510300.0
241 | 6.226,10.15,19.1,430500.0
242 | 6.433,9.52,19.1,514500.0
243 | 6.718,6.56,19.1,550200.0
244 | 6.487,5.9,19.1,512400.0
245 | 6.438,3.59,19.1,520800.0
246 | 6.957,3.53,19.1,621600.0
247 | 8.259,3.54,19.1,898800.0
248 | 6.108,6.57,16.4,459900.0
249 | 5.876,9.25,16.4,438900.0
250 | 7.454,3.11,15.9,924000.0
251 | 7.333,7.79,13.0,756000.0
252 | 6.842,6.9,13.0,632100.0
253 | 7.203,9.59,13.0,709800.0
254 | 7.52,7.26,13.0,905100.0
255 | 8.398,5.91,13.0,1024800.0
256 | 7.327,11.25,13.0,651000.0
257 | 7.206,8.1,13.0,766500.0
258 | 5.56,10.45,13.0,478800.0
259 | 7.014,14.79,13.0,644700.0
260 | 7.47,3.16,13.0,913500.0
261 | 5.92,13.65,18.6,434700.0
262 | 5.856,13.0,18.6,443100.0
263 | 6.24,6.59,18.6,529200.0
264 | 6.538,7.73,18.6,512400.0
265 | 7.691,6.58,18.6,739200.0
266 | 6.758,3.53,17.6,680400.0
267 | 6.854,2.98,17.6,672000.0
268 | 7.267,6.05,17.6,697200.0
269 | 6.826,4.16,17.6,695100.0
270 | 6.482,7.19,17.6,611100.0
271 | 6.812,4.85,14.9,737100.0
272 | 7.82,3.76,14.9,953400.0
273 | 6.968,4.59,14.9,743400.0
274 | 7.645,3.01,14.9,966000.0
275 | 7.088,7.85,15.3,676200.0
276 | 6.453,8.23,15.3,462000.0
277 | 6.23,12.93,18.2,422100.0
278 | 6.209,7.14,16.6,487200.0
279 | 6.315,7.6,16.6,468300.0
280 | 6.565,9.51,16.6,520800.0
281 | 6.861,3.33,19.2,598500.0
282 | 7.148,3.56,19.2,783300.0
283 | 6.63,4.7,19.2,585900.0
284 | 6.127,8.58,16.0,501900.0
285 | 6.009,10.4,16.0,455700.0
286 | 6.678,6.27,16.0,600600.0
287 | 6.549,7.39,16.0,569100.0
288 | 5.79,15.84,16.0,426300.0
289 | 6.345,4.97,14.8,472500.0
290 | 7.041,4.74,14.8,609000.0
291 | 6.871,6.07,14.8,520800.0
292 | 6.59,9.5,16.1,462000.0
293 | 6.495,8.67,16.1,554400.0
294 | 6.982,4.86,16.1,695100.0
295 | 7.236,6.93,18.4,758100.0
296 | 6.616,8.93,18.4,596400.0
297 | 7.42,6.47,18.4,701400.0
298 | 6.849,7.53,18.4,592200.0
299 | 6.635,4.54,18.4,478800.0
300 | 5.972,9.97,18.4,426300.0
301 | 4.973,12.64,18.4,338100.0
302 | 6.122,5.98,18.4,464100.0
303 | 6.023,11.72,18.4,407400.0
304 | 6.266,7.9,18.4,453600.0
305 | 6.567,9.28,18.4,499800.0
306 | 5.705,11.5,18.4,340200.0
307 | 5.914,18.33,18.4,373800.0
308 | 5.782,15.94,18.4,415800.0
309 | 6.382,10.36,18.4,485100.0
310 | 6.113,12.73,18.4,441000.0
311 | 6.426,7.2,19.6,499800.0
312 | 6.376,6.87,19.6,485100.0
313 | 6.041,7.7,19.6,428400.0
314 | 5.708,11.74,19.6,388500.0
315 | 6.415,6.12,19.6,525000.0
316 | 6.431,5.08,19.6,516600.0
317 | 6.312,6.15,19.6,483000.0
318 | 6.083,12.79,19.6,466200.0
319 | 5.868,9.97,16.9,405300.0
320 | 6.333,7.34,16.9,474600.0
321 | 6.144,9.09,16.9,415800.0
322 | 5.706,12.43,16.9,359100.0
323 | 6.031,7.83,16.9,407400.0
324 | 6.316,5.68,20.2,466200.0
325 | 6.31,6.75,20.2,434700.0
326 | 6.037,8.01,20.2,443100.0
327 | 5.869,9.8,20.2,409500.0
328 | 5.895,10.56,20.2,388500.0
329 | 6.059,8.51,20.2,432600.0
330 | 5.985,9.74,20.2,399000.0
331 | 5.968,9.29,20.2,392700.0
332 | 7.241,5.49,15.5,686700.0
333 | 6.54,8.65,15.9,346500.0
334 | 6.696,7.18,17.6,501900.0
335 | 6.874,4.61,17.6,655200.0
336 | 6.014,10.53,18.8,367500.0
337 | 5.898,12.67,18.8,361200.0
338 | 6.516,6.36,17.9,485100.0
339 | 6.635,5.99,17.0,514500.0
340 | 6.939,5.89,19.7,558600.0
341 | 6.49,5.98,19.7,480900.0
342 | 6.579,5.49,18.3,506100.0
343 | 5.884,7.79,18.3,390600.0
344 | 6.728,4.5,17.0,632100.0
345 | 5.663,8.05,22.0,382200.0
346 | 5.936,5.57,22.0,432600.0
347 | 6.212,17.6,20.2,373800.0
348 | 6.395,13.27,20.2,455700.0
349 | 6.127,11.48,20.2,476700.0
350 | 6.112,12.67,20.2,474600.0
351 | 6.398,7.79,20.2,525000.0
352 | 6.251,14.19,20.2,417900.0
353 | 5.362,10.19,20.2,436800.0
354 | 5.803,14.64,20.2,352800.0
355 | 3.561,7.12,20.2,577500.0
356 | 4.963,14.0,20.2,459900.0
357 | 3.863,13.33,20.2,485100.0
358 | 4.906,34.77,20.2,289800.0
359 | 4.138,37.97,20.2,289800.0
360 | 7.313,13.44,20.2,315000.0
361 | 6.649,23.24,20.2,291900.0
362 | 6.794,21.24,20.2,279300.0
363 | 6.38,23.69,20.2,275100.0
364 | 6.223,21.78,20.2,214200.0
365 | 6.968,17.21,20.2,218400.0
366 | 6.545,21.08,20.2,228900.0
367 | 5.536,23.6,20.2,237300.0
368 | 5.52,24.56,20.2,258300.0
369 | 4.368,30.63,20.2,184800.0
370 | 5.277,30.81,20.2,151200.0
371 | 4.652,28.28,20.2,220500.0
372 | 5.0,31.99,20.2,155400.0
373 | 4.88,30.62,20.2,214200.0
374 | 5.39,20.85,20.2,241500.0
375 | 5.713,17.11,20.2,317100.0
376 | 6.051,18.76,20.2,487200.0
377 | 5.036,25.68,20.2,203700.0
378 | 6.193,15.17,20.2,289800.0
379 | 5.887,16.35,20.2,266700.0
380 | 6.471,17.12,20.2,275100.0
381 | 6.405,19.37,20.2,262500.0
382 | 5.747,19.92,20.2,178500.0
383 | 5.453,30.59,20.2,105000.0
384 | 5.852,29.97,20.2,132300.0
385 | 5.987,26.77,20.2,117600.0
386 | 6.343,20.32,20.2,151200.0
387 | 6.404,20.31,20.2,254100.0
388 | 5.349,19.77,20.2,174300.0
389 | 5.531,27.38,20.2,178500.0
390 | 5.683,22.98,20.2,105000.0
391 | 4.138,23.34,20.2,249900.0
392 | 5.608,12.13,20.2,585900.0
393 | 5.617,26.4,20.2,361200.0
394 | 6.852,19.78,20.2,577500.0
395 | 5.757,10.11,20.2,315000.0
396 | 6.657,21.22,20.2,361200.0
397 | 4.628,34.37,20.2,375900.0
398 | 5.155,20.08,20.2,342300.0
399 | 4.519,36.98,20.2,147000.0
400 | 6.434,29.05,20.2,151200.0
401 | 6.782,25.79,20.2,157500.0
402 | 5.304,26.64,20.2,218400.0
403 | 5.957,20.62,20.2,184800.0
404 | 6.824,22.74,20.2,176400.0
405 | 6.411,15.02,20.2,350700.0
406 | 6.006,15.7,20.2,298200.0
407 | 5.648,14.1,20.2,436800.0
408 | 6.103,23.29,20.2,281400.0
409 | 5.565,17.16,20.2,245700.0
410 | 5.896,24.39,20.2,174300.0
411 | 5.837,15.69,20.2,214200.0
412 | 6.202,14.52,20.2,228900.0
413 | 6.193,21.52,20.2,231000.0
414 | 6.38,24.08,20.2,199500.0
415 | 6.348,17.64,20.2,304500.0
416 | 6.833,19.69,20.2,296100.0
417 | 6.425,12.03,20.2,338100.0
418 | 6.436,16.22,20.2,300300.0
419 | 6.208,15.17,20.2,245700.0
420 | 6.629,23.27,20.2,281400.0
421 | 6.461,18.05,20.2,201600.0
422 | 6.152,26.45,20.2,182700.0
423 | 5.935,34.02,20.2,176400.0
424 | 5.627,22.88,20.2,268800.0
425 | 5.818,22.11,20.2,220500.0
426 | 6.406,19.52,20.2,359100.0
427 | 6.219,16.59,20.2,386400.0
428 | 6.485,18.85,20.2,323400.0
429 | 5.854,23.79,20.2,226800.0
430 | 6.459,23.98,20.2,247800.0
431 | 6.341,17.79,20.2,312900.0
432 | 6.251,16.44,20.2,264600.0
433 | 6.185,18.13,20.2,296100.0
434 | 6.417,19.31,20.2,273000.0
435 | 6.749,17.44,20.2,281400.0
436 | 6.655,17.73,20.2,319200.0
437 | 6.297,17.27,20.2,338100.0
438 | 7.393,16.74,20.2,373800.0
439 | 6.728,18.71,20.2,312900.0
440 | 6.525,18.13,20.2,296100.0
441 | 5.976,19.01,20.2,266700.0
442 | 5.936,16.94,20.2,283500.0
443 | 6.301,16.23,20.2,312900.0
444 | 6.081,14.7,20.2,420000.0
445 | 6.701,16.42,20.2,344400.0
446 | 6.376,14.65,20.2,371700.0
447 | 6.317,13.99,20.2,409500.0
448 | 6.513,10.29,20.2,424200.0
449 | 6.209,13.22,20.2,449400.0
450 | 5.759,14.13,20.2,417900.0
451 | 5.952,17.15,20.2,399000.0
452 | 6.003,21.32,20.2,401100.0
453 | 5.926,18.13,20.2,401100.0
454 | 5.713,14.76,20.2,422100.0
455 | 6.167,16.29,20.2,417900.0
456 | 6.229,12.87,20.2,411600.0
457 | 6.437,14.36,20.2,487200.0
458 | 6.98,11.66,20.2,625800.0
459 | 5.427,18.14,20.2,289800.0
460 | 6.162,24.1,20.2,279300.0
461 | 6.484,18.68,20.2,350700.0
462 | 5.304,24.91,20.2,252000.0
463 | 6.185,18.03,20.2,306600.0
464 | 6.229,13.11,20.2,449400.0
465 | 6.242,10.74,20.2,483000.0
466 | 6.75,7.74,20.2,497700.0
467 | 7.061,7.01,20.2,525000.0
468 | 5.762,10.42,20.2,457800.0
469 | 5.871,13.34,20.2,432600.0
470 | 6.312,10.58,20.2,445200.0
471 | 6.114,14.98,20.2,401100.0
472 | 5.905,11.45,20.2,432600.0
473 | 5.454,18.06,20.1,319200.0
474 | 5.414,23.97,20.1,147000.0
475 | 5.093,29.68,20.1,170100.0
476 | 5.983,18.07,20.1,285600.0
477 | 5.983,13.35,20.1,422100.0
478 | 5.707,12.01,19.2,457800.0
479 | 5.926,13.59,19.2,514500.0
480 | 5.67,17.6,19.2,485100.0
481 | 5.39,21.14,19.2,413700.0
482 | 5.794,14.1,19.2,384300.0
483 | 6.019,12.92,19.2,445200.0
484 | 5.569,15.1,19.2,367500.0
485 | 6.027,14.33,19.2,352800.0
486 | 6.593,9.67,21.0,470400.0
487 | 6.12,9.08,21.0,432600.0
488 | 6.976,5.64,21.0,501900.0
489 | 6.794,6.48,21.0,462000.0
490 | 6.03,7.88,21.0,249900.0
491 | 


--------------------------------------------------------------------------------
/projects/boston_housing/visuals.py:
--------------------------------------------------------------------------------
  1 | ###########################################
  2 | # Suppress matplotlib user warnings
  3 | # Necessary for newer version of matplotlib
  4 | import warnings
  5 | warnings.filterwarnings("ignore", category = UserWarning, module = "matplotlib")
  6 | ###########################################
  7 | 
  8 | import matplotlib.pyplot as pl
  9 | import numpy as np
 10 | import sklearn.learning_curve as curves
 11 | from sklearn.tree import DecisionTreeRegressor
 12 | from sklearn.cross_validation import ShuffleSplit, train_test_split
 13 | 
 14 | def ModelLearning(X, y):
 15 |     """ Calculates the performance of several models with varying sizes of training data.
 16 |         The learning and testing scores for each model are then plotted. """
 17 |     
 18 |     # Create 10 cross-validation sets for training and testing
 19 |     cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0)
 20 | 
 21 |     # Generate the training set sizes increasing by 50
 22 |     train_sizes = np.rint(np.linspace(1, X.shape[0]*0.8 - 1, 9)).astype(int)
 23 | 
 24 |     # Create the figure window
 25 |     fig = pl.figure(figsize=(10,7))
 26 | 
 27 |     # Create three different models based on max_depth
 28 |     for k, depth in enumerate([1,3,6,10]):
 29 |         
 30 |         # Create a Decision tree regressor at max_depth = depth
 31 |         regressor = DecisionTreeRegressor(max_depth = depth)
 32 | 
 33 |         # Calculate the training and testing scores
 34 |         sizes, train_scores, test_scores = curves.learning_curve(regressor, X, y, \
 35 |             cv = cv, train_sizes = train_sizes, scoring = 'r2')
 36 |         
 37 |         # Find the mean and standard deviation for smoothing
 38 |         train_std = np.std(train_scores, axis = 1)
 39 |         train_mean = np.mean(train_scores, axis = 1)
 40 |         test_std = np.std(test_scores, axis = 1)
 41 |         test_mean = np.mean(test_scores, axis = 1)
 42 | 
 43 |         # Subplot the learning curve 
 44 |         ax = fig.add_subplot(2, 2, k+1)
 45 |         ax.plot(sizes, train_mean, 'o-', color = 'r', label = 'Training Score')
 46 |         ax.plot(sizes, test_mean, 'o-', color = 'g', label = 'Testing Score')
 47 |         ax.fill_between(sizes, train_mean - train_std, \
 48 |             train_mean + train_std, alpha = 0.15, color = 'r')
 49 |         ax.fill_between(sizes, test_mean - test_std, \
 50 |             test_mean + test_std, alpha = 0.15, color = 'g')
 51 |         
 52 |         # Labels
 53 |         ax.set_title('max_depth = %s'%(depth))
 54 |         ax.set_xlabel('Number of Training Points')
 55 |         ax.set_ylabel('Score')
 56 |         ax.set_xlim([0, X.shape[0]*0.8])
 57 |         ax.set_ylim([-0.05, 1.05])
 58 |     
 59 |     # Visual aesthetics
 60 |     ax.legend(bbox_to_anchor=(1.05, 2.05), loc='lower left', borderaxespad = 0.)
 61 |     fig.suptitle('Decision Tree Regressor Learning Performances', fontsize = 16, y = 1.03)
 62 |     fig.tight_layout()
 63 |     fig.show()
 64 | 
 65 | 
 66 | def ModelComplexity(X, y):
 67 |     """ Calculates the performance of the model as model complexity increases.
 68 |         The learning and testing errors rates are then plotted. """
 69 |     
 70 |     # Create 10 cross-validation sets for training and testing
 71 |     cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0)
 72 | 
 73 |     # Vary the max_depth parameter from 1 to 10
 74 |     max_depth = np.arange(1,11)
 75 | 
 76 |     # Calculate the training and testing scores
 77 |     train_scores, test_scores = curves.validation_curve(DecisionTreeRegressor(), X, y, \
 78 |         param_name = "max_depth", param_range = max_depth, cv = cv, scoring = 'r2')
 79 | 
 80 |     # Find the mean and standard deviation for smoothing
 81 |     train_mean = np.mean(train_scores, axis=1)
 82 |     train_std = np.std(train_scores, axis=1)
 83 |     test_mean = np.mean(test_scores, axis=1)
 84 |     test_std = np.std(test_scores, axis=1)
 85 | 
 86 |     # Plot the validation curve
 87 |     pl.figure(figsize=(7, 5))
 88 |     pl.title('Decision Tree Regressor Complexity Performance')
 89 |     pl.plot(max_depth, train_mean, 'o-', color = 'r', label = 'Training Score')
 90 |     pl.plot(max_depth, test_mean, 'o-', color = 'g', label = 'Validation Score')
 91 |     pl.fill_between(max_depth, train_mean - train_std, \
 92 |         train_mean + train_std, alpha = 0.15, color = 'r')
 93 |     pl.fill_between(max_depth, test_mean - test_std, \
 94 |         test_mean + test_std, alpha = 0.15, color = 'g')
 95 |     
 96 |     # Visual aesthetics
 97 |     pl.legend(loc = 'lower right')
 98 |     pl.xlabel('Maximum Depth')
 99 |     pl.ylabel('Score')
100 |     pl.ylim([-0.05,1.05])
101 |     pl.show()
102 | 
103 | 
104 | def PredictTrials(X, y, fitter, data):
105 |     """ Performs trials of fitting and predicting data. """
106 | 
107 |     # Store the predicted prices
108 |     prices = []
109 | 
110 |     for k in range(10):
111 |         # Split the data
112 |         X_train, X_test, y_train, y_test = train_test_split(X, y, \
113 |             test_size = 0.2, random_state = k)
114 |         
115 |         # Fit the data
116 |         reg = fitter(X_train, y_train)
117 |         
118 |         # Make a prediction
119 |         pred = reg.predict([data[0]])[0]
120 |         prices.append(pred)
121 |         
122 |         # Result
123 |         print "Trial {}: ${:,.2f}".format(k+1, pred)
124 | 
125 |     # Display price range
126 |     print "\nRange in prices: ${:,.2f}".format(max(prices) - min(prices))


--------------------------------------------------------------------------------
/projects/boston_housing/visuals.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/boston_housing/visuals.pyc


--------------------------------------------------------------------------------
/projects/creating_customer_segments/README.md:
--------------------------------------------------------------------------------
 1 | # Project 3: Unsupervised Learning
 2 | ## Creating Customer Segments
 3 | 
 4 | ### Install
 5 | 
 6 | This project requires **Python 2.7** and the following Python libraries installed:
 7 | 
 8 | - [NumPy](http://www.numpy.org/)
 9 | - [Pandas](http://pandas.pydata.org)
10 | - [matplotlib](http://matplotlib.org/)
11 | - [scikit-learn](http://scikit-learn.org/stable/)
12 | 
13 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html)
14 | 
15 | ### Overview
16 | 
17 | This project uses unsupervised machine learning techniques to segment the customers by applying PCA for dimensionality reduction and using the PCA components to cluster the customers into different segments.
18 | 
19 | ![](pca.png)
20 | 
21 | 
22 | 
23 | ### Code
24 | 
25 | Template code is provided in the notebook `customer_segments.ipynb` notebook file. Additional supporting code can be found in `renders.py`. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project.
26 | 
27 | ### Run
28 | 
29 | In a terminal or command window, navigate to the top-level project directory `creating_customer_segments/` (that contains this README) and run one of the following commands:
30 | 
31 | ```ipython notebook customer_segments.ipynb```
32 | ```jupyter notebook customer_segments.ipynb```
33 | 
34 | This will open the iPython Notebook software and project file in your browser.
35 | 
36 | ## Data
37 | 
38 | The dataset used in this project is included as `customers.csv`. You can find more information on this dataset on the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Wholesale+customers) page.
39 | 


--------------------------------------------------------------------------------
/projects/creating_customer_segments/customers.csv:
--------------------------------------------------------------------------------
  1 | Channel,Region,Fresh,Milk,Grocery,Frozen,Detergents_Paper,Delicatessen
  2 | 2,3,12669,9656,7561,214,2674,1338
  3 | 2,3,7057,9810,9568,1762,3293,1776
  4 | 2,3,6353,8808,7684,2405,3516,7844
  5 | 1,3,13265,1196,4221,6404,507,1788
  6 | 2,3,22615,5410,7198,3915,1777,5185
  7 | 2,3,9413,8259,5126,666,1795,1451
  8 | 2,3,12126,3199,6975,480,3140,545
  9 | 2,3,7579,4956,9426,1669,3321,2566
 10 | 1,3,5963,3648,6192,425,1716,750
 11 | 2,3,6006,11093,18881,1159,7425,2098
 12 | 2,3,3366,5403,12974,4400,5977,1744
 13 | 2,3,13146,1124,4523,1420,549,497
 14 | 2,3,31714,12319,11757,287,3881,2931
 15 | 2,3,21217,6208,14982,3095,6707,602
 16 | 2,3,24653,9465,12091,294,5058,2168
 17 | 1,3,10253,1114,3821,397,964,412
 18 | 2,3,1020,8816,12121,134,4508,1080
 19 | 1,3,5876,6157,2933,839,370,4478
 20 | 2,3,18601,6327,10099,2205,2767,3181
 21 | 1,3,7780,2495,9464,669,2518,501
 22 | 2,3,17546,4519,4602,1066,2259,2124
 23 | 1,3,5567,871,2010,3383,375,569
 24 | 1,3,31276,1917,4469,9408,2381,4334
 25 | 2,3,26373,36423,22019,5154,4337,16523
 26 | 2,3,22647,9776,13792,2915,4482,5778
 27 | 2,3,16165,4230,7595,201,4003,57
 28 | 1,3,9898,961,2861,3151,242,833
 29 | 1,3,14276,803,3045,485,100,518
 30 | 2,3,4113,20484,25957,1158,8604,5206
 31 | 1,3,43088,2100,2609,1200,1107,823
 32 | 1,3,18815,3610,11107,1148,2134,2963
 33 | 1,3,2612,4339,3133,2088,820,985
 34 | 1,3,21632,1318,2886,266,918,405
 35 | 1,3,29729,4786,7326,6130,361,1083
 36 | 1,3,1502,1979,2262,425,483,395
 37 | 2,3,688,5491,11091,833,4239,436
 38 | 1,3,29955,4362,5428,1729,862,4626
 39 | 2,3,15168,10556,12477,1920,6506,714
 40 | 2,3,4591,15729,16709,33,6956,433
 41 | 1,3,56159,555,902,10002,212,2916
 42 | 1,3,24025,4332,4757,9510,1145,5864
 43 | 1,3,19176,3065,5956,2033,2575,2802
 44 | 2,3,10850,7555,14961,188,6899,46
 45 | 2,3,630,11095,23998,787,9529,72
 46 | 2,3,9670,7027,10471,541,4618,65
 47 | 2,3,5181,22044,21531,1740,7353,4985
 48 | 2,3,3103,14069,21955,1668,6792,1452
 49 | 2,3,44466,54259,55571,7782,24171,6465
 50 | 2,3,11519,6152,10868,584,5121,1476
 51 | 2,3,4967,21412,28921,1798,13583,1163
 52 | 1,3,6269,1095,1980,3860,609,2162
 53 | 1,3,3347,4051,6996,239,1538,301
 54 | 2,3,40721,3916,5876,532,2587,1278
 55 | 2,3,491,10473,11532,744,5611,224
 56 | 1,3,27329,1449,1947,2436,204,1333
 57 | 1,3,5264,3683,5005,1057,2024,1130
 58 | 2,3,4098,29892,26866,2616,17740,1340
 59 | 2,3,5417,9933,10487,38,7572,1282
 60 | 1,3,13779,1970,1648,596,227,436
 61 | 1,3,6137,5360,8040,129,3084,1603
 62 | 2,3,8590,3045,7854,96,4095,225
 63 | 2,3,35942,38369,59598,3254,26701,2017
 64 | 2,3,7823,6245,6544,4154,4074,964
 65 | 2,3,9396,11601,15775,2896,7677,1295
 66 | 1,3,4760,1227,3250,3724,1247,1145
 67 | 2,3,85,20959,45828,36,24231,1423
 68 | 1,3,9,1534,7417,175,3468,27
 69 | 2,3,19913,6759,13462,1256,5141,834
 70 | 1,3,2446,7260,3993,5870,788,3095
 71 | 1,3,8352,2820,1293,779,656,144
 72 | 1,3,16705,2037,3202,10643,116,1365
 73 | 1,3,18291,1266,21042,5373,4173,14472
 74 | 1,3,4420,5139,2661,8872,1321,181
 75 | 2,3,19899,5332,8713,8132,764,648
 76 | 2,3,8190,6343,9794,1285,1901,1780
 77 | 1,3,20398,1137,3,4407,3,975
 78 | 1,3,717,3587,6532,7530,529,894
 79 | 2,3,12205,12697,28540,869,12034,1009
 80 | 1,3,10766,1175,2067,2096,301,167
 81 | 1,3,1640,3259,3655,868,1202,1653
 82 | 1,3,7005,829,3009,430,610,529
 83 | 2,3,219,9540,14403,283,7818,156
 84 | 2,3,10362,9232,11009,737,3537,2342
 85 | 1,3,20874,1563,1783,2320,550,772
 86 | 2,3,11867,3327,4814,1178,3837,120
 87 | 2,3,16117,46197,92780,1026,40827,2944
 88 | 2,3,22925,73498,32114,987,20070,903
 89 | 1,3,43265,5025,8117,6312,1579,14351
 90 | 1,3,7864,542,4042,9735,165,46
 91 | 1,3,24904,3836,5330,3443,454,3178
 92 | 1,3,11405,596,1638,3347,69,360
 93 | 1,3,12754,2762,2530,8693,627,1117
 94 | 2,3,9198,27472,32034,3232,18906,5130
 95 | 1,3,11314,3090,2062,35009,71,2698
 96 | 2,3,5626,12220,11323,206,5038,244
 97 | 1,3,3,2920,6252,440,223,709
 98 | 2,3,23,2616,8118,145,3874,217
 99 | 1,3,403,254,610,774,54,63
100 | 1,3,503,112,778,895,56,132
101 | 1,3,9658,2182,1909,5639,215,323
102 | 2,3,11594,7779,12144,3252,8035,3029
103 | 2,3,1420,10810,16267,1593,6766,1838
104 | 2,3,2932,6459,7677,2561,4573,1386
105 | 1,3,56082,3504,8906,18028,1480,2498
106 | 1,3,14100,2132,3445,1336,1491,548
107 | 1,3,15587,1014,3970,910,139,1378
108 | 2,3,1454,6337,10704,133,6830,1831
109 | 2,3,8797,10646,14886,2471,8969,1438
110 | 2,3,1531,8397,6981,247,2505,1236
111 | 2,3,1406,16729,28986,673,836,3
112 | 1,3,11818,1648,1694,2276,169,1647
113 | 2,3,12579,11114,17569,805,6457,1519
114 | 1,3,19046,2770,2469,8853,483,2708
115 | 1,3,14438,2295,1733,3220,585,1561
116 | 1,3,18044,1080,2000,2555,118,1266
117 | 1,3,11134,793,2988,2715,276,610
118 | 1,3,11173,2521,3355,1517,310,222
119 | 1,3,6990,3880,5380,1647,319,1160
120 | 1,3,20049,1891,2362,5343,411,933
121 | 1,3,8258,2344,2147,3896,266,635
122 | 1,3,17160,1200,3412,2417,174,1136
123 | 1,3,4020,3234,1498,2395,264,255
124 | 1,3,12212,201,245,1991,25,860
125 | 2,3,11170,10769,8814,2194,1976,143
126 | 1,3,36050,1642,2961,4787,500,1621
127 | 1,3,76237,3473,7102,16538,778,918
128 | 1,3,19219,1840,1658,8195,349,483
129 | 2,3,21465,7243,10685,880,2386,2749
130 | 1,3,140,8847,3823,142,1062,3
131 | 1,3,42312,926,1510,1718,410,1819
132 | 1,3,7149,2428,699,6316,395,911
133 | 1,3,2101,589,314,346,70,310
134 | 1,3,14903,2032,2479,576,955,328
135 | 1,3,9434,1042,1235,436,256,396
136 | 1,3,7388,1882,2174,720,47,537
137 | 1,3,6300,1289,2591,1170,199,326
138 | 1,3,4625,8579,7030,4575,2447,1542
139 | 1,3,3087,8080,8282,661,721,36
140 | 1,3,13537,4257,5034,155,249,3271
141 | 1,3,5387,4979,3343,825,637,929
142 | 1,3,17623,4280,7305,2279,960,2616
143 | 1,3,30379,13252,5189,321,51,1450
144 | 1,3,37036,7152,8253,2995,20,3
145 | 1,3,10405,1596,1096,8425,399,318
146 | 1,3,18827,3677,1988,118,516,201
147 | 2,3,22039,8384,34792,42,12591,4430
148 | 1,3,7769,1936,2177,926,73,520
149 | 1,3,9203,3373,2707,1286,1082,526
150 | 1,3,5924,584,542,4052,283,434
151 | 1,3,31812,1433,1651,800,113,1440
152 | 1,3,16225,1825,1765,853,170,1067
153 | 1,3,1289,3328,2022,531,255,1774
154 | 1,3,18840,1371,3135,3001,352,184
155 | 1,3,3463,9250,2368,779,302,1627
156 | 1,3,622,55,137,75,7,8
157 | 2,3,1989,10690,19460,233,11577,2153
158 | 2,3,3830,5291,14855,317,6694,3182
159 | 1,3,17773,1366,2474,3378,811,418
160 | 2,3,2861,6570,9618,930,4004,1682
161 | 2,3,355,7704,14682,398,8077,303
162 | 2,3,1725,3651,12822,824,4424,2157
163 | 1,3,12434,540,283,1092,3,2233
164 | 1,3,15177,2024,3810,2665,232,610
165 | 2,3,5531,15726,26870,2367,13726,446
166 | 2,3,5224,7603,8584,2540,3674,238
167 | 2,3,15615,12653,19858,4425,7108,2379
168 | 2,3,4822,6721,9170,993,4973,3637
169 | 1,3,2926,3195,3268,405,1680,693
170 | 1,3,5809,735,803,1393,79,429
171 | 1,3,5414,717,2155,2399,69,750
172 | 2,3,260,8675,13430,1116,7015,323
173 | 2,3,200,25862,19816,651,8773,6250
174 | 1,3,955,5479,6536,333,2840,707
175 | 2,3,514,7677,19805,937,9836,716
176 | 1,3,286,1208,5241,2515,153,1442
177 | 2,3,2343,7845,11874,52,4196,1697
178 | 1,3,45640,6958,6536,7368,1532,230
179 | 1,3,12759,7330,4533,1752,20,2631
180 | 1,3,11002,7075,4945,1152,120,395
181 | 1,3,3157,4888,2500,4477,273,2165
182 | 1,3,12356,6036,8887,402,1382,2794
183 | 1,3,112151,29627,18148,16745,4948,8550
184 | 1,3,694,8533,10518,443,6907,156
185 | 1,3,36847,43950,20170,36534,239,47943
186 | 1,3,327,918,4710,74,334,11
187 | 1,3,8170,6448,1139,2181,58,247
188 | 1,3,3009,521,854,3470,949,727
189 | 1,3,2438,8002,9819,6269,3459,3
190 | 2,3,8040,7639,11687,2758,6839,404
191 | 2,3,834,11577,11522,275,4027,1856
192 | 1,3,16936,6250,1981,7332,118,64
193 | 1,3,13624,295,1381,890,43,84
194 | 1,3,5509,1461,2251,547,187,409
195 | 2,3,180,3485,20292,959,5618,666
196 | 1,3,7107,1012,2974,806,355,1142
197 | 1,3,17023,5139,5230,7888,330,1755
198 | 1,1,30624,7209,4897,18711,763,2876
199 | 2,1,2427,7097,10391,1127,4314,1468
200 | 1,1,11686,2154,6824,3527,592,697
201 | 1,1,9670,2280,2112,520,402,347
202 | 2,1,3067,13240,23127,3941,9959,731
203 | 2,1,4484,14399,24708,3549,14235,1681
204 | 1,1,25203,11487,9490,5065,284,6854
205 | 1,1,583,685,2216,469,954,18
206 | 1,1,1956,891,5226,1383,5,1328
207 | 2,1,1107,11711,23596,955,9265,710
208 | 1,1,6373,780,950,878,288,285
209 | 2,1,2541,4737,6089,2946,5316,120
210 | 1,1,1537,3748,5838,1859,3381,806
211 | 2,1,5550,12729,16767,864,12420,797
212 | 1,1,18567,1895,1393,1801,244,2100
213 | 2,1,12119,28326,39694,4736,19410,2870
214 | 1,1,7291,1012,2062,1291,240,1775
215 | 1,1,3317,6602,6861,1329,3961,1215
216 | 2,1,2362,6551,11364,913,5957,791
217 | 1,1,2806,10765,15538,1374,5828,2388
218 | 2,1,2532,16599,36486,179,13308,674
219 | 1,1,18044,1475,2046,2532,130,1158
220 | 2,1,18,7504,15205,1285,4797,6372
221 | 1,1,4155,367,1390,2306,86,130
222 | 1,1,14755,899,1382,1765,56,749
223 | 1,1,5396,7503,10646,91,4167,239
224 | 1,1,5041,1115,2856,7496,256,375
225 | 2,1,2790,2527,5265,5612,788,1360
226 | 1,1,7274,659,1499,784,70,659
227 | 1,1,12680,3243,4157,660,761,786
228 | 2,1,20782,5921,9212,1759,2568,1553
229 | 1,1,4042,2204,1563,2286,263,689
230 | 1,1,1869,577,572,950,4762,203
231 | 1,1,8656,2746,2501,6845,694,980
232 | 2,1,11072,5989,5615,8321,955,2137
233 | 1,1,2344,10678,3828,1439,1566,490
234 | 1,1,25962,1780,3838,638,284,834
235 | 1,1,964,4984,3316,937,409,7
236 | 1,1,15603,2703,3833,4260,325,2563
237 | 1,1,1838,6380,2824,1218,1216,295
238 | 1,1,8635,820,3047,2312,415,225
239 | 1,1,18692,3838,593,4634,28,1215
240 | 1,1,7363,475,585,1112,72,216
241 | 1,1,47493,2567,3779,5243,828,2253
242 | 1,1,22096,3575,7041,11422,343,2564
243 | 1,1,24929,1801,2475,2216,412,1047
244 | 1,1,18226,659,2914,3752,586,578
245 | 1,1,11210,3576,5119,561,1682,2398
246 | 1,1,6202,7775,10817,1183,3143,1970
247 | 2,1,3062,6154,13916,230,8933,2784
248 | 1,1,8885,2428,1777,1777,430,610
249 | 1,1,13569,346,489,2077,44,659
250 | 1,1,15671,5279,2406,559,562,572
251 | 1,1,8040,3795,2070,6340,918,291
252 | 1,1,3191,1993,1799,1730,234,710
253 | 2,1,6134,23133,33586,6746,18594,5121
254 | 1,1,6623,1860,4740,7683,205,1693
255 | 1,1,29526,7961,16966,432,363,1391
256 | 1,1,10379,17972,4748,4686,1547,3265
257 | 1,1,31614,489,1495,3242,111,615
258 | 1,1,11092,5008,5249,453,392,373
259 | 1,1,8475,1931,1883,5004,3593,987
260 | 1,1,56083,4563,2124,6422,730,3321
261 | 1,1,53205,4959,7336,3012,967,818
262 | 1,1,9193,4885,2157,327,780,548
263 | 1,1,7858,1110,1094,6818,49,287
264 | 1,1,23257,1372,1677,982,429,655
265 | 1,1,2153,1115,6684,4324,2894,411
266 | 2,1,1073,9679,15445,61,5980,1265
267 | 1,1,5909,23527,13699,10155,830,3636
268 | 2,1,572,9763,22182,2221,4882,2563
269 | 1,1,20893,1222,2576,3975,737,3628
270 | 2,1,11908,8053,19847,1069,6374,698
271 | 1,1,15218,258,1138,2516,333,204
272 | 1,1,4720,1032,975,5500,197,56
273 | 1,1,2083,5007,1563,1120,147,1550
274 | 1,1,514,8323,6869,529,93,1040
275 | 1,3,36817,3045,1493,4802,210,1824
276 | 1,3,894,1703,1841,744,759,1153
277 | 1,3,680,1610,223,862,96,379
278 | 1,3,27901,3749,6964,4479,603,2503
279 | 1,3,9061,829,683,16919,621,139
280 | 1,3,11693,2317,2543,5845,274,1409
281 | 2,3,17360,6200,9694,1293,3620,1721
282 | 1,3,3366,2884,2431,977,167,1104
283 | 2,3,12238,7108,6235,1093,2328,2079
284 | 1,3,49063,3965,4252,5970,1041,1404
285 | 1,3,25767,3613,2013,10303,314,1384
286 | 1,3,68951,4411,12609,8692,751,2406
287 | 1,3,40254,640,3600,1042,436,18
288 | 1,3,7149,2247,1242,1619,1226,128
289 | 1,3,15354,2102,2828,8366,386,1027
290 | 1,3,16260,594,1296,848,445,258
291 | 1,3,42786,286,471,1388,32,22
292 | 1,3,2708,2160,2642,502,965,1522
293 | 1,3,6022,3354,3261,2507,212,686
294 | 1,3,2838,3086,4329,3838,825,1060
295 | 2,2,3996,11103,12469,902,5952,741
296 | 1,2,21273,2013,6550,909,811,1854
297 | 2,2,7588,1897,5234,417,2208,254
298 | 1,2,19087,1304,3643,3045,710,898
299 | 2,2,8090,3199,6986,1455,3712,531
300 | 2,2,6758,4560,9965,934,4538,1037
301 | 1,2,444,879,2060,264,290,259
302 | 2,2,16448,6243,6360,824,2662,2005
303 | 2,2,5283,13316,20399,1809,8752,172
304 | 2,2,2886,5302,9785,364,6236,555
305 | 2,2,2599,3688,13829,492,10069,59
306 | 2,2,161,7460,24773,617,11783,2410
307 | 2,2,243,12939,8852,799,3909,211
308 | 2,2,6468,12867,21570,1840,7558,1543
309 | 1,2,17327,2374,2842,1149,351,925
310 | 1,2,6987,1020,3007,416,257,656
311 | 2,2,918,20655,13567,1465,6846,806
312 | 1,2,7034,1492,2405,12569,299,1117
313 | 1,2,29635,2335,8280,3046,371,117
314 | 2,2,2137,3737,19172,1274,17120,142
315 | 1,2,9784,925,2405,4447,183,297
316 | 1,2,10617,1795,7647,1483,857,1233
317 | 2,2,1479,14982,11924,662,3891,3508
318 | 1,2,7127,1375,2201,2679,83,1059
319 | 1,2,1182,3088,6114,978,821,1637
320 | 1,2,11800,2713,3558,2121,706,51
321 | 2,2,9759,25071,17645,1128,12408,1625
322 | 1,2,1774,3696,2280,514,275,834
323 | 1,2,9155,1897,5167,2714,228,1113
324 | 1,2,15881,713,3315,3703,1470,229
325 | 1,2,13360,944,11593,915,1679,573
326 | 1,2,25977,3587,2464,2369,140,1092
327 | 1,2,32717,16784,13626,60869,1272,5609
328 | 1,2,4414,1610,1431,3498,387,834
329 | 1,2,542,899,1664,414,88,522
330 | 1,2,16933,2209,3389,7849,210,1534
331 | 1,2,5113,1486,4583,5127,492,739
332 | 1,2,9790,1786,5109,3570,182,1043
333 | 2,2,11223,14881,26839,1234,9606,1102
334 | 1,2,22321,3216,1447,2208,178,2602
335 | 2,2,8565,4980,67298,131,38102,1215
336 | 2,2,16823,928,2743,11559,332,3486
337 | 2,2,27082,6817,10790,1365,4111,2139
338 | 1,2,13970,1511,1330,650,146,778
339 | 1,2,9351,1347,2611,8170,442,868
340 | 1,2,3,333,7021,15601,15,550
341 | 1,2,2617,1188,5332,9584,573,1942
342 | 2,3,381,4025,9670,388,7271,1371
343 | 2,3,2320,5763,11238,767,5162,2158
344 | 1,3,255,5758,5923,349,4595,1328
345 | 2,3,1689,6964,26316,1456,15469,37
346 | 1,3,3043,1172,1763,2234,217,379
347 | 1,3,1198,2602,8335,402,3843,303
348 | 2,3,2771,6939,15541,2693,6600,1115
349 | 2,3,27380,7184,12311,2809,4621,1022
350 | 1,3,3428,2380,2028,1341,1184,665
351 | 2,3,5981,14641,20521,2005,12218,445
352 | 1,3,3521,1099,1997,1796,173,995
353 | 2,3,1210,10044,22294,1741,12638,3137
354 | 1,3,608,1106,1533,830,90,195
355 | 2,3,117,6264,21203,228,8682,1111
356 | 1,3,14039,7393,2548,6386,1333,2341
357 | 1,3,190,727,2012,245,184,127
358 | 1,3,22686,134,218,3157,9,548
359 | 2,3,37,1275,22272,137,6747,110
360 | 1,3,759,18664,1660,6114,536,4100
361 | 1,3,796,5878,2109,340,232,776
362 | 1,3,19746,2872,2006,2601,468,503
363 | 1,3,4734,607,864,1206,159,405
364 | 1,3,2121,1601,2453,560,179,712
365 | 1,3,4627,997,4438,191,1335,314
366 | 1,3,2615,873,1524,1103,514,468
367 | 2,3,4692,6128,8025,1619,4515,3105
368 | 1,3,9561,2217,1664,1173,222,447
369 | 1,3,3477,894,534,1457,252,342
370 | 1,3,22335,1196,2406,2046,101,558
371 | 1,3,6211,337,683,1089,41,296
372 | 2,3,39679,3944,4955,1364,523,2235
373 | 1,3,20105,1887,1939,8164,716,790
374 | 1,3,3884,3801,1641,876,397,4829
375 | 2,3,15076,6257,7398,1504,1916,3113
376 | 1,3,6338,2256,1668,1492,311,686
377 | 1,3,5841,1450,1162,597,476,70
378 | 2,3,3136,8630,13586,5641,4666,1426
379 | 1,3,38793,3154,2648,1034,96,1242
380 | 1,3,3225,3294,1902,282,68,1114
381 | 2,3,4048,5164,10391,130,813,179
382 | 1,3,28257,944,2146,3881,600,270
383 | 1,3,17770,4591,1617,9927,246,532
384 | 1,3,34454,7435,8469,2540,1711,2893
385 | 1,3,1821,1364,3450,4006,397,361
386 | 1,3,10683,21858,15400,3635,282,5120
387 | 1,3,11635,922,1614,2583,192,1068
388 | 1,3,1206,3620,2857,1945,353,967
389 | 1,3,20918,1916,1573,1960,231,961
390 | 1,3,9785,848,1172,1677,200,406
391 | 1,3,9385,1530,1422,3019,227,684
392 | 1,3,3352,1181,1328,5502,311,1000
393 | 1,3,2647,2761,2313,907,95,1827
394 | 1,3,518,4180,3600,659,122,654
395 | 1,3,23632,6730,3842,8620,385,819
396 | 1,3,12377,865,3204,1398,149,452
397 | 1,3,9602,1316,1263,2921,841,290
398 | 2,3,4515,11991,9345,2644,3378,2213
399 | 1,3,11535,1666,1428,6838,64,743
400 | 1,3,11442,1032,582,5390,74,247
401 | 1,3,9612,577,935,1601,469,375
402 | 1,3,4446,906,1238,3576,153,1014
403 | 1,3,27167,2801,2128,13223,92,1902
404 | 1,3,26539,4753,5091,220,10,340
405 | 1,3,25606,11006,4604,127,632,288
406 | 1,3,18073,4613,3444,4324,914,715
407 | 1,3,6884,1046,1167,2069,593,378
408 | 1,3,25066,5010,5026,9806,1092,960
409 | 2,3,7362,12844,18683,2854,7883,553
410 | 2,3,8257,3880,6407,1646,2730,344
411 | 1,3,8708,3634,6100,2349,2123,5137
412 | 1,3,6633,2096,4563,1389,1860,1892
413 | 1,3,2126,3289,3281,1535,235,4365
414 | 1,3,97,3605,12400,98,2970,62
415 | 1,3,4983,4859,6633,17866,912,2435
416 | 1,3,5969,1990,3417,5679,1135,290
417 | 2,3,7842,6046,8552,1691,3540,1874
418 | 2,3,4389,10940,10908,848,6728,993
419 | 1,3,5065,5499,11055,364,3485,1063
420 | 2,3,660,8494,18622,133,6740,776
421 | 1,3,8861,3783,2223,633,1580,1521
422 | 1,3,4456,5266,13227,25,6818,1393
423 | 2,3,17063,4847,9053,1031,3415,1784
424 | 1,3,26400,1377,4172,830,948,1218
425 | 2,3,17565,3686,4657,1059,1803,668
426 | 2,3,16980,2884,12232,874,3213,249
427 | 1,3,11243,2408,2593,15348,108,1886
428 | 1,3,13134,9347,14316,3141,5079,1894
429 | 1,3,31012,16687,5429,15082,439,1163
430 | 1,3,3047,5970,4910,2198,850,317
431 | 1,3,8607,1750,3580,47,84,2501
432 | 1,3,3097,4230,16483,575,241,2080
433 | 1,3,8533,5506,5160,13486,1377,1498
434 | 1,3,21117,1162,4754,269,1328,395
435 | 1,3,1982,3218,1493,1541,356,1449
436 | 1,3,16731,3922,7994,688,2371,838
437 | 1,3,29703,12051,16027,13135,182,2204
438 | 1,3,39228,1431,764,4510,93,2346
439 | 2,3,14531,15488,30243,437,14841,1867
440 | 1,3,10290,1981,2232,1038,168,2125
441 | 1,3,2787,1698,2510,65,477,52
442 | 


--------------------------------------------------------------------------------
/projects/creating_customer_segments/debug.log:
--------------------------------------------------------------------------------
1 | [0308/005334.831:ERROR:crash_report_database_win.cc(426)] unexpected header
2 | 


--------------------------------------------------------------------------------
/projects/creating_customer_segments/pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/creating_customer_segments/pca.png


--------------------------------------------------------------------------------
/projects/creating_customer_segments/renders.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import matplotlib.cm as cm
  3 | import pandas as pd
  4 | import numpy as np
  5 | from sklearn.decomposition import pca
  6 | 
  7 | def pca_results(good_data, pca):
  8 | 	'''
  9 | 	Create a DataFrame of the PCA results
 10 | 	Includes dimension feature weights and explained variance
 11 | 	Visualizes the PCA results
 12 | 	'''
 13 | 
 14 | 	# Dimension indexing
 15 | 	dimensions = dimensions = ['Dimension {}'.format(i) for i in range(1,len(pca.components_)+1)]
 16 | 
 17 | 	# PCA components
 18 | 	components = pd.DataFrame(np.round(pca.components_, 4), columns = good_data.keys())
 19 | 	components.index = dimensions
 20 | 
 21 | 	# PCA explained variance
 22 | 	ratios = pca.explained_variance_ratio_.reshape(len(pca.components_), 1)
 23 | 	variance_ratios = pd.DataFrame(np.round(ratios, 4), columns = ['Explained Variance'])
 24 | 	variance_ratios.index = dimensions
 25 | 
 26 | 	# Create a bar plot visualization
 27 | 	fig, ax = plt.subplots(figsize = (14,8))
 28 | 
 29 | 	# Plot the feature weights as a function of the components
 30 | 	components.plot(ax = ax, kind = 'bar');
 31 | 	ax.set_ylabel("Feature Weights")
 32 | 	ax.set_xticklabels(dimensions, rotation=0)
 33 | 
 34 | 
 35 | 	# Display the explained variance ratios
 36 | 	for i, ev in enumerate(pca.explained_variance_ratio_):
 37 | 		ax.text(i-0.40, ax.get_ylim()[1] + 0.05, "Explained Variance\n          %.4f"%(ev))
 38 | 
 39 | 	# Return a concatenated DataFrame
 40 | 	return pd.concat([variance_ratios, components], axis = 1)
 41 | 
 42 | def cluster_results(reduced_data, preds, centers, pca_samples):
 43 | 	'''
 44 | 	Visualizes the PCA-reduced cluster data in two dimensions
 45 | 	Adds cues for cluster centers and student-selected sample data
 46 | 	'''
 47 | 
 48 | 	predictions = pd.DataFrame(preds, columns = ['Cluster'])
 49 | 	plot_data = pd.concat([predictions, reduced_data], axis = 1)
 50 | 
 51 | 	# Generate the cluster plot
 52 | 	fig, ax = plt.subplots(figsize = (14,8))
 53 | 
 54 | 	# Color map
 55 | 	cmap = cm.get_cmap('gist_rainbow')
 56 | 
 57 | 	# Color the points based on assigned cluster
 58 | 	for i, cluster in plot_data.groupby('Cluster'):   
 59 | 	    cluster.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \
 60 | 	                 color = cmap((i)*1.0/(len(centers)-1)), label = 'Cluster %i'%(i), s=30);
 61 | 
 62 | 	# Plot centers with indicators
 63 | 	for i, c in enumerate(centers):
 64 | 	    ax.scatter(x = c[0], y = c[1], color = 'white', edgecolors = 'black', \
 65 | 	               alpha = 1, linewidth = 2, marker = 'o', s=200);
 66 | 	    ax.scatter(x = c[0], y = c[1], marker='$%d$'%(i), alpha = 1, s=100);
 67 | 
 68 | 	# Plot transformed sample points 
 69 | 	ax.scatter(x = pca_samples[:,0], y = pca_samples[:,1], \
 70 | 	           s = 150, linewidth = 4, color = 'black', marker = 'x');
 71 | 
 72 | 	# Set plot title
 73 | 	ax.set_title("Cluster Learning on PCA-Reduced Data - Centroids Marked by Number\nTransformed Sample Data Marked by Black Cross");
 74 | 
 75 | 
 76 | def channel_results(reduced_data, outliers, pca_samples):
 77 | 	'''
 78 | 	Visualizes the PCA-reduced cluster data in two dimensions using the full dataset
 79 | 	Data is labeled by "Channel" and cues added for student-selected sample data
 80 | 	'''
 81 | 
 82 | 	# Check that the dataset is loadable
 83 | 	try:
 84 | 	    full_data = pd.read_csv("customers.csv")
 85 | 	except:
 86 | 	    print "Dataset could not be loaded. Is the file missing?"
 87 | 	    return False
 88 | 
 89 | 	# Create the Channel DataFrame
 90 | 	channel = pd.DataFrame(full_data['Channel'], columns = ['Channel'])
 91 | 	channel = channel.drop(channel.index[outliers]).reset_index(drop = True)
 92 | 	labeled = pd.concat([reduced_data, channel], axis = 1)
 93 | 	
 94 | 	# Generate the cluster plot
 95 | 	fig, ax = plt.subplots(figsize = (14,8))
 96 | 
 97 | 	# Color map
 98 | 	cmap = cm.get_cmap('gist_rainbow')
 99 | 
100 | 	# Color the points based on assigned Channel
101 | 	labels = ['Hotel/Restaurant/Cafe', 'Retailer']
102 | 	grouped = labeled.groupby('Channel')
103 | 	for i, channel in grouped:   
104 | 	    channel.plot(ax = ax, kind = 'scatter', x = 'Dimension 1', y = 'Dimension 2', \
105 | 	                 color = cmap((i-1)*1.0/2), label = labels[i-1], s=30);
106 | 	    
107 | 	# Plot transformed sample points   
108 | 	for i, sample in enumerate(pca_samples):
109 | 		ax.scatter(x = sample[0], y = sample[1], \
110 | 	           s = 200, linewidth = 3, color = 'black', marker = 'o', facecolors = 'none');
111 | 		ax.scatter(x = sample[0]+0.25, y = sample[1]+0.3, marker='$%d$'%(i), alpha = 1, s=125);
112 | 
113 | 	# Set plot title
114 | 	ax.set_title("PCA-Reduced Data Labeled by 'Channel'\nTransformed Sample Data Circled");


--------------------------------------------------------------------------------
/projects/creating_customer_segments/renders.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/creating_customer_segments/renders.pyc


--------------------------------------------------------------------------------
/projects/smartcab/Project 4 final report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/Project 4 final report.pdf


--------------------------------------------------------------------------------
/projects/smartcab/Q_learning_new_state_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/Q_learning_new_state_space.png


--------------------------------------------------------------------------------
/projects/smartcab/README.md:
--------------------------------------------------------------------------------
 1 | # Project 4: Reinforcement Learning
 2 | ## Train a Smartcab How to Drive
 3 | 
 4 | ### Install
 5 | 
 6 | This project requires **Python 2.7** with the [pygame](https://www.pygame.org/wiki/GettingStarted) library installed.
 7 | 
 8 | ### Overview 
 9 | 
10 | ![](smartcabpic.jpg)
11 | 
12 | 
13 | 
14 | The smartcab operates in a grid like city with other agents. Per trial the smarcab gets assigned a goal end point and starts moving there from a given random start point. I've implemented Q-learning algorithm to teach the smartcab how to reach the goal state while avoiding other agents and avoiding penalties. Penalty is given when the smartcab violates a traffic law or collides with other agents. Final Q-learning model with tuned hyper-parameters achieves 99% accuracy.
15 | 
16 | ![](Q_learning_new_state_space.png)
17 | 
18 | ### Code
19 | 
20 | Template code is provided in the `smartcab/agent.py` python file. Additional supporting python code can be found in `smartcab/enviroment.py`, `smartcab/planner.py`, and `smartcab/simulator.py`. Supporting images for the graphical user interface can be found in the `images` folder. 
21 | 
22 | ### Run
23 | 
24 | In a terminal or command window, navigate to the top-level project directory `smartcab/` (that contains this README) and run one of the following commands:
25 | 
26 | ```python smartcab/agent.py```  
27 | ```python -m smartcab.agent```
28 | 
29 | This will run the `agent.py` file and execute your agent code.
30 | 


--------------------------------------------------------------------------------
/projects/smartcab/images/car-black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-black.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-blue.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-cyan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-cyan.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-green.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-magenta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-magenta.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-orange.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-red.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-white.png


--------------------------------------------------------------------------------
/projects/smartcab/images/car-yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/images/car-yellow.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/.ipynb_checkpoints/Test-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 13,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import random"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {
 18 |     "collapsed": false
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "actions = ['left','right','forward',None]\n",
 23 |     "light = ['red','green']\n",
 24 |     "oncoming = ['left','right','forward',None]\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 3,
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "Q = {}"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 44,
 41 |    "metadata": {
 42 |     "collapsed": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "random_key = (actions[0],light[0],oncoming[1])"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 45,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "if random_key not in Q:\n",
 58 |     "    Q[random_key] = {action : random.randint(20,26) for action in actions}"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 46,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "{('left', 'green', 'forward'): {None: 20,\n",
 72 |        "  'forward': 20,\n",
 73 |        "  'left': 20,\n",
 74 |        "  'right': 20},\n",
 75 |        " ('left', 'red', 'forward'): {None: 20,\n",
 76 |        "  'forward': 23,\n",
 77 |        "  'left': 21,\n",
 78 |        "  'right': 24},\n",
 79 |        " ('left', 'red', 'right'): {None: 26, 'forward': 21, 'left': 21, 'right': 26}}"
 80 |       ]
 81 |      },
 82 |      "execution_count": 46,
 83 |      "metadata": {},
 84 |      "output_type": "execute_result"
 85 |     }
 86 |    ],
 87 |    "source": [
 88 |     "Q"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 47,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "action = {a:v for a,v in Q[random_key].items() if v==max(Q[random_key].values())}"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": 48,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [
109 |     {
110 |      "data": {
111 |       "text/plain": [
112 |        "{None: 26, 'right': 26}"
113 |       ]
114 |      },
115 |      "execution_count": 48,
116 |      "metadata": {},
117 |      "output_type": "execute_result"
118 |     }
119 |    ],
120 |    "source": [
121 |     "action"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": true
129 |    },
130 |    "outputs": [],
131 |    "source": []
132 |   }
133 |  ],
134 |  "metadata": {
135 |   "kernelspec": {
136 |    "display_name": "Python 2",
137 |    "language": "python",
138 |    "name": "python2"
139 |   },
140 |   "language_info": {
141 |    "codemirror_mode": {
142 |     "name": "ipython",
143 |     "version": 2
144 |    },
145 |    "file_extension": ".py",
146 |    "mimetype": "text/x-python",
147 |    "name": "python",
148 |    "nbconvert_exporter": "python",
149 |    "pygments_lexer": "ipython2",
150 |    "version": "2.7.12"
151 |   }
152 |  },
153 |  "nbformat": 4,
154 |  "nbformat_minor": 0
155 | }
156 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Q-learning_over50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over50.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Q-learning_over_100.1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over_100.1.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Q-learning_over_100.2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q-learning_over_100.2.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Q_learning_new_state_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q_learning_new_state_space.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Q_learning_with_epsilon_decay_final_parameter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/Q_learning_with_epsilon_decay_final_parameter.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/Test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import random"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 3,
 17 |    "metadata": {
 18 |     "collapsed": false
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "actions = ['left','right','forward',None]\n",
 23 |     "light = ['red','green']\n",
 24 |     "oncoming = ['left','right','forward',None]\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 4,
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "Q = {}"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 5,
 41 |    "metadata": {
 42 |     "collapsed": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "random_key = (actions[0],light[0],oncoming[1])"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 6,
 52 |    "metadata": {
 53 |     "collapsed": false
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "if random_key not in Q:\n",
 58 |     "    Q[random_key] = {action : random.randint(20,26) for action in actions}"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 7,
 64 |    "metadata": {
 65 |     "collapsed": false
 66 |    },
 67 |    "outputs": [
 68 |     {
 69 |      "data": {
 70 |       "text/plain": [
 71 |        "{('left', 'red', 'right'): {None: 24, 'forward': 26, 'left': 22, 'right': 21}}"
 72 |       ]
 73 |      },
 74 |      "execution_count": 7,
 75 |      "metadata": {},
 76 |      "output_type": "execute_result"
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "Q"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 8,
 86 |    "metadata": {
 87 |     "collapsed": false
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "action = {a:v for a,v in Q[random_key].items() if v==max(Q[random_key].values())}"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": 9,
 97 |    "metadata": {
 98 |     "collapsed": false
 99 |    },
100 |    "outputs": [
101 |     {
102 |      "data": {
103 |       "text/plain": [
104 |        "{'forward': 26}"
105 |       ]
106 |      },
107 |      "execution_count": 9,
108 |      "metadata": {},
109 |      "output_type": "execute_result"
110 |     }
111 |    ],
112 |    "source": [
113 |     "action"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 10,
119 |    "metadata": {
120 |     "collapsed": false
121 |    },
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/plain": [
126 |        "'forward'"
127 |       ]
128 |      },
129 |      "execution_count": 10,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "random.choice(action.keys())"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": null,
141 |    "metadata": {
142 |     "collapsed": false
143 |    },
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "Reporter.__init__(): Initialized with metrics: ['reward', 'flubber']\n",
150 |       "Summary (2 metrics):-\n",
151 |       "Name: reward, samples: 100, type: float64\n",
152 |       "Mean: 0.465168053464, s.d.: 0.283774983872\n",
153 |       "Name: flubber, samples: 10, type: float64\n",
154 |       "Mean: 2.03614942375, s.d.: 0.579376033361\n"
155 |      ]
156 |     }
157 |    ],
158 |    "source": [
159 |     "%run analysis.py"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 12,
165 |    "metadata": {
166 |     "collapsed": false
167 |    },
168 |    "outputs": [
169 |     {
170 |      "data": {
171 |       "text/plain": [
172 |        "26"
173 |       ]
174 |      },
175 |      "execution_count": 12,
176 |      "metadata": {},
177 |      "output_type": "execute_result"
178 |     }
179 |    ],
180 |    "source": [
181 |     "max(Q[('left', 'red', 'right')].values())"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": null,
187 |    "metadata": {
188 |     "collapsed": true
189 |    },
190 |    "outputs": [],
191 |    "source": []
192 |   }
193 |  ],
194 |  "metadata": {
195 |   "kernelspec": {
196 |    "display_name": "Python 2",
197 |    "language": "python",
198 |    "name": "python2"
199 |   },
200 |   "language_info": {
201 |    "codemirror_mode": {
202 |     "name": "ipython",
203 |     "version": 2
204 |    },
205 |    "file_extension": ".py",
206 |    "mimetype": "text/x-python",
207 |    "name": "python",
208 |    "nbconvert_exporter": "python",
209 |    "pygments_lexer": "ipython2",
210 |    "version": "2.7.12"
211 |   }
212 |  },
213 |  "nbformat": 4,
214 |  "nbformat_minor": 0
215 | }
216 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/__init__.py


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/agent.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from environment import Agent, Environment
  3 | from planner import RoutePlanner
  4 | from simulator import Simulator
  5 | 
  6 | class LearningAgent(Agent):
  7 |     """An agent that learns to drive in the smartcab world."""
  8 | 
  9 |     def __init__(self, env):
 10 |         super(LearningAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
 11 |         self.color = 'red'  # override color
 12 |         self.planner = RoutePlanner(self.env, self)  # simple route planner to get next_waypoint
 13 |         
 14 |         # TODO: Initialize any additional variables here
 15 |         self.correct_actions  = ["forward","left","right",None]
 16 |         
 17 |         # add the previous state, action and reward variables for updating Q-values
 18 |         
 19 |         self.previous_state = None
 20 |         self.previous_action = None
 21 |         self.previous_reward = None
 22 |         
 23 |         # Add parameters for the Q-table
 24 |         
 25 |         self.gamma = 0.3
 26 |         self.alpha = 0.5        
 27 |         self.epsilon = 0.1
 28 |         self.Q = {} 
 29 |         self.default_Q_val = 0
 30 |         self.step_number = 0
 31 |         
 32 |         
 33 | 
 34 |     def reset(self, destination=None):
 35 |         self.planner.route_to(destination)
 36 |         # TODO: Prepare for a new trip; reset any variables here, if required
 37 |         
 38 |         self.previous_state = None
 39 |         self.previous_action = None
 40 |         self.previous_reward = None
 41 |         self.step_number+=1
 42 | 
 43 |     def update(self, t):
 44 |         # Gather inputs
 45 |         self.next_waypoint = self.planner.next_waypoint()  # from route planner, also displayed by simulator
 46 |         inputs = self.env.sense(self)
 47 |         deadline = self.env.get_deadline(self)
 48 | 
 49 |         # TODO: Update state
 50 |         
 51 |         
 52 |         
 53 |         # self.state = (("light",inputs["light"]),("oncoming",inputs["oncoming"]),("waypoint",self.next_waypoint))
 54 |         
 55 |         self.state = (("light",inputs["light"]),("oncoming",inputs["oncoming"]),("waypoint",self.next_waypoint),("left",inputs["left"]),("right",inputs["right"]))
 56 |         
 57 |         #1st Q. Implement a basic agent that chooses action randomly
 58 |         # action = random.choice(self.correct_actions)
 59 |         # what if the next_waypoint was chosen as the action?
 60 |         #action = self.next_waypoint 
 61 |         
 62 |         
 63 |         # TO DO: Choose action on basis of Q-learning
 64 |         
 65 |         if self.state in self.Q: # if we have been into this state before 
 66 |             if random.random() > self.epsilon/self.step_number: # epsilon should be a small number so that we use the learned values of Q-table most of the time
 67 |                 # choose the action that has the max Q-value, can be greater than one if most of the actions has just been initialized
 68 |                 potential_actions = {a:v for a,v in self.Q[self.state].items() if v==max(self.Q[self.state].values())}
 69 |                 action = random.choice(potential_actions.keys())
 70 |             else:
 71 |                 action = random.choice(self.correct_actions)
 72 |             
 73 |             
 74 |         else:
 75 |             # if we have not been into this state before then initialize the Q-values for each valid action
 76 |             self.Q[self.state] = {action:self.default_Q_val for action in self.correct_actions}
 77 |             # then choose a random action from the set of valid action
 78 |             action = random.choice(self.correct_actions)
 79 | 
 80 |         
 81 |         
 82 |         
 83 |         # Execute action and get reward
 84 |         reward = self.env.act(self, action)
 85 | 
 86 |         # TODO: Learn policy based on state, action, reward/ Updating Q-table values
 87 |         
 88 |         
 89 |         
 90 |         if self.previous_state != None:
 91 |             # Complete the equation after rewatching the videos 
 92 |             self.Q[self.previous_state][self.previous_action] = (1-self.alpha)*self.Q[self.previous_state][self.previous_action] + \
 93 |                                                             self.alpha*(self.previous_reward + self.gamma*max(self.Q[self.state].values()))
 94 |         
 95 |         
 96 |         
 97 |         
 98 |         self.previous_state = self.state
 99 |         self.previous_action = action
100 |         self.previous_reward = reward
101 |         
102 |         
103 |         
104 | 
105 |         print "LearningAgent.update(): deadline = {}, inputs = {}, action = {}, reward = {}".format(deadline, inputs, action, reward)  #[debug]
106 |         print "State = {}".format(self.state) 
107 | 
108 | def run():
109 |     """Run the agent for a finite number of trials."""
110 | 
111 |     # Set up environment and agent
112 |     e = Environment()  # create environment (also adds some dummy traffic)
113 |     a = e.create_agent(LearningAgent)  # create agent
114 |     e.set_primary_agent(a, enforce_deadline=True)  # specify agent to track
115 |     # NOTE: You can set enforce_deadline=False while debugging to allow longer trials
116 | 
117 |     # Now simulate it
118 |     sim = Simulator(e, update_delay=0.1, display=False,live_plot = True)  # create simulator (uses pygame when display=True, if available)
119 |     # NOTE: To speed up simulation, reduce update_delay and/or set display=False
120 | 
121 |     sim.run(n_trials=100)  # run for a specified number of trials
122 |     # NOTE: To quit midway, press Esc or close pygame window, or hit Ctrl+C on the command-line
123 |     
124 | 
125 | if __name__ == '__main__':
126 |     run()
127 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/analysis.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import OrderedDict
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | class Metric(object):
  9 |     """Named sequence of x and y values, with optional plotting helpers."""
 10 | 
 11 |     def __init__(self, name):
 12 |         self.name = name
 13 |         self.reset()
 14 | 
 15 |     def collect(self, x, y):
 16 |         self.xdata.append(x)
 17 |         self.ydata.append(y)
 18 | 
 19 |     def plot(self, ax):
 20 |         self.plot_obj, = ax.plot(self.xdata, self.ydata, 'o-', label=self.name)
 21 | 
 22 |     def refresh(self):
 23 |         self.plot_obj.set_data(self.xdata, self.ydata)
 24 | 
 25 |     def reset(self):
 26 |         self.xdata = []
 27 |         self.ydata = []
 28 | 
 29 | 
 30 | class Reporter(object):
 31 |     """Collect metrics, analyze and report summary statistics."""
 32 | 
 33 |     def __init__(self, metrics=[], live_plot=False):
 34 |         self.metrics = OrderedDict()
 35 |         self.live_plot = live_plot
 36 | 
 37 |         for name in metrics:
 38 |             self.metrics[name] = Metric(name)
 39 | 
 40 |         if self.live_plot:
 41 |             if not plt.isinteractive():
 42 |                 plt.ion()
 43 |             self.plot()
 44 | 
 45 |         print "Reporter.__init__(): Initialized with metrics: {}".format(metrics)  # [debug]
 46 | 
 47 |     def collect(self, name, x, y):
 48 |         if not name in self.metrics:
 49 |             self.metrics[name] = Metric(name)
 50 |             if self.live_plot:
 51 |                 self.metrics[name].plot(self.ax)
 52 |                 self.ax.legend()  # add new metric to legend
 53 |             print "Reporter.collect(): New metric added: {}".format(name)  # [debug]
 54 |         self.metrics[name].collect(x, y)
 55 |         if self.live_plot:
 56 |             self.metrics[name].refresh()
 57 | 
 58 |     def plot(self):
 59 |         if not hasattr(self, 'fig') or not hasattr(self, 'ax'):
 60 |             self.fig, self.ax = plt.subplots()
 61 |             for name in self.metrics:
 62 |                 self.metrics[name].plot(self.ax)
 63 |             #self.ax.set_autoscalex_on(True)
 64 |             #self.ax.set_autoscaley_on(True)
 65 |             self.ax.grid()
 66 |             self.ax.legend()
 67 |         else:
 68 |             for name in self.metrics:
 69 |                 self.metrics[name].refresh()
 70 |         self.refresh_plot()
 71 | 
 72 |     def refresh_plot(self):
 73 |         self.ax.relim()
 74 |         self.ax.autoscale_view()
 75 |         self.fig.canvas.draw()
 76 |         self.fig.canvas.flush_events()
 77 |         plt.draw()
 78 |         
 79 |     def summary(self):
 80 |         return [pd.Series(metric.ydata, index=metric.xdata, name=name) for name, metric in self.metrics.iteritems()]
 81 | 
 82 | 
 83 |     def show_plot(self):
 84 |         if plt.isinteractive():
 85 |             plt.ioff()
 86 |         self.plot()
 87 |         summary1 = self.summary()
 88 |         print "Summary ({} metrics):-".format(len(summary1))
 89 |         for metric in summary1:
 90 |             print "Name: {}, samples: {}, type: {}".format(metric.name, len(metric), metric.dtype)
 91 |             if metric.name == "success":
 92 |                 print "Total : {} success out of {} samples".format(sum(metric),len(metric))
 93 |             else:
 94 |                 print "Mean: {}, s.d.: {}".format(metric.mean(), metric.std())
 95 |         #print metric[:5]  # [debug]
 96 |         
 97 |      
 98 |         plt.show()
 99 | 
100 |  
101 |     def reset(self):
102 |         for name in self.metrics:
103 |             self.metrics[name].reset()
104 |             if self.live_plot:
105 |                 self.metrics[name].refresh()
106 | 
107 | 
108 | def test_reporter():
109 |     plt.ion()
110 |     rep = Reporter(metrics=['reward', 'flubber'], live_plot=True)
111 |     for i in xrange(100):
112 |         rep.collect('reward', i, np.random.random())
113 |         if i % 10 == 1:
114 |             rep.collect('flubber', i, np.random.random() * 2 + 1)
115 |             rep.refresh_plot()
116 |         time.sleep(0.01)
117 |     rep.plot()
118 |     summary = rep.summary()
119 |     print "Summary ({} metrics):-".format(len(summary))
120 |     for metric in summary:
121 |         print "Name: {}, samples: {}, type: {}".format(metric.name, len(metric), metric.dtype)
122 |         print "Mean: {}, s.d.: {}".format(metric.mean(), metric.std())
123 |         #print metric[:5]  # [debug]
124 |     plt.ioff()
125 |     plt.show()
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     test_reporter()
130 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/analysis.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/analysis.pyc


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/environment.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import random
  3 | from collections import OrderedDict
  4 | 
  5 | from simulator import Simulator
  6 | 
  7 | class TrafficLight(object):
  8 |     """A traffic light that switches periodically."""
  9 | 
 10 |     valid_states = [True, False]  # True = NS open, False = EW open
 11 | 
 12 |     def __init__(self, state=None, period=None):
 13 |         self.state = state if state is not None else random.choice(self.valid_states)
 14 |         self.period = period if period is not None else random.choice([3, 4, 5])
 15 |         self.last_updated = 0
 16 | 
 17 |     def reset(self):
 18 |         self.last_updated = 0
 19 | 
 20 |     def update(self, t):
 21 |         if t - self.last_updated >= self.period:
 22 |             self.state = not self.state  # assuming state is boolean
 23 |             self.last_updated = t
 24 | 
 25 | 
 26 | class Environment(object):
 27 |     """Environment within which all agents operate."""
 28 | 
 29 |     valid_actions = [None, 'forward', 'left', 'right']
 30 |     valid_inputs = {'light': TrafficLight.valid_states, 'oncoming': valid_actions, 'left': valid_actions, 'right': valid_actions}
 31 |     valid_headings = [(1, 0), (0, -1), (-1, 0), (0, 1)]  # ENWS
 32 |     hard_time_limit = -100  # even if enforce_deadline is False, end trial when deadline reaches this value (to avoid deadlocks)
 33 | 
 34 |     def __init__(self, num_dummies=3):
 35 |         self.num_dummies = num_dummies  # no. of dummy agents
 36 |         
 37 |         # Initialize simulation variables
 38 |         self.done = False
 39 |         self.t = 0
 40 |         self.agent_states = OrderedDict()
 41 |         self.status_text = ""
 42 |         
 43 |         
 44 |         
 45 | 
 46 |         # Road network
 47 |         self.grid_size = (8, 6)  # (cols, rows)
 48 |         self.bounds = (1, 1, self.grid_size[0], self.grid_size[1])
 49 |         self.block_size = 100
 50 |         self.intersections = OrderedDict()
 51 |         self.roads = []
 52 |         for x in xrange(self.bounds[0], self.bounds[2] + 1):
 53 |             for y in xrange(self.bounds[1], self.bounds[3] + 1):
 54 |                 self.intersections[(x, y)] = TrafficLight()  # a traffic light at each intersection
 55 | 
 56 |         for a in self.intersections:
 57 |             for b in self.intersections:
 58 |                 if a == b:
 59 |                     continue
 60 |                 if (abs(a[0] - b[0]) + abs(a[1] - b[1])) == 1:  # L1 distance = 1
 61 |                     self.roads.append((a, b))
 62 | 
 63 |         # Dummy agents
 64 |         for i in xrange(self.num_dummies):
 65 |             self.create_agent(DummyAgent)
 66 | 
 67 |         # Primary agent and associated parameters
 68 |         self.primary_agent = None  # to be set explicitly
 69 |         self.enforce_deadline = False
 70 | 
 71 |         # Step data (updated after each environment step)
 72 |         self.step_data = {
 73 |             't': 0,
 74 |             'deadline': 0,
 75 |             'waypoint': None,
 76 |             'inputs': None,
 77 |             'action': None,
 78 |             'reward': 0.0
 79 |         }
 80 | 
 81 |         # Trial data (updated at the end of each trial)
 82 |         self.trial_data = {
 83 |             'net_reward': 0.0,  # total reward earned in current trial
 84 |             'final_deadline': None,  # deadline value (time remaining)
 85 |             'success': 0  # whether the agent reached the destination in time
 86 |         }
 87 | 
 88 |     def create_agent(self, agent_class, *args, **kwargs):
 89 |         agent = agent_class(self, *args, **kwargs)
 90 |         self.agent_states[agent] = {'location': random.choice(self.intersections.keys()), 'heading': (0, 1)}
 91 |         return agent
 92 | 
 93 |     def set_primary_agent(self, agent, enforce_deadline=False):
 94 |         self.primary_agent = agent
 95 |         self.enforce_deadline = enforce_deadline
 96 | 
 97 |     def reset(self):
 98 |         self.done = False
 99 |         self.t = 0
100 |         self.success = 0
101 | 
102 |         # Reset traffic lights
103 |         for traffic_light in self.intersections.itervalues():
104 |             traffic_light.reset()
105 | 
106 |         # Pick a start and a destination
107 |         start = random.choice(self.intersections.keys())
108 |         destination = random.choice(self.intersections.keys())
109 | 
110 |         # Ensure starting location and destination are not too close
111 |         while self.compute_dist(start, destination) < 4:
112 |             start = random.choice(self.intersections.keys())
113 |             destination = random.choice(self.intersections.keys())
114 | 
115 |         start_heading = random.choice(self.valid_headings)
116 |         deadline = self.compute_dist(start, destination) * 5
117 |         print "Environment.reset(): Trial set up with start = {}, destination = {}, deadline = {}".format(start, destination, deadline)
118 | 
119 |         # Initialize agent(s)
120 |         for agent in self.agent_states.iterkeys():
121 |             self.agent_states[agent] = {
122 |                 'location': start if agent is self.primary_agent else random.choice(self.intersections.keys()),
123 |                 'heading': start_heading if agent is self.primary_agent else random.choice(self.valid_headings),
124 |                 'destination': destination if agent is self.primary_agent else None,
125 |                 'deadline': deadline if agent is self.primary_agent else None}
126 |             agent.reset(destination=(destination if agent is self.primary_agent else None))
127 |             if agent is self.primary_agent:
128 |                 # Reset metrics for this trial (step data will be set during the step)
129 |                 self.trial_data['net_reward'] = 0.0
130 |                 self.trial_data['final_deadline'] = deadline
131 |                 self.trial_data['success'] = 0
132 | 
133 |     def step(self):
134 |         #print "Environment.step(): t = {}".format(self.t)  # [debug]
135 | 
136 |         # Update traffic lights
137 |         for intersection, traffic_light in self.intersections.iteritems():
138 |             traffic_light.update(self.t)
139 | 
140 |         # Update agents
141 |         for agent in self.agent_states.iterkeys():
142 |             agent.update(self.t)
143 | 
144 |         if self.done:
145 |             return  # primary agent might have reached destination
146 | 
147 |         if self.primary_agent is not None:
148 |             agent_deadline = self.agent_states[self.primary_agent]['deadline']
149 |             if agent_deadline <= self.hard_time_limit:
150 |                 self.done = True
151 |                 print "Environment.step(): Primary agent hit hard time limit ({})! Trial aborted.".format(self.hard_time_limit)
152 |             elif self.enforce_deadline and agent_deadline <= 0:
153 |                 self.done = True
154 |                 print "Environment.step(): Primary agent ran out of time! Trial aborted."
155 |             self.agent_states[self.primary_agent]['deadline'] = agent_deadline - 1
156 | 
157 |         self.t += 1
158 | 
159 |     def sense(self, agent):
160 |         assert agent in self.agent_states, "Unknown agent!"
161 | 
162 |         state = self.agent_states[agent]
163 |         location = state['location']
164 |         heading = state['heading']
165 |         light = 'green' if (self.intersections[location].state and heading[1] != 0) or ((not self.intersections[location].state) and heading[0] != 0) else 'red'
166 | 
167 |         # Populate oncoming, left, right
168 |         oncoming = None
169 |         left = None
170 |         right = None
171 |         for other_agent, other_state in self.agent_states.iteritems():
172 |             if agent == other_agent or location != other_state['location'] or (heading[0] == other_state['heading'][0] and heading[1] == other_state['heading'][1]):
173 |                 continue
174 |             other_heading = other_agent.get_next_waypoint()
175 |             if (heading[0] * other_state['heading'][0] + heading[1] * other_state['heading'][1]) == -1:
176 |                 if oncoming != 'left':  # we don't want to override oncoming == 'left'
177 |                     oncoming = other_heading
178 |             elif (heading[1] == other_state['heading'][0] and -heading[0] == other_state['heading'][1]):
179 |                 if right != 'forward' and right != 'left':  # we don't want to override right == 'forward or 'left'
180 |                     right = other_heading
181 |             else:
182 |                 if left != 'forward':  # we don't want to override left == 'forward'
183 |                     left = other_heading
184 | 
185 |         return {'light': light, 'oncoming': oncoming, 'left': left, 'right': right}
186 | 
187 |     def get_deadline(self, agent):
188 |         return self.agent_states[agent]['deadline'] if agent is self.primary_agent else None
189 | 
190 |     def act(self, agent, action):
191 |         assert agent in self.agent_states, "Unknown agent!"
192 |         assert action in self.valid_actions, "Invalid action!"
193 | 
194 |         state = self.agent_states[agent]
195 |         location = state['location']
196 |         heading = state['heading']
197 |         light = 'green' if (self.intersections[location].state and heading[1] != 0) or ((not self.intersections[location].state) and heading[0] != 0) else 'red'
198 |         inputs = self.sense(agent)
199 | 
200 |         # Move agent if within bounds and obeys traffic rules
201 |         reward = 0  # reward/penalty
202 |         move_okay = True
203 |         if action == 'forward':
204 |             if light != 'green':
205 |                 move_okay = False
206 |         elif action == 'left':
207 |             if light == 'green' and (inputs['oncoming'] == None or inputs['oncoming'] == 'left'):
208 |                 heading = (heading[1], -heading[0])
209 |             else:
210 |                 move_okay = False
211 |         elif action == 'right':
212 |             if light == 'green' or (inputs['oncoming'] != 'left' and inputs['left'] != 'forward'):
213 |                 heading = (-heading[1], heading[0])
214 |             else:
215 |                 move_okay = False
216 | 
217 |         if move_okay:
218 |             # Valid move (could be null)
219 |             if action is not None:
220 |                 # Valid non-null move
221 |                 location = ((location[0] + heading[0] - self.bounds[0]) % (self.bounds[2] - self.bounds[0] + 1) + self.bounds[0],
222 |                             (location[1] + heading[1] - self.bounds[1]) % (self.bounds[3] - self.bounds[1] + 1) + self.bounds[1])  # wrap-around
223 |                 #if self.bounds[0] <= location[0] <= self.bounds[2] and self.bounds[1] <= location[1] <= self.bounds[3]:  # bounded
224 |                 state['location'] = location
225 |                 state['heading'] = heading
226 |                 reward = 2.0 if action == agent.get_next_waypoint() else -0.5  # valid, but is it correct? (as per waypoint)
227 |             else:
228 |                 # Valid null move
229 |                 reward = 0.0
230 |         else:
231 |             # Invalid move
232 |             reward = -1.0
233 | 
234 |         if agent is self.primary_agent:
235 |             if state['location'] == state['destination']:
236 |                 if state['deadline'] >= 0:
237 |                     reward += 10  # bonus
238 |                     self.trial_data['success'] = 1
239 |                 self.done = True
240 |                 print "Environment.act(): Primary agent has reached destination!"  # [debug]
241 |                 
242 |             self.status_text = "state: {}\naction: {}\nreward: {}".format(agent.get_state(), action, reward)
243 |             #print "Environment.act() [POST]: location: {}, heading: {}, action: {}, reward: {}".format(location, heading, action, reward)  # [debug]
244 | 
245 |             # Update metrics
246 |             self.step_data['t'] = self.t
247 |             self.trial_data['final_deadline'] = self.step_data['deadline'] = state['deadline']
248 |             self.step_data['waypoint'] = agent.get_next_waypoint()
249 |             self.step_data['inputs'] = inputs
250 |             self.step_data['action'] = action
251 |             self.step_data['reward'] = reward
252 |             self.trial_data['net_reward'] += reward
253 |             print "Environment.act(): Step data: {}".format(self.step_data)  # [debug]
254 | 
255 |         return reward
256 | 
257 |     def compute_dist(self, a, b):
258 |         """L1 distance between two points."""
259 |         return abs(b[0] - a[0]) + abs(b[1] - a[1])
260 |     
261 |     def stats_trial():
262 |         return self.success 
263 | 
264 | 
265 | class Agent(object):
266 |     """Base class for all agents."""
267 | 
268 |     def __init__(self, env):
269 |         self.env = env
270 |         self.state = None
271 |         self.next_waypoint = None
272 |         self.color = 'cyan'
273 | 
274 |     def reset(self, destination=None):
275 |         pass
276 | 
277 |     def update(self, t):
278 |         pass
279 | 
280 |     def get_state(self):
281 |         return self.state
282 | 
283 |     def get_next_waypoint(self):
284 |         return self.next_waypoint
285 | 
286 | 
287 | class DummyAgent(Agent):
288 |     color_choices = ['blue', 'cyan', 'magenta', 'orange']
289 | 
290 |     def __init__(self, env):
291 |         super(DummyAgent, self).__init__(env)  # sets self.env = env, state = None, next_waypoint = None, and a default color
292 |         self.next_waypoint = random.choice(Environment.valid_actions[1:])
293 |         self.color = random.choice(self.color_choices)
294 | 
295 |     def update(self, t):
296 |         inputs = self.env.sense(self)
297 | 
298 |         action_okay = True
299 |         if self.next_waypoint == 'right':
300 |             if inputs['light'] == 'red' and inputs['left'] == 'forward':
301 |                 action_okay = False
302 |         elif self.next_waypoint == 'forward':
303 |             if inputs['light'] == 'red':
304 |                 action_okay = False
305 |         elif self.next_waypoint == 'left':
306 |             if inputs['light'] == 'red' or (inputs['oncoming'] == 'forward' or inputs['oncoming'] == 'right'):
307 |                 action_okay = False
308 | 
309 |         action = None
310 |         if action_okay:
311 |             action = self.next_waypoint
312 |             self.next_waypoint = random.choice(Environment.valid_actions[1:])
313 |         reward = self.env.act(self, action)
314 |         #print "DummyAgent.update(): t = {}, inputs = {}, action = {}, reward = {}".format(t, inputs, action, reward)  # [debug]
315 |         #print "DummyAgent.update(): next_waypoint = {}".format(self.next_waypoint)  # [debug]
316 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/environment.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/environment.pyc


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/planner.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class RoutePlanner(object):
 4 |     """Silly route planner that is meant for a perpendicular grid network."""
 5 | 
 6 |     def __init__(self, env, agent):
 7 |         self.env = env
 8 |         self.agent = agent
 9 |         self.destination = None
10 | 
11 |     def route_to(self, destination=None):
12 |         self.destination = destination if destination is not None else random.choice(self.env.intersections.keys())
13 |         print "RoutePlanner.route_to(): destination = {}".format(destination)  # [debug]
14 | 
15 |     def next_waypoint(self):
16 |         location = self.env.agent_states[self.agent]['location']
17 |         heading = self.env.agent_states[self.agent]['heading']
18 |         delta = (self.destination[0] - location[0], self.destination[1] - location[1])
19 |         if delta[0] == 0 and delta[1] == 0:
20 |             return None
21 |         elif delta[0] != 0:  # EW difference
22 |             if delta[0] * heading[0] > 0:  # facing correct EW direction
23 |                 return 'forward'
24 |             elif delta[0] * heading[0] < 0:  # facing opposite EW direction
25 |                 return 'right'  # long U-turn
26 |             elif delta[0] * heading[1] > 0:
27 |                 return 'left'
28 |             else:
29 |                 return 'right'
30 |         elif delta[1] != 0:  # NS difference (turn logic is slightly different)
31 |             if delta[1] * heading[1] > 0:  # facing correct NS direction
32 |                 return 'forward'
33 |             elif delta[1] * heading[1] < 0:  # facing opposite NS direction
34 |                 return 'right'  # long U-turn
35 |             elif delta[1] * heading[0] > 0:
36 |                 return 'right'
37 |             else:
38 |                 return 'left'
39 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/planner.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/planner.pyc


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/random_trial_50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/random_trial_50.png


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/simulator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import random
  4 | import importlib
  5 | 
  6 | import numpy as np
  7 | 
  8 | from analysis import Reporter
  9 | 
 10 | class Simulator(object):
 11 |     """Simulates agents in a dynamic smartcab environment.
 12 | 
 13 |     Uses PyGame to display GUI, if available.
 14 |     """
 15 | 
 16 |     colors = {
 17 |         'black'   : (  0,   0,   0),
 18 |         'white'   : (255, 255, 255),
 19 |         'red'     : (255,   0,   0),
 20 |         'green'   : (  0, 255,   0),
 21 |         'blue'    : (  0,   0, 255),
 22 |         'cyan'    : (  0, 200, 200),
 23 |         'magenta' : (200,   0, 200),
 24 |         'yellow'  : (255, 255,   0),
 25 |         'orange'  : (255, 128,   0)
 26 |     }
 27 | 
 28 |     def __init__(self, env, size=None, update_delay=1.0, display=True, live_plot=False):
 29 |         self.env = env
 30 |         self.size = size if size is not None else ((self.env.grid_size[0] + 1) * self.env.block_size, (self.env.grid_size[1] + 1) * self.env.block_size)
 31 |         self.width, self.height = self.size
 32 |         
 33 |         self.bg_color = self.colors['white']
 34 |         self.road_width = 5
 35 |         self.road_color = self.colors['black']
 36 | 
 37 |         self.quit = False
 38 |         self.start_time = None
 39 |         self.current_time = 0.0
 40 |         self.last_updated = 0.0
 41 |         self.update_delay = update_delay  # duration between each step (in secs)
 42 | 
 43 |         self.display = display
 44 |         if self.display:
 45 |             try:
 46 |                 self.pygame = importlib.import_module('pygame')
 47 |                 self.pygame.init()
 48 |                 self.screen = self.pygame.display.set_mode(self.size)
 49 | 
 50 |                 self.frame_delay = max(1, int(self.update_delay * 1000))  # delay between GUI frames in ms (min: 1)
 51 |                 self.agent_sprite_size = (32, 32)
 52 |                 self.agent_circle_radius = 10  # radius of circle, when using simple representation
 53 |                 for agent in self.env.agent_states:
 54 |                     agent._sprite = self.pygame.transform.smoothscale(self.pygame.image.load(os.path.join("../images", "car-{}.png".format(agent.color))), self.agent_sprite_size)
 55 |                     agent._sprite_size = (agent._sprite.get_width(), agent._sprite.get_height())
 56 | 
 57 |                 self.font = self.pygame.font.Font(None, 28)
 58 |                 self.paused = False
 59 |             except ImportError as e:
 60 |                 self.display = False
 61 |                 print "Simulator.__init__(): Unable to import pygame; display disabled.\n{}: {}".format(e.__class__.__name__, e)
 62 |             except Exception as e:
 63 |                 self.display = False
 64 |                 print "Simulator.__init__(): Error initializing GUI objects; display disabled.\n{}: {}".format(e.__class__.__name__, e)
 65 | 
 66 |         # Setup metrics to report
 67 |         self.live_plot = live_plot
 68 |         self.rep = Reporter(metrics=['net_reward', 'avg_net_reward', 'final_deadline', 'success'], live_plot=self.live_plot)
 69 |         self.avg_net_reward_window = 10
 70 |         
 71 | 
 72 |     def run(self, n_trials=1):
 73 |         self.quit = False
 74 |         self.rep.reset()
 75 |         for trial in xrange(n_trials):
 76 |             print "Simulator.run(): Trial {}".format(trial)  # [debug]
 77 |             self.env.reset()
 78 |             self.current_time = 0.0
 79 |             self.last_updated = 0.0
 80 |             self.start_time = time.time()
 81 |             while True:
 82 |                 try:
 83 |                     # Update current time
 84 |                     self.current_time = time.time() - self.start_time
 85 |                     #print "Simulator.run(): current_time = {:.3f}".format(self.current_time)
 86 | 
 87 |                     # Handle GUI events
 88 |                     if self.display:
 89 |                         for event in self.pygame.event.get():
 90 |                             if event.type == self.pygame.QUIT:
 91 |                                 self.quit = True
 92 |                             elif event.type == self.pygame.KEYDOWN:
 93 |                                 if event.key == 27:  # Esc
 94 |                                     self.quit = True
 95 |                                 elif event.unicode == u' ':
 96 |                                     self.paused = True
 97 | 
 98 |                         if self.paused:
 99 |                             self.pause()
100 | 
101 |                     # Update environment
102 |                     if self.current_time - self.last_updated >= self.update_delay:
103 |                         self.env.step()
104 |                         # TODO: Log step data
105 |                         self.last_updated = self.current_time
106 | 
107 |                     # Render GUI and sleep
108 |                     if self.display:
109 |                         self.render()
110 |                         self.pygame.time.wait(self.frame_delay)
111 |                 except KeyboardInterrupt:
112 |                     self.quit = True
113 |                 finally:
114 |                     if self.quit or self.env.done:
115 |                         break
116 | 
117 |             if self.quit:
118 |                 break
119 | 
120 |             # Collect/update metrics
121 |             self.rep.collect('net_reward', trial, self.env.trial_data['net_reward'])  # total reward obtained in this trial
122 |             self.rep.collect('avg_net_reward', trial, np.mean(self.rep.metrics['net_reward'].ydata[-self.avg_net_reward_window:]))  # rolling mean of reward
123 |             self.rep.collect('final_deadline', trial, self.env.trial_data['final_deadline'])  # final deadline value (time remaining)
124 |             self.rep.collect('success', trial, self.env.trial_data['success'])
125 |             if self.live_plot:
126 |                 self.rep.refresh_plot()  # autoscales axes, draws stuff and flushes events
127 |                 
128 | 
129 |                 
130 |        
131 |         # Report final metrics
132 |         if self.display:
133 |             self.pygame.display.quit()  # need to shutdown pygame before showing metrics plot
134 |             # TODO: Figure out why having both game and plot displays makes things crash!
135 | 
136 |         if self.live_plot:
137 |              self.rep.show_plot()  # holds till user closes plot window
138 |             
139 |         
140 |                 
141 |     def render(self):
142 |         # Clear screen
143 |         self.screen.fill(self.bg_color)
144 | 
145 |         # Draw elements
146 |         # * Static elements
147 |         for road in self.env.roads:
148 |             self.pygame.draw.line(self.screen, self.road_color, (road[0][0] * self.env.block_size, road[0][1] * self.env.block_size), (road[1][0] * self.env.block_size, road[1][1] * self.env.block_size), self.road_width)
149 | 
150 |         for intersection, traffic_light in self.env.intersections.iteritems():
151 |             self.pygame.draw.circle(self.screen, self.road_color, (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size), 10)
152 |             if traffic_light.state:  # North-South is open
153 |                 self.pygame.draw.line(self.screen, self.colors['green'],
154 |                     (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size - 15),
155 |                     (intersection[0] * self.env.block_size, intersection[1] * self.env.block_size + 15), self.road_width)
156 |             else:  # East-West is open
157 |                 self.pygame.draw.line(self.screen, self.colors['green'],
158 |                     (intersection[0] * self.env.block_size - 15, intersection[1] * self.env.block_size),
159 |                     (intersection[0] * self.env.block_size + 15, intersection[1] * self.env.block_size), self.road_width)
160 | 
161 |         # * Dynamic elements
162 |         for agent, state in self.env.agent_states.iteritems():
163 |             # Compute precise agent location here (back from the intersection some)
164 |             agent_offset = (2 * state['heading'][0] * self.agent_circle_radius, 2 * state['heading'][1] * self.agent_circle_radius)
165 |             agent_pos = (state['location'][0] * self.env.block_size - agent_offset[0], state['location'][1] * self.env.block_size - agent_offset[1])
166 |             agent_color = self.colors[agent.color]
167 |             if hasattr(agent, '_sprite') and agent._sprite is not None:
168 |                 # Draw agent sprite (image), properly rotated
169 |                 rotated_sprite = agent._sprite if state['heading'] == (1, 0) else self.pygame.transform.rotate(agent._sprite, 180 if state['heading'][0] == -1 else state['heading'][1] * -90)
170 |                 self.screen.blit(rotated_sprite,
171 |                     self.pygame.rect.Rect(agent_pos[0] - agent._sprite_size[0] / 2, agent_pos[1] - agent._sprite_size[1] / 2,
172 |                         agent._sprite_size[0], agent._sprite_size[1]))
173 |             else:
174 |                 # Draw simple agent (circle with a short line segment poking out to indicate heading)
175 |                 self.pygame.draw.circle(self.screen, agent_color, agent_pos, self.agent_circle_radius)
176 |                 self.pygame.draw.line(self.screen, agent_color, agent_pos, state['location'], self.road_width)
177 |             if agent.get_next_waypoint() is not None:
178 |                 self.screen.blit(self.font.render(agent.get_next_waypoint(), True, agent_color, self.bg_color), (agent_pos[0] + 10, agent_pos[1] + 10))
179 |             if state['destination'] is not None:
180 |                 self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 6)
181 |                 self.pygame.draw.circle(self.screen, agent_color, (state['destination'][0] * self.env.block_size, state['destination'][1] * self.env.block_size), 15, 2)
182 | 
183 |         # * Overlays
184 |         text_y = 10
185 |         for text in self.env.status_text.split('\n'):
186 |             self.screen.blit(self.font.render(text, True, self.colors['red'], self.bg_color), (100, text_y))
187 |             text_y += 20
188 | 
189 |         # Flip buffers
190 |         self.pygame.display.flip()
191 | 
192 |     def pause(self):
193 |         abs_pause_time = time.time()
194 |         pause_text = "[PAUSED] Press any key to continue..."
195 |         self.screen.blit(self.font.render(pause_text, True, self.colors['cyan'], self.bg_color), (100, self.height - 40))
196 |         self.pygame.display.flip()
197 |         print pause_text  # [debug]
198 |         while self.paused:
199 |             for event in self.pygame.event.get():
200 |                 if event.type == self.pygame.KEYDOWN:
201 |                     self.paused = False
202 |             self.pygame.time.wait(self.frame_delay)
203 |         self.screen.blit(self.font.render(pause_text, True, self.bg_color, self.bg_color), (100, self.height - 40))
204 |         self.start_time += (time.time() - abs_pause_time)
205 | 


--------------------------------------------------------------------------------
/projects/smartcab/smartcab/simulator.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcab/simulator.pyc


--------------------------------------------------------------------------------
/projects/smartcab/smartcabpic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/smartcab/smartcabpic.jpg


--------------------------------------------------------------------------------
/projects/student_intervention/README.md:
--------------------------------------------------------------------------------
 1 | # Project 2: Supervised Learning
 2 | ## Building a Student Intervention System
 3 | 
 4 | ### Install
 5 | 
 6 | This project requires **Python 2.7** and the following Python libraries installed:
 7 | 
 8 | - [NumPy](http://www.numpy.org/)
 9 | - [Pandas](http://pandas.pydata.org)
10 | - [scikit-learn](http://scikit-learn.org/stable/)
11 | 
12 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html)
13 | 
14 | Udacity recommends our students install [Anaconda](https://www.continuum.io/downloads), a pre-packaged Python distribution that contains all of the necessary libraries and software for this project. 
15 | 
16 | ### Overview
17 | 
18 | This project uses supervised machine learning techniques to predict which students should be flagged for intervention as high risk students and compares different classifiers.
19 | 
20 | ![](table.png)
21 | 
22 | ### Code
23 | 
24 | Template code is provided in the notebook `student_intervention.ipynb` notebook file. While some code has already been implemented to get you started, you will need to implement additional functionality when requested to successfully complete the project.
25 | 
26 | ### Run
27 | 
28 | In a terminal or command window, navigate to the top-level project directory `student_intervention/` (that contains this README) and run one of the following commands:
29 | 
30 | ```ipython notebook student_intervention.ipynb```  
31 | ```jupyter notebook student_intervention.ipynb```
32 | 
33 | This will open the iPython Notebook software and project file in your browser.
34 | 
35 | ## Data
36 | 
37 | The dataset used in this project is included as `student-data.csv`. This dataset has the following attributes:
38 | 
39 | - `school` : student's school (binary: "GP" or "MS")
40 | - `sex` : student's sex (binary: "F" - female or "M" - male)
41 | - `age` : student's age (numeric: from 15 to 22)
42 | - `address` : student's home address type (binary: "U" - urban or "R" - rural)
43 | - `famsize` : family size (binary: "LE3" - less or equal to 3 or "GT3" - greater than 3)
44 | - `Pstatus` : parent's cohabitation status (binary: "T" - living together or "A" - apart)
45 | - `Medu` : mother's education (numeric: 0 - none,  1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
46 | - `Fedu` : father's education (numeric: 0 - none,  1 - primary education (4th grade), 2 - 5th to 9th grade, 3 - secondary education or 4 - higher education)
47 | - `Mjob` : mother's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other")
48 | - `Fjob` : father's job (nominal: "teacher", "health" care related, civil "services" (e.g. administrative or police), "at_home" or "other")
49 | - `reason` : reason to choose this school (nominal: close to "home", school "reputation", "course" preference or "other")
50 | - `guardian` : student's guardian (nominal: "mother", "father" or "other")
51 | - `traveltime` : home to school travel time (numeric: 1 - <15 min., 2 - 15 to 30 min., 3 - 30 min. to 1 hour, or 4 - >1 hour)
52 | - `studytime` : weekly study time (numeric: 1 - <2 hours, 2 - 2 to 5 hours, 3 - 5 to 10 hours, or 4 - >10 hours)
53 | - `failures` : number of past class failures (numeric: n if 1<=n<3, else 4)
54 | - `schoolsup` : extra educational support (binary: yes or no)
55 | - `famsup` : family educational support (binary: yes or no)
56 | - `paid` : extra paid classes within the course subject (Math or Portuguese) (binary: yes or no)
57 | - `activities` : extra-curricular activities (binary: yes or no)
58 | - `nursery` : attended nursery school (binary: yes or no)
59 | - `higher` : wants to take higher education (binary: yes or no)
60 | - `internet` : Internet access at home (binary: yes or no)
61 | - `romantic` : with a romantic relationship (binary: yes or no)
62 | - `famrel` : quality of family relationships (numeric: from 1 - very bad to 5 - excellent)
63 | - `freetime` : free time after school (numeric: from 1 - very low to 5 - very high)
64 | - `goout` : going out with friends (numeric: from 1 - very low to 5 - very high)
65 | - `Dalc` : workday alcohol consumption (numeric: from 1 - very low to 5 - very high)
66 | - `Walc` : weekend alcohol consumption (numeric: from 1 - very low to 5 - very high)
67 | - `health` : current health status (numeric: from 1 - very bad to 5 - very good)
68 | - `absences` : number of school absences (numeric: from 0 to 93)
69 | - `passed` : did the student pass the final exam (binary: yes or no)
70 | 


--------------------------------------------------------------------------------
/projects/student_intervention/data2d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data2d.png


--------------------------------------------------------------------------------
/projects/student_intervention/data_2d_to_3d_hyperplane.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data_2d_to_3d_hyperplane.png


--------------------------------------------------------------------------------
/projects/student_intervention/data_in_R3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/data_in_R3.png


--------------------------------------------------------------------------------
/projects/student_intervention/debug.log:
--------------------------------------------------------------------------------
1 | [0308/005733.265:ERROR:crash_report_database_win.cc(426)] unexpected header
2 | 


--------------------------------------------------------------------------------
/projects/student_intervention/student-data.csv:
--------------------------------------------------------------------------------
  1 | school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,passed
  2 | GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,6,no
  3 | GP,F,17,U,GT3,T,1,1,at_home,other,course,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,4,no
  4 | GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,3,yes,no,yes,no,yes,yes,yes,no,4,3,2,2,3,3,10,yes
  5 | GP,F,15,U,GT3,T,4,2,health,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,3,2,2,1,1,5,2,yes
  6 | GP,F,16,U,GT3,T,3,3,other,other,home,father,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,2,5,4,yes
  7 | GP,M,16,U,LE3,T,4,3,services,other,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,2,1,2,5,10,yes
  8 | GP,M,16,U,LE3,T,2,2,other,other,home,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,4,4,1,1,3,0,yes
  9 | GP,F,17,U,GT3,A,4,4,other,teacher,home,mother,2,2,0,yes,yes,no,no,yes,yes,no,no,4,1,4,1,1,1,6,no
 10 | GP,M,15,U,LE3,A,3,2,services,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,1,1,0,yes
 11 | GP,M,15,U,GT3,T,3,4,other,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,5,1,1,1,5,0,yes
 12 | GP,F,15,U,GT3,T,4,4,teacher,health,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,1,2,2,0,no
 13 | GP,F,15,U,GT3,T,2,1,services,other,reputation,father,3,3,0,no,yes,no,yes,yes,yes,yes,no,5,2,2,1,1,4,4,yes
 14 | GP,M,15,U,LE3,T,4,4,health,services,course,father,1,1,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,3,5,2,yes
 15 | GP,M,15,U,GT3,T,4,3,teacher,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,5,4,3,1,2,3,2,yes
 16 | GP,M,15,U,GT3,A,2,2,other,other,home,other,1,3,0,no,yes,no,no,yes,yes,yes,yes,4,5,2,1,1,3,0,yes
 17 | GP,F,16,U,GT3,T,4,4,health,other,home,mother,1,1,0,no,yes,no,no,yes,yes,yes,no,4,4,4,1,2,2,4,yes
 18 | GP,F,16,U,GT3,T,4,4,services,services,reputation,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,3,2,3,1,2,2,6,yes
 19 | GP,F,16,U,GT3,T,3,3,other,other,reputation,mother,3,2,0,yes,yes,no,yes,yes,yes,no,no,5,3,2,1,1,4,4,yes
 20 | GP,M,17,U,GT3,T,3,2,services,services,course,mother,1,1,3,no,yes,no,yes,yes,yes,yes,no,5,5,5,2,4,5,16,no
 21 | GP,M,16,U,LE3,T,4,3,health,other,home,father,1,1,0,no,no,yes,yes,yes,yes,yes,no,3,1,3,1,3,5,4,yes
 22 | GP,M,15,U,GT3,T,4,3,teacher,other,reputation,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,4,1,1,1,1,0,yes
 23 | GP,M,15,U,GT3,T,4,4,health,health,other,father,1,1,0,no,yes,yes,no,yes,yes,yes,no,5,4,2,1,1,5,0,yes
 24 | GP,M,16,U,LE3,T,4,2,teacher,other,course,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,4,5,1,1,3,5,2,yes
 25 | GP,M,16,U,LE3,T,2,2,other,other,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,4,2,4,5,0,yes
 26 | GP,F,15,R,GT3,T,2,4,services,health,course,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,5,2,no
 27 | GP,F,16,U,GT3,T,2,2,services,services,home,mother,1,1,2,no,yes,yes,no,no,yes,yes,no,1,2,2,1,3,5,14,no
 28 | GP,M,15,U,GT3,T,2,2,other,other,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,2,5,2,yes
 29 | GP,M,15,U,GT3,T,4,2,health,services,other,mother,1,1,0,no,no,yes,no,yes,yes,yes,no,2,2,4,2,4,1,4,yes
 30 | GP,M,16,U,LE3,A,3,4,services,other,home,mother,1,2,0,yes,yes,no,yes,yes,yes,yes,no,5,3,3,1,1,5,4,yes
 31 | GP,M,16,U,GT3,T,4,4,teacher,teacher,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,5,5,5,5,16,yes
 32 | GP,M,15,U,GT3,T,4,4,health,services,home,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,5,4,2,3,4,5,0,yes
 33 | GP,M,15,U,GT3,T,4,4,services,services,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,1,1,1,5,0,yes
 34 | GP,M,15,R,GT3,T,4,3,teacher,at_home,course,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,4,5,2,1,1,5,0,yes
 35 | GP,M,15,U,LE3,T,3,3,other,other,course,mother,1,2,0,no,no,no,yes,no,yes,yes,no,5,3,2,1,1,2,0,yes
 36 | GP,M,16,U,GT3,T,3,2,other,other,home,mother,1,1,0,no,yes,yes,no,no,yes,yes,no,5,4,3,1,1,5,0,yes
 37 | GP,F,15,U,GT3,T,2,3,other,other,other,father,2,1,0,no,yes,no,yes,yes,yes,no,no,3,5,1,1,1,5,0,no
 38 | GP,M,15,U,LE3,T,4,3,teacher,services,home,mother,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,4,3,1,1,4,2,yes
 39 | GP,M,16,R,GT3,A,4,4,other,teacher,reputation,mother,2,3,0,no,yes,no,yes,yes,yes,yes,yes,2,4,3,1,1,5,7,yes
 40 | GP,F,15,R,GT3,T,3,4,services,health,course,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,5,2,yes
 41 | GP,F,15,R,GT3,T,2,2,at_home,other,reputation,mother,1,1,0,yes,yes,yes,yes,yes,yes,no,no,4,3,1,1,1,2,8,yes
 42 | GP,F,16,U,LE3,T,2,2,other,other,home,mother,2,2,1,no,yes,no,yes,no,yes,yes,yes,3,3,3,1,2,3,25,yes
 43 | GP,M,15,U,LE3,T,4,4,teacher,other,home,other,1,1,0,no,yes,no,no,no,yes,yes,yes,5,4,3,2,4,5,8,yes
 44 | GP,M,15,U,GT3,T,4,4,services,teacher,course,father,1,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,3,1,1,5,2,yes
 45 | GP,M,15,U,GT3,T,2,2,services,services,course,father,1,1,0,yes,yes,no,no,yes,yes,yes,no,5,4,1,1,1,1,0,yes
 46 | GP,F,16,U,LE3,T,2,2,other,at_home,course,father,2,2,1,yes,no,no,yes,yes,yes,yes,no,4,3,3,2,2,5,14,no
 47 | GP,F,15,U,LE3,A,4,3,other,other,course,mother,1,2,0,yes,yes,yes,yes,yes,yes,yes,yes,5,2,2,1,1,5,8,no
 48 | GP,F,16,U,LE3,A,3,3,other,services,home,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,2,3,5,1,4,3,12,yes
 49 | GP,M,16,U,GT3,T,4,3,health,services,reputation,mother,1,4,0,no,no,no,yes,yes,yes,yes,no,4,2,2,1,1,2,4,yes
 50 | GP,M,15,U,GT3,T,4,2,teacher,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,3,2,2,5,2,yes
 51 | GP,F,15,U,GT3,T,4,4,services,teacher,other,father,1,2,1,yes,yes,no,yes,no,yes,yes,no,4,4,4,1,1,3,2,no
 52 | GP,F,16,U,LE3,T,2,2,services,services,course,mother,3,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,2,3,4,2,yes
 53 | GP,F,15,U,LE3,T,4,2,health,other,other,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,5,2,yes
 54 | GP,M,15,U,LE3,A,4,2,health,health,other,father,2,1,1,no,no,no,no,yes,yes,no,no,5,5,5,3,4,5,6,yes
 55 | GP,F,15,U,GT3,T,4,4,services,services,course,mother,1,1,0,yes,yes,yes,no,yes,yes,yes,no,3,3,4,2,3,5,0,yes
 56 | GP,F,15,U,LE3,A,3,3,other,other,other,mother,1,1,0,no,no,yes,no,yes,yes,yes,no,5,3,4,4,4,1,6,yes
 57 | GP,F,16,U,GT3,A,2,1,other,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,5,3,4,1,1,2,8,yes
 58 | GP,F,15,U,GT3,A,4,3,services,services,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,2,1,1,1,0,yes
 59 | GP,M,15,U,GT3,T,4,4,teacher,health,reputation,mother,1,2,0,no,yes,no,yes,yes,yes,no,no,3,2,2,1,1,5,4,yes
 60 | GP,M,15,U,LE3,T,1,2,other,at_home,home,father,1,2,0,yes,yes,no,yes,yes,yes,yes,no,4,3,2,1,1,5,2,no
 61 | GP,F,16,U,GT3,T,4,2,services,other,course,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,2,3,1,1,5,2,yes
 62 | GP,F,16,R,GT3,T,4,4,health,teacher,other,mother,1,2,0,no,yes,no,yes,yes,yes,no,no,2,4,4,2,3,4,6,yes
 63 | GP,F,16,U,GT3,T,1,1,services,services,course,father,4,1,0,yes,yes,no,yes,no,yes,yes,yes,5,5,5,5,5,5,6,yes
 64 | GP,F,16,U,LE3,T,1,2,other,services,reputation,father,1,2,0,yes,no,no,yes,yes,yes,yes,no,4,4,3,1,1,1,4,no
 65 | GP,F,16,U,GT3,T,4,3,teacher,health,home,mother,1,3,0,yes,yes,yes,yes,yes,yes,yes,no,3,4,4,2,4,4,2,no
 66 | GP,F,15,U,LE3,T,4,3,services,services,reputation,father,1,2,0,yes,no,no,yes,yes,yes,yes,yes,4,4,4,2,4,2,0,yes
 67 | GP,F,16,U,LE3,T,4,3,teacher,services,course,mother,3,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,3,1,2,1,2,yes
 68 | GP,M,15,U,GT3,A,4,4,other,services,reputation,mother,1,4,0,no,yes,no,yes,no,yes,yes,yes,1,3,3,5,5,3,4,yes
 69 | GP,F,16,U,GT3,T,3,1,services,other,course,mother,1,4,0,yes,yes,yes,no,yes,yes,yes,no,4,3,3,1,2,5,4,no
 70 | GP,F,15,R,LE3,T,2,2,health,services,reputation,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,no,4,1,3,1,3,4,2,no
 71 | GP,F,15,R,LE3,T,3,1,other,other,reputation,father,2,4,0,no,yes,no,no,no,yes,yes,no,4,4,2,2,3,3,12,yes
 72 | GP,M,16,U,GT3,T,3,1,other,other,reputation,father,2,4,0,no,yes,yes,no,yes,yes,yes,no,4,3,2,1,1,5,0,yes
 73 | GP,M,15,U,GT3,T,4,2,other,other,course,mother,1,4,0,no,no,no,no,yes,yes,yes,no,3,3,3,1,1,3,0,yes
 74 | GP,F,15,R,GT3,T,1,1,other,other,reputation,mother,1,2,2,yes,yes,no,no,no,yes,yes,yes,3,3,4,2,4,5,2,no
 75 | GP,M,16,U,GT3,T,3,1,other,other,reputation,mother,1,1,0,no,no,no,yes,yes,yes,no,no,5,3,2,2,2,5,2,yes
 76 | GP,F,16,U,GT3,T,3,3,other,services,home,mother,1,2,0,yes,yes,yes,yes,yes,yes,yes,no,4,3,3,2,4,5,54,yes
 77 | GP,M,15,U,GT3,T,4,3,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,2,3,5,6,yes
 78 | GP,M,15,U,GT3,T,4,0,teacher,other,course,mother,2,4,0,no,no,no,yes,yes,yes,yes,no,3,4,3,1,1,1,8,yes
 79 | GP,F,16,U,GT3,T,2,2,other,other,reputation,mother,1,4,0,no,no,yes,no,yes,yes,yes,yes,5,2,3,1,3,3,0,yes
 80 | GP,M,17,U,GT3,T,2,1,other,other,home,mother,2,1,3,yes,yes,no,yes,yes,no,yes,no,4,5,1,1,1,3,2,yes
 81 | GP,F,16,U,GT3,T,3,4,at_home,other,course,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,2,4,3,1,2,3,12,no
 82 | GP,M,15,U,GT3,T,2,3,other,services,course,father,1,1,0,yes,yes,yes,yes,no,yes,yes,yes,3,2,2,1,3,3,2,yes
 83 | GP,M,15,U,GT3,T,2,3,other,other,home,mother,1,3,0,yes,no,yes,no,no,yes,yes,no,5,3,2,1,2,5,4,yes
 84 | GP,F,15,U,LE3,T,3,2,services,other,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,4,1,1,5,10,no
 85 | GP,M,15,U,LE3,T,2,2,services,services,home,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,5,3,3,1,3,4,4,yes
 86 | GP,F,15,U,GT3,T,1,1,other,other,home,father,1,2,0,no,yes,no,yes,no,yes,yes,no,4,3,2,2,3,4,2,yes
 87 | GP,F,15,U,GT3,T,4,4,services,services,reputation,father,2,2,2,no,no,yes,no,yes,yes,yes,yes,4,4,4,2,3,5,6,no
 88 | GP,F,16,U,LE3,T,2,2,at_home,other,course,mother,1,2,0,no,yes,no,no,yes,yes,no,no,4,3,4,1,2,2,4,no
 89 | GP,F,15,U,GT3,T,4,2,other,other,reputation,mother,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,3,3,1,3,1,4,yes
 90 | GP,M,16,U,GT3,T,2,2,services,other,reputation,father,2,2,1,no,no,yes,yes,no,yes,yes,no,4,4,2,1,1,3,12,yes
 91 | GP,M,16,U,LE3,A,4,4,teacher,health,reputation,mother,1,2,0,no,yes,no,no,yes,yes,no,no,4,1,3,3,5,5,18,no
 92 | GP,F,16,U,GT3,T,3,3,other,other,home,mother,1,3,0,no,yes,yes,no,yes,yes,yes,yes,4,3,3,1,3,4,0,no
 93 | GP,F,15,U,GT3,T,4,3,services,other,reputation,mother,1,1,0,no,no,yes,yes,yes,yes,yes,no,4,5,5,1,3,1,4,yes
 94 | GP,F,16,U,LE3,T,3,1,other,other,home,father,1,2,0,yes,yes,no,no,yes,yes,no,no,3,3,3,2,3,2,4,no
 95 | GP,F,16,U,GT3,T,4,2,teacher,services,home,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,5,3,3,1,1,1,0,yes
 96 | GP,M,15,U,LE3,T,2,2,services,health,reputation,mother,1,4,0,no,yes,no,yes,yes,yes,yes,no,4,3,4,1,1,4,6,yes
 97 | GP,F,15,R,GT3,T,1,1,at_home,other,home,mother,2,4,1,yes,yes,yes,yes,yes,yes,yes,no,3,1,2,1,1,1,2,yes
 98 | GP,M,16,R,GT3,T,4,3,services,other,reputation,mother,2,1,0,yes,yes,no,yes,no,yes,yes,no,3,3,3,1,1,4,2,yes
 99 | GP,F,16,U,GT3,T,2,1,other,other,course,mother,1,2,0,no,yes,yes,no,yes,yes,no,yes,4,3,5,1,1,5,2,yes
100 | GP,F,16,U,GT3,T,4,4,other,other,reputation,mother,1,1,0,no,no,no,yes,no,yes,yes,no,5,3,4,1,2,1,6,yes
101 | GP,F,16,U,GT3,T,4,3,other,at_home,course,mother,1,3,0,yes,yes,yes,no,yes,yes,yes,no,5,3,5,1,1,3,0,no
102 | GP,M,16,U,GT3,T,4,4,services,services,other,mother,1,1,0,yes,yes,yes,yes,yes,yes,yes,no,4,5,5,5,5,4,14,no
103 | GP,M,16,U,GT3,T,4,4,services,teacher,other,father,1,3,0,no,yes,no,yes,yes,yes,yes,yes,4,4,3,1,1,4,0,yes
104 | GP,M,15,U,GT3,T,4,4,services,other,course,mother,1,1,0,no,yes,no,yes,no,yes,yes,no,5,3,3,1,1,5,4,yes
105 | GP,F,15,U,GT3,T,3,2,services,other,home,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,no,4,3,5,1,1,2,26,no
106 | GP,M,15,U,GT3,A,3,4,services,other,course,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,4,1,1,1,0,yes
107 | GP,F,15,U,GT3,A,3,3,other,health,reputation,father,1,4,0,yes,no,no,no,yes,yes,no,no,4,3,3,1,1,4,10,yes
108 | GP,F,15,U,GT3,T,2,2,other,other,course,mother,1,4,0,yes,yes,yes,no,yes,yes,yes,no,5,1,2,1,1,3,8,no
109 | GP,M,16,U,GT3,T,3,3,services,other,home,father,1,3,0,no,yes,no,yes,yes,yes,yes,no,5,3,3,1,1,5,2,yes
110 | GP,M,15,R,GT3,T,4,4,other,other,home,father,4,4,0,no,yes,yes,yes,yes,yes,yes,yes,1,3,5,3,5,1,6,yes
111 | GP,F,16,U,LE3,T,4,4,health,health,other,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,5,4,5,1,1,4,4,yes
112 | GP,M,15,U,LE3,A,4,4,teacher,teacher,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,5,5,3,1,1,4,6,yes
113 | GP,F,16,R,GT3,T,3,3,services,other,reputation,father,1,3,1,yes,yes,no,yes,yes,yes,yes,no,4,1,2,1,1,2,0,yes
114 | GP,F,16,U,GT3,T,2,2,at_home,other,home,mother,1,2,1,yes,no,no,yes,yes,yes,yes,no,3,1,2,1,1,5,6,yes
115 | GP,M,15,U,LE3,T,4,2,teacher,other,course,mother,1,1,0,no,no,no,no,yes,yes,yes,no,3,5,2,1,1,3,10,yes
116 | GP,M,15,R,GT3,T,2,1,health,services,reputation,mother,1,2,0,no,no,no,yes,yes,yes,yes,yes,5,4,2,1,1,5,8,no
117 | GP,M,16,U,GT3,T,4,4,teacher,teacher,course,father,1,2,0,no,yes,no,yes,yes,yes,yes,no,5,4,4,1,2,5,2,yes
118 | GP,M,15,U,GT3,T,4,4,other,teacher,reputation,father,2,2,0,no,yes,no,yes,yes,yes,no,no,4,4,3,1,1,2,2,yes
119 | GP,M,16,U,GT3,T,3,3,other,services,home,father,2,1,0,no,no,no,yes,yes,yes,yes,no,5,4,2,1,1,5,0,yes
120 | GP,M,17,R,GT3,T,1,3,other,other,course,father,3,2,1,no,yes,no,yes,yes,yes,yes,no,5,2,4,1,4,5,20,no
121 | GP,M,15,U,GT3,T,3,4,other,other,reputation,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,4,3,1,2,4,6,yes
122 | GP,F,15,U,GT3,T,1,2,at_home,services,course,mother,1,2,0,no,no,no,no,no,yes,yes,no,3,2,3,1,2,1,2,yes
123 | GP,M,15,U,GT3,T,2,2,services,services,home,father,1,4,0,no,yes,yes,yes,yes,yes,yes,no,5,5,4,1,2,5,6,yes
124 | GP,F,16,U,LE3,T,2,4,other,health,course,father,2,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,2,2,1,2,5,2,yes
125 | GP,M,16,U,GT3,T,4,4,health,other,course,mother,1,1,0,no,yes,no,yes,yes,yes,yes,no,3,4,4,1,4,5,18,yes
126 | GP,F,16,U,GT3,T,2,2,other,other,home,mother,1,2,0,no,no,yes,no,yes,yes,yes,yes,5,4,4,1,1,5,0,no
127 | GP,M,15,U,GT3,T,3,4,services,services,home,father,1,1,0,yes,no,no,no,yes,yes,yes,no,5,5,5,3,2,5,0,yes
128 | GP,F,15,U,LE3,A,3,4,other,other,home,mother,1,2,0,yes,no,no,yes,yes,yes,yes,yes,5,3,2,1,1,1,0,yes
129 | GP,F,19,U,GT3,T,0,1,at_home,other,course,other,1,2,3,no,yes,no,no,no,no,no,no,3,4,2,1,1,5,2,no
130 | GP,M,18,R,GT3,T,2,2,services,other,reputation,mother,1,1,2,no,yes,no,yes,yes,yes,yes,no,3,3,3,1,2,4,0,no
131 | GP,M,16,R,GT3,T,4,4,teacher,teacher,course,mother,1,1,0,no,no,yes,yes,yes,yes,yes,no,3,5,5,2,5,4,8,yes
132 | GP,F,15,R,GT3,T,3,4,services,teacher,course,father,2,3,2,no,yes,no,no,yes,yes,yes,yes,4,2,2,2,2,5,0,no
133 | GP,F,15,U,GT3,T,1,1,at_home,other,course,mother,3,1,0,no,yes,no,yes,no,yes,yes,yes,4,3,3,1,2,4,0,no
134 | GP,F,17,U,LE3,T,2,2,other,other,course,father,1,1,0,no,yes,no,no,yes,yes,yes,yes,3,4,4,1,3,5,12,yes
135 | GP,F,16,U,GT3,A,3,4,services,other,course,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,2,1,1,4,5,16,yes
136 | GP,M,15,R,GT3,T,3,4,at_home,teacher,course,mother,4,2,0,no,yes,no,no,yes,yes,no,yes,5,3,3,1,1,5,0,no
137 | GP,F,15,U,GT3,T,4,4,services,at_home,course,mother,1,3,0,no,yes,no,yes,yes,yes,yes,yes,4,3,3,1,1,5,0,no
138 | GP,M,17,R,GT3,T,3,4,at_home,other,course,mother,3,2,0,no,no,no,no,yes,yes,no,no,5,4,5,2,4,5,0,no
139 | GP,F,16,U,GT3,A,3,3,other,other,course,other,2,1,2,no,yes,no,yes,no,yes,yes,yes,4,3,2,1,1,5,0,no
140 | GP,M,16,U,LE3,T,1,1,services,other,course,mother,1,2,1,no,no,no,no,yes,yes,no,yes,4,4,4,1,3,5,0,yes
141 | GP,F,15,U,GT3,T,4,4,teacher,teacher,course,mother,2,1,0,no,no,no,yes,yes,yes,yes,no,4,3,2,1,1,5,0,yes
142 | GP,M,15,U,GT3,T,4,3,teacher,services,course,father,2,4,0,yes,yes,no,no,yes,yes,yes,no,2,2,2,1,1,3,0,no
143 | GP,M,16,U,LE3,T,2,2,services,services,reputation,father,2,1,2,no,yes,no,yes,yes,yes,yes,no,2,3,3,2,2,2,8,no
144 | GP,F,15,U,GT3,T,4,4,teacher,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,4,2,2,1,1,5,2,yes
145 | GP,F,16,U,LE3,T,1,1,at_home,at_home,course,mother,1,1,0,no,no,no,no,yes,yes,yes,no,3,4,4,3,3,1,2,yes
146 | GP,M,17,U,GT3,T,2,1,other,other,home,mother,1,1,3,no,yes,no,no,yes,yes,yes,no,5,4,5,1,2,5,0,no
147 | GP,F,15,U,GT3,T,1,1,other,services,course,father,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,2,1,2,5,0,yes
148 | GP,F,15,U,GT3,T,3,2,health,services,home,father,1,2,3,no,yes,no,no,yes,yes,yes,no,3,3,2,1,1,3,0,no
149 | GP,F,15,U,GT3,T,1,2,at_home,other,course,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,4,3,2,1,1,5,2,yes
150 | GP,M,16,U,GT3,T,4,4,teacher,teacher,course,mother,1,1,0,no,yes,no,no,yes,no,yes,yes,3,3,2,2,1,5,0,no
151 | GP,M,15,U,LE3,A,2,1,services,other,course,mother,4,1,3,no,no,no,no,yes,yes,yes,no,4,5,5,2,5,5,0,yes
152 | GP,M,18,U,LE3,T,1,1,other,other,course,mother,1,1,3,no,no,no,no,yes,no,yes,yes,2,3,5,2,5,4,0,no
153 | GP,M,16,U,LE3,T,2,1,at_home,other,course,mother,1,1,1,no,no,no,yes,yes,yes,no,yes,4,4,4,3,5,5,6,yes
154 | GP,F,15,R,GT3,T,3,3,services,services,reputation,other,2,3,2,no,yes,yes,yes,yes,yes,yes,yes,4,2,1,2,3,3,8,yes
155 | GP,M,19,U,GT3,T,3,2,services,at_home,home,mother,1,1,3,no,yes,no,no,yes,no,yes,yes,4,5,4,1,1,4,0,no
156 | GP,F,17,U,GT3,T,4,4,other,teacher,course,mother,1,1,0,yes,yes,no,no,yes,yes,no,yes,4,2,1,1,1,4,0,yes
157 | GP,M,15,R,GT3,T,2,3,at_home,services,course,mother,1,2,0,yes,no,yes,yes,yes,yes,no,no,4,4,4,1,1,1,2,no
158 | GP,M,17,R,LE3,T,1,2,other,other,reputation,mother,1,1,0,no,no,no,no,yes,yes,no,no,2,2,2,3,3,5,8,yes
159 | GP,F,18,R,GT3,T,1,1,at_home,other,course,mother,3,1,3,no,yes,no,yes,no,yes,no,no,5,2,5,1,5,4,6,yes
160 | GP,M,16,R,GT3,T,2,2,at_home,other,course,mother,3,1,0,no,no,no,no,no,yes,no,no,4,2,2,1,2,3,2,yes
161 | GP,M,16,U,GT3,T,3,3,other,services,course,father,1,2,1,no,yes,yes,no,yes,yes,yes,yes,4,5,5,4,4,5,4,yes
162 | GP,M,17,R,LE3,T,2,1,at_home,other,course,mother,2,1,2,no,no,no,yes,yes,no,yes,yes,3,3,2,2,2,5,0,no
163 | GP,M,15,R,GT3,T,3,2,other,other,course,mother,2,2,2,yes,yes,no,no,yes,yes,yes,yes,4,4,4,1,4,3,6,no
164 | GP,M,16,U,LE3,T,1,2,other,other,course,mother,2,1,1,no,no,no,yes,yes,yes,no,no,4,4,4,2,4,5,0,no
165 | GP,M,17,U,GT3,T,1,3,at_home,services,course,father,1,1,0,no,no,no,no,yes,no,yes,no,5,3,3,1,4,2,2,yes
166 | GP,M,17,R,LE3,T,1,1,other,services,course,mother,4,2,3,no,no,no,yes,yes,no,no,yes,5,3,5,1,5,5,0,no
167 | GP,M,16,U,GT3,T,3,2,services,services,course,mother,2,1,1,no,yes,no,yes,no,no,no,no,4,5,2,1,1,2,16,yes
168 | GP,M,16,U,GT3,T,2,2,other,other,course,father,1,2,0,no,no,no,no,yes,no,yes,no,4,3,5,2,4,4,4,yes
169 | GP,F,16,U,GT3,T,4,2,health,services,home,father,1,2,0,no,no,yes,no,yes,yes,yes,yes,4,2,3,1,1,3,0,yes
170 | GP,F,16,U,GT3,T,2,2,other,other,home,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,5,1,5,1,1,4,0,no
171 | GP,F,16,U,GT3,T,4,4,health,health,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,4,4,2,1,1,3,0,yes
172 | GP,M,16,U,GT3,T,3,4,other,other,course,father,3,1,2,no,yes,no,yes,no,yes,yes,no,3,4,5,2,4,2,0,no
173 | GP,M,16,U,GT3,T,1,0,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,2,1,1,3,2,yes
174 | GP,M,17,U,LE3,T,4,4,teacher,other,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,4,4,1,3,5,0,yes
175 | GP,F,16,U,GT3,T,1,3,at_home,services,home,mother,1,2,3,no,no,no,yes,no,yes,yes,yes,4,3,5,1,1,3,0,no
176 | GP,F,16,U,LE3,T,3,3,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,4,5,1,1,4,4,no
177 | GP,M,17,U,LE3,T,4,3,teacher,other,course,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,4,4,4,4,4,4,4,no
178 | GP,F,16,U,GT3,T,2,2,services,other,reputation,mother,2,2,0,no,no,yes,yes,no,yes,yes,no,3,4,4,1,4,5,2,yes
179 | GP,M,17,U,GT3,T,3,3,other,other,reputation,father,1,2,0,no,no,no,yes,no,yes,yes,no,4,3,4,1,4,4,4,no
180 | GP,M,16,R,GT3,T,4,2,teacher,services,other,mother,1,1,0,no,yes,no,yes,yes,yes,yes,yes,4,3,3,3,4,3,10,no
181 | GP,M,17,U,GT3,T,4,3,other,other,course,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,5,2,3,1,1,2,4,yes
182 | GP,M,16,U,GT3,T,4,3,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,3,4,3,2,3,3,10,no
183 | GP,M,16,U,GT3,T,3,3,services,other,home,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,4,2,3,1,2,3,2,yes
184 | GP,F,17,U,GT3,T,2,4,services,services,reputation,father,1,2,0,no,yes,no,yes,yes,yes,no,no,5,4,2,2,3,5,0,yes
185 | GP,F,17,U,LE3,T,3,3,other,other,reputation,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,5,3,3,2,3,1,56,no
186 | GP,F,16,U,GT3,T,3,2,other,other,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,1,2,2,1,2,1,14,yes
187 | GP,M,17,U,GT3,T,3,3,services,services,other,mother,1,2,0,no,yes,no,yes,yes,yes,yes,yes,4,3,4,2,3,4,12,yes
188 | GP,M,16,U,GT3,T,1,2,services,services,other,mother,1,1,0,no,yes,yes,yes,yes,yes,yes,yes,3,3,3,1,2,3,2,yes
189 | GP,M,16,U,LE3,T,2,1,other,other,course,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,4,2,3,1,2,5,0,yes
190 | GP,F,17,U,GT3,A,3,3,health,other,reputation,mother,1,2,0,no,yes,no,no,no,yes,yes,yes,3,3,3,1,3,3,6,no
191 | GP,M,17,R,GT3,T,1,2,at_home,other,home,mother,1,2,0,no,no,no,no,yes,yes,no,no,3,1,3,1,5,3,4,yes
192 | GP,F,16,U,GT3,T,2,3,services,services,course,mother,1,2,0,no,no,no,no,yes,yes,yes,no,4,3,3,1,1,2,10,yes
193 | GP,F,17,U,GT3,T,1,1,at_home,services,course,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,3,3,1,1,3,0,no
194 | GP,M,17,U,GT3,T,1,2,at_home,services,other,other,2,2,0,no,no,yes,yes,no,yes,yes,no,4,4,4,4,5,5,12,no
195 | GP,M,16,R,GT3,T,3,3,services,services,reputation,mother,1,1,0,no,yes,no,yes,yes,yes,yes,no,4,3,2,3,4,5,8,yes
196 | GP,M,16,U,GT3,T,2,3,other,other,home,father,2,1,0,no,no,no,no,yes,yes,yes,no,5,3,3,1,1,3,0,yes
197 | GP,F,17,U,LE3,T,2,4,services,services,course,father,1,2,0,no,no,no,yes,yes,yes,yes,yes,4,3,2,1,1,5,0,yes
198 | GP,M,17,U,GT3,T,4,4,services,teacher,home,mother,1,1,0,no,no,no,no,yes,yes,yes,no,5,2,3,1,2,5,4,yes
199 | GP,M,16,R,LE3,T,3,3,teacher,other,home,father,3,1,0,no,yes,yes,yes,yes,yes,yes,no,3,3,4,3,5,3,8,yes
200 | GP,F,17,U,GT3,T,4,4,services,teacher,home,mother,2,1,1,no,yes,no,no,yes,yes,yes,no,4,2,4,2,3,2,24,yes
201 | GP,F,16,U,LE3,T,4,4,teacher,teacher,reputation,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,5,2,1,2,3,0,yes
202 | GP,F,16,U,GT3,T,4,3,health,other,home,mother,1,2,0,no,yes,no,yes,yes,yes,yes,no,4,3,5,1,5,2,2,yes
203 | GP,F,16,U,GT3,T,2,3,other,other,reputation,mother,1,2,0,yes,yes,yes,yes,yes,yes,no,no,4,4,3,1,3,4,6,yes
204 | GP,F,17,U,GT3,T,1,1,other,other,course,mother,1,2,0,no,yes,yes,no,no,yes,no,no,4,4,4,1,3,1,4,yes
205 | GP,F,17,R,GT3,T,2,2,other,other,reputation,mother,1,1,0,no,yes,no,no,yes,yes,yes,no,5,3,2,1,2,3,18,no
206 | GP,F,16,R,GT3,T,2,2,services,services,reputation,mother,2,4,0,no,yes,yes,yes,no,yes,yes,no,5,3,5,1,1,5,6,yes
207 | GP,F,17,U,GT3,T,3,4,at_home,services,home,mother,1,3,1,no,yes,yes,no,yes,yes,yes,yes,4,4,3,3,4,5,28,no
208 | GP,F,16,U,GT3,A,3,1,services,other,course,mother,1,2,3,no,yes,yes,no,yes,yes,yes,no,2,3,3,2,2,4,5,no
209 | GP,F,16,U,GT3,T,4,3,teacher,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,1,3,2,1,1,1,10,yes
210 | GP,F,16,U,GT3,T,1,1,at_home,other,home,mother,2,1,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,4,5,6,yes
211 | GP,F,17,R,GT3,T,4,3,teacher,other,reputation,mother,2,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,2,1,1,4,6,no
212 | GP,F,19,U,GT3,T,3,3,other,other,reputation,other,1,4,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,2,3,10,no
213 | GP,M,17,U,LE3,T,4,4,services,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,5,3,5,4,5,3,13,yes
214 | GP,F,16,U,GT3,A,2,2,other,other,reputation,mother,1,2,0,yes,yes,yes,no,yes,yes,yes,no,3,3,4,1,1,4,0,yes
215 | GP,M,18,U,GT3,T,2,2,services,other,home,mother,1,2,1,no,yes,yes,yes,yes,yes,yes,no,4,4,4,2,4,5,15,no
216 | GP,F,17,R,LE3,T,4,4,services,other,other,mother,1,1,0,no,yes,yes,no,yes,yes,no,no,5,2,1,1,2,3,12,yes
217 | GP,F,17,U,LE3,T,3,2,other,other,reputation,mother,2,2,0,no,no,yes,no,yes,yes,yes,no,4,4,4,1,3,1,2,yes
218 | GP,F,17,U,GT3,T,4,3,other,other,reputation,mother,1,2,2,no,no,yes,no,yes,yes,yes,yes,3,4,5,2,4,1,22,no
219 | GP,M,18,U,LE3,T,3,3,services,health,home,father,1,2,1,no,yes,yes,no,yes,yes,yes,no,3,2,4,2,4,4,13,no
220 | GP,F,17,U,GT3,T,2,3,at_home,other,home,father,2,1,0,no,yes,yes,no,yes,yes,no,no,3,3,3,1,4,3,3,no
221 | GP,F,17,U,GT3,T,2,2,at_home,at_home,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,1,4,4,yes
222 | GP,F,17,R,GT3,T,2,1,at_home,services,reputation,mother,2,2,0,no,yes,no,yes,yes,yes,yes,no,4,2,5,1,2,5,2,no
223 | GP,F,17,U,GT3,T,1,1,at_home,other,reputation,mother,1,3,1,no,yes,no,yes,yes,yes,no,yes,4,3,4,1,1,5,0,no
224 | GP,F,16,U,GT3,T,2,3,services,teacher,other,mother,1,2,0,yes,no,no,no,yes,yes,yes,no,2,3,1,1,1,3,2,yes
225 | GP,M,18,U,GT3,T,2,2,other,other,home,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,5,5,4,0,yes
226 | GP,F,16,U,GT3,T,4,4,teacher,services,home,mother,1,3,0,no,yes,no,yes,no,yes,yes,no,5,3,2,1,1,5,0,yes
227 | GP,F,18,R,GT3,T,3,1,other,other,reputation,mother,1,2,1,no,no,no,yes,yes,yes,yes,yes,5,3,3,1,1,4,16,no
228 | GP,F,17,U,GT3,T,3,2,other,other,course,mother,1,2,0,no,no,no,yes,no,yes,yes,no,5,3,4,1,3,3,10,yes
229 | GP,M,17,U,LE3,T,2,3,services,services,reputation,father,1,2,0,no,yes,yes,no,no,yes,yes,no,5,3,3,1,3,3,2,yes
230 | GP,M,18,U,LE3,T,2,1,at_home,other,course,mother,4,2,0,yes,yes,yes,yes,yes,yes,yes,yes,4,3,2,4,5,3,14,no
231 | GP,F,17,U,GT3,A,2,1,other,other,course,mother,2,3,0,no,no,no,yes,yes,yes,yes,yes,3,2,3,1,2,3,10,yes
232 | GP,F,17,U,LE3,T,4,3,health,other,reputation,father,1,2,0,no,no,no,yes,yes,yes,yes,yes,3,2,3,1,2,3,14,yes
233 | GP,M,17,R,GT3,T,2,2,other,other,course,father,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,5,2,1,1,1,4,yes
234 | GP,M,17,U,GT3,T,4,4,teacher,teacher,reputation,mother,1,2,0,yes,yes,no,yes,yes,yes,yes,yes,4,5,5,1,3,2,14,no
235 | GP,M,16,U,GT3,T,4,4,health,other,reputation,father,1,2,0,no,yes,yes,yes,yes,yes,yes,no,4,2,4,2,4,1,2,yes
236 | GP,M,16,U,LE3,T,1,1,other,other,home,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,4,2,1,1,5,18,no
237 | GP,M,16,U,GT3,T,3,2,at_home,other,reputation,mother,2,3,0,no,no,no,yes,yes,yes,yes,yes,5,3,3,1,3,2,10,yes
238 | GP,M,17,U,LE3,T,2,2,other,other,home,father,1,2,0,no,no,yes,yes,no,yes,yes,yes,4,4,2,5,5,4,4,yes
239 | GP,F,16,U,GT3,T,2,1,other,other,home,mother,1,1,0,no,no,no,no,yes,yes,yes,yes,4,5,2,1,1,5,20,yes
240 | GP,F,17,R,GT3,T,2,1,at_home,services,course,mother,3,2,0,no,no,no,yes,yes,yes,no,no,2,1,1,1,1,3,2,yes
241 | GP,M,18,U,GT3,T,2,2,other,services,reputation,father,1,2,1,no,no,no,no,yes,no,yes,no,5,5,4,3,5,2,0,no
242 | GP,M,17,U,LE3,T,4,3,health,other,course,mother,2,2,0,no,no,no,yes,yes,yes,yes,yes,2,5,5,1,4,5,14,yes
243 | GP,M,17,R,LE3,A,4,4,teacher,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,3,3,3,2,3,4,2,yes
244 | GP,M,16,U,LE3,T,4,3,teacher,other,course,mother,1,1,0,no,no,no,yes,no,yes,yes,no,5,4,5,1,1,3,0,no
245 | GP,M,16,U,GT3,T,4,4,services,services,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,5,3,2,1,2,5,0,yes
246 | GP,F,18,U,GT3,T,2,1,other,other,course,other,2,3,0,no,yes,yes,no,no,yes,yes,yes,4,4,4,1,1,3,0,no
247 | GP,M,16,U,GT3,T,2,1,other,other,course,mother,3,1,0,no,no,no,no,yes,yes,yes,no,4,3,3,1,1,4,6,yes
248 | GP,M,17,U,GT3,T,2,3,other,other,course,father,2,1,0,no,no,no,no,yes,yes,yes,no,5,2,2,1,1,2,4,yes
249 | GP,M,22,U,GT3,T,3,1,services,services,other,mother,1,1,3,no,no,no,no,no,no,yes,yes,5,4,5,5,5,1,16,no
250 | GP,M,18,R,LE3,T,3,3,other,services,course,mother,1,2,1,no,yes,no,no,yes,yes,yes,yes,4,3,3,1,3,5,8,no
251 | GP,M,16,U,GT3,T,0,2,other,other,other,mother,1,1,0,no,no,yes,no,no,yes,yes,no,4,3,2,2,4,5,0,yes
252 | GP,M,18,U,GT3,T,3,2,services,other,course,mother,2,1,1,no,no,no,no,yes,no,yes,no,4,4,5,2,4,5,0,no
253 | GP,M,16,U,GT3,T,3,3,at_home,other,reputation,other,3,2,0,yes,yes,no,no,no,yes,yes,no,5,3,3,1,3,2,6,yes
254 | GP,M,18,U,GT3,T,2,1,services,services,other,mother,1,1,1,no,no,no,no,no,no,yes,no,3,2,5,2,5,5,4,no
255 | GP,M,16,R,GT3,T,2,1,other,other,course,mother,2,1,0,no,no,no,yes,no,yes,no,no,3,3,2,1,3,3,0,no
256 | GP,M,17,R,GT3,T,2,1,other,other,course,mother,1,1,0,no,no,no,no,no,yes,yes,no,4,4,2,2,4,5,0,yes
257 | GP,M,17,U,LE3,T,1,1,health,other,course,mother,2,1,1,no,yes,no,yes,yes,yes,yes,no,4,4,4,1,2,5,2,no
258 | GP,F,17,U,LE3,T,4,2,teacher,services,reputation,mother,1,4,0,no,yes,yes,yes,yes,yes,yes,no,4,2,3,1,1,4,6,yes
259 | GP,M,19,U,LE3,A,4,3,services,at_home,reputation,mother,1,2,0,no,yes,no,no,yes,yes,yes,no,4,3,1,1,1,1,12,yes
260 | GP,M,18,U,GT3,T,2,1,other,other,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,2,4,1,2,4,8,yes
261 | GP,F,17,U,LE3,T,2,2,services,services,course,father,1,4,0,no,no,yes,yes,yes,yes,yes,yes,3,4,1,1,1,2,0,no
262 | GP,F,18,U,GT3,T,4,3,services,other,home,father,1,2,0,no,yes,yes,no,yes,yes,yes,yes,3,1,2,1,3,2,21,yes
263 | GP,M,18,U,GT3,T,4,3,teacher,other,course,mother,1,2,0,no,yes,yes,no,no,yes,yes,no,4,3,2,1,1,3,2,no
264 | GP,M,18,R,GT3,T,3,2,other,other,course,mother,1,3,0,no,no,no,yes,no,yes,no,no,5,3,2,1,1,3,1,yes
265 | GP,F,17,U,GT3,T,3,3,other,other,home,mother,1,3,0,no,no,no,yes,no,yes,no,no,3,2,3,1,1,4,4,no
266 | GP,F,18,U,GT3,T,2,2,at_home,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,3,1,1,3,0,no
267 | GP,M,18,R,LE3,A,3,4,other,other,reputation,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,2,5,3,4,1,13,yes
268 | GP,M,17,U,GT3,T,3,1,services,other,other,mother,1,2,0,no,no,yes,yes,yes,yes,yes,yes,5,4,4,3,4,5,2,yes
269 | GP,F,18,R,GT3,T,4,4,teacher,other,reputation,mother,2,2,0,no,no,yes,yes,yes,yes,yes,no,4,3,4,2,2,4,8,yes
270 | GP,M,18,U,GT3,T,4,2,health,other,reputation,father,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,5,4,5,1,3,5,10,yes
271 | GP,F,18,R,GT3,T,2,1,other,other,reputation,mother,2,2,0,no,yes,no,no,yes,no,yes,yes,4,3,5,1,2,3,0,no
272 | GP,F,19,U,GT3,T,3,3,other,services,home,other,1,2,2,no,yes,yes,yes,yes,yes,yes,no,4,3,5,3,3,5,15,no
273 | GP,F,18,U,GT3,T,2,3,other,services,reputation,father,1,4,0,no,yes,yes,yes,yes,yes,yes,yes,4,5,5,1,3,2,4,yes
274 | GP,F,18,U,LE3,T,1,1,other,other,home,mother,2,2,0,no,yes,yes,no,no,yes,no,no,4,4,3,1,1,3,2,yes
275 | GP,M,17,R,GT3,T,1,2,at_home,at_home,home,mother,1,2,0,no,yes,yes,yes,no,yes,no,yes,3,5,2,2,2,1,2,yes
276 | GP,F,17,U,GT3,T,2,4,at_home,health,reputation,mother,2,2,0,no,yes,yes,no,yes,yes,yes,yes,4,3,3,1,1,1,2,yes
277 | GP,F,17,U,LE3,T,2,2,services,other,course,mother,2,2,0,yes,yes,yes,no,yes,yes,yes,yes,4,4,4,2,3,5,6,yes
278 | GP,F,18,R,GT3,A,3,2,other,services,home,mother,2,2,0,no,no,no,no,no,no,yes,yes,4,1,1,1,1,5,75,no
279 | GP,M,18,U,GT3,T,4,4,teacher,services,home,mother,2,1,0,no,no,yes,yes,yes,yes,yes,no,3,2,4,1,4,3,22,no
280 | GP,F,18,U,GT3,T,4,4,health,health,reputation,father,1,2,1,yes,yes,no,yes,yes,yes,yes,yes,2,4,4,1,1,4,15,no
281 | GP,M,18,U,LE3,T,4,3,teacher,services,course,mother,2,1,0,no,no,yes,yes,yes,yes,yes,no,4,2,3,1,2,1,8,yes
282 | GP,M,17,U,LE3,A,4,1,services,other,home,mother,2,1,0,no,no,yes,yes,yes,yes,yes,yes,4,5,4,2,4,5,30,no
283 | GP,M,17,U,LE3,A,3,2,teacher,services,home,mother,1,1,1,no,no,no,no,yes,yes,yes,no,4,4,4,3,4,3,19,yes
284 | GP,F,18,R,LE3,T,1,1,at_home,other,reputation,mother,2,4,0,no,yes,yes,yes,yes,yes,no,no,5,2,2,1,1,3,1,yes
285 | GP,F,18,U,GT3,T,1,1,other,other,home,mother,2,2,0,yes,no,no,yes,yes,yes,yes,no,5,4,4,1,1,4,4,yes
286 | GP,F,17,U,GT3,T,2,2,other,other,course,mother,1,2,0,no,yes,no,no,no,yes,yes,no,5,4,5,1,2,5,4,yes
287 | GP,M,17,U,GT3,T,1,1,other,other,reputation,father,1,2,0,no,no,yes,no,no,yes,yes,no,4,3,3,1,2,4,2,yes
288 | GP,F,18,U,GT3,T,2,2,at_home,at_home,other,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,2,2,5,yes
289 | GP,F,17,U,GT3,T,1,1,services,teacher,reputation,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,6,yes
290 | GP,M,18,U,GT3,T,2,1,services,services,reputation,mother,1,3,0,no,no,yes,yes,yes,yes,yes,no,4,2,4,1,3,2,6,yes
291 | GP,M,18,U,LE3,A,4,4,teacher,teacher,reputation,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,no,5,4,3,1,1,2,9,yes
292 | GP,M,18,U,GT3,T,4,2,teacher,other,home,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,2,1,4,5,11,yes
293 | GP,F,17,U,GT3,T,4,3,health,services,reputation,mother,1,3,0,no,yes,yes,no,yes,yes,yes,no,4,2,2,1,2,3,0,yes
294 | GP,F,18,U,LE3,T,2,1,services,at_home,reputation,mother,1,2,1,no,no,no,no,yes,yes,yes,yes,5,4,3,1,1,5,12,yes
295 | GP,F,17,R,LE3,T,3,1,services,other,reputation,mother,2,4,0,no,yes,yes,no,yes,yes,no,no,3,1,2,1,1,3,6,yes
296 | GP,M,18,R,LE3,T,3,2,services,other,reputation,mother,2,3,0,no,yes,yes,yes,yes,yes,yes,no,5,4,2,1,1,4,8,yes
297 | GP,M,17,U,GT3,T,3,3,health,other,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,no,4,4,3,1,3,5,4,yes
298 | GP,F,19,U,GT3,T,4,4,health,other,reputation,other,2,2,0,no,yes,yes,yes,yes,yes,yes,no,2,3,4,2,3,2,0,no
299 | GP,F,18,U,LE3,T,4,3,other,other,home,other,2,2,0,no,yes,yes,no,yes,yes,yes,yes,4,4,5,1,2,2,10,no
300 | GP,F,18,U,GT3,T,4,3,other,other,reputation,father,1,4,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,0,yes
301 | GP,M,18,U,LE3,T,4,4,teacher,teacher,home,mother,1,1,0,no,yes,yes,no,yes,yes,yes,yes,1,4,2,2,2,1,5,yes
302 | GP,F,18,U,LE3,A,4,4,health,other,home,mother,1,2,0,no,yes,no,no,yes,yes,yes,yes,4,2,4,1,1,4,14,yes
303 | GP,M,17,U,LE3,T,4,4,other,teacher,home,father,2,1,0,no,no,yes,no,yes,yes,yes,no,4,1,1,2,2,5,0,yes
304 | GP,F,17,U,GT3,T,4,2,other,other,reputation,mother,2,3,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,1,1,3,0,yes
305 | GP,F,17,U,GT3,T,3,2,health,health,reputation,father,1,4,0,no,yes,yes,yes,no,yes,yes,no,5,2,2,1,2,5,0,yes
306 | GP,M,19,U,GT3,T,3,3,other,other,home,other,1,2,1,no,yes,no,yes,yes,yes,yes,yes,4,4,4,1,1,3,20,yes
307 | GP,F,18,U,GT3,T,2,4,services,at_home,reputation,other,1,2,1,no,yes,yes,yes,yes,yes,yes,no,4,4,3,1,1,3,8,yes
308 | GP,M,20,U,GT3,A,3,2,services,other,course,other,1,1,0,no,no,no,yes,yes,yes,no,no,5,5,3,1,1,5,0,yes
309 | GP,M,19,U,GT3,T,4,4,teacher,services,reputation,other,2,1,1,no,yes,yes,no,yes,yes,yes,yes,4,3,4,1,1,4,38,no
310 | GP,M,19,R,GT3,T,3,3,other,services,reputation,father,1,2,1,no,no,no,yes,yes,yes,no,yes,4,5,3,1,2,5,0,yes
311 | GP,F,19,U,LE3,T,1,1,at_home,other,reputation,other,1,2,1,yes,yes,no,yes,no,yes,yes,no,4,4,3,1,3,3,18,yes
312 | GP,F,19,U,LE3,T,1,2,services,services,home,other,1,2,1,no,no,no,yes,no,yes,no,yes,4,2,4,2,2,3,0,no
313 | GP,F,19,U,GT3,T,2,1,at_home,other,other,other,3,2,0,no,yes,no,no,yes,no,yes,yes,3,4,1,1,1,2,20,yes
314 | GP,M,19,U,GT3,T,1,2,other,services,course,other,1,2,1,no,no,no,no,no,yes,yes,no,4,5,2,2,2,4,3,yes
315 | GP,F,19,U,LE3,T,3,2,services,other,reputation,other,2,2,1,no,yes,yes,no,no,yes,yes,yes,4,2,2,1,2,1,22,yes
316 | GP,F,19,U,GT3,T,1,1,at_home,health,home,other,1,3,2,no,no,no,no,no,yes,yes,yes,4,1,2,1,1,3,14,yes
317 | GP,F,19,R,GT3,T,2,3,other,other,reputation,other,1,3,1,no,no,no,no,yes,yes,yes,yes,4,1,2,1,1,3,40,yes
318 | GP,F,18,U,GT3,T,2,1,services,other,course,mother,2,2,0,no,yes,yes,yes,yes,yes,yes,no,5,3,3,1,2,1,0,no
319 | GP,F,18,U,GT3,T,4,3,other,other,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,3,4,1,1,5,9,no
320 | GP,F,17,R,GT3,T,3,4,at_home,services,course,father,1,3,0,no,yes,yes,yes,no,yes,yes,no,4,3,4,2,5,5,0,yes
321 | GP,F,18,U,GT3,T,4,4,teacher,other,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,4,4,3,3,5,2,yes
322 | GP,F,17,U,GT3,A,4,3,services,services,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,5,2,2,1,2,5,23,yes
323 | GP,F,17,U,GT3,T,2,2,other,other,course,mother,1,2,0,no,yes,no,no,yes,yes,no,yes,4,2,2,1,1,3,12,no
324 | GP,F,17,R,LE3,T,2,2,services,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,3,3,2,2,2,3,3,yes
325 | GP,F,17,U,GT3,T,3,1,services,services,course,father,1,3,0,no,yes,no,no,no,yes,yes,no,3,4,3,2,3,5,1,yes
326 | GP,F,17,U,LE3,T,0,2,at_home,at_home,home,father,2,3,0,no,no,no,no,yes,yes,yes,no,3,3,3,2,3,2,0,yes
327 | GP,M,18,U,GT3,T,4,4,other,other,course,mother,1,3,0,no,no,no,yes,yes,yes,yes,no,4,3,3,2,2,3,3,yes
328 | GP,M,17,U,GT3,T,3,3,other,services,reputation,mother,1,1,0,no,no,no,yes,no,yes,yes,no,4,3,5,3,5,5,3,yes
329 | GP,M,17,R,GT3,T,2,2,services,other,course,mother,4,1,0,no,yes,no,no,yes,yes,yes,no,4,4,5,5,5,4,8,yes
330 | GP,F,17,U,GT3,T,4,4,teacher,services,course,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,no,5,4,4,1,3,4,7,no
331 | GP,F,17,U,GT3,T,4,4,teacher,teacher,course,mother,2,3,0,no,yes,yes,no,no,yes,yes,yes,4,3,3,1,2,4,4,yes
332 | GP,M,18,U,LE3,T,2,2,other,other,course,mother,1,4,0,no,yes,no,yes,yes,yes,yes,no,4,5,5,2,4,5,2,no
333 | GP,F,17,R,GT3,T,2,4,at_home,other,course,father,1,3,0,no,yes,no,no,yes,yes,yes,yes,4,4,3,1,1,5,7,yes
334 | GP,F,18,U,GT3,T,3,3,services,services,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,no,5,3,4,1,1,4,0,no
335 | GP,F,18,U,LE3,T,2,2,other,other,home,other,1,2,0,no,no,no,yes,no,yes,yes,yes,4,3,3,1,1,2,0,no
336 | GP,F,18,R,GT3,T,2,2,at_home,other,course,mother,2,4,0,no,no,no,yes,yes,yes,no,no,4,4,4,1,1,4,0,no
337 | GP,F,17,U,GT3,T,3,4,services,other,course,mother,1,3,0,no,no,no,no,yes,yes,yes,no,4,4,5,1,3,5,16,yes
338 | GP,F,19,R,GT3,A,3,1,services,at_home,home,other,1,3,1,no,no,yes,no,yes,yes,no,no,5,4,3,1,2,5,12,yes
339 | GP,F,17,U,GT3,T,3,2,other,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,yes,4,3,2,2,3,2,0,no
340 | GP,F,18,U,LE3,T,3,3,services,services,home,mother,1,4,0,no,yes,no,no,yes,yes,yes,no,5,3,3,1,1,1,7,yes
341 | GP,F,17,R,GT3,A,3,2,other,other,home,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,3,2,3,2,4,yes
342 | GP,F,19,U,GT3,T,2,1,services,services,home,other,1,3,1,no,no,yes,yes,yes,yes,yes,yes,4,3,4,1,3,3,4,yes
343 | GP,M,18,U,GT3,T,4,4,teacher,services,home,father,1,2,1,no,yes,no,yes,yes,yes,yes,no,4,3,3,2,2,2,0,no
344 | GP,M,18,U,LE3,T,3,4,services,other,home,mother,1,2,0,no,no,no,yes,yes,yes,yes,yes,4,3,3,1,3,5,11,yes
345 | GP,F,17,U,GT3,A,2,2,at_home,at_home,home,father,1,2,1,no,yes,no,no,yes,yes,yes,yes,3,3,1,1,2,4,0,no
346 | GP,F,18,U,GT3,T,2,3,at_home,other,course,mother,1,3,0,no,yes,no,no,yes,yes,yes,no,4,3,3,1,2,3,4,yes
347 | GP,F,18,U,GT3,T,3,2,other,services,other,mother,1,3,0,no,no,no,no,yes,yes,yes,yes,5,4,3,2,3,1,7,yes
348 | GP,M,18,R,GT3,T,4,3,teacher,services,course,mother,1,3,0,no,no,no,no,yes,yes,yes,yes,5,3,2,1,2,4,9,yes
349 | GP,M,18,U,GT3,T,4,3,teacher,other,course,mother,1,3,0,no,yes,yes,no,yes,yes,yes,yes,5,4,5,2,3,5,0,no
350 | GP,F,17,U,GT3,T,4,3,health,other,reputation,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,3,1,3,4,0,yes
351 | MS,M,18,R,GT3,T,3,2,other,other,course,mother,2,1,1,no,yes,no,no,no,yes,yes,no,2,5,5,5,5,5,10,yes
352 | MS,M,19,R,GT3,T,1,1,other,services,home,other,3,2,3,no,no,no,no,yes,yes,yes,no,5,4,4,3,3,2,8,no
353 | MS,M,17,U,GT3,T,3,3,health,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,yes,no,4,5,4,2,3,3,2,yes
354 | MS,M,18,U,LE3,T,1,3,at_home,services,course,mother,1,1,1,no,no,no,no,yes,no,yes,yes,4,3,3,2,3,3,7,no
355 | MS,M,19,R,GT3,T,1,1,other,other,home,other,3,1,1,no,yes,no,no,yes,yes,yes,no,4,4,4,3,3,5,4,no
356 | MS,M,17,R,GT3,T,4,3,services,other,home,mother,2,2,0,no,yes,yes,yes,no,yes,yes,yes,4,5,5,1,3,2,4,yes
357 | MS,F,18,U,GT3,T,3,3,services,services,course,father,1,2,0,no,yes,no,no,yes,yes,no,yes,5,3,4,1,1,5,0,no
358 | MS,F,17,R,GT3,T,4,4,teacher,services,other,father,2,2,0,no,yes,yes,yes,yes,yes,yes,no,4,3,3,1,2,5,4,yes
359 | MS,F,17,U,LE3,A,3,2,services,other,reputation,mother,2,2,0,no,no,no,no,yes,yes,no,yes,1,2,3,1,2,5,2,yes
360 | MS,M,18,U,LE3,T,1,1,other,services,home,father,2,1,0,no,no,no,no,no,yes,yes,yes,3,3,2,1,2,3,4,yes
361 | MS,F,18,U,LE3,T,1,1,at_home,services,course,father,2,3,0,no,no,no,no,yes,yes,yes,no,5,3,2,1,1,4,0,yes
362 | MS,F,18,R,LE3,A,1,4,at_home,other,course,mother,3,2,0,no,no,no,no,yes,yes,no,yes,4,3,4,1,4,5,0,yes
363 | MS,M,18,R,LE3,T,1,1,at_home,other,other,mother,2,2,1,no,no,no,yes,no,no,no,no,4,4,3,2,3,5,2,yes
364 | MS,F,18,U,GT3,T,3,3,services,services,other,mother,2,2,0,no,yes,no,no,yes,yes,yes,yes,4,3,2,1,3,3,0,yes
365 | MS,F,17,U,LE3,T,4,4,at_home,at_home,course,mother,1,2,0,no,yes,yes,yes,yes,yes,yes,yes,2,3,4,1,1,1,0,yes
366 | MS,F,17,R,GT3,T,1,2,other,services,course,father,2,2,0,no,no,no,no,no,yes,no,no,3,2,2,1,2,3,0,yes
367 | MS,M,18,R,GT3,T,1,3,at_home,other,course,mother,2,2,0,no,yes,yes,no,yes,yes,no,no,3,3,4,2,4,3,4,yes
368 | MS,M,18,U,LE3,T,4,4,teacher,services,other,mother,2,3,0,no,no,yes,no,yes,yes,yes,yes,4,2,2,2,2,5,0,yes
369 | MS,F,17,R,GT3,T,1,1,other,services,reputation,mother,3,1,1,no,yes,yes,no,yes,yes,yes,yes,5,2,1,1,2,1,0,no
370 | MS,F,18,U,GT3,T,2,3,at_home,services,course,father,2,1,0,no,yes,yes,no,yes,yes,yes,yes,5,2,3,1,2,4,0,yes
371 | MS,F,18,R,GT3,T,4,4,other,teacher,other,father,3,2,0,no,yes,yes,no,no,yes,yes,yes,3,2,2,4,2,5,10,yes
372 | MS,F,19,U,LE3,T,3,2,services,services,home,other,2,2,2,no,no,no,yes,yes,yes,no,yes,3,2,2,1,1,3,4,no
373 | MS,M,18,R,LE3,T,1,2,at_home,services,other,father,3,1,0,no,yes,yes,yes,yes,no,yes,yes,4,3,3,2,3,3,3,yes
374 | MS,F,17,U,GT3,T,2,2,other,at_home,home,mother,1,3,0,no,no,no,yes,yes,yes,no,yes,3,4,3,1,1,3,8,yes
375 | MS,F,17,R,GT3,T,1,2,other,other,course,mother,1,1,0,no,no,no,yes,yes,yes,yes,no,3,5,5,1,3,1,14,no
376 | MS,F,18,R,LE3,T,4,4,other,other,reputation,mother,2,3,0,no,no,no,no,yes,yes,yes,no,5,4,4,1,1,1,0,yes
377 | MS,F,18,R,GT3,T,1,1,other,other,home,mother,4,3,0,no,no,no,no,yes,yes,yes,no,4,3,2,1,2,4,2,yes
378 | MS,F,20,U,GT3,T,4,2,health,other,course,other,2,3,2,no,yes,yes,no,no,yes,yes,yes,5,4,3,1,1,3,4,yes
379 | MS,F,18,R,LE3,T,4,4,teacher,services,course,mother,1,2,0,no,no,yes,yes,yes,yes,yes,no,5,4,3,3,4,2,4,yes
380 | MS,F,18,U,GT3,T,3,3,other,other,home,mother,1,2,0,no,no,yes,no,yes,yes,yes,yes,4,1,3,1,2,1,0,yes
381 | MS,F,17,R,GT3,T,3,1,at_home,other,reputation,mother,1,2,0,no,yes,yes,yes,no,yes,yes,no,4,5,4,2,3,1,17,yes
382 | MS,M,18,U,GT3,T,4,4,teacher,teacher,home,father,1,2,0,no,no,yes,yes,no,yes,yes,no,3,2,4,1,4,2,4,yes
383 | MS,M,18,R,GT3,T,2,1,other,other,other,mother,2,1,0,no,no,no,yes,no,yes,yes,yes,4,4,3,1,3,5,5,no
384 | MS,M,17,U,GT3,T,2,3,other,services,home,father,2,2,0,no,no,no,yes,yes,yes,yes,no,4,4,3,1,1,3,2,yes
385 | MS,M,19,R,GT3,T,1,1,other,services,other,mother,2,1,1,no,no,no,no,yes,yes,no,no,4,3,2,1,3,5,0,no
386 | MS,M,18,R,GT3,T,4,2,other,other,home,father,2,1,1,no,no,yes,no,yes,yes,no,no,5,4,3,4,3,3,14,no
387 | MS,F,18,R,GT3,T,2,2,at_home,other,other,mother,2,3,0,no,no,yes,no,yes,yes,no,no,5,3,3,1,3,4,2,yes
388 | MS,F,18,R,GT3,T,4,4,teacher,at_home,reputation,mother,3,1,0,no,yes,yes,yes,yes,yes,yes,yes,4,4,3,2,2,5,7,no
389 | MS,F,19,R,GT3,T,2,3,services,other,course,mother,1,3,1,no,no,no,yes,no,yes,yes,no,5,4,2,1,2,5,0,no
390 | MS,F,18,U,LE3,T,3,1,teacher,services,course,mother,1,2,0,no,yes,yes,no,yes,yes,yes,no,4,3,4,1,1,1,0,no
391 | MS,F,18,U,GT3,T,1,1,other,other,course,mother,2,2,1,no,no,no,yes,yes,yes,no,no,1,1,1,1,1,5,0,no
392 | MS,M,20,U,LE3,A,2,2,services,services,course,other,1,2,2,no,yes,yes,no,yes,yes,no,no,5,5,4,4,5,4,11,no
393 | MS,M,17,U,LE3,T,3,1,services,services,course,mother,2,1,0,no,no,no,no,no,yes,yes,no,2,4,5,3,4,2,3,yes
394 | MS,M,21,R,GT3,T,1,1,other,other,course,other,1,1,3,no,no,no,no,no,yes,no,no,5,5,3,3,3,3,3,no
395 | MS,M,18,R,LE3,T,3,2,services,other,course,mother,3,1,0,no,no,no,no,no,yes,yes,no,4,4,1,3,4,5,0,yes
396 | MS,M,19,U,LE3,T,1,1,other,at_home,course,father,1,1,0,no,no,no,no,yes,yes,yes,no,3,2,3,3,3,5,5,no
397 | 


--------------------------------------------------------------------------------
/projects/student_intervention/student_intervention.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Machine Learning Engineer Nanodegree\n",
   8 |     "## Supervised Learning\n",
   9 |     "## Project 2: Building a Student Intervention System"
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "markdown",
  14 |    "metadata": {},
  15 |    "source": [
  16 |     "Welcome to the second project of the Machine Learning Engineer Nanodegree! In this notebook, some template code has already been provided for you, and it will be your job to implement the additional functionality necessary to successfully complete this project. Sections that begin with **'Implementation'** in the header indicate that the following block of code will require additional functionality which you must provide. Instructions will be provided for each section and the specifics of the implementation are marked in the code block with a `'TODO'` statement. Please be sure to read the instructions carefully!\n",
  17 |     "\n",
  18 |     "In addition to implementing code, there will be questions that you must answer which relate to the project and your implementation. Each section where you will answer a question is preceded by a **'Question X'** header. Carefully read each question and provide thorough answers in the following text boxes that begin with **'Answer:'**. Your project submission will be evaluated based on your answers to each of the questions and the implementation you provide.  \n",
  19 |     "\n",
  20 |     ">**Note:** Code and Markdown cells can be executed using the **Shift + Enter** keyboard shortcut. In addition, Markdown cells can be edited by typically double-clicking the cell to enter edit mode."
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "markdown",
  25 |    "metadata": {},
  26 |    "source": [
  27 |     "### Question 1 - Classification vs. Regression\n",
  28 |     "*Your goal for this project is to identify students who might need early intervention before they fail to graduate. Which type of supervised learning problem is this, classification or regression? Why?*"
  29 |    ]
  30 |   },
  31 |   {
  32 |    "cell_type": "markdown",
  33 |    "metadata": {},
  34 |    "source": [
  35 |     "**Answer: **\n",
  36 |     "\n",
  37 |     "We want to identify students who might need early intervention before they fail to graduate, so we have to seperate them into two classes based on whether they are likely to pass or fail. This is a classification problem as we are predicting discrete labels instead of continuous output."
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "## Exploring the Data\n",
  45 |     "Run the code cell below to load necessary Python libraries and load the student data. Note that the last column from this dataset, `'passed'`, will be our target label (whether the student graduated or didn't graduate). All other columns are features about each student."
  46 |    ]
  47 |   },
  48 |   {
  49 |    "cell_type": "code",
  50 |    "execution_count": 1,
  51 |    "metadata": {
  52 |     "collapsed": false
  53 |    },
  54 |    "outputs": [
  55 |     {
  56 |      "name": "stdout",
  57 |      "output_type": "stream",
  58 |      "text": [
  59 |       "Student data read successfully!\n"
  60 |      ]
  61 |     }
  62 |    ],
  63 |    "source": [
  64 |     "# Import libraries\n",
  65 |     "import numpy as np\n",
  66 |     "import pandas as pd\n",
  67 |     "from time import time\n",
  68 |     "from sklearn.metrics import f1_score\n",
  69 |     "\n",
  70 |     "# Read student data\n",
  71 |     "student_data = pd.read_csv(\"student-data.csv\")\n",
  72 |     "print \"Student data read successfully!\""
  73 |    ]
  74 |   },
  75 |   {
  76 |    "cell_type": "code",
  77 |    "execution_count": 2,
  78 |    "metadata": {
  79 |     "collapsed": false
  80 |    },
  81 |    "outputs": [
  82 |     {
  83 |      "data": {
  84 |       "text/html": [
  85 |        "<div>\n",
  86 |        "<table border=\"1\" class=\"dataframe\">\n",
  87 |        "  <thead>\n",
  88 |        "    <tr style=\"text-align: right;\">\n",
  89 |        "      <th></th>\n",
  90 |        "      <th>school</th>\n",
  91 |        "      <th>sex</th>\n",
  92 |        "      <th>age</th>\n",
  93 |        "      <th>address</th>\n",
  94 |        "      <th>famsize</th>\n",
  95 |        "      <th>Pstatus</th>\n",
  96 |        "      <th>Medu</th>\n",
  97 |        "      <th>Fedu</th>\n",
  98 |        "      <th>Mjob</th>\n",
  99 |        "      <th>Fjob</th>\n",
 100 |        "      <th>...</th>\n",
 101 |        "      <th>internet</th>\n",
 102 |        "      <th>romantic</th>\n",
 103 |        "      <th>famrel</th>\n",
 104 |        "      <th>freetime</th>\n",
 105 |        "      <th>goout</th>\n",
 106 |        "      <th>Dalc</th>\n",
 107 |        "      <th>Walc</th>\n",
 108 |        "      <th>health</th>\n",
 109 |        "      <th>absences</th>\n",
 110 |        "      <th>passed</th>\n",
 111 |        "    </tr>\n",
 112 |        "  </thead>\n",
 113 |        "  <tbody>\n",
 114 |        "    <tr>\n",
 115 |        "      <th>0</th>\n",
 116 |        "      <td>GP</td>\n",
 117 |        "      <td>F</td>\n",
 118 |        "      <td>18</td>\n",
 119 |        "      <td>U</td>\n",
 120 |        "      <td>GT3</td>\n",
 121 |        "      <td>A</td>\n",
 122 |        "      <td>4</td>\n",
 123 |        "      <td>4</td>\n",
 124 |        "      <td>at_home</td>\n",
 125 |        "      <td>teacher</td>\n",
 126 |        "      <td>...</td>\n",
 127 |        "      <td>no</td>\n",
 128 |        "      <td>no</td>\n",
 129 |        "      <td>4</td>\n",
 130 |        "      <td>3</td>\n",
 131 |        "      <td>4</td>\n",
 132 |        "      <td>1</td>\n",
 133 |        "      <td>1</td>\n",
 134 |        "      <td>3</td>\n",
 135 |        "      <td>6</td>\n",
 136 |        "      <td>no</td>\n",
 137 |        "    </tr>\n",
 138 |        "    <tr>\n",
 139 |        "      <th>1</th>\n",
 140 |        "      <td>GP</td>\n",
 141 |        "      <td>F</td>\n",
 142 |        "      <td>17</td>\n",
 143 |        "      <td>U</td>\n",
 144 |        "      <td>GT3</td>\n",
 145 |        "      <td>T</td>\n",
 146 |        "      <td>1</td>\n",
 147 |        "      <td>1</td>\n",
 148 |        "      <td>at_home</td>\n",
 149 |        "      <td>other</td>\n",
 150 |        "      <td>...</td>\n",
 151 |        "      <td>yes</td>\n",
 152 |        "      <td>no</td>\n",
 153 |        "      <td>5</td>\n",
 154 |        "      <td>3</td>\n",
 155 |        "      <td>3</td>\n",
 156 |        "      <td>1</td>\n",
 157 |        "      <td>1</td>\n",
 158 |        "      <td>3</td>\n",
 159 |        "      <td>4</td>\n",
 160 |        "      <td>no</td>\n",
 161 |        "    </tr>\n",
 162 |        "    <tr>\n",
 163 |        "      <th>2</th>\n",
 164 |        "      <td>GP</td>\n",
 165 |        "      <td>F</td>\n",
 166 |        "      <td>15</td>\n",
 167 |        "      <td>U</td>\n",
 168 |        "      <td>LE3</td>\n",
 169 |        "      <td>T</td>\n",
 170 |        "      <td>1</td>\n",
 171 |        "      <td>1</td>\n",
 172 |        "      <td>at_home</td>\n",
 173 |        "      <td>other</td>\n",
 174 |        "      <td>...</td>\n",
 175 |        "      <td>yes</td>\n",
 176 |        "      <td>no</td>\n",
 177 |        "      <td>4</td>\n",
 178 |        "      <td>3</td>\n",
 179 |        "      <td>2</td>\n",
 180 |        "      <td>2</td>\n",
 181 |        "      <td>3</td>\n",
 182 |        "      <td>3</td>\n",
 183 |        "      <td>10</td>\n",
 184 |        "      <td>yes</td>\n",
 185 |        "    </tr>\n",
 186 |        "    <tr>\n",
 187 |        "      <th>3</th>\n",
 188 |        "      <td>GP</td>\n",
 189 |        "      <td>F</td>\n",
 190 |        "      <td>15</td>\n",
 191 |        "      <td>U</td>\n",
 192 |        "      <td>GT3</td>\n",
 193 |        "      <td>T</td>\n",
 194 |        "      <td>4</td>\n",
 195 |        "      <td>2</td>\n",
 196 |        "      <td>health</td>\n",
 197 |        "      <td>services</td>\n",
 198 |        "      <td>...</td>\n",
 199 |        "      <td>yes</td>\n",
 200 |        "      <td>yes</td>\n",
 201 |        "      <td>3</td>\n",
 202 |        "      <td>2</td>\n",
 203 |        "      <td>2</td>\n",
 204 |        "      <td>1</td>\n",
 205 |        "      <td>1</td>\n",
 206 |        "      <td>5</td>\n",
 207 |        "      <td>2</td>\n",
 208 |        "      <td>yes</td>\n",
 209 |        "    </tr>\n",
 210 |        "    <tr>\n",
 211 |        "      <th>4</th>\n",
 212 |        "      <td>GP</td>\n",
 213 |        "      <td>F</td>\n",
 214 |        "      <td>16</td>\n",
 215 |        "      <td>U</td>\n",
 216 |        "      <td>GT3</td>\n",
 217 |        "      <td>T</td>\n",
 218 |        "      <td>3</td>\n",
 219 |        "      <td>3</td>\n",
 220 |        "      <td>other</td>\n",
 221 |        "      <td>other</td>\n",
 222 |        "      <td>...</td>\n",
 223 |        "      <td>no</td>\n",
 224 |        "      <td>no</td>\n",
 225 |        "      <td>4</td>\n",
 226 |        "      <td>3</td>\n",
 227 |        "      <td>2</td>\n",
 228 |        "      <td>1</td>\n",
 229 |        "      <td>2</td>\n",
 230 |        "      <td>5</td>\n",
 231 |        "      <td>4</td>\n",
 232 |        "      <td>yes</td>\n",
 233 |        "    </tr>\n",
 234 |        "  </tbody>\n",
 235 |        "</table>\n",
 236 |        "<p>5 rows × 31 columns</p>\n",
 237 |        "</div>"
 238 |       ],
 239 |       "text/plain": [
 240 |        "  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  \\\n",
 241 |        "0     GP   F   18       U     GT3       A     4     4  at_home   teacher   \n",
 242 |        "1     GP   F   17       U     GT3       T     1     1  at_home     other   \n",
 243 |        "2     GP   F   15       U     LE3       T     1     1  at_home     other   \n",
 244 |        "3     GP   F   15       U     GT3       T     4     2   health  services   \n",
 245 |        "4     GP   F   16       U     GT3       T     3     3    other     other   \n",
 246 |        "\n",
 247 |        "   ...   internet romantic  famrel  freetime  goout Dalc Walc health absences  \\\n",
 248 |        "0  ...         no       no       4         3      4    1    1      3        6   \n",
 249 |        "1  ...        yes       no       5         3      3    1    1      3        4   \n",
 250 |        "2  ...        yes       no       4         3      2    2    3      3       10   \n",
 251 |        "3  ...        yes      yes       3         2      2    1    1      5        2   \n",
 252 |        "4  ...         no       no       4         3      2    1    2      5        4   \n",
 253 |        "\n",
 254 |        "  passed  \n",
 255 |        "0     no  \n",
 256 |        "1     no  \n",
 257 |        "2    yes  \n",
 258 |        "3    yes  \n",
 259 |        "4    yes  \n",
 260 |        "\n",
 261 |        "[5 rows x 31 columns]"
 262 |       ]
 263 |      },
 264 |      "execution_count": 2,
 265 |      "metadata": {},
 266 |      "output_type": "execute_result"
 267 |     }
 268 |    ],
 269 |    "source": [
 270 |     "student_data.head()\n",
 271 |     "\n"
 272 |    ]
 273 |   },
 274 |   {
 275 |    "cell_type": "code",
 276 |    "execution_count": 51,
 277 |    "metadata": {
 278 |     "collapsed": false
 279 |    },
 280 |    "outputs": [
 281 |     {
 282 |      "data": {
 283 |       "text/plain": [
 284 |        "yes    265\n",
 285 |        "no     130\n",
 286 |        "Name: passed, dtype: int64"
 287 |       ]
 288 |      },
 289 |      "execution_count": 51,
 290 |      "metadata": {},
 291 |      "output_type": "execute_result"
 292 |     }
 293 |    ],
 294 |    "source": [
 295 |     "student_data[\"passed\"].value_counts()"
 296 |    ]
 297 |   },
 298 |   {
 299 |    "cell_type": "markdown",
 300 |    "metadata": {},
 301 |    "source": [
 302 |     "### Implementation: Data Exploration\n",
 303 |     "Let's begin by investigating the dataset to determine how many students we have information on, and learn about the graduation rate among these students. In the code cell below, you will need to compute the following:\n",
 304 |     "- The total number of students, `n_students`.\n",
 305 |     "- The total number of features for each student, `n_features`.\n",
 306 |     "- The number of those students who passed, `n_passed`.\n",
 307 |     "- The number of those students who failed, `n_failed`.\n",
 308 |     "- The graduation rate of the class, `grad_rate`, in percent (%).\n"
 309 |    ]
 310 |   },
 311 |   {
 312 |    "cell_type": "code",
 313 |    "execution_count": 3,
 314 |    "metadata": {
 315 |     "collapsed": false
 316 |    },
 317 |    "outputs": [
 318 |     {
 319 |      "name": "stdout",
 320 |      "output_type": "stream",
 321 |      "text": [
 322 |       "Total number of students: 395\n",
 323 |       "Number of features: 30\n",
 324 |       "Number of students who passed: 265\n",
 325 |       "Number of students who failed: 130\n",
 326 |       "Graduation rate of the class: 67.09%\n"
 327 |      ]
 328 |     }
 329 |    ],
 330 |    "source": [
 331 |     "# TODO: Calculate number of students\n",
 332 |     "n_students = student_data.shape[0]\n",
 333 |     "\n",
 334 |     "# TODO: Calculate number of features\n",
 335 |     "n_features = student_data.shape[1] - 1\n",
 336 |     "\n",
 337 |     "# TODO: Calculate passing students\n",
 338 |     "n_passed = student_data[\"passed\"].value_counts()[\"yes\"]\n",
 339 |     "\n",
 340 |     "# TODO: Calculate failing students\n",
 341 |     "n_failed = student_data[\"passed\"].value_counts()[\"no\"]\n",
 342 |     "\n",
 343 |     "# TODO: Calculate graduation rate\n",
 344 |     "grad_rate = (265/395.0)*100\n",
 345 |     "\n",
 346 |     "# Print the results\n",
 347 |     "print \"Total number of students: {}\".format(n_students)\n",
 348 |     "print \"Number of features: {}\".format(n_features)\n",
 349 |     "print \"Number of students who passed: {}\".format(n_passed)\n",
 350 |     "print \"Number of students who failed: {}\".format(n_failed)\n",
 351 |     "print \"Graduation rate of the class: {:.2f}%\".format(grad_rate)"
 352 |    ]
 353 |   },
 354 |   {
 355 |    "cell_type": "markdown",
 356 |    "metadata": {},
 357 |    "source": [
 358 |     "## Preparing the Data\n",
 359 |     "In this section, we will prepare the data for modeling, training and testing.\n",
 360 |     "\n",
 361 |     "### Identify feature and target columns\n",
 362 |     "It is often the case that the data you obtain contains non-numeric features. This can be a problem, as most machine learning algorithms expect numeric data to perform computations with.\n",
 363 |     "\n",
 364 |     "Run the code cell below to separate the student data into feature and target columns to see if any features are non-numeric."
 365 |    ]
 366 |   },
 367 |   {
 368 |    "cell_type": "code",
 369 |    "execution_count": 53,
 370 |    "metadata": {
 371 |     "collapsed": false
 372 |    },
 373 |    "outputs": [
 374 |     {
 375 |      "name": "stdout",
 376 |      "output_type": "stream",
 377 |      "text": [
 378 |       "Feature columns:\n",
 379 |       "['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']\n",
 380 |       "\n",
 381 |       "Target column: passed\n",
 382 |       "\n",
 383 |       "Feature values:\n",
 384 |       "  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  \\\n",
 385 |       "0     GP   F   18       U     GT3       A     4     4  at_home   teacher   \n",
 386 |       "1     GP   F   17       U     GT3       T     1     1  at_home     other   \n",
 387 |       "2     GP   F   15       U     LE3       T     1     1  at_home     other   \n",
 388 |       "3     GP   F   15       U     GT3       T     4     2   health  services   \n",
 389 |       "4     GP   F   16       U     GT3       T     3     3    other     other   \n",
 390 |       "\n",
 391 |       "    ...    higher internet  romantic  famrel  freetime goout Dalc Walc health  \\\n",
 392 |       "0   ...       yes       no        no       4         3     4    1    1      3   \n",
 393 |       "1   ...       yes      yes        no       5         3     3    1    1      3   \n",
 394 |       "2   ...       yes      yes        no       4         3     2    2    3      3   \n",
 395 |       "3   ...       yes      yes       yes       3         2     2    1    1      5   \n",
 396 |       "4   ...       yes       no        no       4         3     2    1    2      5   \n",
 397 |       "\n",
 398 |       "  absences  \n",
 399 |       "0        6  \n",
 400 |       "1        4  \n",
 401 |       "2       10  \n",
 402 |       "3        2  \n",
 403 |       "4        4  \n",
 404 |       "\n",
 405 |       "[5 rows x 30 columns]\n"
 406 |      ]
 407 |     }
 408 |    ],
 409 |    "source": [
 410 |     "# Extract feature columns\n",
 411 |     "feature_cols = list(student_data.columns[:-1])\n",
 412 |     "\n",
 413 |     "# Extract target column 'passed'\n",
 414 |     "target_col = student_data.columns[-1] \n",
 415 |     "\n",
 416 |     "# Show the list of columns\n",
 417 |     "print \"Feature columns:\\n{}\".format(feature_cols)\n",
 418 |     "print \"\\nTarget column: {}\".format(target_col)\n",
 419 |     "\n",
 420 |     "# Separate the data into feature data and target data (X_all and y_all, respectively)\n",
 421 |     "X_all = student_data[feature_cols]\n",
 422 |     "y_all = student_data[target_col]\n",
 423 |     "\n",
 424 |     "# Show the feature information by printing the first five rows\n",
 425 |     "print \"\\nFeature values:\"\n",
 426 |     "print X_all.head()"
 427 |    ]
 428 |   },
 429 |   {
 430 |    "cell_type": "markdown",
 431 |    "metadata": {},
 432 |    "source": [
 433 |     "### Preprocess Feature Columns\n",
 434 |     "\n",
 435 |     "As you can see, there are several non-numeric columns that need to be converted! Many of them are simply `yes`/`no`, e.g. `internet`. These can be reasonably converted into `1`/`0` (binary) values.\n",
 436 |     "\n",
 437 |     "Other columns, like `Mjob` and `Fjob`, have more than two values, and are known as _categorical variables_. The recommended way to handle such a column is to create as many columns as possible values (e.g. `Fjob_teacher`, `Fjob_other`, `Fjob_services`, etc.), and assign a `1` to one of them and `0` to all others.\n",
 438 |     "\n",
 439 |     "These generated columns are sometimes called _dummy variables_, and we will use the [`pandas.get_dummies()`](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html?highlight=get_dummies#pandas.get_dummies) function to perform this transformation. Run the code cell below to perform the preprocessing routine discussed in this section."
 440 |    ]
 441 |   },
 442 |   {
 443 |    "cell_type": "code",
 444 |    "execution_count": 54,
 445 |    "metadata": {
 446 |     "collapsed": false
 447 |    },
 448 |    "outputs": [
 449 |     {
 450 |      "name": "stdout",
 451 |      "output_type": "stream",
 452 |      "text": [
 453 |       "Processed feature columns (48 total features):\n",
 454 |       "['school_GP', 'school_MS', 'sex_F', 'sex_M', 'age', 'address_R', 'address_U', 'famsize_GT3', 'famsize_LE3', 'Pstatus_A', 'Pstatus_T', 'Medu', 'Fedu', 'Mjob_at_home', 'Mjob_health', 'Mjob_other', 'Mjob_services', 'Mjob_teacher', 'Fjob_at_home', 'Fjob_health', 'Fjob_other', 'Fjob_services', 'Fjob_teacher', 'reason_course', 'reason_home', 'reason_other', 'reason_reputation', 'guardian_father', 'guardian_mother', 'guardian_other', 'traveltime', 'studytime', 'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']\n"
 455 |      ]
 456 |     }
 457 |    ],
 458 |    "source": [
 459 |     "def preprocess_features(X):\n",
 460 |     "    ''' Preprocesses the student data and converts non-numeric binary variables into\n",
 461 |     "        binary (0/1) variables. Converts categorical variables into dummy variables. '''\n",
 462 |     "    \n",
 463 |     "    # Initialize new output DataFrame\n",
 464 |     "    output = pd.DataFrame(index = X.index)\n",
 465 |     "\n",
 466 |     "    # Investigate each feature column for the data\n",
 467 |     "    for col, col_data in X.iteritems():\n",
 468 |     "        \n",
 469 |     "        # If data type is non-numeric, replace all yes/no values with 1/0\n",
 470 |     "        if col_data.dtype == object:\n",
 471 |     "            col_data = col_data.replace(['yes', 'no'], [1, 0])\n",
 472 |     "\n",
 473 |     "        # If data type is categorical, convert to dummy variables\n",
 474 |     "        if col_data.dtype == object:\n",
 475 |     "            # Example: 'school' => 'school_GP' and 'school_MS'\n",
 476 |     "            col_data = pd.get_dummies(col_data, prefix = col)  \n",
 477 |     "        \n",
 478 |     "        # Collect the revised columns\n",
 479 |     "        output = output.join(col_data)\n",
 480 |     "    \n",
 481 |     "    return output\n",
 482 |     "\n",
 483 |     "X_all = preprocess_features(X_all)\n",
 484 |     "print \"Processed feature columns ({} total features):\\n{}\".format(len(X_all.columns), list(X_all.columns))"
 485 |    ]
 486 |   },
 487 |   {
 488 |    "cell_type": "markdown",
 489 |    "metadata": {},
 490 |    "source": [
 491 |     "### Implementation: Training and Testing Data Split\n",
 492 |     "So far, we have converted all _categorical_ features into numeric values. For the next step, we split the data (both features and corresponding labels) into training and test sets. In the following code cell below, you will need to implement the following:\n",
 493 |     "- Randomly shuffle and split the data (`X_all`, `y_all`) into training and testing subsets.\n",
 494 |     "  - Use 300 training points (approximately 75%) and 95 testing points (approximately 25%).\n",
 495 |     "  - Set a `random_state` for the function(s) you use, if provided.\n",
 496 |     "  - Store the results in `X_train`, `X_test`, `y_train`, and `y_test`."
 497 |    ]
 498 |   },
 499 |   {
 500 |    "cell_type": "code",
 501 |    "execution_count": 68,
 502 |    "metadata": {
 503 |     "collapsed": false
 504 |    },
 505 |    "outputs": [
 506 |     {
 507 |      "name": "stdout",
 508 |      "output_type": "stream",
 509 |      "text": [
 510 |       "Training set has 300 samples.\n",
 511 |       "Testing set has 95 samples.\n"
 512 |      ]
 513 |     }
 514 |    ],
 515 |    "source": [
 516 |     "# TODO: Import any additional functionality you may need here\n",
 517 |     "\n",
 518 |     "from sklearn.cross_validation import train_test_split\n",
 519 |     "\n",
 520 |     "# TODO: Set the number of training points\n",
 521 |     "num_train = 300\n",
 522 |     "\n",
 523 |     "# Set the number of testing points\n",
 524 |     "num_test = X_all.shape[0] - num_train\n",
 525 |     "\n",
 526 |     "# TODO: Shuffle and split the dataset into the number of training and testing points above\n",
 527 |     "X_train,X_test,y_train, y_test = train_test_split(X_all,y_all,test_size = num_test, random_state = 0)\n",
 528 |     "\n",
 529 |     "# Show the results of the split\n",
 530 |     "print \"Training set has {} samples.\".format(X_train.shape[0])\n",
 531 |     "print \"Testing set has {} samples.\".format(X_test.shape[0])"
 532 |    ]
 533 |   },
 534 |   {
 535 |    "cell_type": "markdown",
 536 |    "metadata": {},
 537 |    "source": [
 538 |     "## Training and Evaluating Models\n",
 539 |     "In this section, you will choose 3 supervised learning models that are appropriate for this problem and available in `scikit-learn`. You will first discuss the reasoning behind choosing these three models by considering what you know about the data and each model's strengths and weaknesses. You will then fit the model to varying sizes of training data (100 data points, 200 data points, and 300 data points) and measure the F<sub>1</sub> score. You will need to produce three tables (one for each model) that shows the training set size, training time, prediction time, F<sub>1</sub> score on the training set, and F<sub>1</sub> score on the testing set."
 540 |    ]
 541 |   },
 542 |   {
 543 |    "cell_type": "markdown",
 544 |    "metadata": {},
 545 |    "source": [
 546 |     "### Question 2 - Model Application\n",
 547 |     "*List three supervised learning models that are appropriate for this problem. What are the general applications of each model? What are their strengths and weaknesses? Given what you know about the data, why did you choose these models to be applied?*"
 548 |    ]
 549 |   },
 550 |   {
 551 |    "cell_type": "markdown",
 552 |    "metadata": {},
 553 |    "source": [
 554 |     "**Answer: **\n",
 555 |     "\n",
 556 |     "The three supervised learning models that I've chosen are :\n",
 557 |     "1. Decision Trees\n",
 558 |     "2. Support Vector Machines\n",
 559 |     "3. K-Nearest Neighbors\n",
 560 |     "\n",
 561 |     "**1. Decision Trees** : \n",
 562 |     "\n",
 563 |     "Decision Trees are widely used in several industries including medicine(to classify diseases based on patient's features for example), biomedical research, financial analysis(fraud detection,credit defaulting etc) for both classification and regression problem, astronomy to social media websites for predicting engagement/ad clicks for it's interpretability and versatality.\n",
 564 |     "\n",
 565 |     "* Strengths : \n",
 566 |     "    1. Decision Trees are interpretable and easy to visualize.\n",
 567 |     "    2. Decision Trees can handle both categorical and numerical data\n",
 568 |     "    3. All else being equal, decision trees prefer shorter trees to longer trees by splitting on the \"best features\"(using information gain or gini impurity index), so it's easy to understand what are the most important features in a dataset\n",
 569 |     "\n",
 570 |     "\n",
 571 |     "* Weeknesses :\n",
 572 |     "\n",
 573 |     "    1. Decision trees grow exponentially with the number of instances and more features\n",
 574 |     "    2. Decision trees overfit very easily as it picks up subtle variances in the data set. However, over-fitting can be minimized by calculating best maximum depth of the tree, minimum number of samples to spilt per node and pruning techniques after creating the tree. Random forest, another model based on decision trees are incredibly popular as it minimizes errors by ensembling over many decision trees.\n",
 575 |     "\n",
 576 |     "* Reasons for choosing this model : \n",
 577 |     "    1. This dataset has many features and a problem like predicting which students need intervention is unlikely to be  linear relationship completely as many details influence a student's learning.\n",
 578 |     "    2. Most features in this data set are binary which is for a decision tree to handle with conditionals and the resulting tree will be easier to interpret and thus determine a course of action to ensure student's learning rate improves.\n",
 579 |     "    \n",
 580 |     "\n",
 581 |     "** 2. Support Vector Machine :** \n",
 582 |     "\n",
 583 |     "Support vector machines classify data by finding the maximum margin hyperplane that seperates class labels, it's also a very popular model like the other two, decision trees and K-nearest neighbors and used in industry for classification and regression tasks. Support Vector Machines have been successfully used on high dimensional data such as genetic data(protein structure prediction), music(song genre classification, music retrival), image classification(histogram based), image retrieval etc.\n",
 584 |     "\n",
 585 |     "* Strengths  :\n",
 586 |     "    1. As Support Vector Machine tries to find the seperator hyperplane that has the maximum distance between the seperate classes, it's not prone to overfitting.\n",
 587 |     "    2. Linear SVM produces a line as decision boundary, but SVM is also effetive with high dimensional data by using Kernel-trick (mapping the data points to higher dimensional spaces to find the appropriate class labels) \n",
 588 |     "    \n",
 589 |     "* Weeknesses :\n",
 590 |     "    1. Performance depends on the choice of Kernel. Large data sets may take a lot of time to train.\n",
 591 |     "    2. SVM works really well for datasets that has a clear margin of seperation, but performs poorly on noisy datasets.\n",
 592 |     "    \n",
 593 |     "* Reasons for choosing this model :\n",
 594 |     "    1. As there are many features in this dataset, if this datset has a good margin of seperation SVM would be able to pick it up. \n",
 595 |     "    2. SVM also works well with high dimensional data with kernel trick, and this dataset has many features.\n",
 596 |     "\n",
 597 |     "\n",
 598 |     "**3. K Nearest Neighbor ** :\n",
 599 |     "\n",
 600 |     "K nearest neighbor is a method of 'instance based learning'/lazy learning as the computation begins when we start predicting, it's also a non-parametric method. K nearest neighbor tries to find similar instances for each query and predicts based on their average/majority voting for classification and regression problem. It can be used in many different cases including content retrieval for photos, videos, text and recommending products etc. It's one of the most popular methods in data mining.\n",
 601 |     "\n",
 602 |     "* Strengths :\n",
 603 |     "    1. It does not make any assumptions about the distribution of data. Rather it simply tries to find the most similar k neighbors for each query based on some distance metric/similarity measurement and uses the whole data set for each query.\n",
 604 |     "    2. After choosing the number of neighbors k and the similarity metric d , the algorithm is simple to implement in production.\n",
 605 |     "    3. It's possible to weight the contribution of the neighbors for predicting labels (weighting the nearest neighbors highest and the distant one's lower) for higher accuracy. For datasets that don't follow a general pattern, K nearest neighbor is often a really good choice.\n",
 606 |     "    \n",
 607 |     "* Weeknesses: \n",
 608 |     "    1. K-nearest neighbor requires the entire dataset to be preserved in the memory. Unlike a parametric model like linear regression where we just have to train once to find the parameters, we can't throw away the data set and this can make the space requirement incredibly high.\n",
 609 |     "    2. It's important to use domain knowledge and grid search techniques to find a good similarity measure and a good k, in practice there can be many variations of distance metrics which can yield different performances.\n",
 610 |     "    3. It's less interpretable than models like decision tree where we can understand which features are the strongest.\n",
 611 |     "    \n",
 612 |     "* Reasons for choosing this model:\n",
 613 |     "    1. Students who are failing may have similar patterns such as similar amount of time invested in recreation over studying, similar number of family members and income, similar geographic region etc which K nearest neighbor can deal with easily.\n",
 614 |     "    "
 615 |    ]
 616 |   },
 617 |   {
 618 |    "cell_type": "markdown",
 619 |    "metadata": {},
 620 |    "source": [
 621 |     "### Setup\n",
 622 |     "Run the code cell below to initialize three helper functions which you can use for training and testing the three supervised learning models you've chosen above. The functions are as follows:\n",
 623 |     "- `train_classifier` - takes as input a classifier and training data and fits the classifier to the data.\n",
 624 |     "- `predict_labels` - takes as input a fit classifier, features, and a target labeling and makes predictions using the F<sub>1</sub> score.\n",
 625 |     "- `train_predict` - takes as input a classifier, and the training and testing data, and performs `train_clasifier` and `predict_labels`.\n",
 626 |     " - This function will report the F<sub>1</sub> score for both the training and testing data separately."
 627 |    ]
 628 |   },
 629 |   {
 630 |    "cell_type": "code",
 631 |    "execution_count": 83,
 632 |    "metadata": {
 633 |     "collapsed": false
 634 |    },
 635 |    "outputs": [],
 636 |    "source": [
 637 |     "def train_classifier(clf, X_train, y_train):\n",
 638 |     "    ''' Fits a classifier to the training data. '''\n",
 639 |     "    \n",
 640 |     "    # Start the clock, train the classifier, then stop the clock\n",
 641 |     "    start = time()\n",
 642 |     "    clf.fit(X_train, y_train)\n",
 643 |     "    end = time()\n",
 644 |     "    \n",
 645 |     "    # Print the results\n",
 646 |     "    print \"Trained model in {:.4f} seconds\".format(end - start)\n",
 647 |     "\n",
 648 |     "    \n",
 649 |     "def predict_labels(clf, features, target):\n",
 650 |     "    ''' Makes predictions using a fit classifier based on F1 score. '''\n",
 651 |     "    \n",
 652 |     "    # Start the clock, make predictions, then stop the clock\n",
 653 |     "    start = time()\n",
 654 |     "    y_pred = clf.predict(features)\n",
 655 |     "    end = time()\n",
 656 |     "    \n",
 657 |     "    # Print and return results\n",
 658 |     "    print \"Made predictions in {:.4f} seconds.\".format(end - start)\n",
 659 |     "    return f1_score(target.values, y_pred, pos_label='yes')\n",
 660 |     "\n",
 661 |     "\n",
 662 |     "def train_predict(clf, X_train, y_train, X_test, y_test):\n",
 663 |     "    ''' Train and predict using a classifer based on F1 score. '''\n",
 664 |     "    \n",
 665 |     "    # Indicate the classifier and the training set size\n",
 666 |     "    print \"Training a {} using a training set size of {}. . .\".format(clf.__class__.__name__, len(X_train))\n",
 667 |     "    \n",
 668 |     "    # Train the classifier\n",
 669 |     "    train_classifier(clf, X_train, y_train)\n",
 670 |     "    \n",
 671 |     "    # Print the results of prediction for both training and testing\n",
 672 |     "    print \"F1 score for training set: {:.4f}.\".format(predict_labels(clf, X_train, y_train))\n",
 673 |     "    print \"F1 score for test set: {:.4f}.\".format(predict_labels(clf, X_test, y_test))\n",
 674 |     "    print \"\\n\""
 675 |    ]
 676 |   },
 677 |   {
 678 |    "cell_type": "markdown",
 679 |    "metadata": {},
 680 |    "source": [
 681 |     "### Implementation: Model Performance Metrics\n",
 682 |     "With the predefined functions above, you will now import the three supervised learning models of your choice and run the `train_predict` function for each one. Remember that you will need to train and predict on each classifier for three different training set sizes: 100, 200, and 300. Hence, you should expect to have 9 different outputs below — 3 for each model using the varying training set sizes. In the following code cell, you will need to implement the following:\n",
 683 |     "- Import the three supervised learning models you've discussed in the previous section.\n",
 684 |     "- Initialize the three models and store them in `clf_A`, `clf_B`, and `clf_C`.\n",
 685 |     " - Use a `random_state` for each model you use, if provided.\n",
 686 |     " - **Note:** Use the default settings for each model — you will tune one specific model in a later section.\n",
 687 |     "- Create the different training set sizes to be used to train each model.\n",
 688 |     " - *Do not reshuffle and resplit the data! The new training points should be drawn from `X_train` and `y_train`.*\n",
 689 |     "- Fit each model with each training set size and make predictions on the test set (9 in total).  \n",
 690 |     "**Note:** Three tables are provided after the following code cell which can be used to store your results."
 691 |    ]
 692 |   },
 693 |   {
 694 |    "cell_type": "code",
 695 |    "execution_count": 84,
 696 |    "metadata": {
 697 |     "collapsed": false
 698 |    },
 699 |    "outputs": [
 700 |     {
 701 |      "name": "stdout",
 702 |      "output_type": "stream",
 703 |      "text": [
 704 |       "Training a DecisionTreeClassifier using a training set size of 100. . .\n",
 705 |       "Trained model in 0.0050 seconds\n",
 706 |       "Made predictions in 0.0010 seconds.\n",
 707 |       "F1 score for training set: 1.0000.\n",
 708 |       "Made predictions in 0.0010 seconds.\n",
 709 |       "F1 score for test set: 0.6942.\n",
 710 |       "\n",
 711 |       "\n",
 712 |       "Training a DecisionTreeClassifier using a training set size of 200. . .\n",
 713 |       "Trained model in 0.0050 seconds\n",
 714 |       "Made predictions in 0.0010 seconds.\n",
 715 |       "F1 score for training set: 1.0000.\n",
 716 |       "Made predictions in 0.0000 seconds.\n",
 717 |       "F1 score for test set: 0.7132.\n",
 718 |       "\n",
 719 |       "\n",
 720 |       "Training a DecisionTreeClassifier using a training set size of 300. . .\n",
 721 |       "Trained model in 0.0000 seconds\n",
 722 |       "Made predictions in 0.0000 seconds.\n",
 723 |       "F1 score for training set: 1.0000.\n",
 724 |       "Made predictions in 0.0000 seconds.\n",
 725 |       "F1 score for test set: 0.7167.\n",
 726 |       "\n",
 727 |       "\n",
 728 |       "\n",
 729 |       "\n",
 730 |       "\n",
 731 |       "\n",
 732 |       "Training a SVC using a training set size of 100. . .\n",
 733 |       "Trained model in 0.0000 seconds\n",
 734 |       "Made predictions in 0.0000 seconds.\n",
 735 |       "F1 score for training set: 0.8591.\n",
 736 |       "Made predictions in 0.0000 seconds.\n",
 737 |       "F1 score for test set: 0.7838.\n",
 738 |       "\n",
 739 |       "\n",
 740 |       "Training a SVC using a training set size of 200. . .\n",
 741 |       "Trained model in 0.0180 seconds\n",
 742 |       "Made predictions in 0.0000 seconds.\n",
 743 |       "F1 score for training set: 0.8693.\n",
 744 |       "Made predictions in 0.0150 seconds.\n",
 745 |       "F1 score for test set: 0.7755.\n",
 746 |       "\n",
 747 |       "\n",
 748 |       "Training a SVC using a training set size of 300. . .\n",
 749 |       "Trained model in 0.0310 seconds\n",
 750 |       "Made predictions in 0.0160 seconds.\n",
 751 |       "F1 score for training set: 0.8692.\n",
 752 |       "Made predictions in 0.0000 seconds.\n",
 753 |       "F1 score for test set: 0.7586.\n",
 754 |       "\n",
 755 |       "\n",
 756 |       "\n",
 757 |       "\n",
 758 |       "\n",
 759 |       "\n",
 760 |       "Training a KNeighborsClassifier using a training set size of 100. . .\n",
 761 |       "Trained model in 0.0000 seconds\n",
 762 |       "Made predictions in 0.0000 seconds.\n",
 763 |       "F1 score for training set: 0.7972.\n",
 764 |       "Made predictions in 0.0000 seconds.\n",
 765 |       "F1 score for test set: 0.7068.\n",
 766 |       "\n",
 767 |       "\n",
 768 |       "Training a KNeighborsClassifier using a training set size of 200. . .\n",
 769 |       "Trained model in 0.0000 seconds\n",
 770 |       "Made predictions in 0.0220 seconds.\n",
 771 |       "F1 score for training set: 0.8571.\n",
 772 |       "Made predictions in 0.0070 seconds.\n",
 773 |       "F1 score for test set: 0.7121.\n",
 774 |       "\n",
 775 |       "\n",
 776 |       "Training a KNeighborsClassifier using a training set size of 300. . .\n",
 777 |       "Trained model in 0.0030 seconds\n",
 778 |       "Made predictions in 0.0250 seconds.\n",
 779 |       "F1 score for training set: 0.8722.\n",
 780 |       "Made predictions in 0.0000 seconds.\n",
 781 |       "F1 score for test set: 0.7482.\n",
 782 |       "\n",
 783 |       "\n"
 784 |      ]
 785 |     }
 786 |    ],
 787 |    "source": [
 788 |     "# TODO: Import the three supervised learning models from sklearn\n",
 789 |     "from sklearn.tree import DecisionTreeClassifier\n",
 790 |     "from sklearn.svm import SVC\n",
 791 |     "from sklearn.neighbors import KNeighborsClassifier \n",
 792 |     "\n",
 793 |     "# TODO: Initialize the three models\n",
 794 |     "clf_A = DecisionTreeClassifier(random_state =0)\n",
 795 |     "clf_B = SVC(random_state = 0)\n",
 796 |     "clf_C = KNeighborsClassifier()\n",
 797 |     "\n",
 798 |     "\n",
 799 |     "training_sizes  = [100,200,300]\n",
 800 |     "\n",
 801 |     "# TODO: Execute the 'train_predict' function for each classifier and each training set size\n",
 802 |     "\n",
 803 |     "# Decision Tree\n",
 804 |     "for size in training_sizes:\n",
 805 |     "    train_predict(clf_A, X_train[:size], y_train[:size], X_test, y_test)\n",
 806 |     "    \n",
 807 |     "print \"\\n\\n\\n\"\n",
 808 |     "    \n",
 809 |     "# Support Vector Machine    \n",
 810 |     "for size in training_sizes:\n",
 811 |     "    train_predict(clf_B, X_train[:size], y_train[:size], X_test, y_test)\n",
 812 |     "    \n",
 813 |     "print \"\\n\\n\\n\"\n",
 814 |     "    \n",
 815 |     "# K Neareset Neighbor Classifier\n",
 816 |     "for size in training_sizes:\n",
 817 |     "    train_predict(clf_C, X_train[:size], y_train[:size], X_test, y_test)"
 818 |    ]
 819 |   },
 820 |   {
 821 |    "cell_type": "markdown",
 822 |    "metadata": {},
 823 |    "source": [
 824 |     "### Tabular Results\n",
 825 |     "Edit the cell below to see how a table can be designed in [Markdown](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet#tables). You can record your results from above in the tables provided."
 826 |    ]
 827 |   },
 828 |   {
 829 |    "cell_type": "markdown",
 830 |    "metadata": {},
 831 |    "source": [
 832 |     "** Classifer 1 - DecisionTreeClassifier?**  \n",
 833 |     "\n",
 834 |     "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n",
 835 |     "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n",
 836 |     "| 100               |0.0050 seconds                         |0.0010 seconds.                        |1.0000                  |0.6942                 |\n",
 837 |     "| 200               |0.0050 seconds          |0.0000 seconds                        |1.0000                  |0.7132                 |\n",
 838 |     "| 300               |0.0000 seconds                         |0.0000 seconds                        |1.0000                  |0.7167        |\n",
 839 |     "\n",
 840 |     "\n",
 841 |     "\n",
 842 |     "\n",
 843 |     "** Classifer 2 - SVM?**  \n",
 844 |     "\n",
 845 |     "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n",
 846 |     "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n",
 847 |     "| 100               |0.0000 seconds                         |0.0000 seconds                        |0.8591                  |0.7838                 |\n",
 848 |     "| 200               |0.0180 seconds            |0.0150 seconds                        |0.8693                  | 0.7755                 |\n",
 849 |     "| 300               |0.0310 seconds                         |0.0000 seconds                        |0.8692                  |0.7586  |\n",
 850 |     "\n",
 851 |     "\n",
 852 |     "** Classifer 3 - K-Nearest Neighbor **  \n",
 853 |     "\n",
 854 |     "| Training Set Size | Training Time | Prediction Time (test) | F1 Score (train) | F1 Score (test) |\n",
 855 |     "| :---------------: | :---------------------: | :--------------------: | :--------------: | :-------------: |\n",
 856 |     "| 100               |0.0000 seconds                         |0.0000 seconds                       |0.7972                  |0.7068                 |\n",
 857 |     "| 200               |0.0000 seconds                        |0.0070 seconds                        |0.8571                  |0.7121                 |\n",
 858 |     "| 300               | 0.0030 seconds                         |0.0000 seconds                        |0.8722                  |0.7482                 |\n",
 859 |     "\n",
 860 |     "\n"
 861 |    ]
 862 |   },
 863 |   {
 864 |    "cell_type": "code",
 865 |    "execution_count": 92,
 866 |    "metadata": {
 867 |     "collapsed": false
 868 |    },
 869 |    "outputs": [
 870 |     {
 871 |      "name": "stdout",
 872 |      "output_type": "stream",
 873 |      "text": [
 874 |       "yes    206\n",
 875 |       "no      94\n",
 876 |       "Name: passed, dtype: int64\n",
 877 |       "yes    59\n",
 878 |       "no     36\n",
 879 |       "Name: passed, dtype: int64\n"
 880 |      ]
 881 |     }
 882 |    ],
 883 |    "source": [
 884 |     "print y_train.value_counts()\n",
 885 |     "print y_test.value_counts()"
 886 |    ]
 887 |   },
 888 |   {
 889 |    "cell_type": "code",
 890 |    "execution_count": 94,
 891 |    "metadata": {
 892 |     "collapsed": false
 893 |    },
 894 |    "outputs": [
 895 |     {
 896 |      "name": "stdout",
 897 |      "output_type": "stream",
 898 |      "text": [
 899 |       "0.708033333333\n",
 900 |       "0.772633333333\n",
 901 |       "0.722366666667\n"
 902 |      ]
 903 |     }
 904 |    ],
 905 |    "source": [
 906 |     "decision_tree_f1_average = (0.6942+0.7132+0.7167)/3.0\n",
 907 |     "svm_f1_average = (0.7838 + 0.7755 + 0.7586)/3.0\n",
 908 |     "k_nearest_f1_average = (0.7068+0.7121+0.7482)/3.0 \n",
 909 |     "\n",
 910 |     "print decision_tree_f1_average\n",
 911 |     "print svm_f1_average\n",
 912 |     "print k_nearest_f1_average"
 913 |    ]
 914 |   },
 915 |   {
 916 |    "cell_type": "markdown",
 917 |    "metadata": {},
 918 |    "source": [
 919 |     "## Choosing the Best Model\n",
 920 |     "In this final section, you will choose from the three supervised learning models the *best* model to use on the student data. You will then perform a grid search optimization for the model over the entire training set (`X_train` and `y_train`) by tuning at least one parameter to improve upon the untuned model's F<sub>1</sub> score. "
 921 |    ]
 922 |   },
 923 |   {
 924 |    "cell_type": "markdown",
 925 |    "metadata": {},
 926 |    "source": [
 927 |     "### Question 3 - Chosing the Best Model\n",
 928 |     "*Based on the experiments you performed earlier, in one to two paragraphs, explain to the board of supervisors what single model you chose as the best model. Which model is generally the most appropriate based on the available data, limited resources, cost, and performance?*"
 929 |    ]
 930 |   },
 931 |   {
 932 |    "cell_type": "markdown",
 933 |    "metadata": {},
 934 |    "source": [
 935 |     "**Answer: **\n",
 936 |     "\n",
 937 |     "The model I would choose as the best model is SVM.\n",
 938 |     "\n",
 939 |     "Reasons : \n",
 940 |     "    1. DecisionTreeClassifier shows clear signs of overfitting. It fits the training data perfectly with a F1-score of 1, but performs worse on the testing data compared to both SVM and k-nearest neighbor. So Decision Tree would clearly not be an appropriate model for this data set.\n",
 941 |     "    2. K-Nearest Neighbor actually shows quite stable performance over training and testing data sets and performs better both on the training and testing data sets steadily as the score increased with more training data(possibly because it found similar students with more training instances for the query instances). However, K-nearest's performance on the test data set is still poor compared to SVM.\n",
 942 |     "    3. SVM's average test score is 0.7726, beating both decision tree(average f1 on test set = 0.7080) and k-nearest neighbor(average f1 score 0.7223), based on scores SVM is the best choice. It's true that there's subtle differences of computation time for training and testing phases but for a small data set like this the differences are not that important."
 943 |    ]
 944 |   },
 945 |   {
 946 |    "cell_type": "markdown",
 947 |    "metadata": {},
 948 |    "source": [
 949 |     "### Question 4 - Model in Layman's Terms\n",
 950 |     "*In one to two paragraphs, explain to the board of directors in layman's terms how the final model chosen is supposed to work. For example if you've chosen to use a decision tree or a support vector machine, how does the model go about making a prediction?*"
 951 |    ]
 952 |   },
 953 |   {
 954 |    "cell_type": "markdown",
 955 |    "metadata": {},
 956 |    "source": [
 957 |     "**Answer: **\n",
 958 |     "\n",
 959 |     "The model that was chosen is called Support Vector Machine which is a linear seperator. Intuitively in the simplest case, we can imagine a 2D plane where we plot the data and labels on the x and y axis respective and we want to seperate the labels using a line. We can choose many lines for this task, assuming the labels are not overlapping, however, support vector machine will choose the \"maximum margin\" line, the line that has the biggest distance from the nearest points of both classes, i.e the line which is actually in the 'middle'. We choose this line to generalize the model to test data and avoid overfitting, a line too close to either of the classes can misclassify quickly.\n",
 960 |     "\n",
 961 |     "<img src=\"data2d.png\">\n",
 962 |     "\n",
 963 |     "For the higher dimension data sets instead of a line we map the datapoints to higher dimensions(with 'kernel trick') and find the maximum margin hyperplane to seperate the classes with as much gap as possible. For example in the image below a line could not have seperated circular data in 2D, so data has been mapped to 3D space where a clear seperating hyperplane was found, then the labels were used to classify the instances.\n",
 964 |     "\n",
 965 |     "<img src=\"data_2d_to_3d_hyperplane.png\">\n",
 966 |     "\n",
 967 |     "\n",
 968 |     "\n",
 969 |     "For practical purposes, choosing decision tree would have been more interpretable, but in this case would have led to overfitting (as we have seen in the table) and intervention of a student who's actually doing well because of a bad model would have led to negative consequences in this student's life. Choosing something like K-Nearest perhaps would have been stable perhaps, but not as interpretable as decision trees. However if we scale to millions of students, decision trees will also grow exponentially and K-Nearest neighbors woudld have to iterate over all the millions of students to find similar one's.\n",
 970 |     "\n",
 971 |     "On the other hand, Support vector machine's clearly showed best performance so far and it's an widely used algorithm in the industry too, so SVM was chosen. Visualizing SVM is not as easy as decision tree's, but it has better performance."
 972 |    ]
 973 |   },
 974 |   {
 975 |    "cell_type": "markdown",
 976 |    "metadata": {},
 977 |    "source": [
 978 |     "### Implementation: Model Tuning\n",
 979 |     "Fine tune the chosen model. Use grid search (`GridSearchCV`) with at least one important parameter tuned with at least 3 different values. You will need to use the entire training set for this. In the code cell below, you will need to implement the following:\n",
 980 |     "- Import [`sklearn.grid_search.gridSearchCV`](http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html) and [`sklearn.metrics.make_scorer`](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html).\n",
 981 |     "- Create a dictionary of parameters you wish to tune for the chosen model.\n",
 982 |     " - Example: `parameters = {'parameter' : [list of values]}`.\n",
 983 |     "- Initialize the classifier you've chosen and store it in `clf`.\n",
 984 |     "- Create the F<sub>1</sub> scoring function using `make_scorer` and store it in `f1_scorer`.\n",
 985 |     " - Set the `pos_label` parameter to the correct value!\n",
 986 |     "- Perform grid search on the classifier `clf` using `f1_scorer` as the scoring method, and store it in `grid_obj`.\n",
 987 |     "- Fit the grid search object to the training data (`X_train`, `y_train`), and store it in `grid_obj`."
 988 |    ]
 989 |   },
 990 |   {
 991 |    "cell_type": "code",
 992 |    "execution_count": 85,
 993 |    "metadata": {
 994 |     "collapsed": false
 995 |    },
 996 |    "outputs": [
 997 |     {
 998 |      "name": "stdout",
 999 |      "output_type": "stream",
1000 |      "text": [
1001 |       "{'kernel': 'rbf', 'C': 1, 'verbose': False, 'probability': False, 'degree': 3, 'shrinking': True, 'max_iter': -1, 'decision_function_shape': None, 'random_state': None, 'tol': 0.001, 'cache_size': 200, 'coef0': 0.0, 'gamma': 'auto', 'class_weight': None}\n",
1002 |       "Made predictions in 0.0250 seconds.\n",
1003 |       "Tuned model has a training F1 score of 0.8692.\n",
1004 |       "Made predictions in 0.0100 seconds.\n",
1005 |       "Tuned model has a testing F1 score of 0.7586.\n"
1006 |      ]
1007 |     }
1008 |    ],
1009 |    "source": [
1010 |     "# TODO: Import 'GridSearchCV' and 'make_scorer'\n",
1011 |     "\n",
1012 |     "from sklearn.grid_search import GridSearchCV\n",
1013 |     "from sklearn.metrics import make_scorer\n",
1014 |     "\n",
1015 |     "# TODO: Create the parameters list you wish to tune\n",
1016 |     "parameters = {'kernel':('linear', 'poly','rbf'), 'C':[0.25,0.5,1, 10,50]}\n",
1017 |     "\n",
1018 |     "# TODO: Initialize the classifier\n",
1019 |     "clf = SVC()\n",
1020 |     "\n",
1021 |     "# TODO: Make an f1 scoring function using 'make_scorer' \n",
1022 |     "f1_scorer = make_scorer(f1_score,pos_label = \"yes\")\n",
1023 |     "\n",
1024 |     "# TODO: Perform grid search on the classifier using the f1_scorer as the scoring method\n",
1025 |     "grid_obj = GridSearchCV(clf,param_grid = parameters,scoring = f1_scorer)\n",
1026 |     "\n",
1027 |     "# TODO: Fit the grid search object to the training data and find the optimal parameters\n",
1028 |     "grid_obj.fit(X_train,y_train)\n",
1029 |     "\n",
1030 |     "# Get the estimator\n",
1031 |     "clf = grid_obj.best_estimator_\n",
1032 |     "print clf.get_params()\n",
1033 |     "# Report the final F1 score for training and testing after parameter tuning\n",
1034 |     "print \"Tuned model has a training F1 score of {:.4f}.\".format(predict_labels(clf, X_train, y_train))\n",
1035 |     "print \"Tuned model has a testing F1 score of {:.4f}.\".format(predict_labels(clf, X_test, y_test))"
1036 |    ]
1037 |   },
1038 |   {
1039 |    "cell_type": "markdown",
1040 |    "metadata": {},
1041 |    "source": [
1042 |     "### Question 5 - Final F<sub>1</sub> Score\n",
1043 |     "*What is the final model's F<sub>1</sub> score for training and testing? How does that score compare to the untuned model?*"
1044 |    ]
1045 |   },
1046 |   {
1047 |    "cell_type": "markdown",
1048 |    "metadata": {},
1049 |    "source": [
1050 |     "**Answer: **\n",
1051 |     "\n",
1052 |     "Final models F1 Score for training : 0.8692\n",
1053 |     "Final models F1 score for test : 0.7586.\n",
1054 |     "\n",
1055 |     "It shows no difference from the 300 training point model chosen above, but it does not perform worse either. The most probable reason behind this situation is probably that grid search ended up choosing the default parameters despite given more options. I tried to reduce the C parameter, but it chose the value 1 again, which is also the default value, despite given more options for the Kernel it again chose the default version, which is \"rbf\" for non-linear datasets which is also optimum. The number of training points also don't vary as the total number of training points are 300. \n",
1056 |     "\n",
1057 |     "So the grid search model is giving similar performance to the former one. This data set also is not balanced, there are more students who graduated than the one's who didn't graduate, perhaps that lead to more noise in the data which made SVM perform 0.7586 F1-score only in testing which is quite different from the training one."
1058 |    ]
1059 |   },
1060 |   {
1061 |    "cell_type": "markdown",
1062 |    "metadata": {},
1063 |    "source": [
1064 |     "> **Note**: Once you have completed all of the code implementations and successfully answered each question above, you may finalize your work by exporting the iPython Notebook as an HTML document. You can do this by using the menu above and navigating to  \n",
1065 |     "**File -> Download as -> HTML (.html)**. Include the finished document along with this notebook as your submission."
1066 |    ]
1067 |   }
1068 |  ],
1069 |  "metadata": {
1070 |   "kernelspec": {
1071 |    "display_name": "Python 2",
1072 |    "language": "python",
1073 |    "name": "python2"
1074 |   },
1075 |   "language_info": {
1076 |    "codemirror_mode": {
1077 |     "name": "ipython",
1078 |     "version": 2
1079 |    },
1080 |    "file_extension": ".py",
1081 |    "mimetype": "text/x-python",
1082 |    "name": "python",
1083 |    "nbconvert_exporter": "python",
1084 |    "pygments_lexer": "ipython2",
1085 |    "version": "2.7.11"
1086 |   }
1087 |  },
1088 |  "nbformat": 4,
1089 |  "nbformat_minor": 0
1090 | }
1091 | 


--------------------------------------------------------------------------------
/projects/student_intervention/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/student_intervention/table.png


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/README.md:
--------------------------------------------------------------------------------
 1 | # Project 0: Introduction and Fundamentals
 2 | ## Titanic Survival Exploration
 3 | 
 4 | ### Install
 5 | 
 6 | This project requires **Python 2.7** and the following Python libraries installed:
 7 | 
 8 | - [NumPy](http://www.numpy.org/)
 9 | - [Pandas](http://pandas.pydata.org)
10 | - [matplotlib](http://matplotlib.org/)
11 | - [scikit-learn](http://scikit-learn.org/stable/)
12 | 
13 | You will also need to have software installed to run and execute an [iPython Notebook](http://ipython.org/notebook.html)
14 | 
15 | ### Overview 
16 | 
17 | This is an optional exploratory project to see which variables are more important for predicting titanic survival. It's more of a 'hello world' project for machine learning.
18 | 
19 | ![](gender.png)
20 | 
21 | ### Code
22 | 
23 | Template code is provided in the notebook `titanic_survival_exploration.ipynb` notebook file. Additional supporting code can be found in `titanic_visualizations.py`. 
24 | 
25 | ### Run
26 | 
27 | In a terminal or command window, navigate to the top-level project directory `titanic_survival_exploration/` (that contains this README) and run one of the following commands:
28 | 
29 | ```ipython notebook titanic_survival_exploration.ipynb```
30 | ```jupyter notebook titanic_survival_exploration.ipynb```
31 | 
32 | This will open the iPython Notebook software and project file in your browser.
33 | 
34 | ## Data
35 | 
36 | The dataset used in this project is included as `titanic_data.csv`. This dataset is provided by Udacity and contains the following attributes:
37 | 
38 | - `survival` ? Survival (0 = No; 1 = Yes)
39 | - `pclass` ? Passenger Class (1 = 1st; 2 = 2nd; 3 = 3rd)
40 | - `name` ? Name
41 | - `sex` ? Sex
42 | - `age` ? Age
43 | - `sibsp` ? Number of Siblings/Spouses Aboard
44 | - `parch` ? Number of Parents/Children Aboard
45 | - `ticket` ? Ticket Number
46 | - `fare` ? Passenger Fare
47 | - `cabin` ? Cabin
48 | - `embarked` ? Port of Embarkation (C = Cherbourg; Q = Queenstown; S = Southampton)


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/debug.log:
--------------------------------------------------------------------------------
1 | [0308/013815.059:ERROR:crash_report_database_win.cc(426)] unexpected header
2 | 


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/gender.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/titanic_survival_exploration/gender.png


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/titanic_visualizations.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | def filter_data(data, condition):
  6 |     """
  7 |     Remove elements that do not match the condition provided.
  8 |     Takes a data list as input and returns a filtered list.
  9 |     Conditions should be a list of strings of the following format:
 10 |       '<field> <op> <value>'
 11 |     where the following operations are valid: >, <, >=, <=, ==, !=
 12 |     
 13 |     Example: ["Sex == 'male'", 'Age < 18']
 14 |     """
 15 | 
 16 |     field, op, value = condition.split(" ")
 17 |     
 18 |     # convert value into number or strip excess quotes if string
 19 |     try:
 20 |         value = float(value)
 21 |     except:
 22 |         value = value.strip("\'\"")
 23 |     
 24 |     # get booleans for filtering
 25 |     if op == ">":
 26 |         matches = data[field] > value
 27 |     elif op == "<":
 28 |         matches = data[field] < value
 29 |     elif op == ">=":
 30 |         matches = data[field] >= value
 31 |     elif op == "<=":
 32 |         matches = data[field] <= value
 33 |     elif op == "==":
 34 |         matches = data[field] == value
 35 |     elif op == "!=":
 36 |         matches = data[field] != value
 37 |     else: # catch invalid operation codes
 38 |         raise Exception("Invalid comparison operator. Only >, <, >=, <=, ==, != allowed.")
 39 |     
 40 |     # filter data and outcomes
 41 |     data = data[matches].reset_index(drop = True)
 42 |     return data
 43 | 
 44 | def survival_stats(data, outcomes, key, filters = []):
 45 |     """
 46 |     Print out selected statistics regarding survival, given a feature of
 47 |     interest and any number of filters (including no filters)
 48 |     """
 49 |     
 50 |     # Check that the key exists
 51 |     if key not in data.columns.values :
 52 |         print "'{}' is not a feature of the Titanic data. Did you spell something wrong?".format(key)
 53 |         return False
 54 | 
 55 |     # Return the function before visualizing if 'Cabin' or 'Ticket'
 56 |     # is selected: too many unique categories to display
 57 |     if(key == 'Cabin' or key == 'PassengerId' or key == 'Ticket'):
 58 |         print "'{}' has too many unique categories to display! Try a different feature.".format(key)
 59 |         return False
 60 | 
 61 |     # Merge data and outcomes into single dataframe
 62 |     all_data = pd.concat([data, outcomes], axis = 1)
 63 |     
 64 |     # Apply filters to data
 65 |     for condition in filters:
 66 |         all_data = filter_data(all_data, condition)
 67 | 
 68 |     # Create outcomes DataFrame
 69 |     all_data = all_data[[key, 'Survived']]
 70 |     
 71 |     # Create plotting figure
 72 |     plt.figure(figsize=(8,6))
 73 | 
 74 |     # 'Numerical' features
 75 |     if(key == 'Age' or key == 'Fare'):
 76 |         
 77 |         # Remove NaN values from Age data
 78 |         all_data = all_data[~np.isnan(all_data[key])]
 79 |         
 80 |         # Divide the range of data into bins and count survival rates
 81 |         min_value = all_data[key].min()
 82 |         max_value = all_data[key].max()
 83 |         value_range = max_value - min_value
 84 | 
 85 |         # 'Fares' has larger range of values than 'Age' so create more bins
 86 |         if(key == 'Fare'):
 87 |             bins = np.arange(0, all_data['Fare'].max() + 20, 20)
 88 |         if(key == 'Age'):
 89 |             bins = np.arange(0, all_data['Age'].max() + 10, 10)
 90 |         
 91 |         # Overlay each bin's survival rates
 92 |         nonsurv_vals = all_data[all_data['Survived'] == 0][key].reset_index(drop = True)
 93 |         surv_vals = all_data[all_data['Survived'] == 1][key].reset_index(drop = True)
 94 |         plt.hist(nonsurv_vals, bins = bins, alpha = 0.6,
 95 |                  color = 'red', label = 'Did not survive')
 96 |         plt.hist(surv_vals, bins = bins, alpha = 0.6,
 97 |                  color = 'green', label = 'Survived')
 98 |     
 99 |         # Add legend to plot
100 |         plt.xlim(0, bins.max())
101 |         plt.legend(framealpha = 0.8)
102 |     
103 |     # 'Categorical' features
104 |     else:
105 |        
106 |         # Set the various categories
107 |         if(key == 'Pclass'):
108 |             values = np.arange(1,4)
109 |         if(key == 'Parch' or key == 'SibSp'):
110 |             values = np.arange(0,np.max(data[key]) + 1)
111 |         if(key == 'Embarked'):
112 |             values = ['C', 'Q', 'S']
113 |         if(key == 'Sex'):
114 |             values = ['male', 'female']
115 | 
116 |         # Create DataFrame containing categories and count of each
117 |         frame = pd.DataFrame(index = np.arange(len(values)), columns=(key,'Survived','NSurvived'))
118 |         for i, value in enumerate(values):
119 |             frame.loc[i] = [value, \
120 |                    len(all_data[(all_data['Survived'] == 1) & (all_data[key] == value)]), \
121 |                    len(all_data[(all_data['Survived'] == 0) & (all_data[key] == value)])]
122 | 
123 |         # Set the width of each bar
124 |         bar_width = 0.4
125 | 
126 |         # Display each category's survival rates
127 |         for i in np.arange(len(frame)):
128 |             nonsurv_bar = plt.bar(i-bar_width, frame.loc[i]['NSurvived'], width = bar_width, color = 'r')
129 |             surv_bar = plt.bar(i, frame.loc[i]['Survived'], width = bar_width, color = 'g')
130 | 
131 |             plt.xticks(np.arange(len(frame)), values)
132 |             plt.legend((nonsurv_bar[0], surv_bar[0]),('Did not survive', 'Survived'), framealpha = 0.8)
133 | 
134 |     # Common attributes for plot formatting
135 |     plt.xlabel(key)
136 |     plt.ylabel('Number of Passengers')
137 |     plt.title('Passenger Survival Statistics With \'%s\' Feature'%(key))
138 |     plt.show()
139 | 
140 |     # Report number of passengers with missing values
141 |     if sum(pd.isnull(all_data[key])):
142 |         nan_outcomes = all_data[pd.isnull(all_data[key])]['Survived']
143 |         print "Passengers with missing '{}' values: {} ({} survived, {} did not survive)".format( \
144 |               key, len(nan_outcomes), sum(nan_outcomes == 1), sum(nan_outcomes == 0))
145 | 
146 | 


--------------------------------------------------------------------------------
/projects/titanic_survival_exploration/titanic_visualizations.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/projects/titanic_survival_exploration/titanic_visualizations.pyc


--------------------------------------------------------------------------------
/verified certificate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tahsin-Mayeesha/Udacity-Machine-Learning-Nanodegree-Coursework/ebd68ef68f85410c8f9a35e3c87068d77c41f209/verified certificate.png


--------------------------------------------------------------------------------