├── project_10_logistic_regression
    ├── iphone_purchase_records.csv
    └── logistic_regression_dec10.py
├── project_11_k_nearest_neighbor
    ├── iphone_purchase_records.csv
    └── knn_dec12.py
├── project_12_svm
    ├── iphone_purchase_records.csv
    ├── svm
    └── svm_dec12.py
├── project_13_kernel_svm
    ├── iphone_purchase_records.csv
    └── kernel_svm_dec12.py
├── project_14_naive_bayes
    ├── iphone_purchase_records.csv
    └── naive_bayes.py
├── project_15_decision_tree_classifier
    ├── decision_tree_classification.py
    └── iphone_purchase_records.csv
├── project_16_random_forest_classifier
    ├── iphone_purchase_records.csv
    └── random_forest_classifier.py
├── project_17_compare_classification_algorithms
    ├── compare_classification_algos.py
    └── iphone_purchase_records.csv
├── project_1_simple_linear_regression
    ├── Salary_Data.csv
    ├── project_1
    └── simple_linear_regression.py
├── project_2_multiple_linear_regression
    ├── 50_Startups.csv
    ├── multiple_linear_regression.py
    └── project_2
├── project_3_polynomial_regression
    ├── Position_Salaries.csv
    ├── poly_regression.py
    └── project_3
├── project_4_support_vector_regression
    ├── Position_Salaries.csv
    ├── project_4
    └── svr_2019.py
├── project_5_decision_tree_regression
    ├── Position_Salaries.csv
    ├── decision_tree.py
    └── project_5
├── project_6_random_forest_regression
    ├── Position_Salaries.csv
    ├── project_6
    └── random_forest.py
├── project_7_compare_regression_models
    ├── Position_Salaries.csv
    ├── compare.py
    └── project_7
├── project_8_predict_weight
    ├── Height_Weight_single_variable_data_101_series_1.0.csv
    ├── project_8
    └── project_8_predict_weight.py
└── project_9_predict_weight_sex
    ├── predict_weight.py
    └── weight-height.csv


/project_10_logistic_regression/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_10_logistic_regression/logistic_regression_dec10.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Dec 11 17:31:56 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | # Step 3 - Split Data into training and testing
25 | from sklearn.model_selection import train_test_split
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
27 | 
28 | # Step 4 - Feature Scaling
29 | from sklearn.preprocessing import StandardScaler
30 | sc = StandardScaler()
31 | X_train = sc.fit_transform(X_train)
32 | X_test = sc.transform(X_test)
33 | 
34 | 
35 | # Step 5 - Logistic Regression Classifier
36 | from sklearn.linear_model import LogisticRegression
37 | classifier = LogisticRegression(random_state=0, solver="liblinear")
38 | classifier.fit(X_train, y_train)
39 | 
40 | 
41 | # Step 6 - Predict
42 | y_pred = classifier.predict(X_test)
43 | 
44 | # Step 7 - Confusion Matrix
45 | from sklearn import metrics
46 | cm = metrics.confusion_matrix(y_test, y_pred) 
47 | print(cm)
48 | accuracy = metrics.accuracy_score(y_test, y_pred) 
49 | print("Accuracy score:",accuracy)
50 | precision = metrics.precision_score(y_test, y_pred) 
51 | print("Precision score:",precision)
52 | recall = metrics.recall_score(y_test, y_pred) 
53 | print("Recall score:",recall)
54 | 
55 | # Step 8 - Make New Predictions
56 | x1 = sc.transform([[1,21,40000]])
57 | x2 = sc.transform([[1,21,80000]])
58 | x3 = sc.transform([[0,21,40000]])
59 | x4 = sc.transform([[0,21,80000]])
60 | x5 = sc.transform([[1,41,40000]])
61 | x6 = sc.transform([[1,41,80000]])
62 | x7 = sc.transform([[0,41,40000]])
63 | x8 = sc.transform([[0,41,80000]])
64 | 
65 | print("Male aged 21 making $40k will buy iPhone:", classifier.predict(x1))
66 | print("Male aged 21 making $80k will buy iPhone:", classifier.predict(x2))
67 | print("Female aged 21 making $40k will buy iPhone:", classifier.predict(x3))
68 | print("Female aged 21 making $80k will buy iPhone:", classifier.predict(x4))
69 | print("Male aged 41 making $40k will buy iPhone:", classifier.predict(x5))
70 | print("Male aged 41 making $80k will buy iPhone:", classifier.predict(x6))
71 | print("Female aged 41 making $40k will buy iPhone:", classifier.predict(x7))
72 | print("Female aged 41 making $80k will buy iPhone:", classifier.predict(x8))
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/project_11_k_nearest_neighbor/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_11_k_nearest_neighbor/knn_dec12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Dec 12 12:00:17 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | 
25 | # Step 3 - Split into training and test data
26 | from sklearn.model_selection import train_test_split
27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
28 | 
29 | 
30 | # Step 4 - Feature Scaling
31 | from sklearn.preprocessing import StandardScaler
32 | sc_X = StandardScaler()
33 | X_train = sc_X.fit_transform(X_train)
34 | X_test = sc_X.transform(X_test)
35 | 
36 | # Step 5 - Fit KNN Classifier
37 | from sklearn.neighbors import KNeighborsClassifier
38 | # metric = minkowski and p=2 is Euclidean Distance
39 | # metric = minkowski and p=1 is Manhattan Distance
40 | classifier = KNeighborsClassifier(n_neighbors=5, metric="minkowski",p=2)
41 | classifier.fit(X_train, y_train)
42 | 
43 | # Step 5 - Make Prediction
44 | y_pred = classifier.predict(X_test)
45 | 
46 | # Step 6 - Confusion Matrix
47 | #from sklearn import metrics
48 | #cm = metrics.confusion_matrix(y_test, y_pred) ## 4,3 errors
49 | #accuracy = metrics.accuracy_score(y_test, y_pred) ## 0.93
50 | #precision = metrics.precision_score(y_test, y_pred) ## 0.87
51 | #recall = metrics.recall_score(y_test, y_pred) ## 0.90
52 | 
53 | # Step 7 - Confusion Matrix
54 | from sklearn import metrics
55 | cm = metrics.confusion_matrix(y_test, y_pred) 
56 | print(cm)
57 | accuracy = metrics.accuracy_score(y_test, y_pred) 
58 | print("Accuracy score:",accuracy)
59 | precision = metrics.precision_score(y_test, y_pred) 
60 | print("Precision score:",precision)
61 | recall = metrics.recall_score(y_test, y_pred) 
62 | print("Recall score:",recall)
63 | 


--------------------------------------------------------------------------------
/project_12_svm/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_12_svm/svm:
--------------------------------------------------------------------------------
 1 | Machine Learning Project 12 - Using Support Vector Classification
 2 | 
 3 | Below is the sample dataset - we have got points on a two dimensional space. We have some observations - some are red and some are green. So these points, we have already classified them. 
 4 | 
 5 | img 1
 6 | 
 7 | We will use SVM to draw a line that separates these 2 categories. For example - let us assume the line below is drawn by the SVM algorithm to separate the 2 categories and at the same time it has the maximum margin. By margin we mean, there will never be any data point inside the margin.
 8 | You can read more about SVM and maximum margins in this great tutorial.
 9 | 
10 | This line is drawn equal distance from the red and green points.
11 | 
12 | img 2
13 | 
14 | 
15 | These 2 points are called the support vectors.
16 | These 2 points are supporting the algorithm - even if you get rid of the other points - nothing will change. The algorithm will be exactly same. The other points do not contribute to the result of the algorithm. Only these 2 points highlighted contribute and hence are called the support vectors. 
17 | You can call them support points in a 2 dimensional space but in reality they are vectors because in a multidimensional space when you have more than 2 variables - maybe 10 or 50 variables - each point is no longer a point because we cannot visualize it in a two dimensional space and therefore each of those points is actually a vector in a multidimensional space.
18 | 
19 | 
20 | 
21 | img 3
22 | 
23 | 
24 | The line in the middle is called the Maximum Margin Hyperplane in a multidimensional space or Maximum Margin Classifier in a two dimensional space.
25 | The green dotted line is called the positive hyperplane
26 | The red dotted line is called negative hyperplane. It does'nt matter in which order you name them - its just that one is positive and the other is negative.
27 | 
28 | 
29 | 
30 | What is special about SVMs?
31 | 
32 | Let's say you are building an algorithm to identify apple's and oranges. What most machine learning algorithms would do is they they would look at the most common looking types of apples and most common looking type of oranges to learn and train themselves. So based on that - they will identify new samples as either apple or orange.
33 | 
34 | But in case of Support Vector Machine - instead of looking at most common types of apples and oranges - the SVM would look at apples that are very much like an orange and similarly oranges that resemble an apple. 
35 | 
36 | If you look at the image below - the SVM would pick the apple on the left that looks very similar to an orange and would pick the the green orange on the right that looks very similar to a green apple. So these 2 points would represent the support vectors and are very close the boundary. So the SVM is a very different type of algorithm as it picks the extreme case which is close to the boundary and it uses that to construct its analysis. That's why in certain cases, the SVM performs better than other classification algorithms.
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/project_12_svm/svm_dec12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Dec 12 20:21:32 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | 
25 | # Step 3 - Split into Train and Test set
26 | from sklearn.model_selection import train_test_split
27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
28 | 
29 | # Step 4 - Feature Scaling
30 | from sklearn.preprocessing import StandardScaler
31 | ss_X = StandardScaler()
32 | X_train = ss_X.fit_transform(X_train)
33 | X_test = ss_X.transform(X_test)
34 | 
35 | # Step 5 - Fit SVC Classifier
36 | from sklearn.svm import SVC
37 | classifier = SVC(kernel = "linear", random_state=0)
38 | classifier.fit(X_train, y_train)
39 | 
40 | # Step 6 - Predict
41 | y_pred = classifier.predict(X_test)
42 | 
43 | 
44 | # Step 7 - Confusion Matrix
45 | from sklearn import metrics
46 | cm = metrics.confusion_matrix(y_test, y_pred) 
47 | print(cm)
48 | accuracy = metrics.accuracy_score(y_test, y_pred) 
49 | print("Accuracy score:",accuracy)
50 | precision = metrics.precision_score(y_test, y_pred) 
51 | print("Precision score:",precision)
52 | recall = metrics.recall_score(y_test, y_pred) 
53 | print("Recall score:",recall)
54 | 


--------------------------------------------------------------------------------
/project_13_kernel_svm/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_13_kernel_svm/kernel_svm_dec12.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Dec 12 20:53:27 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | import pandas as pd
10 | dataset = pd.read_csv("iphone_purchase_records.csv")
11 | X = dataset.iloc[:,:-1].values
12 | y = dataset.iloc[:, 3].values
13 | 
14 | # Step 2 - Convert Gender to number
15 | from sklearn.preprocessing import LabelEncoder
16 | labelEncoder_gender =  LabelEncoder()
17 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
18 | 
19 | # Optional - if you want to convert X to float data type
20 | import numpy as np
21 | X = np.vstack(X[:, :]).astype(np.float)
22 | 
23 | # Step 2 - Split into training and Test
24 | from sklearn.model_selection import train_test_split
25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
26 | 
27 | # Step 3 - Feature Scaling
28 | from sklearn.preprocessing import StandardScaler
29 | ss_X = StandardScaler()
30 | X_train = ss_X.fit_transform(X_train)
31 | X_test = ss_X.transform(X_test)
32 | 
33 | 
34 | # Step 4 - Fit SCV Classifier
35 | from sklearn.svm import SVC
36 | classifier = SVC( kernel="rbf", random_state=0)
37 | classifier.fit(X_train, y_train)
38 | 
39 | # Step 5 - Predict
40 | y_pred = classifier.predict(X_test)
41 | 
42 | from sklearn import metrics
43 | cm = metrics.confusion_matrix(y_test, y_pred) 
44 | print(cm)
45 | accuracy = metrics.accuracy_score(y_test, y_pred) 
46 | print("Accuracy score:",accuracy)
47 | precision = metrics.precision_score(y_test, y_pred) 
48 | print("Precision score:",precision)
49 | recall = metrics.recall_score(y_test, y_pred) 
50 | print("Recall score:",recall)


--------------------------------------------------------------------------------
/project_14_naive_bayes/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_14_naive_bayes/naive_bayes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Dec 18 12:34:51 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | # Step 3 - Splitting the data into Train and Test
25 | from sklearn.model_selection import train_test_split
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
27 | 
28 | # Step 4 - Feature Scaling
29 | from sklearn.preprocessing import StandardScaler
30 | ss_X = StandardScaler()
31 | X_train = ss_X.fit_transform(X_train)
32 | X_test = ss_X.transform(X_test)
33 | 
34 | # Step 5 - Fit the classifier
35 | from sklearn.naive_bayes import GaussianNB
36 | classifier = GaussianNB()
37 | classifier.fit(X_train, y_train)
38 | 
39 | # Step 6 - Predict
40 | y_pred = classifier.predict(X_test)
41 | 
42 | # Step 7 - Confusion Matrix
43 | from sklearn import metrics
44 | cm = metrics.confusion_matrix(y_test, y_pred) 
45 | print(cm)
46 | accuracy = metrics.accuracy_score(y_test, y_pred) 
47 | print("Accuracy score:",accuracy)
48 | precision = metrics.precision_score(y_test, y_pred) 
49 | print("Precision score:",precision)
50 | recall = metrics.recall_score(y_test, y_pred) 
51 | print("Recall score:",recall)


--------------------------------------------------------------------------------
/project_15_decision_tree_classifier/decision_tree_classification.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Dec 19 17:30:09 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | # Step 3 - Split Data
25 | from sklearn.model_selection import train_test_split
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
27 | 
28 | # Step 4 - Fit the classifier
29 | from sklearn.tree import DecisionTreeClassifier
30 | classifier = DecisionTreeClassifier(criterion = "entropy", random_state=0)
31 | classifier.fit(X_train, y_train)
32 | 
33 | # Step 5 - Predict
34 | y_pred = classifier.predict(X_test)
35 | 
36 | 
37 | # Step 6 - Evaluate the model performance
38 | from sklearn import metrics
39 | cm = metrics.confusion_matrix(y_test, y_pred) 
40 | print(cm)
41 | accuracy = metrics.accuracy_score(y_test, y_pred) 
42 | print("Accuracy score:",accuracy)
43 | precision = metrics.precision_score(y_test, y_pred) 
44 | print("Precision score:",precision)
45 | recall = metrics.recall_score(y_test, y_pred) 
46 | print("Recall score:",recall)


--------------------------------------------------------------------------------
/project_15_decision_tree_classifier/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_16_random_forest_classifier/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_16_random_forest_classifier/random_forest_classifier.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Spyder Editor
 4 | 
 5 | This is a temporary script file.
 6 | """
 7 | 
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | # Optional - if you want to convert X to float data type
21 | import numpy as np
22 | X = np.vstack(X[:, :]).astype(np.float)
23 | 
24 | # Step 3 - Split Data
25 | from sklearn.model_selection import train_test_split
26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
27 | 
28 | 
29 | # Step 4 - Fit Classifier
30 | from sklearn.ensemble import RandomForestClassifier
31 | classifier = RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=0)
32 | classifier.fit(X_train, y_train)
33 | 
34 | # Step 5 - Predict
35 | y_pred = classifier.predict(X_test)
36 | 
37 | # Step 6 - Metrics
38 | #from sklearn import metrics
39 | #cm = metrics.confusion_matrix(y_test, y_pred) ## 5,3 errors
40 | #accuracy = metrics.accuracy_score(y_test, y_pred)  ## 0.92
41 | #precision = metrics.precision_score(y_test, y_pred)  ## 0.85
42 | #recall = metrics.recall_score(y_test, y_pred)  ## 0.90
43 | 
44 | # Step 6 - Evaluate the model performance
45 | from sklearn import metrics
46 | cm = metrics.confusion_matrix(y_test, y_pred) 
47 | print(cm)
48 | accuracy = metrics.accuracy_score(y_test, y_pred) 
49 | print("Accuracy score:",accuracy)
50 | precision = metrics.precision_score(y_test, y_pred) 
51 | print("Precision score:",precision)
52 | recall = metrics.recall_score(y_test, y_pred) 
53 | print("Recall score:",recall)
54 | 
55 | 


--------------------------------------------------------------------------------
/project_17_compare_classification_algorithms/compare_classification_algos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Dec 19 17:30:09 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("iphone_purchase_records.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:, 3].values
14 | 
15 | # Step 2 - Convert Gender to number
16 | from sklearn.preprocessing import LabelEncoder
17 | labelEncoder_gender =  LabelEncoder()
18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
19 | 
20 | 
21 | # Step 3 - Feature Scaling
22 | from sklearn.preprocessing import StandardScaler
23 | sc = StandardScaler()
24 | X = sc.fit_transform(X)
25 | 
26 | # Step 4 - Compare Classification Algorithms
27 | from sklearn.model_selection import KFold
28 | from sklearn.model_selection import cross_val_score
29 | from sklearn.linear_model import LogisticRegression
30 | from sklearn.tree import DecisionTreeClassifier
31 | from sklearn.neighbors import KNeighborsClassifier
32 | from sklearn.ensemble import RandomForestClassifier
33 | from sklearn.naive_bayes import GaussianNB
34 | from sklearn.svm import SVC
35 | 
36 | classification_models = []
37 | classification_models.append(('Logistic Regression', LogisticRegression(solver="liblinear")))
38 | classification_models.append(('K Nearest Neighbor', KNeighborsClassifier(n_neighbors=5, metric="minkowski",p=2)))
39 | classification_models.append(('Kernel SVM', SVC(kernel = 'rbf',gamma='scale')))
40 | classification_models.append(('Naive Bayes', GaussianNB()))
41 | classification_models.append(('Decision Tree', DecisionTreeClassifier(criterion = "entropy")))
42 | classification_models.append(('Random Forest', RandomForestClassifier(n_estimators=100, criterion="entropy")))
43 | 
44 | for name, model in classification_models:
45 |   kfold = KFold(n_splits=10, random_state=7)
46 |   result = cross_val_score(model, X, y, cv=kfold, scoring='accuracy')
47 |   print("%s: Mean Accuracy = %.2f%% - SD Accuracy = %.2f%%" % (name, result.mean()*100, result.std()*100))


--------------------------------------------------------------------------------
/project_17_compare_classification_algorithms/iphone_purchase_records.csv:
--------------------------------------------------------------------------------
  1 | Gender,Age,Salary,Purchase Iphone
  2 | Male,19,19000,0
  3 | Male,35,20000,0
  4 | Female,26,43000,0
  5 | Female,27,57000,0
  6 | Male,19,76000,0
  7 | Male,27,58000,0
  8 | Female,27,84000,0
  9 | Female,32,150000,1
 10 | Male,25,33000,0
 11 | Female,35,65000,0
 12 | Female,26,80000,0
 13 | Female,26,52000,0
 14 | Male,20,86000,0
 15 | Male,32,18000,0
 16 | Male,18,82000,0
 17 | Male,29,80000,0
 18 | Male,47,25000,1
 19 | Male,45,26000,1
 20 | Male,46,28000,1
 21 | Female,48,29000,1
 22 | Male,45,22000,1
 23 | Female,47,49000,1
 24 | Male,48,41000,1
 25 | Female,45,22000,1
 26 | Male,46,23000,1
 27 | Male,47,20000,1
 28 | Male,49,28000,1
 29 | Female,47,30000,1
 30 | Male,29,43000,0
 31 | Male,31,18000,0
 32 | Male,31,74000,0
 33 | Female,27,137000,1
 34 | Female,21,16000,0
 35 | Female,28,44000,0
 36 | Male,27,90000,0
 37 | Male,35,27000,0
 38 | Female,33,28000,0
 39 | Male,30,49000,0
 40 | Female,26,72000,0
 41 | Female,27,31000,0
 42 | Female,27,17000,0
 43 | Female,33,51000,0
 44 | Male,35,108000,0
 45 | Male,30,15000,0
 46 | Female,28,84000,0
 47 | Male,23,20000,0
 48 | Male,25,79000,0
 49 | Female,27,54000,0
 50 | Male,30,135000,1
 51 | Female,31,89000,0
 52 | Female,24,32000,0
 53 | Female,18,44000,0
 54 | Female,29,83000,0
 55 | Female,35,23000,0
 56 | Female,27,58000,0
 57 | Female,24,55000,0
 58 | Female,23,48000,0
 59 | Male,28,79000,0
 60 | Male,22,18000,0
 61 | Female,32,117000,0
 62 | Male,27,20000,0
 63 | Male,25,87000,0
 64 | Female,23,66000,0
 65 | Male,32,120000,1
 66 | Female,59,83000,0
 67 | Male,24,58000,0
 68 | Male,24,19000,0
 69 | Female,23,82000,0
 70 | Female,22,63000,0
 71 | Female,31,68000,0
 72 | Male,25,80000,0
 73 | Female,24,27000,0
 74 | Female,20,23000,0
 75 | Female,33,113000,0
 76 | Male,32,18000,0
 77 | Male,34,112000,1
 78 | Male,18,52000,0
 79 | Female,22,27000,0
 80 | Female,28,87000,0
 81 | Female,26,17000,0
 82 | Male,30,80000,0
 83 | Male,39,42000,0
 84 | Male,20,49000,0
 85 | Male,35,88000,0
 86 | Female,30,62000,0
 87 | Female,31,118000,1
 88 | Male,24,55000,0
 89 | Female,28,85000,0
 90 | Male,26,81000,0
 91 | Male,35,50000,0
 92 | Male,22,81000,0
 93 | Female,30,116000,0
 94 | Male,26,15000,0
 95 | Female,29,28000,0
 96 | Female,29,83000,0
 97 | Female,35,44000,0
 98 | Female,35,25000,0
 99 | Male,28,123000,1
100 | Male,35,73000,0
101 | Female,28,37000,0
102 | Male,27,88000,0
103 | Male,28,59000,0
104 | Female,32,86000,0
105 | Female,33,149000,1
106 | Female,19,21000,0
107 | Male,21,72000,0
108 | Female,26,35000,0
109 | Male,27,89000,0
110 | Male,26,86000,0
111 | Female,38,80000,0
112 | Female,39,71000,0
113 | Female,37,71000,0
114 | Male,38,61000,0
115 | Male,37,55000,0
116 | Male,42,80000,0
117 | Male,40,57000,0
118 | Male,35,75000,0
119 | Male,36,52000,0
120 | Male,40,59000,0
121 | Male,41,59000,0
122 | Female,36,75000,0
123 | Male,37,72000,0
124 | Female,40,75000,0
125 | Male,35,53000,0
126 | Female,41,51000,0
127 | Female,39,61000,0
128 | Male,42,65000,0
129 | Male,26,32000,0
130 | Male,30,17000,0
131 | Female,26,84000,0
132 | Male,31,58000,0
133 | Male,33,31000,0
134 | Male,30,87000,0
135 | Female,21,68000,0
136 | Female,28,55000,0
137 | Male,23,63000,0
138 | Female,20,82000,0
139 | Male,30,107000,1
140 | Female,28,59000,0
141 | Male,19,25000,0
142 | Male,19,85000,0
143 | Female,18,68000,0
144 | Male,35,59000,0
145 | Male,30,89000,0
146 | Female,34,25000,0
147 | Female,24,89000,0
148 | Female,27,96000,1
149 | Female,41,30000,0
150 | Male,29,61000,0
151 | Male,20,74000,0
152 | Female,26,15000,0
153 | Male,41,45000,0
154 | Male,31,76000,0
155 | Female,36,50000,0
156 | Male,40,47000,0
157 | Female,31,15000,0
158 | Male,46,59000,0
159 | Male,29,75000,0
160 | Male,26,30000,0
161 | Female,32,135000,1
162 | Male,32,100000,1
163 | Male,25,90000,0
164 | Female,37,33000,0
165 | Male,35,38000,0
166 | Female,33,69000,0
167 | Female,18,86000,0
168 | Female,22,55000,0
169 | Female,35,71000,0
170 | Male,29,148000,1
171 | Female,29,47000,0
172 | Male,21,88000,0
173 | Male,34,115000,0
174 | Female,26,118000,0
175 | Female,34,43000,0
176 | Female,34,72000,0
177 | Female,23,28000,0
178 | Female,35,47000,0
179 | Male,25,22000,0
180 | Male,24,23000,0
181 | Female,31,34000,0
182 | Male,26,16000,0
183 | Female,31,71000,0
184 | Female,32,117000,1
185 | Male,33,43000,0
186 | Female,33,60000,0
187 | Male,31,66000,0
188 | Female,20,82000,0
189 | Female,33,41000,0
190 | Male,35,72000,0
191 | Male,28,32000,0
192 | Male,24,84000,0
193 | Female,19,26000,0
194 | Male,29,43000,0
195 | Male,19,70000,0
196 | Male,28,89000,0
197 | Male,34,43000,0
198 | Female,30,79000,0
199 | Female,20,36000,0
200 | Male,26,80000,0
201 | Male,35,22000,0
202 | Male,35,39000,0
203 | Male,49,74000,0
204 | Female,39,134000,1
205 | Female,41,71000,0
206 | Female,58,101000,1
207 | Female,47,47000,0
208 | Female,55,130000,1
209 | Female,52,114000,0
210 | Female,40,142000,1
211 | Female,46,22000,0
212 | Female,48,96000,1
213 | Male,52,150000,1
214 | Female,59,42000,0
215 | Male,35,58000,0
216 | Male,47,43000,0
217 | Female,60,108000,1
218 | Male,49,65000,0
219 | Male,40,78000,0
220 | Female,46,96000,0
221 | Male,59,143000,1
222 | Female,41,80000,0
223 | Male,35,91000,1
224 | Male,37,144000,1
225 | Male,60,102000,1
226 | Female,35,60000,0
227 | Male,37,53000,0
228 | Female,36,126000,1
229 | Male,56,133000,1
230 | Female,40,72000,0
231 | Female,42,80000,1
232 | Female,35,147000,1
233 | Male,39,42000,0
234 | Male,40,107000,1
235 | Male,49,86000,1
236 | Female,38,112000,0
237 | Male,46,79000,1
238 | Male,40,57000,0
239 | Female,37,80000,0
240 | Female,46,82000,0
241 | Female,53,143000,1
242 | Male,42,149000,1
243 | Male,38,59000,0
244 | Female,50,88000,1
245 | Female,56,104000,1
246 | Female,41,72000,0
247 | Female,51,146000,1
248 | Female,35,50000,0
249 | Female,57,122000,1
250 | Male,41,52000,0
251 | Female,35,97000,1
252 | Female,44,39000,0
253 | Male,37,52000,0
254 | Female,48,134000,1
255 | Female,37,146000,1
256 | Female,50,44000,0
257 | Female,52,90000,1
258 | Female,41,72000,0
259 | Male,40,57000,0
260 | Female,58,95000,1
261 | Female,45,131000,1
262 | Female,35,77000,0
263 | Male,36,144000,1
264 | Female,55,125000,1
265 | Female,35,72000,0
266 | Male,48,90000,1
267 | Female,42,108000,1
268 | Male,40,75000,0
269 | Male,37,74000,0
270 | Female,47,144000,1
271 | Male,40,61000,0
272 | Female,43,133000,0
273 | Female,59,76000,1
274 | Male,60,42000,1
275 | Male,39,106000,1
276 | Female,57,26000,1
277 | Male,57,74000,1
278 | Male,38,71000,0
279 | Male,49,88000,1
280 | Female,52,38000,1
281 | Female,50,36000,1
282 | Female,59,88000,1
283 | Male,35,61000,0
284 | Male,37,70000,1
285 | Female,52,21000,1
286 | Male,48,141000,0
287 | Female,37,93000,1
288 | Female,37,62000,0
289 | Female,48,138000,1
290 | Male,41,79000,0
291 | Female,37,78000,1
292 | Male,39,134000,1
293 | Male,49,89000,1
294 | Male,55,39000,1
295 | Male,37,77000,0
296 | Female,35,57000,0
297 | Female,36,63000,0
298 | Male,42,73000,1
299 | Female,43,112000,1
300 | Male,45,79000,0
301 | Male,46,117000,1
302 | Female,58,38000,1
303 | Male,48,74000,1
304 | Female,37,137000,1
305 | Male,37,79000,1
306 | Female,40,60000,0
307 | Male,42,54000,0
308 | Female,51,134000,0
309 | Female,47,113000,1
310 | Male,36,125000,1
311 | Female,38,50000,0
312 | Female,42,70000,0
313 | Male,39,96000,1
314 | Female,38,50000,0
315 | Female,49,141000,1
316 | Female,39,79000,0
317 | Female,39,75000,1
318 | Female,54,104000,1
319 | Male,35,55000,0
320 | Male,45,32000,1
321 | Male,36,60000,0
322 | Female,52,138000,1
323 | Female,53,82000,1
324 | Male,41,52000,0
325 | Female,48,30000,1
326 | Female,48,131000,1
327 | Female,41,60000,0
328 | Male,41,72000,0
329 | Female,42,75000,0
330 | Male,36,118000,1
331 | Female,47,107000,1
332 | Male,38,51000,0
333 | Female,48,119000,1
334 | Male,42,65000,0
335 | Male,40,65000,0
336 | Male,57,60000,1
337 | Female,36,54000,0
338 | Male,58,144000,1
339 | Male,35,79000,0
340 | Female,38,55000,0
341 | Male,39,122000,1
342 | Female,53,104000,1
343 | Male,35,75000,0
344 | Female,38,65000,0
345 | Female,47,51000,1
346 | Male,47,105000,1
347 | Female,41,63000,0
348 | Male,53,72000,1
349 | Female,54,108000,1
350 | Male,39,77000,0
351 | Male,38,61000,0
352 | Female,38,113000,1
353 | Male,37,75000,0
354 | Female,42,90000,1
355 | Female,37,57000,0
356 | Male,36,99000,1
357 | Male,60,34000,1
358 | Male,54,70000,1
359 | Female,41,72000,0
360 | Male,40,71000,1
361 | Male,42,54000,0
362 | Male,43,129000,1
363 | Female,53,34000,1
364 | Female,47,50000,1
365 | Female,42,79000,0
366 | Male,42,104000,1
367 | Female,59,29000,1
368 | Female,58,47000,1
369 | Male,46,88000,1
370 | Male,38,71000,0
371 | Female,54,26000,1
372 | Female,60,46000,1
373 | Male,60,83000,1
374 | Female,39,73000,0
375 | Male,59,130000,1
376 | Female,37,80000,0
377 | Female,46,32000,1
378 | Female,46,74000,0
379 | Female,42,53000,0
380 | Male,41,87000,1
381 | Female,58,23000,1
382 | Male,42,64000,0
383 | Male,48,33000,1
384 | Female,44,139000,1
385 | Male,49,28000,1
386 | Female,57,33000,1
387 | Male,56,60000,1
388 | Female,49,39000,1
389 | Male,39,71000,0
390 | Male,47,34000,1
391 | Female,48,35000,1
392 | Male,48,33000,1
393 | Male,47,23000,1
394 | Female,45,45000,1
395 | Male,60,42000,1
396 | Female,39,59000,0
397 | Female,46,41000,1
398 | Male,51,23000,1
399 | Female,50,20000,1
400 | Male,36,33000,0
401 | Female,49,36000,1


--------------------------------------------------------------------------------
/project_1_simple_linear_regression/Salary_Data.csv:
--------------------------------------------------------------------------------
 1 | YearsExperience,Salary
 2 | 1.1,39343.00
 3 | 1.3,46205.00
 4 | 1.5,37731.00
 5 | 2.0,43525.00
 6 | 2.2,39891.00
 7 | 2.9,56642.00
 8 | 3.0,60150.00
 9 | 3.2,54445.00
10 | 3.2,64445.00
11 | 3.7,57189.00
12 | 3.9,63218.00
13 | 4.0,55794.00
14 | 4.0,56957.00
15 | 4.1,57081.00
16 | 4.5,61111.00
17 | 4.9,67938.00
18 | 5.1,66029.00
19 | 5.3,83088.00
20 | 5.9,81363.00
21 | 6.0,93940.00
22 | 6.8,91738.00
23 | 7.1,98273.00
24 | 7.9,101302.00
25 | 8.2,113812.00
26 | 8.7,109431.00
27 | 9.0,105582.00
28 | 9.5,116969.00
29 | 9.6,112635.00
30 | 10.3,122391.00
31 | 10.5,121872.00
32 | 


--------------------------------------------------------------------------------
/project_1_simple_linear_regression/project_1:
--------------------------------------------------------------------------------
  1 | I'm basically writing this blog for myself because I've been wanting to learn Machine Learning for a while now but have never really got to it. So this blog is more like a journal for me to write about my daily progress - (hopefully I will be making some progress every day).
  2 | 
  3 | #100DaysOfMLCode #100ProjectsInML
  4 | 
  5 | The best approach for me to learn anything is by working on sample projects. No matter how simple the project is, it helps me better understand the concepts. So I will be working through some small mini projects as part of this learning journey.
  6 | 
  7 | There are 100's of excellent resources out there to help you get started. I stumbled upon this A-Z Machine learning course on Udemy and I'll be walking through those examples in the first few weeks.
  8 | 
  9 | 
 10 | 
 11 | Today I'll be going through "Simple Linear Regression"
 12 | 
 13 | Dataset
 14 | First lets look at the dataset. It is Salary_Data.csv and can be found here
 15 | It has 2 columns - "Years of Experience" and "Salary" for 30 employees in a company. So in this example, we will train a Simple Linear Regression model to learn the correlation between the number of years of experience of each employee and their respective salary. Once the model is trained, we will be able to do some sample predictions.   
 16 | 
 17 | Below is a sample screenshot of the dataset.
 18 | 
 19 | 
 20 | So lets get started.
 21 | 
 22 | Step 1: Load the Dataset
 23 | 
 24 | Below is the code snippet for loading the dataset. 
 25 | We will be using the pandas dataframe.
 26 | Here X is the independent variable which is the "Years of Experience"
 27 | and y is the dependent variable which is the "Salary"
 28 | 
 29 | So for X, we specify dataset.iloc[:, :-1].values
 30 | which simply means take all rows and all columns except last one
 31 | 
 32 | And for y, we specify dataset.iloc[:, 1].values
 33 | which simply means take all rows and only columns with index 1 - In python indexes begin at 0 - so index 1 here is the second column which is Salary
 34 | 
 35 | # Step 1 Load Data
 36 | import pandas as pd
 37 | dataset = pd.read_csv('Salary_Data.csv')
 38 | X = dataset.iloc[:, :-1].values
 39 | y = dataset.iloc[:,1].values
 40 | 
 41 | Below is the sample screenshot of X and y
 42 | 
 43 | Step 2: Split dataset into training set and test set
 44 | 
 45 | Next we have to split the dataset into training and testing. We will use the training dataset for training the model and then check the performance of the model on the test dataset.
 46 | 
 47 | For this we will use the train_test_split method from library model_selection
 48 | We are providing a test_size of 1/3 which means test set will contain 10 observations and training set will contain 20 observations
 49 | The random_state=0 is required only if you want to compare your results with mine.
 50 | 
 51 | # Step 2: Split data into training and testing
 52 | from sklearn.model_selection import train_test_split
 53 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)
 54 | 
 55 | Below is the sample screenshot of X_train, y_train, X_test and y_test
 56 | 
 57 | Step 3: Fit Simple Linear Regression model to training set
 58 | 
 59 | This is a very simple step. We will be using the LinearRegression class from the library sklearn.linear_model. First we create an object of the LinearRegression class and call the fit method passing the X_train and y_train. 
 60 | 
 61 | # Step 3: Fit Simple Linear Regression to Training Data
 62 | from sklearn.linear_model import LinearRegression
 63 | regressor = LinearRegression()
 64 | regressor.fit(X_train, y_train)
 65 | 
 66 | 
 67 | Step 4: Predict the test set
 68 | Using the regressor we trained in the previous step, we will not use it to predict the results of the test set and compare the predicted values with the actual values
 69 | 
 70 | # Step 4: Make Prediction
 71 | y_pred = regressor.predict(X_test)
 72 | 
 73 | Now we have the y_pred which are the predicted values from our Model and y_test which are the actual values. 
 74 | Let us compare are see how well our model did. As you can see from the screenshot below - our basic model did pretty well.
 75 | 
 76 | If we take the first employee - the actual salary is 37731 and our model predicted 40835.1 - which is not too bad. There are some predictions that are off but some are pretty close.
 77 | 
 78 | Step 5 - Visualizing the training set
 79 | 
 80 | Lets visualize the results.
 81 | First we'll plot the actual data points of training set - X_train and y_train
 82 | plt.scatter(X_train, y_train, color = 'red')
 83 | 
 84 | Next we'll plot the regression line - which is the predicted values for the X_train
 85 | plt.plot(X_train, regressor.predict(X_train), color='blue')
 86 | 
 87 | # Step 5 - Visualize training set results
 88 | import matplotlib.pyplot as plt
 89 | # plot the actual data points of training set
 90 | plt.scatter(X_train, y_train, color = 'red')
 91 | # plot the regression line
 92 | plt.plot(X_train, regressor.predict(X_train), color='blue')
 93 | plt.title('Salary vs Experience (Training set)')
 94 | plt.xlabel('Years of Experience')
 95 | plt.ylabel('Salary')
 96 | plt.show()
 97 | 
 98 | 
 99 | Step 6 - Visualizing the test set
100 | 
101 | Lets visualize the results.
102 | First we'll plot the actual data points of training set - X_test and y_test
103 | plt.scatter(X_test, y_test, color = 'red')
104 | 
105 | Next we'll plot the regression line - which is the same as above
106 | plt.plot(X_train, regressor.predict(X_train), color='blue')
107 | 
108 | # Step 6 - Visualize test set results
109 | import matplotlib.pyplot as plt
110 | # plot the actual data points of training set
111 | plt.scatter(X_test, y_test, color = 'red')
112 | # plot the regression line
113 | plt.plot(X_train, regressor.predict(X_train), color='blue')
114 | plt.title('Salary vs Experience (Test set)')
115 | plt.xlabel('Years of Experience')
116 | plt.ylabel('Salary')
117 | plt.show()
118 | 
119 | Step 7 - Make new predictions
120 | We can also make brand new predictions for data points that do not exist in the dataset.
121 | Like for a person with 15 years experience
122 | 
123 | new_salary_pred = regressor.predict([[15]])
124 | 
125 | # Step 7 - Make new prediction
126 | new_salary_pred = regressor.predict([[15]])
127 | 
128 | Here is the full source code
129 | 
130 | 
131 | 
132 | 


--------------------------------------------------------------------------------
/project_1_simple_linear_regression/simple_linear_regression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Sep  1 19:14:35 2019
 5 | @author: omairaasim
 6 | """
 7 | 
 8 | # Step 1 Load Data
 9 | import pandas as pd
10 | dataset = pd.read_csv('Salary_Data.csv')
11 | X = dataset.iloc[:, :-1].values
12 | y = dataset.iloc[:,1].values
13 | 
14 | # Step 2: Split data into training and testing
15 | from sklearn.model_selection import train_test_split
16 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)
17 | 
18 | # Step 3: Fit Simple Linear Regression to Training Data
19 | from sklearn.linear_model import LinearRegression
20 | regressor = LinearRegression()
21 | regressor.fit(X_train, y_train)
22 | 
23 | # Step 4: Make Prediction
24 | y_pred = regressor.predict(X_test)
25 | 
26 | # Step 5 - Visualize training set results
27 | import matplotlib.pyplot as plt
28 | # plot the actual data points of training set
29 | plt.scatter(X_train, y_train, color = 'red')
30 | # plot the regression line
31 | plt.plot(X_train, regressor.predict(X_train), color='blue')
32 | plt.title('Salary vs Experience (Training set)')
33 | plt.xlabel('Years of Experience')
34 | plt.ylabel('Salary')
35 | plt.show()
36 | 
37 | # Step 6 - Visualize test set results
38 | import matplotlib.pyplot as plt
39 | # plot the actual data points of test set
40 | plt.scatter(X_test, y_test, color = 'red')
41 | # plot the regression line (same as above)
42 | plt.plot(X_train, regressor.predict(X_train), color='blue')
43 | plt.title('Salary vs Experience (Test set)')
44 | plt.xlabel('Years of Experience')
45 | plt.ylabel('Salary')
46 | plt.show()
47 | 
48 | # Step 7 - Make new prediction
49 | new_salary_pred = regressor.predict([[15]])
50 | print('The predicted salary of a person with 15 years experience is ',new_salary_pred)
51 | 


--------------------------------------------------------------------------------
/project_2_multiple_linear_regression/50_Startups.csv:
--------------------------------------------------------------------------------
 1 | R&D Spend,Administration,Marketing Spend,State,Profit
 2 | 165349.2,136897.8,471784.1,New York,192261.83
 3 | 162597.7,151377.59,443898.53,California,191792.06
 4 | 153441.51,101145.55,407934.54,Florida,191050.39
 5 | 144372.41,118671.85,383199.62,New York,182901.99
 6 | 142107.34,91391.77,366168.42,Florida,166187.94
 7 | 131876.9,99814.71,362861.36,New York,156991.12
 8 | 134615.46,147198.87,127716.82,California,156122.51
 9 | 130298.13,145530.06,323876.68,Florida,155752.6
10 | 120542.52,148718.95,311613.29,New York,152211.77
11 | 123334.88,108679.17,304981.62,California,149759.96
12 | 101913.08,110594.11,229160.95,Florida,146121.95
13 | 100671.96,91790.61,249744.55,California,144259.4
14 | 93863.75,127320.38,249839.44,Florida,141585.52
15 | 91992.39,135495.07,252664.93,California,134307.35
16 | 119943.24,156547.42,256512.92,Florida,132602.65
17 | 114523.61,122616.84,261776.23,New York,129917.04
18 | 78013.11,121597.55,264346.06,California,126992.93
19 | 94657.16,145077.58,282574.31,New York,125370.37
20 | 91749.16,114175.79,294919.57,Florida,124266.9
21 | 86419.7,153514.11,0,New York,122776.86
22 | 76253.86,113867.3,298664.47,California,118474.03
23 | 78389.47,153773.43,299737.29,New York,111313.02
24 | 73994.56,122782.75,303319.26,Florida,110352.25
25 | 67532.53,105751.03,304768.73,Florida,108733.99
26 | 77044.01,99281.34,140574.81,New York,108552.04
27 | 64664.71,139553.16,137962.62,California,107404.34
28 | 75328.87,144135.98,134050.07,Florida,105733.54
29 | 72107.6,127864.55,353183.81,New York,105008.31
30 | 66051.52,182645.56,118148.2,Florida,103282.38
31 | 65605.48,153032.06,107138.38,New York,101004.64
32 | 61994.48,115641.28,91131.24,Florida,99937.59
33 | 61136.38,152701.92,88218.23,New York,97483.56
34 | 63408.86,129219.61,46085.25,California,97427.84
35 | 55493.95,103057.49,214634.81,Florida,96778.92
36 | 46426.07,157693.92,210797.67,California,96712.8
37 | 46014.02,85047.44,205517.64,New York,96479.51
38 | 28663.76,127056.21,201126.82,Florida,90708.19
39 | 44069.95,51283.14,197029.42,California,89949.14
40 | 20229.59,65947.93,185265.1,New York,81229.06
41 | 38558.51,82982.09,174999.3,California,81005.76
42 | 28754.33,118546.05,172795.67,California,78239.91
43 | 27892.92,84710.77,164470.71,Florida,77798.83
44 | 23640.93,96189.63,148001.11,California,71498.49
45 | 15505.73,127382.3,35534.17,New York,69758.98
46 | 22177.74,154806.14,28334.72,California,65200.33
47 | 1000.23,124153.04,1903.93,New York,64926.08
48 | 1315.46,115816.21,297114.46,Florida,49490.75
49 | 0,135426.92,0,California,42559.73
50 | 542.05,51743.15,0,New York,35673.41
51 | 0,116983.8,45173.06,California,14681.4


--------------------------------------------------------------------------------
/project_2_multiple_linear_regression/multiple_linear_regression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Nov 30 19:45:38 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("50_Startups.csv")
12 | X = dataset.iloc[:,:-1].values
13 | y = dataset.iloc[:,4].values
14 | 
15 | # Step 2 - Encode Categorical Data
16 | from sklearn.preprocessing import OneHotEncoder
17 | from sklearn.compose import ColumnTransformer
18 | import numpy as np
19 | ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[3])], remainder='passthrough')
20 | X = np.array(ct.fit_transform(X))
21 | 
22 | # Step 3 - Dummy Trap
23 | X = X[:,1:]
24 | 
25 | # Step 4 - Split Data
26 | from sklearn.model_selection import train_test_split
27 | X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
28 | 
29 | # Step 5 - Fit Regressor
30 | from sklearn.linear_model import LinearRegression
31 | regressor = LinearRegression()
32 | regressor.fit(X_train, y_train)
33 | 
34 | # Step 6 - Predict
35 | y_pred = regressor.predict(X_test)
36 | 


--------------------------------------------------------------------------------
/project_2_multiple_linear_regression/project_2:
--------------------------------------------------------------------------------
  1 | In project 2 of Machine Learning, I'm going to be looking at Multiple Linear Regression. Unlike Simple Linear Regression where there is one independent variable and one dependent variable - in Multiple Linear Regression there are several independent variables that could have an effect on determining the dependent variable.
  2 | 
  3 | I'll be using the example from the A-Z Machine learning course from Udemy.
  4 | 
  5 | Let's dive right in.
  6 | 
  7 | Dataset
  8 | The dataset we will be using for this project can be found here**.
  9 | It contains data about 50 startups
 10 | It has 5 columns - "R&D Spend", "Administration", "Marketing Spend", "State", "Profit"
 11 | The first 3 columns indicate how much each startup spends on Research and Development, how much they spend on Marketing and how much they spend on Administration cost.
 12 | The state column indicates which state the startup is based in. And the last column states the profit made by the start up.
 13 | 
 14 | Project Objective
 15 | We want our model to predict the profit based on the independent variables described above. So Profit is the dependent variable and the other 4 are independent variables.
 16 | 
 17 | Step 1: Load the Dataset
 18 | 
 19 | Below is the code snippet for loading the dataset. 
 20 | We will be using the pandas dataframe.
 21 | Here X is contains all the independent variable which are "R&D Spend", "Administration", "Marketing Spend" and "State"
 22 | and y is the dependent variable which is the "Profit"
 23 | 
 24 | So for X, we specify dataset.iloc[:, :-1].values
 25 | which simply means take all rows and all columns except last one
 26 | 
 27 | And for y, we specify dataset.iloc[:, 4].values
 28 | which simply means take all rows and only columns with index 4 - In python indexes begin at 0 - so index 4 here is the fifth column which is "Profit"
 29 | 
 30 | # Step 1 - Load Data
 31 | import pandas as pd
 32 | dataset = pd.read_csv("50_Startups.csv")
 33 | X = dataset.iloc[:,:-1].values
 34 | y = dataset.iloc[:,4].values
 35 | 
 36 | ***************************************************
 37 | 
 38 | Step 2: Convert text variable to numbers
 39 | We can see that in our dataset we have a categorical variable which is "State" which we have to encode.
 40 | Here the "State" variable is at index 3
 41 | We use LabelEncoder class to convert text to numbers
 42 | 
 43 | # Step 2 - Convert text variable "State" to numbers
 44 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 45 | labelEncoder_X = LabelEncoder()
 46 | X[:,3] = labelEncoder_X.fit_transform(X[:,3])
 47 | 
 48 | Once we run the above code snippet - we will see that all States have been converted to numbers
 49 | For example New York has been converted to 2, California to 0 and Florida to 1
 50 | 
 51 | ****************************************************
 52 | 
 53 | Step 3: Use OneHotEncoder to introduce Dummy variables
 54 | If we leave the dataset in the above state, it will not be right. Because New York has been assigned a value 2 and California has been assigned 0. So the model might assume New York is higher than California which is not right.
 55 | 
 56 | So to avoid this we have to introduce dummy variables using OneHotEncoder as shown below
 57 | 
 58 | # Step 3 - Use OneHotEncoder to introduce dummy variables
 59 | oneHotEncoder = OneHotEncoder(categorical_features=[3])
 60 | X = oneHotEncoder.fit_transform(X).toarray()
 61 | 
 62 | After running the above code snippet - lets examine the dataset - we can see that 3 dummy variables have been added as we had 3 different States.
 63 | 
 64 | Lets compare the X dataset with the original dataset.
 65 | - Lets looks at the first entry at index 0 - In original dataset the state was "New York" - and after encoding the 3rd dummy variable has the value 1 which means the 3rd dummy variable represents the state New York
 66 | - Lets looks at the second entry at index 1 - In original dataset the state was "California" - and after encoding the 1st dummy variable has the value 1 which means the 1st dummy variable represents the state California
 67 | - Lets looks at the third entry at index 2 - In original dataset the state was "Florida" - and after encoding the 2nd dummy variable has the value 1 which means the 2nd dummy variable represents the state Florida
 68 | 
 69 | 
 70 | Step 4: Dummy Variable Trap
 71 | We have to remove one of the dummy variables. You can read about the dummy variable trap and why we need to remove one of the dummy variables.
 72 | In the below code snippet - we are removing the first column.
 73 | 
 74 | # Step 4 - Dummy Trap
 75 | X = X[:,1:]
 76 | 
 77 | 
 78 | Step 5: Split dataset into training set and test set
 79 | 
 80 | Next we have to split the dataset into training and testing. We will use the training dataset for training the model and then check the performance of the model on the test dataset.
 81 | 
 82 | For this we will use the train_test_split method from library model_selection
 83 | We are providing a test_size of 0.2 which means test set will contain 10 observations and training set will contain 40 observations
 84 | The random_state=0 is required only if you want to compare your results with mine.
 85 | 
 86 | # Step 5 - Split Data
 87 | from sklearn.model_selection import train_test_split
 88 | X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)
 89 | 
 90 | Below is the sample screenshot of X_train, y_train, X_test and y_test
 91 | 
 92 | Step 6: Fit Simple Linear Regression model to training set
 93 | This is a very simple step. We will be using the LinearRegression class from the library sklearn.linear_model. First we create an object of the LinearRegression class and call the fit method passing the X_train and y_train. 
 94 | 
 95 | # Step 6 - Fit Regressor
 96 | from sklearn.linear_model import LinearRegression
 97 | regressor = LinearRegression()
 98 | regressor.fit(X_train, y_train)
 99 | 
100 | Step 7: Predict the test set
101 | Using the regressor we trained in the previous step, we will now use it to predict the results of the test set and compare the predicted values with the actual values
102 | 
103 | # Step 7 - Predict
104 | y_pred = regressor.predict(X_test)
105 | 
106 | Now we have the y_pred which are the predicted values from our Model and y_test which are the actual values. 
107 | Let us compare are see how well our model did. As you can see from the screenshot below - our basic model did pretty well.
108 | 
109 | If we take the first startup - the actual profit is 103282 and our model predicted 103015 - which is almost perfect. There are some predictions that are off like the second startup -  the actual profit is 144259 and our model predicted 132582.
110 | 
111 | Step 8: Backward Elimination
112 | In the model that we just built, we used all the independent variables but its possible that some independent variables are more significant than others and have a greater impact on the profit and some are not significant meaning if we remove them from the model - we may get better predictions.
113 | 
114 | So we are going to use backward elimination process to see which independent variables we must include in the model and which to exclude.
115 | 
116 | The first step is for us to add a column of 1's to our X dataset as the first column. We add this column of ones to develop the y-intercept.
117 | This column corresponds to x0=1 associated to this constant b0 in the multiple linear regression equation 
118 | y = b0 + b1 * x1 + b2 * x2 + bn * xn
119 | 
120 | # Add ones
121 | import numpy as np
122 | ones = np.ones(shape = (50,1), dtype=int)
123 | X = np.append(arr = ones, values= X, axis=1)
124 | 
125 | Now we will start the backward elimination process. Since we will be creating a new optimal matrix of features - we will call it X_opt. This will contain only the independent features that are significant in predicting profit.
126 | 
127 | To begin with, we will include all independent variables in X_opt
128 | X_opt = X[:,[0,1,2,3,4,5]]
129 | 
130 | Next we need to select a significance level (SL) - here we decode on significance level of 0.05. So if the p value of the independent variable is greater than SL, we will remove that independent variable and repeat the process with the remaining independent variables.
131 | 
132 | Next we create a new regressor of the OLS class (Ordinary Least Square) from statsmodel library. 
133 | It takes 2 arguments
134 | - endog : which is the dependent variable
135 | - exog : which is the matrix containing all independent variables
136 | 
137 | Now we need to fit the OLS algorithm as shown below:
138 | 
139 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
140 | 
141 | Then we will look at the summary to see which independent variable has p value higher than SL (0.05)
142 | 
143 | regressor_OLS.summary()
144 | 
145 | Below all the steps are outlined
146 | 
147 | # Backward Elimination
148 | import statsmodels.formula.api as sm
149 | X_opt = X[:,[0,1,2,3,4,5]]
150 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
151 | regressor_OLS.summary()
152 | 
153 | Here is the screenshot of the summary
154 | 
155 | 
156 | Lets examine the output
157 | x1 and x2 are the 2 dummy variables we added for state
158 | x3 is R&D spent
159 | x4 is Admin spent
160 | x5 is marketing spent
161 | 
162 | We have to look for the highest P value greater than 0.5 which in this case is 0.99 (99%) for x2
163 | So we have to remove x2 (2nd dummy variable for state) which has index 2 
164 | 
165 | X_opt = X[:,[0,1,3,4,5]]
166 | 
167 | Now lets repeat the process after removing the independent variable with highest p value
168 | 
169 | X_opt = X[:,[0,1,3,4,5]]
170 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
171 | regressor_OLS.summary()
172 | 
173 | Here is the screenshot of the summary
174 | 
175 | Lets examine the output. We have to look for the highest P value greater than 0.5 which in this case is 0.94 (94%) for x1
176 | So we have to remove x1 (1st dummy variable for state) which has index 1 
177 | 
178 | X_opt = X[:,[0,3,4,5]]
179 | 
180 | Now lets repeat the process after removing the independent variable with highest p value
181 | 
182 | X_opt = X[:,[0,3,4,5]]
183 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
184 | regressor_OLS.summary()
185 | 
186 | Here is the screenshot of the summary
187 | 
188 | Lets examine the output. We have to again look for the highest P value greater than 0.5 which in this case is 0.602 (60%) for x2
189 | So we have to remove x2 (Admin spent) which has index 4
190 | 
191 | X_opt = X[:,[0,3,5]]
192 | 
193 | Now lets repeat the process after removing the independent variable with highest p value
194 | 
195 | X_opt = X[:,[0,3,5]]
196 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
197 | regressor_OLS.summary()
198 | 
199 | Here is the screenshot of the summary
200 | 
201 | Lets examine the output. We have to again look for the highest P value greater than 0.5 which in this case is 0.06 (6%) for x2
202 | So we have to remove x2 (Marketing spent) which has index 5 in X_opt
203 | 
204 | X_opt = X[:,[0,3]]
205 | 
206 | Now lets repeat the process after removing the independent variable with highest p value
207 | 
208 | X_opt = X[:,[0,3]]
209 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit()
210 | regressor_OLS.summary()
211 | 
212 | Finally we are left with only 1 independent variable which is the R&D spent.
213 | 
214 | So we can build our model again but this time taking only 1 independent variable which is the R&D spent and do the prediction and our results will be better than the first time.


--------------------------------------------------------------------------------
/project_3_polynomial_regression/Position_Salaries.csv:
--------------------------------------------------------------------------------
 1 | Position,Level,Salary
 2 | Business Analyst,1,45000
 3 | Junior Consultant,2,50000
 4 | Senior Consultant,3,60000
 5 | Manager,4,80000
 6 | Country Manager,5,110000
 7 | Region Manager,6,150000
 8 | Partner,7,200000
 9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000


--------------------------------------------------------------------------------
/project_3_polynomial_regression/poly_regression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | 
 5 | @author: omairaasim
 6 | """
 7 | 
 8 | # Step 1 - Load Data
 9 | import pandas as pd
10 | dataset = pd.read_csv("Position_Salaries.csv")
11 | X = dataset.iloc[:, 1:2].values
12 | y = dataset.iloc[:, 2].values
13 | 
14 | # Step 2 - Fitting Linear Regression
15 | from sklearn.linear_model import LinearRegression
16 | lin_reg = LinearRegression()
17 | lin_reg.fit(X,y)
18 | 
19 | # Step 3 - Visualize Linear Regression Results
20 | import matplotlib.pyplot as plt
21 | 
22 | plt.scatter(X,y, color="red")
23 | plt.plot(X, lin_reg.predict(X))
24 | plt.title("Linear Regression")
25 | plt.xlabel("Level")
26 | plt.ylabel("Salary")
27 | plt.show()
28 | 
29 | # Step 4 Linear Regression prediction
30 | lin_reg.predict([[6.5]])
31 | 
32 | # Step 5 - Convert X to polynomial format
33 | from sklearn.preprocessing import PolynomialFeatures
34 | poly_reg = PolynomialFeatures(degree=4)
35 | X_poly = poly_reg.fit_transform(X)
36 | 
37 | 
38 | # Step 6 - Passing X_poly to LinearRegression
39 | lin_reg_2 = LinearRegression()
40 | lin_reg_2.fit(X_poly,y)
41 | 
42 | # Step 7 - Visualize Poly Regression Results
43 | plt.scatter(X,y, color="red")
44 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)))
45 | plt.title("Poly Regression - Degree 4")
46 | plt.xlabel("Level")
47 | plt.ylabel("Salary")
48 | plt.show()
49 | 
50 | 
51 | # Step 8 Polynomial Regression prediction
52 | new_salary_pred = lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))
53 | print('The predicted salary of a person at 6.5 Level is ',new_salary_pred)
54 | 


--------------------------------------------------------------------------------
/project_3_polynomial_regression/project_3:
--------------------------------------------------------------------------------
  1 | #100DaysOfMLCode #100ProjectsInML
  2 | 
  3 | Today I'll be looking at the Polynomial Regression example from the A-Z Machine Learning course on Udemy
  4 | 
  5 | If you look at the image above which list the equations for all 3 types of Regression - you will notice that in Polynomial Regression we have the same variables x1 but it is raised different powers.
  6 | 
  7 | For example 
  8 | - instead of x2 - we have x1 raised to the power 2
  9 | - instead of x3 - we have x1 raised to the power 3
 10 | 
 11 | Lets explore the dataset.
 12 | 
 13 | Dataset
 14 | First lets look at the dataset. It is Position_Salaries.csv and can be found here
 15 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under.
 16 | 
 17 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000.
 18 | 
 19 | Below is the screenshot of the dataset.
 20 | 
 21 | Project Objective
 22 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5
 23 | 
 24 | We want to build a model to predict what salary we should offer this new employee.
 25 | 
 26 | Let's get started.
 27 | 
 28 | Step 1: Load the Dataset
 29 | 
 30 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position".
 31 | 
 32 | Here X is the independent variable which is the "Level"
 33 | and y is the dependent variable which is the "Salary"
 34 | 
 35 | So for X, we specify 
 36 | 
 37 | X = dataset.iloc[:, 1:2].values
 38 | 
 39 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included)
 40 | 
 41 | And for y, we specify dataset.iloc[:, 2].values
 42 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary
 43 | 
 44 | # Step 1 - Load Data
 45 | import pandas as pd
 46 | dataset = pd.read_csv("Position_Salaries.csv")
 47 | X = dataset.iloc[:, 1:2].values
 48 | y = dataset.iloc[:, 2].values
 49 | 
 50 | *********************************
 51 | 
 52 | Step 2: Fit Linear Regression model to dataset
 53 | 
 54 | First we will build a simple linear regression model to see what prediction it makes and then compare it to the prediction made by the Polynomial Regression to see which is more accurate.
 55 | 
 56 | We will be using the LinearRegression class from the library sklearn.linear_model. We create an object of the LinearRegression class and call the fit method passing the X and y. 
 57 | 
 58 | # Step 2 - Fitting Linear Regression
 59 | from sklearn.linear_model import LinearRegression
 60 | lin_reg = LinearRegression()
 61 | lin_reg.fit(X,y)
 62 | 
 63 | ********************************
 64 | 
 65 | Step 3: Visualize Linear Regression Results
 66 | 
 67 | Lets plot the graph to look at the results for Linear Regression
 68 | 
 69 | # Step 3 - Visualize Linear Regression Results
 70 | import matplotlib.pyplot as plt
 71 | 
 72 | plt.scatter(X,y, color="red")
 73 | plt.plot(X, lin_reg.predict(X))
 74 | plt.title("Linear Regression")
 75 | plt.xlabel("Level")
 76 | plt.ylabel("Salary")
 77 | plt.show()
 78 | 
 79 | If we look at the graph, we can see that a person at level 6.5 should be offered a salary of around $300k. We will confirm this in next step.
 80 | 
 81 | ***********************************
 82 | 
 83 | Step 4: Predict Linear Regression Results
 84 | 
 85 | # Step 4 prediction
 86 | lin_reg.predict([[6.5]])
 87 | 
 88 | We can see that the prediction is way off as it predicts $330k.
 89 | 
 90 | Now lets check the predictions by implementing Polynomial Regression
 91 | 
 92 | ***********************************
 93 | 
 94 | Step 5: Convert X to polynomial format
 95 | 
 96 | For Polynomial Regression, we need to transform our matrix X to X_poly where X_poly will contain X to the power of n - depending upon the degree we choose. If we choose degree 2, then X_poly will contain X and X to the power 2. If we choose degree 3, then X_poly will contain X, X to the power 2 and X to the power 3.
 97 | 
 98 | We will be using the PolynomialFeatures class from the sklearn.preprocessing library for this purpose. When we create an object of this class - we have to pass the degree parameter. Lets begin by choose degree as 2. Then we call the fit_transform method to transform matrix X.
 99 | 
100 | # Step 5 - Convert X to polynomial format
101 | from sklearn.preprocessing import PolynomialFeatures
102 | poly_reg = PolynomialFeatures(degree=2)
103 | X_poly = poly_reg.fit_transform(X)
104 | 
105 | Lets look at X_poly
106 | 
107 | If you see, the 2nd column is the actual levels from 1 to 10 present in X.
108 | The 3rd column contains X raised to the power 2 (as we chose degree 2)
109 | The first column contains just 1's - This is automatically added by the PolynomialFeatures class to include the constant b0.
110 | 
111 | *********************************
112 | 
113 | Step 6: Fitting Polynomial Regression
114 | 
115 | Now we will create a new linear regression object called lin_reg_2 and pass X_poly to it instead of X that we passed in Step 2.
116 | 
117 |  # Step 6 - Passing X_poly to LinearRegression
118 | lin_reg_2 = LinearRegression()
119 | lin_reg_2.fit(X_poly,y)
120 | 
121 | *********************************
122 | 
123 | Step 7: Visualize Poly Regression Results
124 | 
125 | Lets plot the graph to look at the results for Polynomial Regression
126 | 
127 | # Step 7 - Visualize Poly Regression Results
128 | plt.scatter(X,y, color="red")
129 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)))
130 | plt.title("Poly Regression Degree 2")
131 | plt.xlabel("Level")
132 | plt.ylabel("Salary")
133 | plt.show()
134 | 
135 | If we look at the graph, we can see that a person at level 6.5 should be offered a salary of around $190k. We will confirm this in next step.
136 | 
137 | ********************************
138 | 
139 | Step 8: Predict Polynomial Regression Results
140 | 
141 | We get a prediction of $189k
142 | 
143 | # Step 8 prediction
144 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))
145 | 
146 | ********************************
147 | 
148 | Step 9 - Change degree to 3 and run steps 5-8
149 | 
150 | # Step 5 - Convert X to polynomial format
151 | from sklearn.preprocessing import PolynomialFeatures
152 | poly_reg = PolynomialFeatures(degree=3)
153 | X_poly = poly_reg.fit_transform(X)
154 | 
155 |  # Step 6 - Passing X_poly to LinearRegression
156 | lin_reg_2 = LinearRegression()
157 | lin_reg_2.fit(X_poly,y)
158 | 
159 | # Step 7 - Visualize Poly Regression Results
160 | plt.scatter(X,y, color="red")
161 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)))
162 | plt.title("Poly Regression Degree 3")
163 | plt.xlabel("Level")
164 | plt.ylabel("Salary")
165 | plt.show()
166 | 
167 | # Step 8 prediction
168 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))
169 | 
170 | We get a prediction of $133k
171 | 
172 | *********************************
173 | 
174 | Step 10 - Change degree to 4 and run steps 5-8
175 | 
176 | # Step 5 - Convert X to polynomial format
177 | from sklearn.preprocessing import PolynomialFeatures
178 | poly_reg = PolynomialFeatures(degree=4)
179 | X_poly = poly_reg.fit_transform(X)
180 | 
181 |  # Step 6 - Passing X_poly to LinearRegression
182 | lin_reg_2 = LinearRegression()
183 | lin_reg_2.fit(X_poly,y)
184 | 
185 | # Step 7 - Visualize Poly Regression Results
186 | plt.scatter(X,y, color="red")
187 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)))
188 | plt.title("Poly Regression Degree 4")
189 | plt.xlabel("Level")
190 | plt.ylabel("Salary")
191 | plt.show()
192 | 
193 | # Step 8 prediction
194 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]]))
195 | 
196 | We get a prediction of $158k which looks reasonable based on our dataset.
197 | 
198 | So in this case by using Linear Regression - we got a prediction of $330k and by using Polynomial Regression we got a prediction of 158k.
199 | 
200 | Here is the full source code.


--------------------------------------------------------------------------------
/project_4_support_vector_regression/Position_Salaries.csv:
--------------------------------------------------------------------------------
 1 | Position,Level,Salary
 2 | Business Analyst,1,45000
 3 | Junior Consultant,2,50000
 4 | Senior Consultant,3,60000
 5 | Manager,4,80000
 6 | Country Manager,5,110000
 7 | Region Manager,6,150000
 8 | Partner,7,200000
 9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000


--------------------------------------------------------------------------------
/project_4_support_vector_regression/project_4:
--------------------------------------------------------------------------------
  1 | 
  2 | Today I'll be looking at the Support Vector Regression (SVR) example from the A-Z Machine Learning course on Udemy.
  3 | 
  4 | #100DaysOfMLCode #100ProjectsInML
  5 | 
  6 | We will be working on the same problem that we worked on Project 3. Here instead of using Polynomial Regression, we will use Support Vector Regression and see whether the prediction is better or worse compared to Polynomial Regression.
  7 | 
  8 | Lets explore the dataset.
  9 | 
 10 | Dataset
 11 | First lets look at the dataset. It is Position_Salaries.csv and can be found here
 12 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under.
 13 | 
 14 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000.
 15 | 
 16 | Below is the screenshot of the dataset.
 17 | 
 18 | Project Objective
 19 | 
 20 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5
 21 | 
 22 | We want to build a model to predict what salary we should offer this new employee.
 23 | 
 24 | Let's get started.
 25 | 
 26 | Step 1: Load the Dataset
 27 | 
 28 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position".
 29 | 
 30 | Here X is the independent variable which is the "Level"
 31 | and y is the dependent variable which is the "Salary"
 32 | 
 33 | So for X, we specify 
 34 | 
 35 | X = dataset.iloc[:, 1:2].values
 36 | 
 37 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included)
 38 | 
 39 | And for y, we specify 
 40 | 
 41 | y = dataset.iloc[:, 2:].values
 42 | 
 43 | which simply means take all rows and only columns with index 2 which is Salary
 44 | 
 45 | # Step 1 - Load Data
 46 | import pandas as pd
 47 | dataset = pd.read_csv("Position_Salaries.csv")
 48 | X = dataset.iloc[: ,1:2].values
 49 | y = dataset.iloc[:, 2:].values
 50 | 
 51 | 
 52 | Step 2 - Feature Scaling
 53 | 
 54 | # Step 2 - Feature Scaling
 55 | from sklearn.preprocessing import StandardScaler
 56 | sc_X = StandardScaler()
 57 | sc_y = StandardScaler()
 58 | X = sc_X.fit_transform(X)
 59 | y = sc_y.fit_transform(y)
 60 | 
 61 | 
 62 | Step 3 - Fit SVR
 63 | 
 64 | We will be using the SVR class from the library sklearn.svm. First we create an object of the SVR class and pass kernel parameter as "rbf" (Radial Basis Function) and then call the fit method passing the X and y.
 65 | 
 66 | # Step 3 - Fit SVR
 67 | from sklearn.svm import SVR
 68 | regressor = SVR(kernel = "rbf")
 69 | regressor.fit(X,y)
 70 | 
 71 | 
 72 | Step 4 - Visualization
 73 | 
 74 | # Step 4 - Visualization
 75 | import matplotlib.pyplot as plt
 76 | plt.scatter(X, y , color="red")
 77 | plt.plot(X, regressor.predict(X), color="blue")
 78 | plt.title("SVR")
 79 | plt.xlabel("Position")
 80 | plt.ylabel("Salary")
 81 | plt.show()
 82 | 
 83 | Step 5 - Make Predictions
 84 | 
 85 | Since we want to predict the salary for an employee at level 6.5 - first we will have to do feature scaling to transform value 6.5
 86 | Then we have to do the prediction
 87 | Finally since the predicted value is already scaled, we have to do inverse transformation to get the actual value
 88 | These steps are outlined below. 
 89 | 
 90 | 
 91 | # Step 5 - Predictions
 92 | import numpy as np
 93 | # First transform 6.5 to feature scaling
 94 | sc_X_val = sc_X.transform(np.array([[6.5]]))
 95 | # Second predict the value
 96 | scaled_y_pred = regressor.predict(sc_X_val)
 97 | # Third - since this is scaled - we have to inverse transform
 98 | y_pred = sc_y.inverse_transform(scaled_y_pred) 
 99 | 
100 | 
101 | We can see that the predicted value is $170k and in Polynomial Regression we got $158k
102 | 


--------------------------------------------------------------------------------
/project_4_support_vector_regression/svr_2019.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Dec  1 19:28:27 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Data
10 | import pandas as pd
11 | dataset = pd.read_csv("Position_Salaries.csv")
12 | X = dataset.iloc[: ,1:2].values
13 | y = dataset.iloc[:, 2:].values
14 | 
15 | # Step 2 - Feature Scaling
16 | from sklearn.preprocessing import StandardScaler
17 | sc_X = StandardScaler()
18 | sc_y = StandardScaler()
19 | X = sc_X.fit_transform(X)
20 | y = sc_y.fit_transform(y)
21 | 
22 | # Step 3 - Fit SVR
23 | from sklearn.svm import SVR
24 | regressor = SVR(kernel = "rbf")
25 | regressor.fit(X,y)
26 | 
27 | # Step 4 - Visualization
28 | import matplotlib.pyplot as plt
29 | plt.scatter(X, y , color="red")
30 | plt.plot(X, regressor.predict(X), color="blue")
31 | plt.title("SVR")
32 | plt.xlabel("Position")
33 | plt.ylabel("Salary")
34 | plt.show()
35 | 
36 | # Step 5 - Predict Results
37 | import numpy as np
38 | # First transform 6.5 to feature scaling
39 | sc_X_val = sc_X.transform(np.array([[6.5]]))
40 | # Second predict the value
41 | scaled_y_pred = regressor.predict(sc_X_val)
42 | # Third - since this is scaled - we have to inverse transform
43 | y_pred = sc_y.inverse_transform(scaled_y_pred) 
44 | print('The predicted salary of a person at 6.5 Level is ',y_pred)
45 | 


--------------------------------------------------------------------------------
/project_5_decision_tree_regression/Position_Salaries.csv:
--------------------------------------------------------------------------------
 1 | Position,Level,Salary
 2 | Business Analyst,1,45000
 3 | Junior Consultant,2,50000
 4 | Senior Consultant,3,60000
 5 | Manager,4,80000
 6 | Country Manager,5,110000
 7 | Region Manager,6,150000
 8 | Partner,7,200000
 9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000


--------------------------------------------------------------------------------
/project_5_decision_tree_regression/decision_tree.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sat Dec  1 20:35:24 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load Dataset
10 | import pandas as pd
11 | dataset = pd.read_csv("Position_Salaries.csv")
12 | X = dataset.iloc[:, 1:2].values
13 | y = dataset.iloc[:, 2].values
14 | 
15 | # Step 2 - Fit Decision Tree Regressor
16 | from sklearn.tree import DecisionTreeRegressor
17 | regressor = DecisionTreeRegressor(criterion="mse")
18 | regressor.fit(X, y)
19 | 
20 | # Step 3 - Visualize
21 | import matplotlib.pyplot as plt
22 | 
23 | import numpy as np
24 | X_grid = np.arange(min(X), max(X), 0.01)
25 | X_grid = X_grid.reshape((len(X_grid),1))
26 | 
27 | plt.scatter(X, y, color="red")
28 | plt.plot(X_grid, regressor.predict(X_grid), color="blue")
29 | plt.title("Decision Tree Regressor")
30 | plt.xlabel("Position")
31 | plt.ylabel("Salary")
32 | plt.show()
33 | 
34 | # Step 4 - Predict
35 | y_pred = regressor.predict([[6.5]])
36 | print('The predicted salary of a person at 6.5 Level is ',y_pred)
37 | 


--------------------------------------------------------------------------------
/project_5_decision_tree_regression/project_5:
--------------------------------------------------------------------------------
  1 | Today we will be looking at the one of the most popular regression models called Decision Tree.
  2 | 
  3 | #100DaysOfMLCode #100ProjectsInML
  4 | 
  5 | I will be solving the same problem about predicting salary of a new employee based on his position level.
  6 | 
  7 | I have solved the same problem in project 3 using Polynomial Regression - You can check it out here.
  8 | And the same problem has been solved in project 4 using Support Vector Regression - You can check that project here.
  9 | 
 10 | Let's understand Decision Trees.
 11 | 
 12 | Decision tree regression model is Non Linear and a Non continuous model.
 13 | 
 14 | Below is a scatter plot which represents our dataset. It has 2 independent variables X1 and X2 and what we are trying to predict is a 3rd dependent variable y. 
 15 | 
 16 | Insert image 
 17 | 
 18 | Now once we run the decision tree algorithm, the scatter plot will be split up into segments. Each one of these splits is called a leaf. The way the splits are made is based on the principle of information entropy. It is a mathematical concept and is quite complex. If you want to learn more about that - you can read up on the concept of information entropy. 
 19 | 
 20 | 
 21 | Let's walk through an example scenario so we understand how decision tree's work. Let's say the algorithm makes the first split at X1 = 20 - so the scatter plot is divided into 2 segments - first segment is when X1 < 20 and second segment is when X1 > 20.
 22 | 
 23 | Insert image
 24 | 
 25 | Insert image
 26 | 
 27 | 
 28 | Now let's say split 2 happens at X2 = 170 - but it only happens to points where X1 > 20
 29 | 
 30 | Insert image
 31 | 
 32 | Insert image
 33 | 
 34 | Next, split 3 happens at X2 = 200 - but it happens to points X1 < 20
 35 | 
 36 | Insert image
 37 | 
 38 | Insert image
 39 | 
 40 | 
 41 | Finally, split 4 happens at X1 = 40 - but it applies to points where X1 > 20 and X2 < 170
 42 | 
 43 | Insert image
 44 | 
 45 | So now our decision tree is done. 
 46 | 
 47 | Now how do we determine the value of a new data point. It's very simple - we take the average of each of our terminal leaves. The diagram below shows an example of average for each of the terminal leaves.
 48 | 
 49 | Insert image
 50 | 
 51 | Now lets say we have a new data point where X1 = 30 and X2 = 50, it falls in the leaf whose average is -64.1 - so the decision tree algorithm will predict the value of y as -64.1. From the below diagram, we can see how it arrives at that value.
 52 | 
 53 | Insert image
 54 | 
 55 | 
 56 | Dataset
 57 | First lets look at the dataset. It is Position_Salaries.csv and can be found here
 58 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under.
 59 | 
 60 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000.
 61 | 
 62 | Below is the screenshot of the dataset.
 63 | 
 64 | Project Objective
 65 | 
 66 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5
 67 | 
 68 | We want to build a model to predict what salary we should offer this new employee.
 69 | 
 70 | Let's get started.
 71 | 
 72 | 
 73 | Step 1: Load the Dataset
 74 | 
 75 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position".
 76 | 
 77 | Here X is the independent variable which is the "Level"
 78 | and y is the dependent variable which is the "Salary"
 79 | 
 80 | So for X, we specify 
 81 | 
 82 | X = dataset.iloc[:, 1:2].values
 83 | 
 84 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included)
 85 | 
 86 | And for y, we specify 
 87 | 
 88 | dataset.iloc[:, 2].values
 89 | 
 90 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary
 91 | 
 92 | Step 2 - Fit Decision Tree Regressor
 93 | 
 94 | We will be using the DecisionTreeRegressor class from the library sklearn.tree. First we create an object of the DecisionTreeRegressor class and pass criterion parameter as "mse" (Mean Squared Error) and then call the fit method passing the X and y.
 95 | 
 96 | Step 3 - Visualize
 97 | Let's plot the graph to look at the results for Decision Tree Regression. For Decision Trees we have to use continuous points.
 98 | 
 99 | Step 4: Predict Decision Tree Regression Results
100 | 
101 | We get a prediction of $150k
102 | 


--------------------------------------------------------------------------------
/project_6_random_forest_regression/Position_Salaries.csv:
--------------------------------------------------------------------------------
 1 | Position,Level,Salary
 2 | Business Analyst,1,45000
 3 | Junior Consultant,2,50000
 4 | Senior Consultant,3,60000
 5 | Manager,4,80000
 6 | Country Manager,5,110000
 7 | Region Manager,6,150000
 8 | Partner,7,200000
 9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000


--------------------------------------------------------------------------------
/project_6_random_forest_regression/project_6:
--------------------------------------------------------------------------------
  1 | Today I will be writing about Random Forest Regression Model. Random Forest is a version of Ensemble Learning. Ensemble learning in simple terms is when you take a sample algorithm multiple times and you put them together to make it more powerful than the original version. Unlike Decision Tree model where we built a Decision Tree to predict the value for a new data point - In Random Forest we build many Decision Trees - (typical default is 500 trees).
  2 | 
  3 | So instead of getting 1 prediction, in Random Forest we get many predictions for y (say 500 trees give out 500 predictions). We then take the average of all the predictions to assign that to y.
  4 | 
  5 | #100DaysOfMLCode #100ProjectsInML
  6 | 
  7 | I have solved the same problem in project 3 using Polynomial Regression - You can check it out here.
  8 | We then solved it Support Vector Regression - You can check that project here.
  9 | And in the last project, we used Decision Tree Regression - Its available here.
 10 | 
 11 | Today, we will use Random Forest model and see how good our prediction is.
 12 | 
 13 | Dataset
 14 | 
 15 | First lets look at the dataset. It is Position_Salaries.csv and can be found here
 16 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under.
 17 | 
 18 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000.
 19 | 
 20 | Below is the screenshot of the dataset.
 21 | 
 22 | 
 23 | Project Objective
 24 | 
 25 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5
 26 | 
 27 | We want to build a model to predict what salary we should offer this new employee.
 28 | 
 29 | Let's get started.
 30 | 
 31 | Step 1: Load the Dataset
 32 | 
 33 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position".
 34 | 
 35 | Here X is the independent variable which is the "Level"
 36 | and y is the dependent variable which is the "Salary"
 37 | 
 38 | So for X, we specify 
 39 | 
 40 | X = dataset.iloc[:, 1:2].values
 41 | 
 42 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included)
 43 | 
 44 | And for y, we specify 
 45 | 
 46 | dataset.iloc[:, 2].values
 47 | 
 48 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary
 49 | 
 50 | # Step 1 - Load Data
 51 | import pandas as pd
 52 | dataset = pd.read_csv("Position_Salaries.csv")
 53 | X = dataset.iloc[:, 1:2].values
 54 | y = dataset.iloc[:, 2].values
 55 | 
 56 | Step 2 - Fit Random Forest Regressor
 57 | 
 58 | We will be using the RandomForestRegressor class from the library sklearn.ensemble. First we create an object of the RandomForestRegressor class. 
 59 | 
 60 | When initializing the class, we need to specify the number of trees. So we pass the parameter n_estimators which specifies the number of trees we want to use. The second parameter of random_state = 0 is just so that our results match.  We then call the fit method passing the X and y.
 61 | 
 62 | First lets run by setting n_estimators as 10 trees
 63 | 
 64 | # Step 2 - Fit Regressor
 65 | from sklearn.ensemble import RandomForestRegressor
 66 | regressor = RandomForestRegressor(n_estimators=10, random_state=0)
 67 | regressor.fit(X, y)
 68 | 
 69 | Step 3 - Visualize
 70 | Let's plot the graph to look at the results for Random Forest Regression. For Random Forest also we have to use continuous points.
 71 | 
 72 | # Step 3 - Visualize
 73 | import matplotlib.pyplot as plt
 74 | import numpy as np
 75 | X_grid = np.arange(min(X), max(X), 0.01)
 76 | X_grid = X_grid.reshape((len(X_grid),1))
 77 | 
 78 | plt.scatter(X, y, color="red")
 79 | plt.plot(X_grid, regressor.predict(X_grid), color="blue")
 80 | plt.title("Random Forest Regressor - 10 Trees")
 81 | plt.xlabel("Position")
 82 | plt.ylabel("Salaries")
 83 | plt.show()
 84 | 
 85 | Step 4: Predict Random Forest Regression Results
 86 | 
 87 | We get a prediction of $167k
 88 | 
 89 | # Step 4 - Predict
 90 | regressor.predict([[6.5]])
 91 | 
 92 | Step 5: Increase number of tree's to 100
 93 | 
 94 | regressor = RandomForestRegressor(n_estimators=100, random_state=0)
 95 | regressor.fit(X, y)
 96 | 
 97 | import numpy as np
 98 | X_grid = np.arange(min(X), max(X), 0.01)
 99 | X_grid = X_grid.reshape((len(X_grid),1))
100 | plt.scatter(X, y, color="red")
101 | plt.plot(X_grid, regressor.predict(X_grid), color="blue")
102 | plt.title("Random Forest Regressor - 100 Trees")
103 | plt.xlabel("Position")
104 | plt.ylabel("Salaries")
105 | plt.show()
106 | 
107 | regressor.predict([[6.5]])
108 | 
109 | We get a prediction of $158k
110 | 
111 | Step 6: Increase number of tree's to 300
112 | 
113 | regressor = RandomForestRegressor(n_estimators=300, random_state=0)
114 | regressor.fit(X, y)
115 | 
116 | import numpy as np
117 | X_grid = np.arange(min(X), max(X), 0.01)
118 | X_grid = X_grid.reshape((len(X_grid),1))
119 | plt.scatter(X, y, color="red")
120 | plt.plot(X_grid, regressor.predict(X_grid), color="blue")
121 | plt.title("Random Forest Regressor - 300 Trees")
122 | plt.xlabel("Position")
123 | plt.ylabel("Salaries")
124 | plt.show()
125 | 
126 | regressor.predict([[6.5]])
127 | 
128 | We get a prediction of $160k
129 | 
130 | So to compare our results with previous regression models
131 | Polynomial Regression gave a prediction of $158k
132 | Support Vector Regression gave a prediction of $170k
133 | Decision Tree Regression gave a prediction of $150k
134 | Random Forest Regression with 300 trees gave a prediction of $160k


--------------------------------------------------------------------------------
/project_6_random_forest_regression/random_forest.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Dec  2 11:54:30 2018
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | # Step 1 - Load Data
 9 | import pandas as pd
10 | dataset = pd.read_csv("Position_Salaries.csv")
11 | X = dataset.iloc[:, 1:2].values
12 | y = dataset.iloc[:, 2].values
13 | 
14 | # Step 2 - Fit Regressor
15 | from sklearn.ensemble import RandomForestRegressor
16 | regressor = RandomForestRegressor(n_estimators=100, random_state=0)
17 | regressor.fit(X, y)
18 | 
19 | # Step 3 - Visualize
20 | import matplotlib.pyplot as plt
21 | import numpy as np
22 | X_grid = np.arange(min(X), max(X), 0.01)
23 | X_grid = X_grid.reshape((len(X_grid),1))
24 | 
25 | plt.scatter(X, y, color="red")
26 | plt.plot(X_grid, regressor.predict(X_grid), color="blue")
27 | plt.title("Random Forest Regressor - 100 Trees")
28 | plt.xlabel("Position")
29 | plt.ylabel("Salaries")
30 | plt.show()
31 | 
32 | # Step 4 - Predict
33 | y_pred = regressor.predict([[6.5]])
34 | print('The predicted salary of a person at 6.5 Level is ',y_pred)
35 | 


--------------------------------------------------------------------------------
/project_7_compare_regression_models/Position_Salaries.csv:
--------------------------------------------------------------------------------
 1 | Position,Level,Salary
 2 | Business Analyst,1,45000
 3 | Junior Consultant,2,50000
 4 | Senior Consultant,3,60000
 5 | Manager,4,80000
 6 | Country Manager,5,110000
 7 | Region Manager,6,150000
 8 | Partner,7,200000
 9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000


--------------------------------------------------------------------------------
/project_7_compare_regression_models/compare.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Dec  3 11:39:26 2018
  5 | 
  6 | @author: omairaasim
  7 | """
  8 | 
  9 | # Step 1 - Load Data
 10 | import pandas as pd
 11 | dataset = pd.read_csv("Position_Salaries.csv")
 12 | X = dataset.iloc[:, 1:2].values
 13 | y = dataset.iloc[:, 2].values
 14 | 
 15 | 
 16 | ###########################
 17 | ### Linear Regression ###
 18 | ###########################
 19 | from sklearn.linear_model import LinearRegression
 20 | linear_regressor = LinearRegression()
 21 | linear_regressor.fit(X, y)
 22 | 
 23 | # Predict
 24 | lin_pred = linear_regressor.predict([[6.5]])
 25 | print('The predicted salary of a person at 6.5 Level with Linear Regression is ',lin_pred)
 26 | 
 27 | ################################
 28 | ### Polynomial Regression ###
 29 | ################################
 30 | 
 31 | # ** NOTE - conver X to X_poly of required degree
 32 | from sklearn.preprocessing import PolynomialFeatures
 33 | poly_features = PolynomialFeatures(degree=4)
 34 | X_poly = poly_features.fit_transform(X)
 35 | 
 36 | from sklearn.linear_model import LinearRegression
 37 | poly_regressor = LinearRegression()
 38 | poly_regressor.fit(X_poly, y)
 39 | 
 40 | # Predict - have to convert 6.5 to poly format
 41 | poly_pred = poly_regressor.predict(poly_features.fit_transform([[6.5]]))
 42 | print('The predicted salary of a person at 6.5 Level with Polynomial Regression is ',poly_pred)
 43 | 
 44 | ################################
 45 | ### SVR Regression ###
 46 | ################################
 47 | 
 48 | # ** NOTE - SVR does not do feature scaling
 49 | from sklearn.preprocessing import StandardScaler
 50 | ss_x = StandardScaler()
 51 | ss_y = StandardScaler()
 52 | X_scaled = ss_x.fit_transform(X)
 53 | y_scaled = ss_y.fit_transform(y.reshape(-1,1))
 54 | 
 55 | 
 56 | from sklearn.svm import SVR
 57 | svr_regressor = SVR(kernel="rbf")
 58 | svr_regressor.fit(X_scaled, y_scaled)
 59 | 
 60 | # Predict - since we did feature scaling -
 61 | # So have to scale/transform 6.5 also
 62 | position_val = ss_x.transform([[6.5]])
 63 | pred_val_scaled = svr_regressor.predict(position_val)
 64 | # The above statement will return scaled predicted value
 65 | # So have to convert that using inverse transform
 66 | svr_pred = ss_y.inverse_transform(pred_val_scaled)
 67 | print('The predicted salary of a person at 6.5 Level with Support Vector Regression is ',svr_pred)
 68 | 
 69 | ################################
 70 | ### Decision Tree Regression ###
 71 | ################################
 72 | from sklearn.tree import DecisionTreeRegressor
 73 | tree_regressor = DecisionTreeRegressor(criterion="mse")
 74 | tree_regressor.fit(X, y)
 75 | 
 76 | # Predict
 77 | tree_pred = tree_regressor.predict([[6.5]])
 78 | print('The predicted salary of a person at 6.5 Level with Decision Tree Regression is ',tree_pred)
 79 | 
 80 | ################################
 81 | ### Random Forest Regression ###
 82 | ################################
 83 | from sklearn.ensemble import RandomForestRegressor
 84 | forest_regressor = RandomForestRegressor(n_estimators=300, random_state=0)
 85 | forest_regressor.fit(X, y)
 86 | 
 87 | # Predict
 88 | forest_pred = forest_regressor.predict([[6.5]])
 89 | print('The predicted salary of a person at 6.5 Level with Random Forest Regression is ',forest_pred)
 90 | 
 91 | 
 92 | ################################
 93 | ### Visualizations ###
 94 | ################################
 95 | import matplotlib.pyplot as plt
 96 | import numpy as np
 97 | 
 98 | X_grid = np.arange(min(X), max(X), 0.01)
 99 | X_grid = X_grid.reshape((len(X_grid),1))
100 | 
101 | plt.scatter(X, y,color="red")
102 | plt.plot(X_grid, linear_regressor.predict(X_grid), color="blue")
103 | plt.plot(X_grid, poly_regressor.predict(poly_features.fit_transform(X_grid)), color="green")
104 | plt.plot(X_grid, ss_y.inverse_transform(svr_regressor.predict(ss_x.transform(X_grid))), color="orange")
105 | plt.plot(X_grid, tree_regressor.predict(X_grid), color="black")
106 | plt.plot(X_grid, forest_regressor.predict(X_grid), color="purple")
107 | #plt.xticks(np.arange(min(X), max(X)+1, 1))
108 | #plt.yticks(np.arange(min(y), max(y)+1, 50000))
109 | plt.title("Regression")
110 | plt.xlabel("Position")
111 | plt.ylabel("Salaries")
112 | #plt.figure(figsize=(20,10))
113 | #fig = plt.gcf()
114 | #fig.set_size_inches(10.5, 10)
115 | plt.show()
116 | 


--------------------------------------------------------------------------------
/project_7_compare_regression_models/project_7:
--------------------------------------------------------------------------------
  1 | In the last 6 articles, I've covered some of the most popular form of regression models
  2 | - Simple Linear Regression
  3 | - Multiple Linear Regression
  4 | - Polynomial Regression
  5 | - Support Vector Regression
  6 | - Decision Tree Regression
  7 | - Random Forest Regression
  8 | 
  9 | #100DaysOfMLCode #100ProjectsInML
 10 | 
 11 | The articles are for absolute beginners and have been presented in the most simplest form. In programmaing terms - they are like the "Hello World" examples of different types of Regressions. The intent is to get the developers acquainted with the high level concept of Regression and in the process get their hands dirty by implementing a very simple project.
 12 | 
 13 | In reality, building the model and making a prediction are the most simplest steps. All it involves is just 3 lines of code
 14 | - Create an object of the Regression class.
 15 | - Call the fit method.
 16 | - Call the predict method.
 17 | 
 18 | But if it was this simple - every other person would be a Data Scientist. Apparently bulk of the work involves exploring and understanding the data, cleaning up the data, imputing missing values. Basically understanding and preparing the data is most of the work. 
 19 | 
 20 | In the upcoming projects on Regression, I will be implementing some end to end projects which will include all the steps mentioned above.
 21 | 
 22 | For now - let's recapture the different types of Regression models that we built and see which model gives us the best prediction.
 23 | 
 24 | Dataset
 25 | 
 26 | First lets look at the dataset. It is Position_Salaries.csv and can be found here
 27 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under.
 28 | 
 29 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000.
 30 | 
 31 | Below is the screenshot of the dataset.
 32 | 
 33 | Project Objective
 34 | 
 35 | A company "XYZ" uses the dataset above to determine what salary to offer a new employee. Let's say an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for past 2 years. So based on the table above - he falls between level 6 and level 7.
 36 | 
 37 | The new employee is saying he is currently withdrawing a salary of $160,000.
 38 | 
 39 | We want to build a model to predict if he is saying the truth or not.
 40 | 
 41 | Let's get started.
 42 | 
 43 | Step 1: Load the Dataset
 44 | 
 45 | If we look at the dataset, we need to predict the salary for an employee who falls between Level 6 and 7 - So we really do not need the first column "Position".
 46 | 
 47 | Here X is the independent variable which is the "Level"
 48 | and y is the dependent variable which is the "Salary"
 49 | 
 50 | # Step 1 - Load Data
 51 | import pandas as pd
 52 | dataset = pd.read_csv("Position_Salaries.csv")
 53 | X = dataset.iloc[:, 1:2].values
 54 | y = dataset.iloc[:, 2].values
 55 | 
 56 | Step 2: Apply Linear Regression Model and make prediction
 57 | 
 58 | ###########################
 59 | ### Linear Regression ###
 60 | ###########################
 61 | from sklearn.linear_model import LinearRegression
 62 | linear_regressor = LinearRegression()
 63 | linear_regressor.fit(X, y)
 64 | 
 65 | # Predict
 66 | lin_pred = linear_regressor.predict([[6.5]])
 67 | 
 68 | Step 3: Apply Polynomial Regression Model and make prediction
 69 | 
 70 | ################################
 71 | ### Polynomial Regression ###
 72 | ################################
 73 | 
 74 | # ** NOTE - conver X to X_poly of required degree
 75 | from sklearn.preprocessing import PolynomialFeatures
 76 | poly_features = PolynomialFeatures(degree=4)
 77 | X_poly = poly_features.fit_transform(X)
 78 | 
 79 | from sklearn.linear_model import LinearRegression
 80 | poly_regressor = LinearRegression()
 81 | poly_regressor.fit(X_poly, y)
 82 | 
 83 | # Predict - have to convert 6.5 to poly format
 84 | poly_pred = poly_regressor.predict(poly_features.fit_transform([[6.5]]))
 85 | 
 86 | 
 87 | Step 4: Apply Support Vector Regression Model and make prediction
 88 | 
 89 | ################################
 90 | ### SVR Regression ###
 91 | ################################
 92 | 
 93 | # ** NOTE - SVR does not do feature scaling
 94 | from sklearn.preprocessing import StandardScaler
 95 | ss_x = StandardScaler()
 96 | ss_y = StandardScaler()
 97 | X_scaled = ss_x.fit_transform(X)
 98 | y_scaled = ss_y.fit_transform(y.reshape(-1,1))
 99 | 
100 | from sklearn.svm import SVR
101 | svr_regressor = SVR(kernel="rbf")
102 | svr_regressor.fit(X_scaled, y_scaled)
103 | 
104 | # Predict - since we did feature scaling - so have to scale/transform 6.5 also
105 | position_val = ss_x.transform([[6.5]])
106 | 
107 | # Predict
108 | pred_val_scaled = svr_regressor.predict(position_val)
109 | 
110 | # The above statement will return scaled predicted value - so have to convert that using inverse transform
111 | svr_pred = ss_y.inverse_transform(pred_val_scaled)
112 | 
113 | Step 5: Apply Decision Tree Regression Model and make prediction
114 | 
115 | ################################
116 | ### Decision Tree Regression ###
117 | ################################
118 | from sklearn.tree import DecisionTreeRegressor
119 | tree_regressor = DecisionTreeRegressor(criterion="mse")
120 | tree_regressor.fit(X, y)
121 | 
122 | # Predict
123 | tree_pred = tree_regressor.predict([[6.5]])
124 | 
125 | 
126 | Step 6: Apply Random Forest Regression Model and make prediction
127 | 
128 | ################################
129 | ### Random Forest Regression ###
130 | ################################
131 | from sklearn.ensemble import RandomForestRegressor
132 | forest_regressor = RandomForestRegressor(n_estimators=300, random_state=0)
133 | forest_regressor.fit(X, y)
134 | 
135 | # Predict
136 | forest_pred = forest_regressor.predict([[6.5]])
137 | 
138 | Step 7: Compare Prediction Results
139 | 
140 | The table below shows the prediction results obtained from different regression models we tried above.
141 | 
142 | Step 8: Conclusion
143 | 
144 | According to the problem statement, we know that the new employee is saying he is currently making $160k.
145 | 
146 | Now based on the models that we ran, we can see from the table above that Polynomial Regression and Random Forest have made pretty accurate predictions. So we can conclude that the new employee is saying the truth.
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/project_8_predict_weight/Height_Weight_single_variable_data_101_series_1.0.csv:
--------------------------------------------------------------------------------
 1 | Height,Weight
 2 | 121.92,28
 3 | 124.46,28.1
 4 | 127,28.2
 5 | 129.54,28.3
 6 | 132.08,28.4
 7 | 134.62,28.5
 8 | 137.16,35.2
 9 | 139.7,37.6
10 | 142.24,40
11 | 144.78,42.6
12 | 147.32,44.9
13 | 149.86,47.6
14 | 152.4,49.9
15 | 154.94,52.6
16 | 157.48,54.9
17 | 160.02,57.6
18 | 162.56,59.9
19 | 165.1,62.6
20 | 167.64,64.8
21 | 170.18,67.6
22 | 172.72,69.8
23 | 175.26,72.6
24 | 177.8,74.8
25 | 180.34,77.5
26 | 182.88,79.8
27 | 185.42,82.5
28 | 187.96,84.8
29 | 190.5,87.5
30 | 193.04,89.8
31 | 195.58,92.5
32 | 198.12,94.8
33 | 200.66,97.5
34 | 203.2,99.8
35 | 205,102.5
36 | 208,104.8


--------------------------------------------------------------------------------
/project_8_predict_weight/project_8:
--------------------------------------------------------------------------------
 1 | I thought of working on a Kaggle dataset today but at the same time I was looking for a very simple project. I came across this dataset that list the height and weight of people. You can check it out here.
 2 | 
 3 | We will go over some more complex and large datasets in the upcoming projects. But in this project - I will introduce two new topics - "Check for null values" and "Evaluate Regression Model"
 4 | 
 5 | So in this project, we have to train our model on this data and then make a new weight prediction of the person given his height.
 6 | 
 7 | #100DaysOfMLCode #100ProjectsInML
 8 | 
 9 | Let's get started.
10 | 
11 | Step 1 - Load Dataset
12 | First we will load the dataset. The file is Height_Weight_single_variable_data_101_series_1.0.csv.
13 | 
14 | # Step 1 : Load Dataset 
15 | import pandas as pd
16 | dataset = pd.read_csv("Height_Weight_single_variable_data_101_series_1.0.csv")
17 | X = dataset.iloc[:, :-1].values
18 | y = dataset.iloc[:, 1].values
19 | 
20 | Step 2 - Check for missing values
21 | It is one of the most important steps in data preparation. We have to see if the dataset has any missing values and then figure out the best possible way to impute the missing values.
22 | 
23 | # Step 2: Check for missing values
24 | dataset.isnull().sum()
25 | 
26 | As we can see, it shows there are 0 missing values in height and weight column. So there is nothing to be done here.
27 | 
28 | Step 3 - Split dataset into training set and test set
29 | 
30 | There are 35 entries in the dataset. Lets take 20% of data as test data.
31 | 
32 | # Step 3: Split dataset into train and test
33 | from sklearn.model_selection import train_test_split
34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
35 | 
36 | Step 4 - Fit Regression Model
37 | 
38 | We will first fit the Regression Model and see how good it fits the data. If the performance is not good - then we can experiment with other Regression Models.
39 | 
40 | # Step 4: Fit Linear Regression Model
41 | from sklearn.linear_model import LinearRegression
42 | lin_reg = LinearRegression()
43 | lin_reg.fit(X_train, y_train)
44 | 
45 | Step 5 - Predict test set values
46 | 
47 | Now that the Linear Regression model is trained, let's predict the X_test values.
48 | 
49 | # Step 5: Predict values for test data
50 | lin_pred = lin_reg.predict(X_test)
51 | 
52 | Step 6 - Evaluate performance of the Model
53 | 
54 | We can see from the image below that the predicted values are very close to the real values.
55 | 
56 | IMAGE y_test, y_pred
57 | 
58 | After we fit the model, we have to evaluate how well the model fits the data. For this purpose, we will look at "R squared" and Mean Squared Error.
59 | 
60 | - "R Squared": This value is between 0 and 1 ie between 0% and 100%. Generally speaking, in most cases - the closer it is to 1 - the better - 1 means perfect correlation. 
61 | 
62 | Wikipedia defines it as
63 | R squared is the proportion of the variance in the dependent variable that is predictable from the independent variable(s)
64 | 
65 | So if it is 100%, the two variables are perfectly correlated.
66 | 
67 | - Mean Squared Error (MSE): This is the average of the square of errors. Error here implies the difference between the actual values and predicted values. We sqaure each difference. So if mean squared error is large, it means the error is large. The lower the value, the better the model. We can use the MSE value in selecting one model over the other.
68 | 
69 | 
70 | from sklearn import metrics
71 | print('R square = ',metrics.r2_score(y_test, lin_pred))
72 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred))
73 | 


--------------------------------------------------------------------------------
/project_8_predict_weight/project_8_predict_weight.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Sep  8 08:49:06 2019
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 : Load Dataset 
10 | import pandas as pd
11 | dataset = pd.read_csv("Height_Weight_single_variable_data_101_series_1.0.csv")
12 | X = dataset.iloc[:, :-1].values
13 | y = dataset.iloc[:, 1].values
14 | 
15 | # Step 2: Check for missing values
16 | dataset.isnull().sum()
17 | 
18 | # Step 3: Split dataset into train and test
19 | from sklearn.model_selection import train_test_split
20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
21 | 
22 | # Step 4: Fit Linear Regression Model
23 | from sklearn.linear_model import LinearRegression
24 | lin_reg = LinearRegression()
25 | lin_reg.fit(X_train, y_train)
26 | 
27 | # Step 5: Predict values for test data
28 | lin_pred = lin_reg.predict(X_test)
29 | 
30 | # Step 6: Compare predictions with real results
31 | from sklearn import metrics
32 | print('R square = ',metrics.r2_score(y_test, lin_pred))
33 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred))
34 | 
35 | 
36 | # Step 7: Visualize Training set
37 | import matplotlib.pyplot as plt
38 | plt.scatter(X_train, y_train, color="red")
39 | plt.plot(X_train, lin_reg.predict(X_train), color="blue" )
40 | plt.title("Height and Weight - Training Set")
41 | plt.xlabel("Height")
42 | plt.ylabel("Weight")
43 | plt.show()
44 | 
45 | # Step 8: Visualize Test set
46 | import matplotlib.pyplot as plt
47 | plt.scatter(X_test, y_test, color="red")
48 | plt.plot(X_train, lin_reg.predict(X_train), color="blue" )
49 | plt.title("Height and Weight - Test Set")
50 | plt.xlabel("Height")
51 | plt.ylabel("Weight")
52 | plt.show()
53 | 
54 | # Step 9: Make new Prediction
55 | lin_pred_new = lin_reg.predict([[166]])
56 | print('If a person has height 166, the predicted weight is ',lin_pred_new)
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/project_9_predict_weight_sex/predict_weight.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Sep  9 07:41:20 2019
 5 | 
 6 | @author: omairaasim
 7 | """
 8 | 
 9 | # Step 1 - Load data
10 | import pandas as pd
11 | dataset = pd.read_csv("weight-height.csv")
12 | 
13 | # Step 2 - Analyze data
14 | dataset.info()
15 | dataset.describe()
16 | dataset.isnull().sum()
17 | 
18 | # Step 3 - Convert Gender to number 
19 | # Using LabelEncoder Start # Comment this section if using other option
20 | X = dataset.iloc[:, :-1].values
21 | y = dataset.iloc[:, 2].values
22 | from sklearn.preprocessing import LabelEncoder
23 | labelEncoder_gender =  LabelEncoder()
24 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0])
25 | 
26 | import numpy as np
27 | X = np.vstack(X[:, :]).astype(np.float)
28 | # Using LabelEncoder End #
29 | 
30 | ############    OR     ##############
31 | 
32 | 
33 | # Step 3 - Convert Gender to number 
34 | # Replace directly in dataframe Start #
35 | # dataset['Gender'].replace('Female',0, inplace=True)
36 | # dataset['Gender'].replace('Male',1, inplace=True)
37 | # X = dataset.iloc[:, :-1].values
38 | # y = dataset.iloc[:, 2].values
39 | # Replace directly in dataframe End #
40 | 
41 | # Step 4 - Split data
42 | from sklearn.model_selection import train_test_split
43 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
44 | 
45 | # Step 5 - Fit Regression Model
46 | from sklearn.linear_model import LinearRegression
47 | lin_reg = LinearRegression()
48 | lin_reg.fit(X_train, y_train)
49 | 
50 | # Step 6 - Make Prediction using test data
51 | lin_pred = lin_reg.predict(X_test)
52 | 
53 | 
54 | # Step 7 - Model Accuracy
55 | from sklearn import metrics
56 | print('R square = ',metrics.r2_score(y_test, lin_pred))
57 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred))
58 | print('Mean absolute Error = ',metrics.mean_absolute_error(y_test, lin_pred))
59 | 
60 | # Step 8 - Predict my weight
61 | my_weight_pred = lin_reg.predict([[0,74]])
62 | print('My predicted weight = ',my_weight_pred)
63 | 


--------------------------------------------------------------------------------