├── project_10_logistic_regression ├── iphone_purchase_records.csv └── logistic_regression_dec10.py ├── project_11_k_nearest_neighbor ├── iphone_purchase_records.csv └── knn_dec12.py ├── project_12_svm ├── iphone_purchase_records.csv ├── svm └── svm_dec12.py ├── project_13_kernel_svm ├── iphone_purchase_records.csv └── kernel_svm_dec12.py ├── project_14_naive_bayes ├── iphone_purchase_records.csv └── naive_bayes.py ├── project_15_decision_tree_classifier ├── decision_tree_classification.py └── iphone_purchase_records.csv ├── project_16_random_forest_classifier ├── iphone_purchase_records.csv └── random_forest_classifier.py ├── project_17_compare_classification_algorithms ├── compare_classification_algos.py └── iphone_purchase_records.csv ├── project_1_simple_linear_regression ├── Salary_Data.csv ├── project_1 └── simple_linear_regression.py ├── project_2_multiple_linear_regression ├── 50_Startups.csv ├── multiple_linear_regression.py └── project_2 ├── project_3_polynomial_regression ├── Position_Salaries.csv ├── poly_regression.py └── project_3 ├── project_4_support_vector_regression ├── Position_Salaries.csv ├── project_4 └── svr_2019.py ├── project_5_decision_tree_regression ├── Position_Salaries.csv ├── decision_tree.py └── project_5 ├── project_6_random_forest_regression ├── Position_Salaries.csv ├── project_6 └── random_forest.py ├── project_7_compare_regression_models ├── Position_Salaries.csv ├── compare.py └── project_7 ├── project_8_predict_weight ├── Height_Weight_single_variable_data_101_series_1.0.csv ├── project_8 └── project_8_predict_weight.py └── project_9_predict_weight_sex ├── predict_weight.py └── weight-height.csv /project_10_logistic_regression/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_10_logistic_regression/logistic_regression_dec10.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 11 17:31:56 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | # Step 3 - Split Data into training and testing 25 | from sklearn.model_selection import train_test_split 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 27 | 28 | # Step 4 - Feature Scaling 29 | from sklearn.preprocessing import StandardScaler 30 | sc = StandardScaler() 31 | X_train = sc.fit_transform(X_train) 32 | X_test = sc.transform(X_test) 33 | 34 | 35 | # Step 5 - Logistic Regression Classifier 36 | from sklearn.linear_model import LogisticRegression 37 | classifier = LogisticRegression(random_state=0, solver="liblinear") 38 | classifier.fit(X_train, y_train) 39 | 40 | 41 | # Step 6 - Predict 42 | y_pred = classifier.predict(X_test) 43 | 44 | # Step 7 - Confusion Matrix 45 | from sklearn import metrics 46 | cm = metrics.confusion_matrix(y_test, y_pred) 47 | print(cm) 48 | accuracy = metrics.accuracy_score(y_test, y_pred) 49 | print("Accuracy score:",accuracy) 50 | precision = metrics.precision_score(y_test, y_pred) 51 | print("Precision score:",precision) 52 | recall = metrics.recall_score(y_test, y_pred) 53 | print("Recall score:",recall) 54 | 55 | # Step 8 - Make New Predictions 56 | x1 = sc.transform([[1,21,40000]]) 57 | x2 = sc.transform([[1,21,80000]]) 58 | x3 = sc.transform([[0,21,40000]]) 59 | x4 = sc.transform([[0,21,80000]]) 60 | x5 = sc.transform([[1,41,40000]]) 61 | x6 = sc.transform([[1,41,80000]]) 62 | x7 = sc.transform([[0,41,40000]]) 63 | x8 = sc.transform([[0,41,80000]]) 64 | 65 | print("Male aged 21 making $40k will buy iPhone:", classifier.predict(x1)) 66 | print("Male aged 21 making $80k will buy iPhone:", classifier.predict(x2)) 67 | print("Female aged 21 making $40k will buy iPhone:", classifier.predict(x3)) 68 | print("Female aged 21 making $80k will buy iPhone:", classifier.predict(x4)) 69 | print("Male aged 41 making $40k will buy iPhone:", classifier.predict(x5)) 70 | print("Male aged 41 making $80k will buy iPhone:", classifier.predict(x6)) 71 | print("Female aged 41 making $40k will buy iPhone:", classifier.predict(x7)) 72 | print("Female aged 41 making $80k will buy iPhone:", classifier.predict(x8)) 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /project_11_k_nearest_neighbor/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_11_k_nearest_neighbor/knn_dec12.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 12 12:00:17 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | 25 | # Step 3 - Split into training and test data 26 | from sklearn.model_selection import train_test_split 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 28 | 29 | 30 | # Step 4 - Feature Scaling 31 | from sklearn.preprocessing import StandardScaler 32 | sc_X = StandardScaler() 33 | X_train = sc_X.fit_transform(X_train) 34 | X_test = sc_X.transform(X_test) 35 | 36 | # Step 5 - Fit KNN Classifier 37 | from sklearn.neighbors import KNeighborsClassifier 38 | # metric = minkowski and p=2 is Euclidean Distance 39 | # metric = minkowski and p=1 is Manhattan Distance 40 | classifier = KNeighborsClassifier(n_neighbors=5, metric="minkowski",p=2) 41 | classifier.fit(X_train, y_train) 42 | 43 | # Step 5 - Make Prediction 44 | y_pred = classifier.predict(X_test) 45 | 46 | # Step 6 - Confusion Matrix 47 | #from sklearn import metrics 48 | #cm = metrics.confusion_matrix(y_test, y_pred) ## 4,3 errors 49 | #accuracy = metrics.accuracy_score(y_test, y_pred) ## 0.93 50 | #precision = metrics.precision_score(y_test, y_pred) ## 0.87 51 | #recall = metrics.recall_score(y_test, y_pred) ## 0.90 52 | 53 | # Step 7 - Confusion Matrix 54 | from sklearn import metrics 55 | cm = metrics.confusion_matrix(y_test, y_pred) 56 | print(cm) 57 | accuracy = metrics.accuracy_score(y_test, y_pred) 58 | print("Accuracy score:",accuracy) 59 | precision = metrics.precision_score(y_test, y_pred) 60 | print("Precision score:",precision) 61 | recall = metrics.recall_score(y_test, y_pred) 62 | print("Recall score:",recall) 63 | -------------------------------------------------------------------------------- /project_12_svm/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_12_svm/svm: -------------------------------------------------------------------------------- 1 | Machine Learning Project 12 - Using Support Vector Classification 2 | 3 | Below is the sample dataset - we have got points on a two dimensional space. We have some observations - some are red and some are green. So these points, we have already classified them. 4 | 5 | img 1 6 | 7 | We will use SVM to draw a line that separates these 2 categories. For example - let us assume the line below is drawn by the SVM algorithm to separate the 2 categories and at the same time it has the maximum margin. By margin we mean, there will never be any data point inside the margin. 8 | You can read more about SVM and maximum margins in this great tutorial. 9 | 10 | This line is drawn equal distance from the red and green points. 11 | 12 | img 2 13 | 14 | 15 | These 2 points are called the support vectors. 16 | These 2 points are supporting the algorithm - even if you get rid of the other points - nothing will change. The algorithm will be exactly same. The other points do not contribute to the result of the algorithm. Only these 2 points highlighted contribute and hence are called the support vectors. 17 | You can call them support points in a 2 dimensional space but in reality they are vectors because in a multidimensional space when you have more than 2 variables - maybe 10 or 50 variables - each point is no longer a point because we cannot visualize it in a two dimensional space and therefore each of those points is actually a vector in a multidimensional space. 18 | 19 | 20 | 21 | img 3 22 | 23 | 24 | The line in the middle is called the Maximum Margin Hyperplane in a multidimensional space or Maximum Margin Classifier in a two dimensional space. 25 | The green dotted line is called the positive hyperplane 26 | The red dotted line is called negative hyperplane. It does'nt matter in which order you name them - its just that one is positive and the other is negative. 27 | 28 | 29 | 30 | What is special about SVMs? 31 | 32 | Let's say you are building an algorithm to identify apple's and oranges. What most machine learning algorithms would do is they they would look at the most common looking types of apples and most common looking type of oranges to learn and train themselves. So based on that - they will identify new samples as either apple or orange. 33 | 34 | But in case of Support Vector Machine - instead of looking at most common types of apples and oranges - the SVM would look at apples that are very much like an orange and similarly oranges that resemble an apple. 35 | 36 | If you look at the image below - the SVM would pick the apple on the left that looks very similar to an orange and would pick the the green orange on the right that looks very similar to a green apple. So these 2 points would represent the support vectors and are very close the boundary. So the SVM is a very different type of algorithm as it picks the extreme case which is close to the boundary and it uses that to construct its analysis. That's why in certain cases, the SVM performs better than other classification algorithms. 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /project_12_svm/svm_dec12.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 12 20:21:32 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | 25 | # Step 3 - Split into Train and Test set 26 | from sklearn.model_selection import train_test_split 27 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 28 | 29 | # Step 4 - Feature Scaling 30 | from sklearn.preprocessing import StandardScaler 31 | ss_X = StandardScaler() 32 | X_train = ss_X.fit_transform(X_train) 33 | X_test = ss_X.transform(X_test) 34 | 35 | # Step 5 - Fit SVC Classifier 36 | from sklearn.svm import SVC 37 | classifier = SVC(kernel = "linear", random_state=0) 38 | classifier.fit(X_train, y_train) 39 | 40 | # Step 6 - Predict 41 | y_pred = classifier.predict(X_test) 42 | 43 | 44 | # Step 7 - Confusion Matrix 45 | from sklearn import metrics 46 | cm = metrics.confusion_matrix(y_test, y_pred) 47 | print(cm) 48 | accuracy = metrics.accuracy_score(y_test, y_pred) 49 | print("Accuracy score:",accuracy) 50 | precision = metrics.precision_score(y_test, y_pred) 51 | print("Precision score:",precision) 52 | recall = metrics.recall_score(y_test, y_pred) 53 | print("Recall score:",recall) 54 | -------------------------------------------------------------------------------- /project_13_kernel_svm/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_13_kernel_svm/kernel_svm_dec12.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 12 20:53:27 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | import pandas as pd 10 | dataset = pd.read_csv("iphone_purchase_records.csv") 11 | X = dataset.iloc[:,:-1].values 12 | y = dataset.iloc[:, 3].values 13 | 14 | # Step 2 - Convert Gender to number 15 | from sklearn.preprocessing import LabelEncoder 16 | labelEncoder_gender = LabelEncoder() 17 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 18 | 19 | # Optional - if you want to convert X to float data type 20 | import numpy as np 21 | X = np.vstack(X[:, :]).astype(np.float) 22 | 23 | # Step 2 - Split into training and Test 24 | from sklearn.model_selection import train_test_split 25 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 26 | 27 | # Step 3 - Feature Scaling 28 | from sklearn.preprocessing import StandardScaler 29 | ss_X = StandardScaler() 30 | X_train = ss_X.fit_transform(X_train) 31 | X_test = ss_X.transform(X_test) 32 | 33 | 34 | # Step 4 - Fit SCV Classifier 35 | from sklearn.svm import SVC 36 | classifier = SVC( kernel="rbf", random_state=0) 37 | classifier.fit(X_train, y_train) 38 | 39 | # Step 5 - Predict 40 | y_pred = classifier.predict(X_test) 41 | 42 | from sklearn import metrics 43 | cm = metrics.confusion_matrix(y_test, y_pred) 44 | print(cm) 45 | accuracy = metrics.accuracy_score(y_test, y_pred) 46 | print("Accuracy score:",accuracy) 47 | precision = metrics.precision_score(y_test, y_pred) 48 | print("Precision score:",precision) 49 | recall = metrics.recall_score(y_test, y_pred) 50 | print("Recall score:",recall) -------------------------------------------------------------------------------- /project_14_naive_bayes/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_14_naive_bayes/naive_bayes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 18 12:34:51 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | # Step 3 - Splitting the data into Train and Test 25 | from sklearn.model_selection import train_test_split 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 27 | 28 | # Step 4 - Feature Scaling 29 | from sklearn.preprocessing import StandardScaler 30 | ss_X = StandardScaler() 31 | X_train = ss_X.fit_transform(X_train) 32 | X_test = ss_X.transform(X_test) 33 | 34 | # Step 5 - Fit the classifier 35 | from sklearn.naive_bayes import GaussianNB 36 | classifier = GaussianNB() 37 | classifier.fit(X_train, y_train) 38 | 39 | # Step 6 - Predict 40 | y_pred = classifier.predict(X_test) 41 | 42 | # Step 7 - Confusion Matrix 43 | from sklearn import metrics 44 | cm = metrics.confusion_matrix(y_test, y_pred) 45 | print(cm) 46 | accuracy = metrics.accuracy_score(y_test, y_pred) 47 | print("Accuracy score:",accuracy) 48 | precision = metrics.precision_score(y_test, y_pred) 49 | print("Precision score:",precision) 50 | recall = metrics.recall_score(y_test, y_pred) 51 | print("Recall score:",recall) -------------------------------------------------------------------------------- /project_15_decision_tree_classifier/decision_tree_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 19 17:30:09 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | # Step 3 - Split Data 25 | from sklearn.model_selection import train_test_split 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 27 | 28 | # Step 4 - Fit the classifier 29 | from sklearn.tree import DecisionTreeClassifier 30 | classifier = DecisionTreeClassifier(criterion = "entropy", random_state=0) 31 | classifier.fit(X_train, y_train) 32 | 33 | # Step 5 - Predict 34 | y_pred = classifier.predict(X_test) 35 | 36 | 37 | # Step 6 - Evaluate the model performance 38 | from sklearn import metrics 39 | cm = metrics.confusion_matrix(y_test, y_pred) 40 | print(cm) 41 | accuracy = metrics.accuracy_score(y_test, y_pred) 42 | print("Accuracy score:",accuracy) 43 | precision = metrics.precision_score(y_test, y_pred) 44 | print("Precision score:",precision) 45 | recall = metrics.recall_score(y_test, y_pred) 46 | print("Recall score:",recall) -------------------------------------------------------------------------------- /project_15_decision_tree_classifier/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_16_random_forest_classifier/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_16_random_forest_classifier/random_forest_classifier.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Spyder Editor 4 | 5 | This is a temporary script file. 6 | """ 7 | 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | # Optional - if you want to convert X to float data type 21 | import numpy as np 22 | X = np.vstack(X[:, :]).astype(np.float) 23 | 24 | # Step 3 - Split Data 25 | from sklearn.model_selection import train_test_split 26 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) 27 | 28 | 29 | # Step 4 - Fit Classifier 30 | from sklearn.ensemble import RandomForestClassifier 31 | classifier = RandomForestClassifier(n_estimators=100, criterion="entropy", random_state=0) 32 | classifier.fit(X_train, y_train) 33 | 34 | # Step 5 - Predict 35 | y_pred = classifier.predict(X_test) 36 | 37 | # Step 6 - Metrics 38 | #from sklearn import metrics 39 | #cm = metrics.confusion_matrix(y_test, y_pred) ## 5,3 errors 40 | #accuracy = metrics.accuracy_score(y_test, y_pred) ## 0.92 41 | #precision = metrics.precision_score(y_test, y_pred) ## 0.85 42 | #recall = metrics.recall_score(y_test, y_pred) ## 0.90 43 | 44 | # Step 6 - Evaluate the model performance 45 | from sklearn import metrics 46 | cm = metrics.confusion_matrix(y_test, y_pred) 47 | print(cm) 48 | accuracy = metrics.accuracy_score(y_test, y_pred) 49 | print("Accuracy score:",accuracy) 50 | precision = metrics.precision_score(y_test, y_pred) 51 | print("Precision score:",precision) 52 | recall = metrics.recall_score(y_test, y_pred) 53 | print("Recall score:",recall) 54 | 55 | -------------------------------------------------------------------------------- /project_17_compare_classification_algorithms/compare_classification_algos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 19 17:30:09 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("iphone_purchase_records.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:, 3].values 14 | 15 | # Step 2 - Convert Gender to number 16 | from sklearn.preprocessing import LabelEncoder 17 | labelEncoder_gender = LabelEncoder() 18 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 19 | 20 | 21 | # Step 3 - Feature Scaling 22 | from sklearn.preprocessing import StandardScaler 23 | sc = StandardScaler() 24 | X = sc.fit_transform(X) 25 | 26 | # Step 4 - Compare Classification Algorithms 27 | from sklearn.model_selection import KFold 28 | from sklearn.model_selection import cross_val_score 29 | from sklearn.linear_model import LogisticRegression 30 | from sklearn.tree import DecisionTreeClassifier 31 | from sklearn.neighbors import KNeighborsClassifier 32 | from sklearn.ensemble import RandomForestClassifier 33 | from sklearn.naive_bayes import GaussianNB 34 | from sklearn.svm import SVC 35 | 36 | classification_models = [] 37 | classification_models.append(('Logistic Regression', LogisticRegression(solver="liblinear"))) 38 | classification_models.append(('K Nearest Neighbor', KNeighborsClassifier(n_neighbors=5, metric="minkowski",p=2))) 39 | classification_models.append(('Kernel SVM', SVC(kernel = 'rbf',gamma='scale'))) 40 | classification_models.append(('Naive Bayes', GaussianNB())) 41 | classification_models.append(('Decision Tree', DecisionTreeClassifier(criterion = "entropy"))) 42 | classification_models.append(('Random Forest', RandomForestClassifier(n_estimators=100, criterion="entropy"))) 43 | 44 | for name, model in classification_models: 45 | kfold = KFold(n_splits=10, random_state=7) 46 | result = cross_val_score(model, X, y, cv=kfold, scoring='accuracy') 47 | print("%s: Mean Accuracy = %.2f%% - SD Accuracy = %.2f%%" % (name, result.mean()*100, result.std()*100)) -------------------------------------------------------------------------------- /project_17_compare_classification_algorithms/iphone_purchase_records.csv: -------------------------------------------------------------------------------- 1 | Gender,Age,Salary,Purchase Iphone 2 | Male,19,19000,0 3 | Male,35,20000,0 4 | Female,26,43000,0 5 | Female,27,57000,0 6 | Male,19,76000,0 7 | Male,27,58000,0 8 | Female,27,84000,0 9 | Female,32,150000,1 10 | Male,25,33000,0 11 | Female,35,65000,0 12 | Female,26,80000,0 13 | Female,26,52000,0 14 | Male,20,86000,0 15 | Male,32,18000,0 16 | Male,18,82000,0 17 | Male,29,80000,0 18 | Male,47,25000,1 19 | Male,45,26000,1 20 | Male,46,28000,1 21 | Female,48,29000,1 22 | Male,45,22000,1 23 | Female,47,49000,1 24 | Male,48,41000,1 25 | Female,45,22000,1 26 | Male,46,23000,1 27 | Male,47,20000,1 28 | Male,49,28000,1 29 | Female,47,30000,1 30 | Male,29,43000,0 31 | Male,31,18000,0 32 | Male,31,74000,0 33 | Female,27,137000,1 34 | Female,21,16000,0 35 | Female,28,44000,0 36 | Male,27,90000,0 37 | Male,35,27000,0 38 | Female,33,28000,0 39 | Male,30,49000,0 40 | Female,26,72000,0 41 | Female,27,31000,0 42 | Female,27,17000,0 43 | Female,33,51000,0 44 | Male,35,108000,0 45 | Male,30,15000,0 46 | Female,28,84000,0 47 | Male,23,20000,0 48 | Male,25,79000,0 49 | Female,27,54000,0 50 | Male,30,135000,1 51 | Female,31,89000,0 52 | Female,24,32000,0 53 | Female,18,44000,0 54 | Female,29,83000,0 55 | Female,35,23000,0 56 | Female,27,58000,0 57 | Female,24,55000,0 58 | Female,23,48000,0 59 | Male,28,79000,0 60 | Male,22,18000,0 61 | Female,32,117000,0 62 | Male,27,20000,0 63 | Male,25,87000,0 64 | Female,23,66000,0 65 | Male,32,120000,1 66 | Female,59,83000,0 67 | Male,24,58000,0 68 | Male,24,19000,0 69 | Female,23,82000,0 70 | Female,22,63000,0 71 | Female,31,68000,0 72 | Male,25,80000,0 73 | Female,24,27000,0 74 | Female,20,23000,0 75 | Female,33,113000,0 76 | Male,32,18000,0 77 | Male,34,112000,1 78 | Male,18,52000,0 79 | Female,22,27000,0 80 | Female,28,87000,0 81 | Female,26,17000,0 82 | Male,30,80000,0 83 | Male,39,42000,0 84 | Male,20,49000,0 85 | Male,35,88000,0 86 | Female,30,62000,0 87 | Female,31,118000,1 88 | Male,24,55000,0 89 | Female,28,85000,0 90 | Male,26,81000,0 91 | Male,35,50000,0 92 | Male,22,81000,0 93 | Female,30,116000,0 94 | Male,26,15000,0 95 | Female,29,28000,0 96 | Female,29,83000,0 97 | Female,35,44000,0 98 | Female,35,25000,0 99 | Male,28,123000,1 100 | Male,35,73000,0 101 | Female,28,37000,0 102 | Male,27,88000,0 103 | Male,28,59000,0 104 | Female,32,86000,0 105 | Female,33,149000,1 106 | Female,19,21000,0 107 | Male,21,72000,0 108 | Female,26,35000,0 109 | Male,27,89000,0 110 | Male,26,86000,0 111 | Female,38,80000,0 112 | Female,39,71000,0 113 | Female,37,71000,0 114 | Male,38,61000,0 115 | Male,37,55000,0 116 | Male,42,80000,0 117 | Male,40,57000,0 118 | Male,35,75000,0 119 | Male,36,52000,0 120 | Male,40,59000,0 121 | Male,41,59000,0 122 | Female,36,75000,0 123 | Male,37,72000,0 124 | Female,40,75000,0 125 | Male,35,53000,0 126 | Female,41,51000,0 127 | Female,39,61000,0 128 | Male,42,65000,0 129 | Male,26,32000,0 130 | Male,30,17000,0 131 | Female,26,84000,0 132 | Male,31,58000,0 133 | Male,33,31000,0 134 | Male,30,87000,0 135 | Female,21,68000,0 136 | Female,28,55000,0 137 | Male,23,63000,0 138 | Female,20,82000,0 139 | Male,30,107000,1 140 | Female,28,59000,0 141 | Male,19,25000,0 142 | Male,19,85000,0 143 | Female,18,68000,0 144 | Male,35,59000,0 145 | Male,30,89000,0 146 | Female,34,25000,0 147 | Female,24,89000,0 148 | Female,27,96000,1 149 | Female,41,30000,0 150 | Male,29,61000,0 151 | Male,20,74000,0 152 | Female,26,15000,0 153 | Male,41,45000,0 154 | Male,31,76000,0 155 | Female,36,50000,0 156 | Male,40,47000,0 157 | Female,31,15000,0 158 | Male,46,59000,0 159 | Male,29,75000,0 160 | Male,26,30000,0 161 | Female,32,135000,1 162 | Male,32,100000,1 163 | Male,25,90000,0 164 | Female,37,33000,0 165 | Male,35,38000,0 166 | Female,33,69000,0 167 | Female,18,86000,0 168 | Female,22,55000,0 169 | Female,35,71000,0 170 | Male,29,148000,1 171 | Female,29,47000,0 172 | Male,21,88000,0 173 | Male,34,115000,0 174 | Female,26,118000,0 175 | Female,34,43000,0 176 | Female,34,72000,0 177 | Female,23,28000,0 178 | Female,35,47000,0 179 | Male,25,22000,0 180 | Male,24,23000,0 181 | Female,31,34000,0 182 | Male,26,16000,0 183 | Female,31,71000,0 184 | Female,32,117000,1 185 | Male,33,43000,0 186 | Female,33,60000,0 187 | Male,31,66000,0 188 | Female,20,82000,0 189 | Female,33,41000,0 190 | Male,35,72000,0 191 | Male,28,32000,0 192 | Male,24,84000,0 193 | Female,19,26000,0 194 | Male,29,43000,0 195 | Male,19,70000,0 196 | Male,28,89000,0 197 | Male,34,43000,0 198 | Female,30,79000,0 199 | Female,20,36000,0 200 | Male,26,80000,0 201 | Male,35,22000,0 202 | Male,35,39000,0 203 | Male,49,74000,0 204 | Female,39,134000,1 205 | Female,41,71000,0 206 | Female,58,101000,1 207 | Female,47,47000,0 208 | Female,55,130000,1 209 | Female,52,114000,0 210 | Female,40,142000,1 211 | Female,46,22000,0 212 | Female,48,96000,1 213 | Male,52,150000,1 214 | Female,59,42000,0 215 | Male,35,58000,0 216 | Male,47,43000,0 217 | Female,60,108000,1 218 | Male,49,65000,0 219 | Male,40,78000,0 220 | Female,46,96000,0 221 | Male,59,143000,1 222 | Female,41,80000,0 223 | Male,35,91000,1 224 | Male,37,144000,1 225 | Male,60,102000,1 226 | Female,35,60000,0 227 | Male,37,53000,0 228 | Female,36,126000,1 229 | Male,56,133000,1 230 | Female,40,72000,0 231 | Female,42,80000,1 232 | Female,35,147000,1 233 | Male,39,42000,0 234 | Male,40,107000,1 235 | Male,49,86000,1 236 | Female,38,112000,0 237 | Male,46,79000,1 238 | Male,40,57000,0 239 | Female,37,80000,0 240 | Female,46,82000,0 241 | Female,53,143000,1 242 | Male,42,149000,1 243 | Male,38,59000,0 244 | Female,50,88000,1 245 | Female,56,104000,1 246 | Female,41,72000,0 247 | Female,51,146000,1 248 | Female,35,50000,0 249 | Female,57,122000,1 250 | Male,41,52000,0 251 | Female,35,97000,1 252 | Female,44,39000,0 253 | Male,37,52000,0 254 | Female,48,134000,1 255 | Female,37,146000,1 256 | Female,50,44000,0 257 | Female,52,90000,1 258 | Female,41,72000,0 259 | Male,40,57000,0 260 | Female,58,95000,1 261 | Female,45,131000,1 262 | Female,35,77000,0 263 | Male,36,144000,1 264 | Female,55,125000,1 265 | Female,35,72000,0 266 | Male,48,90000,1 267 | Female,42,108000,1 268 | Male,40,75000,0 269 | Male,37,74000,0 270 | Female,47,144000,1 271 | Male,40,61000,0 272 | Female,43,133000,0 273 | Female,59,76000,1 274 | Male,60,42000,1 275 | Male,39,106000,1 276 | Female,57,26000,1 277 | Male,57,74000,1 278 | Male,38,71000,0 279 | Male,49,88000,1 280 | Female,52,38000,1 281 | Female,50,36000,1 282 | Female,59,88000,1 283 | Male,35,61000,0 284 | Male,37,70000,1 285 | Female,52,21000,1 286 | Male,48,141000,0 287 | Female,37,93000,1 288 | Female,37,62000,0 289 | Female,48,138000,1 290 | Male,41,79000,0 291 | Female,37,78000,1 292 | Male,39,134000,1 293 | Male,49,89000,1 294 | Male,55,39000,1 295 | Male,37,77000,0 296 | Female,35,57000,0 297 | Female,36,63000,0 298 | Male,42,73000,1 299 | Female,43,112000,1 300 | Male,45,79000,0 301 | Male,46,117000,1 302 | Female,58,38000,1 303 | Male,48,74000,1 304 | Female,37,137000,1 305 | Male,37,79000,1 306 | Female,40,60000,0 307 | Male,42,54000,0 308 | Female,51,134000,0 309 | Female,47,113000,1 310 | Male,36,125000,1 311 | Female,38,50000,0 312 | Female,42,70000,0 313 | Male,39,96000,1 314 | Female,38,50000,0 315 | Female,49,141000,1 316 | Female,39,79000,0 317 | Female,39,75000,1 318 | Female,54,104000,1 319 | Male,35,55000,0 320 | Male,45,32000,1 321 | Male,36,60000,0 322 | Female,52,138000,1 323 | Female,53,82000,1 324 | Male,41,52000,0 325 | Female,48,30000,1 326 | Female,48,131000,1 327 | Female,41,60000,0 328 | Male,41,72000,0 329 | Female,42,75000,0 330 | Male,36,118000,1 331 | Female,47,107000,1 332 | Male,38,51000,0 333 | Female,48,119000,1 334 | Male,42,65000,0 335 | Male,40,65000,0 336 | Male,57,60000,1 337 | Female,36,54000,0 338 | Male,58,144000,1 339 | Male,35,79000,0 340 | Female,38,55000,0 341 | Male,39,122000,1 342 | Female,53,104000,1 343 | Male,35,75000,0 344 | Female,38,65000,0 345 | Female,47,51000,1 346 | Male,47,105000,1 347 | Female,41,63000,0 348 | Male,53,72000,1 349 | Female,54,108000,1 350 | Male,39,77000,0 351 | Male,38,61000,0 352 | Female,38,113000,1 353 | Male,37,75000,0 354 | Female,42,90000,1 355 | Female,37,57000,0 356 | Male,36,99000,1 357 | Male,60,34000,1 358 | Male,54,70000,1 359 | Female,41,72000,0 360 | Male,40,71000,1 361 | Male,42,54000,0 362 | Male,43,129000,1 363 | Female,53,34000,1 364 | Female,47,50000,1 365 | Female,42,79000,0 366 | Male,42,104000,1 367 | Female,59,29000,1 368 | Female,58,47000,1 369 | Male,46,88000,1 370 | Male,38,71000,0 371 | Female,54,26000,1 372 | Female,60,46000,1 373 | Male,60,83000,1 374 | Female,39,73000,0 375 | Male,59,130000,1 376 | Female,37,80000,0 377 | Female,46,32000,1 378 | Female,46,74000,0 379 | Female,42,53000,0 380 | Male,41,87000,1 381 | Female,58,23000,1 382 | Male,42,64000,0 383 | Male,48,33000,1 384 | Female,44,139000,1 385 | Male,49,28000,1 386 | Female,57,33000,1 387 | Male,56,60000,1 388 | Female,49,39000,1 389 | Male,39,71000,0 390 | Male,47,34000,1 391 | Female,48,35000,1 392 | Male,48,33000,1 393 | Male,47,23000,1 394 | Female,45,45000,1 395 | Male,60,42000,1 396 | Female,39,59000,0 397 | Female,46,41000,1 398 | Male,51,23000,1 399 | Female,50,20000,1 400 | Male,36,33000,0 401 | Female,49,36000,1 -------------------------------------------------------------------------------- /project_1_simple_linear_regression/Salary_Data.csv: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343.00 3 | 1.3,46205.00 4 | 1.5,37731.00 5 | 2.0,43525.00 6 | 2.2,39891.00 7 | 2.9,56642.00 8 | 3.0,60150.00 9 | 3.2,54445.00 10 | 3.2,64445.00 11 | 3.7,57189.00 12 | 3.9,63218.00 13 | 4.0,55794.00 14 | 4.0,56957.00 15 | 4.1,57081.00 16 | 4.5,61111.00 17 | 4.9,67938.00 18 | 5.1,66029.00 19 | 5.3,83088.00 20 | 5.9,81363.00 21 | 6.0,93940.00 22 | 6.8,91738.00 23 | 7.1,98273.00 24 | 7.9,101302.00 25 | 8.2,113812.00 26 | 8.7,109431.00 27 | 9.0,105582.00 28 | 9.5,116969.00 29 | 9.6,112635.00 30 | 10.3,122391.00 31 | 10.5,121872.00 32 | -------------------------------------------------------------------------------- /project_1_simple_linear_regression/project_1: -------------------------------------------------------------------------------- 1 | I'm basically writing this blog for myself because I've been wanting to learn Machine Learning for a while now but have never really got to it. So this blog is more like a journal for me to write about my daily progress - (hopefully I will be making some progress every day). 2 | 3 | #100DaysOfMLCode #100ProjectsInML 4 | 5 | The best approach for me to learn anything is by working on sample projects. No matter how simple the project is, it helps me better understand the concepts. So I will be working through some small mini projects as part of this learning journey. 6 | 7 | There are 100's of excellent resources out there to help you get started. I stumbled upon this A-Z Machine learning course on Udemy and I'll be walking through those examples in the first few weeks. 8 | 9 | 10 | 11 | Today I'll be going through "Simple Linear Regression" 12 | 13 | Dataset 14 | First lets look at the dataset. It is Salary_Data.csv and can be found here 15 | It has 2 columns - "Years of Experience" and "Salary" for 30 employees in a company. So in this example, we will train a Simple Linear Regression model to learn the correlation between the number of years of experience of each employee and their respective salary. Once the model is trained, we will be able to do some sample predictions. 16 | 17 | Below is a sample screenshot of the dataset. 18 | 19 | 20 | So lets get started. 21 | 22 | Step 1: Load the Dataset 23 | 24 | Below is the code snippet for loading the dataset. 25 | We will be using the pandas dataframe. 26 | Here X is the independent variable which is the "Years of Experience" 27 | and y is the dependent variable which is the "Salary" 28 | 29 | So for X, we specify dataset.iloc[:, :-1].values 30 | which simply means take all rows and all columns except last one 31 | 32 | And for y, we specify dataset.iloc[:, 1].values 33 | which simply means take all rows and only columns with index 1 - In python indexes begin at 0 - so index 1 here is the second column which is Salary 34 | 35 | # Step 1 Load Data 36 | import pandas as pd 37 | dataset = pd.read_csv('Salary_Data.csv') 38 | X = dataset.iloc[:, :-1].values 39 | y = dataset.iloc[:,1].values 40 | 41 | Below is the sample screenshot of X and y 42 | 43 | Step 2: Split dataset into training set and test set 44 | 45 | Next we have to split the dataset into training and testing. We will use the training dataset for training the model and then check the performance of the model on the test dataset. 46 | 47 | For this we will use the train_test_split method from library model_selection 48 | We are providing a test_size of 1/3 which means test set will contain 10 observations and training set will contain 20 observations 49 | The random_state=0 is required only if you want to compare your results with mine. 50 | 51 | # Step 2: Split data into training and testing 52 | from sklearn.model_selection import train_test_split 53 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0) 54 | 55 | Below is the sample screenshot of X_train, y_train, X_test and y_test 56 | 57 | Step 3: Fit Simple Linear Regression model to training set 58 | 59 | This is a very simple step. We will be using the LinearRegression class from the library sklearn.linear_model. First we create an object of the LinearRegression class and call the fit method passing the X_train and y_train. 60 | 61 | # Step 3: Fit Simple Linear Regression to Training Data 62 | from sklearn.linear_model import LinearRegression 63 | regressor = LinearRegression() 64 | regressor.fit(X_train, y_train) 65 | 66 | 67 | Step 4: Predict the test set 68 | Using the regressor we trained in the previous step, we will not use it to predict the results of the test set and compare the predicted values with the actual values 69 | 70 | # Step 4: Make Prediction 71 | y_pred = regressor.predict(X_test) 72 | 73 | Now we have the y_pred which are the predicted values from our Model and y_test which are the actual values. 74 | Let us compare are see how well our model did. As you can see from the screenshot below - our basic model did pretty well. 75 | 76 | If we take the first employee - the actual salary is 37731 and our model predicted 40835.1 - which is not too bad. There are some predictions that are off but some are pretty close. 77 | 78 | Step 5 - Visualizing the training set 79 | 80 | Lets visualize the results. 81 | First we'll plot the actual data points of training set - X_train and y_train 82 | plt.scatter(X_train, y_train, color = 'red') 83 | 84 | Next we'll plot the regression line - which is the predicted values for the X_train 85 | plt.plot(X_train, regressor.predict(X_train), color='blue') 86 | 87 | # Step 5 - Visualize training set results 88 | import matplotlib.pyplot as plt 89 | # plot the actual data points of training set 90 | plt.scatter(X_train, y_train, color = 'red') 91 | # plot the regression line 92 | plt.plot(X_train, regressor.predict(X_train), color='blue') 93 | plt.title('Salary vs Experience (Training set)') 94 | plt.xlabel('Years of Experience') 95 | plt.ylabel('Salary') 96 | plt.show() 97 | 98 | 99 | Step 6 - Visualizing the test set 100 | 101 | Lets visualize the results. 102 | First we'll plot the actual data points of training set - X_test and y_test 103 | plt.scatter(X_test, y_test, color = 'red') 104 | 105 | Next we'll plot the regression line - which is the same as above 106 | plt.plot(X_train, regressor.predict(X_train), color='blue') 107 | 108 | # Step 6 - Visualize test set results 109 | import matplotlib.pyplot as plt 110 | # plot the actual data points of training set 111 | plt.scatter(X_test, y_test, color = 'red') 112 | # plot the regression line 113 | plt.plot(X_train, regressor.predict(X_train), color='blue') 114 | plt.title('Salary vs Experience (Test set)') 115 | plt.xlabel('Years of Experience') 116 | plt.ylabel('Salary') 117 | plt.show() 118 | 119 | Step 7 - Make new predictions 120 | We can also make brand new predictions for data points that do not exist in the dataset. 121 | Like for a person with 15 years experience 122 | 123 | new_salary_pred = regressor.predict([[15]]) 124 | 125 | # Step 7 - Make new prediction 126 | new_salary_pred = regressor.predict([[15]]) 127 | 128 | Here is the full source code 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /project_1_simple_linear_regression/simple_linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Sep 1 19:14:35 2019 5 | @author: omairaasim 6 | """ 7 | 8 | # Step 1 Load Data 9 | import pandas as pd 10 | dataset = pd.read_csv('Salary_Data.csv') 11 | X = dataset.iloc[:, :-1].values 12 | y = dataset.iloc[:,1].values 13 | 14 | # Step 2: Split data into training and testing 15 | from sklearn.model_selection import train_test_split 16 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0) 17 | 18 | # Step 3: Fit Simple Linear Regression to Training Data 19 | from sklearn.linear_model import LinearRegression 20 | regressor = LinearRegression() 21 | regressor.fit(X_train, y_train) 22 | 23 | # Step 4: Make Prediction 24 | y_pred = regressor.predict(X_test) 25 | 26 | # Step 5 - Visualize training set results 27 | import matplotlib.pyplot as plt 28 | # plot the actual data points of training set 29 | plt.scatter(X_train, y_train, color = 'red') 30 | # plot the regression line 31 | plt.plot(X_train, regressor.predict(X_train), color='blue') 32 | plt.title('Salary vs Experience (Training set)') 33 | plt.xlabel('Years of Experience') 34 | plt.ylabel('Salary') 35 | plt.show() 36 | 37 | # Step 6 - Visualize test set results 38 | import matplotlib.pyplot as plt 39 | # plot the actual data points of test set 40 | plt.scatter(X_test, y_test, color = 'red') 41 | # plot the regression line (same as above) 42 | plt.plot(X_train, regressor.predict(X_train), color='blue') 43 | plt.title('Salary vs Experience (Test set)') 44 | plt.xlabel('Years of Experience') 45 | plt.ylabel('Salary') 46 | plt.show() 47 | 48 | # Step 7 - Make new prediction 49 | new_salary_pred = regressor.predict([[15]]) 50 | print('The predicted salary of a person with 15 years experience is ',new_salary_pred) 51 | -------------------------------------------------------------------------------- /project_2_multiple_linear_regression/50_Startups.csv: -------------------------------------------------------------------------------- 1 | R&D Spend,Administration,Marketing Spend,State,Profit 2 | 165349.2,136897.8,471784.1,New York,192261.83 3 | 162597.7,151377.59,443898.53,California,191792.06 4 | 153441.51,101145.55,407934.54,Florida,191050.39 5 | 144372.41,118671.85,383199.62,New York,182901.99 6 | 142107.34,91391.77,366168.42,Florida,166187.94 7 | 131876.9,99814.71,362861.36,New York,156991.12 8 | 134615.46,147198.87,127716.82,California,156122.51 9 | 130298.13,145530.06,323876.68,Florida,155752.6 10 | 120542.52,148718.95,311613.29,New York,152211.77 11 | 123334.88,108679.17,304981.62,California,149759.96 12 | 101913.08,110594.11,229160.95,Florida,146121.95 13 | 100671.96,91790.61,249744.55,California,144259.4 14 | 93863.75,127320.38,249839.44,Florida,141585.52 15 | 91992.39,135495.07,252664.93,California,134307.35 16 | 119943.24,156547.42,256512.92,Florida,132602.65 17 | 114523.61,122616.84,261776.23,New York,129917.04 18 | 78013.11,121597.55,264346.06,California,126992.93 19 | 94657.16,145077.58,282574.31,New York,125370.37 20 | 91749.16,114175.79,294919.57,Florida,124266.9 21 | 86419.7,153514.11,0,New York,122776.86 22 | 76253.86,113867.3,298664.47,California,118474.03 23 | 78389.47,153773.43,299737.29,New York,111313.02 24 | 73994.56,122782.75,303319.26,Florida,110352.25 25 | 67532.53,105751.03,304768.73,Florida,108733.99 26 | 77044.01,99281.34,140574.81,New York,108552.04 27 | 64664.71,139553.16,137962.62,California,107404.34 28 | 75328.87,144135.98,134050.07,Florida,105733.54 29 | 72107.6,127864.55,353183.81,New York,105008.31 30 | 66051.52,182645.56,118148.2,Florida,103282.38 31 | 65605.48,153032.06,107138.38,New York,101004.64 32 | 61994.48,115641.28,91131.24,Florida,99937.59 33 | 61136.38,152701.92,88218.23,New York,97483.56 34 | 63408.86,129219.61,46085.25,California,97427.84 35 | 55493.95,103057.49,214634.81,Florida,96778.92 36 | 46426.07,157693.92,210797.67,California,96712.8 37 | 46014.02,85047.44,205517.64,New York,96479.51 38 | 28663.76,127056.21,201126.82,Florida,90708.19 39 | 44069.95,51283.14,197029.42,California,89949.14 40 | 20229.59,65947.93,185265.1,New York,81229.06 41 | 38558.51,82982.09,174999.3,California,81005.76 42 | 28754.33,118546.05,172795.67,California,78239.91 43 | 27892.92,84710.77,164470.71,Florida,77798.83 44 | 23640.93,96189.63,148001.11,California,71498.49 45 | 15505.73,127382.3,35534.17,New York,69758.98 46 | 22177.74,154806.14,28334.72,California,65200.33 47 | 1000.23,124153.04,1903.93,New York,64926.08 48 | 1315.46,115816.21,297114.46,Florida,49490.75 49 | 0,135426.92,0,California,42559.73 50 | 542.05,51743.15,0,New York,35673.41 51 | 0,116983.8,45173.06,California,14681.4 -------------------------------------------------------------------------------- /project_2_multiple_linear_regression/multiple_linear_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Nov 30 19:45:38 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("50_Startups.csv") 12 | X = dataset.iloc[:,:-1].values 13 | y = dataset.iloc[:,4].values 14 | 15 | # Step 2 - Encode Categorical Data 16 | from sklearn.preprocessing import OneHotEncoder 17 | from sklearn.compose import ColumnTransformer 18 | import numpy as np 19 | ct = ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[3])], remainder='passthrough') 20 | X = np.array(ct.fit_transform(X)) 21 | 22 | # Step 3 - Dummy Trap 23 | X = X[:,1:] 24 | 25 | # Step 4 - Split Data 26 | from sklearn.model_selection import train_test_split 27 | X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0) 28 | 29 | # Step 5 - Fit Regressor 30 | from sklearn.linear_model import LinearRegression 31 | regressor = LinearRegression() 32 | regressor.fit(X_train, y_train) 33 | 34 | # Step 6 - Predict 35 | y_pred = regressor.predict(X_test) 36 | -------------------------------------------------------------------------------- /project_2_multiple_linear_regression/project_2: -------------------------------------------------------------------------------- 1 | In project 2 of Machine Learning, I'm going to be looking at Multiple Linear Regression. Unlike Simple Linear Regression where there is one independent variable and one dependent variable - in Multiple Linear Regression there are several independent variables that could have an effect on determining the dependent variable. 2 | 3 | I'll be using the example from the A-Z Machine learning course from Udemy. 4 | 5 | Let's dive right in. 6 | 7 | Dataset 8 | The dataset we will be using for this project can be found here**. 9 | It contains data about 50 startups 10 | It has 5 columns - "R&D Spend", "Administration", "Marketing Spend", "State", "Profit" 11 | The first 3 columns indicate how much each startup spends on Research and Development, how much they spend on Marketing and how much they spend on Administration cost. 12 | The state column indicates which state the startup is based in. And the last column states the profit made by the start up. 13 | 14 | Project Objective 15 | We want our model to predict the profit based on the independent variables described above. So Profit is the dependent variable and the other 4 are independent variables. 16 | 17 | Step 1: Load the Dataset 18 | 19 | Below is the code snippet for loading the dataset. 20 | We will be using the pandas dataframe. 21 | Here X is contains all the independent variable which are "R&D Spend", "Administration", "Marketing Spend" and "State" 22 | and y is the dependent variable which is the "Profit" 23 | 24 | So for X, we specify dataset.iloc[:, :-1].values 25 | which simply means take all rows and all columns except last one 26 | 27 | And for y, we specify dataset.iloc[:, 4].values 28 | which simply means take all rows and only columns with index 4 - In python indexes begin at 0 - so index 4 here is the fifth column which is "Profit" 29 | 30 | # Step 1 - Load Data 31 | import pandas as pd 32 | dataset = pd.read_csv("50_Startups.csv") 33 | X = dataset.iloc[:,:-1].values 34 | y = dataset.iloc[:,4].values 35 | 36 | *************************************************** 37 | 38 | Step 2: Convert text variable to numbers 39 | We can see that in our dataset we have a categorical variable which is "State" which we have to encode. 40 | Here the "State" variable is at index 3 41 | We use LabelEncoder class to convert text to numbers 42 | 43 | # Step 2 - Convert text variable "State" to numbers 44 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 45 | labelEncoder_X = LabelEncoder() 46 | X[:,3] = labelEncoder_X.fit_transform(X[:,3]) 47 | 48 | Once we run the above code snippet - we will see that all States have been converted to numbers 49 | For example New York has been converted to 2, California to 0 and Florida to 1 50 | 51 | **************************************************** 52 | 53 | Step 3: Use OneHotEncoder to introduce Dummy variables 54 | If we leave the dataset in the above state, it will not be right. Because New York has been assigned a value 2 and California has been assigned 0. So the model might assume New York is higher than California which is not right. 55 | 56 | So to avoid this we have to introduce dummy variables using OneHotEncoder as shown below 57 | 58 | # Step 3 - Use OneHotEncoder to introduce dummy variables 59 | oneHotEncoder = OneHotEncoder(categorical_features=[3]) 60 | X = oneHotEncoder.fit_transform(X).toarray() 61 | 62 | After running the above code snippet - lets examine the dataset - we can see that 3 dummy variables have been added as we had 3 different States. 63 | 64 | Lets compare the X dataset with the original dataset. 65 | - Lets looks at the first entry at index 0 - In original dataset the state was "New York" - and after encoding the 3rd dummy variable has the value 1 which means the 3rd dummy variable represents the state New York 66 | - Lets looks at the second entry at index 1 - In original dataset the state was "California" - and after encoding the 1st dummy variable has the value 1 which means the 1st dummy variable represents the state California 67 | - Lets looks at the third entry at index 2 - In original dataset the state was "Florida" - and after encoding the 2nd dummy variable has the value 1 which means the 2nd dummy variable represents the state Florida 68 | 69 | 70 | Step 4: Dummy Variable Trap 71 | We have to remove one of the dummy variables. You can read about the dummy variable trap and why we need to remove one of the dummy variables. 72 | In the below code snippet - we are removing the first column. 73 | 74 | # Step 4 - Dummy Trap 75 | X = X[:,1:] 76 | 77 | 78 | Step 5: Split dataset into training set and test set 79 | 80 | Next we have to split the dataset into training and testing. We will use the training dataset for training the model and then check the performance of the model on the test dataset. 81 | 82 | For this we will use the train_test_split method from library model_selection 83 | We are providing a test_size of 0.2 which means test set will contain 10 observations and training set will contain 40 observations 84 | The random_state=0 is required only if you want to compare your results with mine. 85 | 86 | # Step 5 - Split Data 87 | from sklearn.model_selection import train_test_split 88 | X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0) 89 | 90 | Below is the sample screenshot of X_train, y_train, X_test and y_test 91 | 92 | Step 6: Fit Simple Linear Regression model to training set 93 | This is a very simple step. We will be using the LinearRegression class from the library sklearn.linear_model. First we create an object of the LinearRegression class and call the fit method passing the X_train and y_train. 94 | 95 | # Step 6 - Fit Regressor 96 | from sklearn.linear_model import LinearRegression 97 | regressor = LinearRegression() 98 | regressor.fit(X_train, y_train) 99 | 100 | Step 7: Predict the test set 101 | Using the regressor we trained in the previous step, we will now use it to predict the results of the test set and compare the predicted values with the actual values 102 | 103 | # Step 7 - Predict 104 | y_pred = regressor.predict(X_test) 105 | 106 | Now we have the y_pred which are the predicted values from our Model and y_test which are the actual values. 107 | Let us compare are see how well our model did. As you can see from the screenshot below - our basic model did pretty well. 108 | 109 | If we take the first startup - the actual profit is 103282 and our model predicted 103015 - which is almost perfect. There are some predictions that are off like the second startup - the actual profit is 144259 and our model predicted 132582. 110 | 111 | Step 8: Backward Elimination 112 | In the model that we just built, we used all the independent variables but its possible that some independent variables are more significant than others and have a greater impact on the profit and some are not significant meaning if we remove them from the model - we may get better predictions. 113 | 114 | So we are going to use backward elimination process to see which independent variables we must include in the model and which to exclude. 115 | 116 | The first step is for us to add a column of 1's to our X dataset as the first column. We add this column of ones to develop the y-intercept. 117 | This column corresponds to x0=1 associated to this constant b0 in the multiple linear regression equation 118 | y = b0 + b1 * x1 + b2 * x2 + bn * xn 119 | 120 | # Add ones 121 | import numpy as np 122 | ones = np.ones(shape = (50,1), dtype=int) 123 | X = np.append(arr = ones, values= X, axis=1) 124 | 125 | Now we will start the backward elimination process. Since we will be creating a new optimal matrix of features - we will call it X_opt. This will contain only the independent features that are significant in predicting profit. 126 | 127 | To begin with, we will include all independent variables in X_opt 128 | X_opt = X[:,[0,1,2,3,4,5]] 129 | 130 | Next we need to select a significance level (SL) - here we decode on significance level of 0.05. So if the p value of the independent variable is greater than SL, we will remove that independent variable and repeat the process with the remaining independent variables. 131 | 132 | Next we create a new regressor of the OLS class (Ordinary Least Square) from statsmodel library. 133 | It takes 2 arguments 134 | - endog : which is the dependent variable 135 | - exog : which is the matrix containing all independent variables 136 | 137 | Now we need to fit the OLS algorithm as shown below: 138 | 139 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 140 | 141 | Then we will look at the summary to see which independent variable has p value higher than SL (0.05) 142 | 143 | regressor_OLS.summary() 144 | 145 | Below all the steps are outlined 146 | 147 | # Backward Elimination 148 | import statsmodels.formula.api as sm 149 | X_opt = X[:,[0,1,2,3,4,5]] 150 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 151 | regressor_OLS.summary() 152 | 153 | Here is the screenshot of the summary 154 | 155 | 156 | Lets examine the output 157 | x1 and x2 are the 2 dummy variables we added for state 158 | x3 is R&D spent 159 | x4 is Admin spent 160 | x5 is marketing spent 161 | 162 | We have to look for the highest P value greater than 0.5 which in this case is 0.99 (99%) for x2 163 | So we have to remove x2 (2nd dummy variable for state) which has index 2 164 | 165 | X_opt = X[:,[0,1,3,4,5]] 166 | 167 | Now lets repeat the process after removing the independent variable with highest p value 168 | 169 | X_opt = X[:,[0,1,3,4,5]] 170 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 171 | regressor_OLS.summary() 172 | 173 | Here is the screenshot of the summary 174 | 175 | Lets examine the output. We have to look for the highest P value greater than 0.5 which in this case is 0.94 (94%) for x1 176 | So we have to remove x1 (1st dummy variable for state) which has index 1 177 | 178 | X_opt = X[:,[0,3,4,5]] 179 | 180 | Now lets repeat the process after removing the independent variable with highest p value 181 | 182 | X_opt = X[:,[0,3,4,5]] 183 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 184 | regressor_OLS.summary() 185 | 186 | Here is the screenshot of the summary 187 | 188 | Lets examine the output. We have to again look for the highest P value greater than 0.5 which in this case is 0.602 (60%) for x2 189 | So we have to remove x2 (Admin spent) which has index 4 190 | 191 | X_opt = X[:,[0,3,5]] 192 | 193 | Now lets repeat the process after removing the independent variable with highest p value 194 | 195 | X_opt = X[:,[0,3,5]] 196 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 197 | regressor_OLS.summary() 198 | 199 | Here is the screenshot of the summary 200 | 201 | Lets examine the output. We have to again look for the highest P value greater than 0.5 which in this case is 0.06 (6%) for x2 202 | So we have to remove x2 (Marketing spent) which has index 5 in X_opt 203 | 204 | X_opt = X[:,[0,3]] 205 | 206 | Now lets repeat the process after removing the independent variable with highest p value 207 | 208 | X_opt = X[:,[0,3]] 209 | regressor_OLS = sm.OLS(endog = y, exog=X_opt).fit() 210 | regressor_OLS.summary() 211 | 212 | Finally we are left with only 1 independent variable which is the R&D spent. 213 | 214 | So we can build our model again but this time taking only 1 independent variable which is the R&D spent and do the prediction and our results will be better than the first time. -------------------------------------------------------------------------------- /project_3_polynomial_regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /project_3_polynomial_regression/poly_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | 5 | @author: omairaasim 6 | """ 7 | 8 | # Step 1 - Load Data 9 | import pandas as pd 10 | dataset = pd.read_csv("Position_Salaries.csv") 11 | X = dataset.iloc[:, 1:2].values 12 | y = dataset.iloc[:, 2].values 13 | 14 | # Step 2 - Fitting Linear Regression 15 | from sklearn.linear_model import LinearRegression 16 | lin_reg = LinearRegression() 17 | lin_reg.fit(X,y) 18 | 19 | # Step 3 - Visualize Linear Regression Results 20 | import matplotlib.pyplot as plt 21 | 22 | plt.scatter(X,y, color="red") 23 | plt.plot(X, lin_reg.predict(X)) 24 | plt.title("Linear Regression") 25 | plt.xlabel("Level") 26 | plt.ylabel("Salary") 27 | plt.show() 28 | 29 | # Step 4 Linear Regression prediction 30 | lin_reg.predict([[6.5]]) 31 | 32 | # Step 5 - Convert X to polynomial format 33 | from sklearn.preprocessing import PolynomialFeatures 34 | poly_reg = PolynomialFeatures(degree=4) 35 | X_poly = poly_reg.fit_transform(X) 36 | 37 | 38 | # Step 6 - Passing X_poly to LinearRegression 39 | lin_reg_2 = LinearRegression() 40 | lin_reg_2.fit(X_poly,y) 41 | 42 | # Step 7 - Visualize Poly Regression Results 43 | plt.scatter(X,y, color="red") 44 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X))) 45 | plt.title("Poly Regression - Degree 4") 46 | plt.xlabel("Level") 47 | plt.ylabel("Salary") 48 | plt.show() 49 | 50 | 51 | # Step 8 Polynomial Regression prediction 52 | new_salary_pred = lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) 53 | print('The predicted salary of a person at 6.5 Level is ',new_salary_pred) 54 | -------------------------------------------------------------------------------- /project_3_polynomial_regression/project_3: -------------------------------------------------------------------------------- 1 | #100DaysOfMLCode #100ProjectsInML 2 | 3 | Today I'll be looking at the Polynomial Regression example from the A-Z Machine Learning course on Udemy 4 | 5 | If you look at the image above which list the equations for all 3 types of Regression - you will notice that in Polynomial Regression we have the same variables x1 but it is raised different powers. 6 | 7 | For example 8 | - instead of x2 - we have x1 raised to the power 2 9 | - instead of x3 - we have x1 raised to the power 3 10 | 11 | Lets explore the dataset. 12 | 13 | Dataset 14 | First lets look at the dataset. It is Position_Salaries.csv and can be found here 15 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under. 16 | 17 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000. 18 | 19 | Below is the screenshot of the dataset. 20 | 21 | Project Objective 22 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5 23 | 24 | We want to build a model to predict what salary we should offer this new employee. 25 | 26 | Let's get started. 27 | 28 | Step 1: Load the Dataset 29 | 30 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position". 31 | 32 | Here X is the independent variable which is the "Level" 33 | and y is the dependent variable which is the "Salary" 34 | 35 | So for X, we specify 36 | 37 | X = dataset.iloc[:, 1:2].values 38 | 39 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included) 40 | 41 | And for y, we specify dataset.iloc[:, 2].values 42 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary 43 | 44 | # Step 1 - Load Data 45 | import pandas as pd 46 | dataset = pd.read_csv("Position_Salaries.csv") 47 | X = dataset.iloc[:, 1:2].values 48 | y = dataset.iloc[:, 2].values 49 | 50 | ********************************* 51 | 52 | Step 2: Fit Linear Regression model to dataset 53 | 54 | First we will build a simple linear regression model to see what prediction it makes and then compare it to the prediction made by the Polynomial Regression to see which is more accurate. 55 | 56 | We will be using the LinearRegression class from the library sklearn.linear_model. We create an object of the LinearRegression class and call the fit method passing the X and y. 57 | 58 | # Step 2 - Fitting Linear Regression 59 | from sklearn.linear_model import LinearRegression 60 | lin_reg = LinearRegression() 61 | lin_reg.fit(X,y) 62 | 63 | ******************************** 64 | 65 | Step 3: Visualize Linear Regression Results 66 | 67 | Lets plot the graph to look at the results for Linear Regression 68 | 69 | # Step 3 - Visualize Linear Regression Results 70 | import matplotlib.pyplot as plt 71 | 72 | plt.scatter(X,y, color="red") 73 | plt.plot(X, lin_reg.predict(X)) 74 | plt.title("Linear Regression") 75 | plt.xlabel("Level") 76 | plt.ylabel("Salary") 77 | plt.show() 78 | 79 | If we look at the graph, we can see that a person at level 6.5 should be offered a salary of around $300k. We will confirm this in next step. 80 | 81 | *********************************** 82 | 83 | Step 4: Predict Linear Regression Results 84 | 85 | # Step 4 prediction 86 | lin_reg.predict([[6.5]]) 87 | 88 | We can see that the prediction is way off as it predicts $330k. 89 | 90 | Now lets check the predictions by implementing Polynomial Regression 91 | 92 | *********************************** 93 | 94 | Step 5: Convert X to polynomial format 95 | 96 | For Polynomial Regression, we need to transform our matrix X to X_poly where X_poly will contain X to the power of n - depending upon the degree we choose. If we choose degree 2, then X_poly will contain X and X to the power 2. If we choose degree 3, then X_poly will contain X, X to the power 2 and X to the power 3. 97 | 98 | We will be using the PolynomialFeatures class from the sklearn.preprocessing library for this purpose. When we create an object of this class - we have to pass the degree parameter. Lets begin by choose degree as 2. Then we call the fit_transform method to transform matrix X. 99 | 100 | # Step 5 - Convert X to polynomial format 101 | from sklearn.preprocessing import PolynomialFeatures 102 | poly_reg = PolynomialFeatures(degree=2) 103 | X_poly = poly_reg.fit_transform(X) 104 | 105 | Lets look at X_poly 106 | 107 | If you see, the 2nd column is the actual levels from 1 to 10 present in X. 108 | The 3rd column contains X raised to the power 2 (as we chose degree 2) 109 | The first column contains just 1's - This is automatically added by the PolynomialFeatures class to include the constant b0. 110 | 111 | ********************************* 112 | 113 | Step 6: Fitting Polynomial Regression 114 | 115 | Now we will create a new linear regression object called lin_reg_2 and pass X_poly to it instead of X that we passed in Step 2. 116 | 117 | # Step 6 - Passing X_poly to LinearRegression 118 | lin_reg_2 = LinearRegression() 119 | lin_reg_2.fit(X_poly,y) 120 | 121 | ********************************* 122 | 123 | Step 7: Visualize Poly Regression Results 124 | 125 | Lets plot the graph to look at the results for Polynomial Regression 126 | 127 | # Step 7 - Visualize Poly Regression Results 128 | plt.scatter(X,y, color="red") 129 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X))) 130 | plt.title("Poly Regression Degree 2") 131 | plt.xlabel("Level") 132 | plt.ylabel("Salary") 133 | plt.show() 134 | 135 | If we look at the graph, we can see that a person at level 6.5 should be offered a salary of around $190k. We will confirm this in next step. 136 | 137 | ******************************** 138 | 139 | Step 8: Predict Polynomial Regression Results 140 | 141 | We get a prediction of $189k 142 | 143 | # Step 8 prediction 144 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) 145 | 146 | ******************************** 147 | 148 | Step 9 - Change degree to 3 and run steps 5-8 149 | 150 | # Step 5 - Convert X to polynomial format 151 | from sklearn.preprocessing import PolynomialFeatures 152 | poly_reg = PolynomialFeatures(degree=3) 153 | X_poly = poly_reg.fit_transform(X) 154 | 155 | # Step 6 - Passing X_poly to LinearRegression 156 | lin_reg_2 = LinearRegression() 157 | lin_reg_2.fit(X_poly,y) 158 | 159 | # Step 7 - Visualize Poly Regression Results 160 | plt.scatter(X,y, color="red") 161 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X))) 162 | plt.title("Poly Regression Degree 3") 163 | plt.xlabel("Level") 164 | plt.ylabel("Salary") 165 | plt.show() 166 | 167 | # Step 8 prediction 168 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) 169 | 170 | We get a prediction of $133k 171 | 172 | ********************************* 173 | 174 | Step 10 - Change degree to 4 and run steps 5-8 175 | 176 | # Step 5 - Convert X to polynomial format 177 | from sklearn.preprocessing import PolynomialFeatures 178 | poly_reg = PolynomialFeatures(degree=4) 179 | X_poly = poly_reg.fit_transform(X) 180 | 181 | # Step 6 - Passing X_poly to LinearRegression 182 | lin_reg_2 = LinearRegression() 183 | lin_reg_2.fit(X_poly,y) 184 | 185 | # Step 7 - Visualize Poly Regression Results 186 | plt.scatter(X,y, color="red") 187 | plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X))) 188 | plt.title("Poly Regression Degree 4") 189 | plt.xlabel("Level") 190 | plt.ylabel("Salary") 191 | plt.show() 192 | 193 | # Step 8 prediction 194 | lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) 195 | 196 | We get a prediction of $158k which looks reasonable based on our dataset. 197 | 198 | So in this case by using Linear Regression - we got a prediction of $330k and by using Polynomial Regression we got a prediction of 158k. 199 | 200 | Here is the full source code. -------------------------------------------------------------------------------- /project_4_support_vector_regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /project_4_support_vector_regression/project_4: -------------------------------------------------------------------------------- 1 | 2 | Today I'll be looking at the Support Vector Regression (SVR) example from the A-Z Machine Learning course on Udemy. 3 | 4 | #100DaysOfMLCode #100ProjectsInML 5 | 6 | We will be working on the same problem that we worked on Project 3. Here instead of using Polynomial Regression, we will use Support Vector Regression and see whether the prediction is better or worse compared to Polynomial Regression. 7 | 8 | Lets explore the dataset. 9 | 10 | Dataset 11 | First lets look at the dataset. It is Position_Salaries.csv and can be found here 12 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under. 13 | 14 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000. 15 | 16 | Below is the screenshot of the dataset. 17 | 18 | Project Objective 19 | 20 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5 21 | 22 | We want to build a model to predict what salary we should offer this new employee. 23 | 24 | Let's get started. 25 | 26 | Step 1: Load the Dataset 27 | 28 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position". 29 | 30 | Here X is the independent variable which is the "Level" 31 | and y is the dependent variable which is the "Salary" 32 | 33 | So for X, we specify 34 | 35 | X = dataset.iloc[:, 1:2].values 36 | 37 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included) 38 | 39 | And for y, we specify 40 | 41 | y = dataset.iloc[:, 2:].values 42 | 43 | which simply means take all rows and only columns with index 2 which is Salary 44 | 45 | # Step 1 - Load Data 46 | import pandas as pd 47 | dataset = pd.read_csv("Position_Salaries.csv") 48 | X = dataset.iloc[: ,1:2].values 49 | y = dataset.iloc[:, 2:].values 50 | 51 | 52 | Step 2 - Feature Scaling 53 | 54 | # Step 2 - Feature Scaling 55 | from sklearn.preprocessing import StandardScaler 56 | sc_X = StandardScaler() 57 | sc_y = StandardScaler() 58 | X = sc_X.fit_transform(X) 59 | y = sc_y.fit_transform(y) 60 | 61 | 62 | Step 3 - Fit SVR 63 | 64 | We will be using the SVR class from the library sklearn.svm. First we create an object of the SVR class and pass kernel parameter as "rbf" (Radial Basis Function) and then call the fit method passing the X and y. 65 | 66 | # Step 3 - Fit SVR 67 | from sklearn.svm import SVR 68 | regressor = SVR(kernel = "rbf") 69 | regressor.fit(X,y) 70 | 71 | 72 | Step 4 - Visualization 73 | 74 | # Step 4 - Visualization 75 | import matplotlib.pyplot as plt 76 | plt.scatter(X, y , color="red") 77 | plt.plot(X, regressor.predict(X), color="blue") 78 | plt.title("SVR") 79 | plt.xlabel("Position") 80 | plt.ylabel("Salary") 81 | plt.show() 82 | 83 | Step 5 - Make Predictions 84 | 85 | Since we want to predict the salary for an employee at level 6.5 - first we will have to do feature scaling to transform value 6.5 86 | Then we have to do the prediction 87 | Finally since the predicted value is already scaled, we have to do inverse transformation to get the actual value 88 | These steps are outlined below. 89 | 90 | 91 | # Step 5 - Predictions 92 | import numpy as np 93 | # First transform 6.5 to feature scaling 94 | sc_X_val = sc_X.transform(np.array([[6.5]])) 95 | # Second predict the value 96 | scaled_y_pred = regressor.predict(sc_X_val) 97 | # Third - since this is scaled - we have to inverse transform 98 | y_pred = sc_y.inverse_transform(scaled_y_pred) 99 | 100 | 101 | We can see that the predicted value is $170k and in Polynomial Regression we got $158k 102 | -------------------------------------------------------------------------------- /project_4_support_vector_regression/svr_2019.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Dec 1 19:28:27 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("Position_Salaries.csv") 12 | X = dataset.iloc[: ,1:2].values 13 | y = dataset.iloc[:, 2:].values 14 | 15 | # Step 2 - Feature Scaling 16 | from sklearn.preprocessing import StandardScaler 17 | sc_X = StandardScaler() 18 | sc_y = StandardScaler() 19 | X = sc_X.fit_transform(X) 20 | y = sc_y.fit_transform(y) 21 | 22 | # Step 3 - Fit SVR 23 | from sklearn.svm import SVR 24 | regressor = SVR(kernel = "rbf") 25 | regressor.fit(X,y) 26 | 27 | # Step 4 - Visualization 28 | import matplotlib.pyplot as plt 29 | plt.scatter(X, y , color="red") 30 | plt.plot(X, regressor.predict(X), color="blue") 31 | plt.title("SVR") 32 | plt.xlabel("Position") 33 | plt.ylabel("Salary") 34 | plt.show() 35 | 36 | # Step 5 - Predict Results 37 | import numpy as np 38 | # First transform 6.5 to feature scaling 39 | sc_X_val = sc_X.transform(np.array([[6.5]])) 40 | # Second predict the value 41 | scaled_y_pred = regressor.predict(sc_X_val) 42 | # Third - since this is scaled - we have to inverse transform 43 | y_pred = sc_y.inverse_transform(scaled_y_pred) 44 | print('The predicted salary of a person at 6.5 Level is ',y_pred) 45 | -------------------------------------------------------------------------------- /project_5_decision_tree_regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /project_5_decision_tree_regression/decision_tree.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Dec 1 20:35:24 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Dataset 10 | import pandas as pd 11 | dataset = pd.read_csv("Position_Salaries.csv") 12 | X = dataset.iloc[:, 1:2].values 13 | y = dataset.iloc[:, 2].values 14 | 15 | # Step 2 - Fit Decision Tree Regressor 16 | from sklearn.tree import DecisionTreeRegressor 17 | regressor = DecisionTreeRegressor(criterion="mse") 18 | regressor.fit(X, y) 19 | 20 | # Step 3 - Visualize 21 | import matplotlib.pyplot as plt 22 | 23 | import numpy as np 24 | X_grid = np.arange(min(X), max(X), 0.01) 25 | X_grid = X_grid.reshape((len(X_grid),1)) 26 | 27 | plt.scatter(X, y, color="red") 28 | plt.plot(X_grid, regressor.predict(X_grid), color="blue") 29 | plt.title("Decision Tree Regressor") 30 | plt.xlabel("Position") 31 | plt.ylabel("Salary") 32 | plt.show() 33 | 34 | # Step 4 - Predict 35 | y_pred = regressor.predict([[6.5]]) 36 | print('The predicted salary of a person at 6.5 Level is ',y_pred) 37 | -------------------------------------------------------------------------------- /project_5_decision_tree_regression/project_5: -------------------------------------------------------------------------------- 1 | Today we will be looking at the one of the most popular regression models called Decision Tree. 2 | 3 | #100DaysOfMLCode #100ProjectsInML 4 | 5 | I will be solving the same problem about predicting salary of a new employee based on his position level. 6 | 7 | I have solved the same problem in project 3 using Polynomial Regression - You can check it out here. 8 | And the same problem has been solved in project 4 using Support Vector Regression - You can check that project here. 9 | 10 | Let's understand Decision Trees. 11 | 12 | Decision tree regression model is Non Linear and a Non continuous model. 13 | 14 | Below is a scatter plot which represents our dataset. It has 2 independent variables X1 and X2 and what we are trying to predict is a 3rd dependent variable y. 15 | 16 | Insert image 17 | 18 | Now once we run the decision tree algorithm, the scatter plot will be split up into segments. Each one of these splits is called a leaf. The way the splits are made is based on the principle of information entropy. It is a mathematical concept and is quite complex. If you want to learn more about that - you can read up on the concept of information entropy. 19 | 20 | 21 | Let's walk through an example scenario so we understand how decision tree's work. Let's say the algorithm makes the first split at X1 = 20 - so the scatter plot is divided into 2 segments - first segment is when X1 < 20 and second segment is when X1 > 20. 22 | 23 | Insert image 24 | 25 | Insert image 26 | 27 | 28 | Now let's say split 2 happens at X2 = 170 - but it only happens to points where X1 > 20 29 | 30 | Insert image 31 | 32 | Insert image 33 | 34 | Next, split 3 happens at X2 = 200 - but it happens to points X1 < 20 35 | 36 | Insert image 37 | 38 | Insert image 39 | 40 | 41 | Finally, split 4 happens at X1 = 40 - but it applies to points where X1 > 20 and X2 < 170 42 | 43 | Insert image 44 | 45 | So now our decision tree is done. 46 | 47 | Now how do we determine the value of a new data point. It's very simple - we take the average of each of our terminal leaves. The diagram below shows an example of average for each of the terminal leaves. 48 | 49 | Insert image 50 | 51 | Now lets say we have a new data point where X1 = 30 and X2 = 50, it falls in the leaf whose average is -64.1 - so the decision tree algorithm will predict the value of y as -64.1. From the below diagram, we can see how it arrives at that value. 52 | 53 | Insert image 54 | 55 | 56 | Dataset 57 | First lets look at the dataset. It is Position_Salaries.csv and can be found here 58 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under. 59 | 60 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000. 61 | 62 | Below is the screenshot of the dataset. 63 | 64 | Project Objective 65 | 66 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5 67 | 68 | We want to build a model to predict what salary we should offer this new employee. 69 | 70 | Let's get started. 71 | 72 | 73 | Step 1: Load the Dataset 74 | 75 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position". 76 | 77 | Here X is the independent variable which is the "Level" 78 | and y is the dependent variable which is the "Salary" 79 | 80 | So for X, we specify 81 | 82 | X = dataset.iloc[:, 1:2].values 83 | 84 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included) 85 | 86 | And for y, we specify 87 | 88 | dataset.iloc[:, 2].values 89 | 90 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary 91 | 92 | Step 2 - Fit Decision Tree Regressor 93 | 94 | We will be using the DecisionTreeRegressor class from the library sklearn.tree. First we create an object of the DecisionTreeRegressor class and pass criterion parameter as "mse" (Mean Squared Error) and then call the fit method passing the X and y. 95 | 96 | Step 3 - Visualize 97 | Let's plot the graph to look at the results for Decision Tree Regression. For Decision Trees we have to use continuous points. 98 | 99 | Step 4: Predict Decision Tree Regression Results 100 | 101 | We get a prediction of $150k 102 | -------------------------------------------------------------------------------- /project_6_random_forest_regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /project_6_random_forest_regression/project_6: -------------------------------------------------------------------------------- 1 | Today I will be writing about Random Forest Regression Model. Random Forest is a version of Ensemble Learning. Ensemble learning in simple terms is when you take a sample algorithm multiple times and you put them together to make it more powerful than the original version. Unlike Decision Tree model where we built a Decision Tree to predict the value for a new data point - In Random Forest we build many Decision Trees - (typical default is 500 trees). 2 | 3 | So instead of getting 1 prediction, in Random Forest we get many predictions for y (say 500 trees give out 500 predictions). We then take the average of all the predictions to assign that to y. 4 | 5 | #100DaysOfMLCode #100ProjectsInML 6 | 7 | I have solved the same problem in project 3 using Polynomial Regression - You can check it out here. 8 | We then solved it Support Vector Regression - You can check that project here. 9 | And in the last project, we used Decision Tree Regression - Its available here. 10 | 11 | Today, we will use Random Forest model and see how good our prediction is. 12 | 13 | Dataset 14 | 15 | First lets look at the dataset. It is Position_Salaries.csv and can be found here 16 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under. 17 | 18 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000. 19 | 20 | Below is the screenshot of the dataset. 21 | 22 | 23 | Project Objective 24 | 25 | Lets assume the above table is what the HR team of a company uses to determine what salary to offer to a new employee. For our project, let's take an example that an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for 2 years. So based on the table above - he falls between level 6 and level 7 - Lets say he falls under level 6.5 26 | 27 | We want to build a model to predict what salary we should offer this new employee. 28 | 29 | Let's get started. 30 | 31 | Step 1: Load the Dataset 32 | 33 | If we look at the dataset, we need to predict the salary for an employee who falls under Level 6.5 - So we really do not need the first column "Position". 34 | 35 | Here X is the independent variable which is the "Level" 36 | and y is the dependent variable which is the "Salary" 37 | 38 | So for X, we specify 39 | 40 | X = dataset.iloc[:, 1:2].values 41 | 42 | which simply means take all rows and all columns from index 1 upto index 2 but not including index 2 (upper bound range is not included) 43 | 44 | And for y, we specify 45 | 46 | dataset.iloc[:, 2].values 47 | 48 | which simply means take all rows and only columns with index 2 - In python indexes begin at 0 - so index 2 here is the second column which is Salary 49 | 50 | # Step 1 - Load Data 51 | import pandas as pd 52 | dataset = pd.read_csv("Position_Salaries.csv") 53 | X = dataset.iloc[:, 1:2].values 54 | y = dataset.iloc[:, 2].values 55 | 56 | Step 2 - Fit Random Forest Regressor 57 | 58 | We will be using the RandomForestRegressor class from the library sklearn.ensemble. First we create an object of the RandomForestRegressor class. 59 | 60 | When initializing the class, we need to specify the number of trees. So we pass the parameter n_estimators which specifies the number of trees we want to use. The second parameter of random_state = 0 is just so that our results match. We then call the fit method passing the X and y. 61 | 62 | First lets run by setting n_estimators as 10 trees 63 | 64 | # Step 2 - Fit Regressor 65 | from sklearn.ensemble import RandomForestRegressor 66 | regressor = RandomForestRegressor(n_estimators=10, random_state=0) 67 | regressor.fit(X, y) 68 | 69 | Step 3 - Visualize 70 | Let's plot the graph to look at the results for Random Forest Regression. For Random Forest also we have to use continuous points. 71 | 72 | # Step 3 - Visualize 73 | import matplotlib.pyplot as plt 74 | import numpy as np 75 | X_grid = np.arange(min(X), max(X), 0.01) 76 | X_grid = X_grid.reshape((len(X_grid),1)) 77 | 78 | plt.scatter(X, y, color="red") 79 | plt.plot(X_grid, regressor.predict(X_grid), color="blue") 80 | plt.title("Random Forest Regressor - 10 Trees") 81 | plt.xlabel("Position") 82 | plt.ylabel("Salaries") 83 | plt.show() 84 | 85 | Step 4: Predict Random Forest Regression Results 86 | 87 | We get a prediction of $167k 88 | 89 | # Step 4 - Predict 90 | regressor.predict([[6.5]]) 91 | 92 | Step 5: Increase number of tree's to 100 93 | 94 | regressor = RandomForestRegressor(n_estimators=100, random_state=0) 95 | regressor.fit(X, y) 96 | 97 | import numpy as np 98 | X_grid = np.arange(min(X), max(X), 0.01) 99 | X_grid = X_grid.reshape((len(X_grid),1)) 100 | plt.scatter(X, y, color="red") 101 | plt.plot(X_grid, regressor.predict(X_grid), color="blue") 102 | plt.title("Random Forest Regressor - 100 Trees") 103 | plt.xlabel("Position") 104 | plt.ylabel("Salaries") 105 | plt.show() 106 | 107 | regressor.predict([[6.5]]) 108 | 109 | We get a prediction of $158k 110 | 111 | Step 6: Increase number of tree's to 300 112 | 113 | regressor = RandomForestRegressor(n_estimators=300, random_state=0) 114 | regressor.fit(X, y) 115 | 116 | import numpy as np 117 | X_grid = np.arange(min(X), max(X), 0.01) 118 | X_grid = X_grid.reshape((len(X_grid),1)) 119 | plt.scatter(X, y, color="red") 120 | plt.plot(X_grid, regressor.predict(X_grid), color="blue") 121 | plt.title("Random Forest Regressor - 300 Trees") 122 | plt.xlabel("Position") 123 | plt.ylabel("Salaries") 124 | plt.show() 125 | 126 | regressor.predict([[6.5]]) 127 | 128 | We get a prediction of $160k 129 | 130 | So to compare our results with previous regression models 131 | Polynomial Regression gave a prediction of $158k 132 | Support Vector Regression gave a prediction of $170k 133 | Decision Tree Regression gave a prediction of $150k 134 | Random Forest Regression with 300 trees gave a prediction of $160k -------------------------------------------------------------------------------- /project_6_random_forest_regression/random_forest.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Dec 2 11:54:30 2018 5 | 6 | @author: omairaasim 7 | """ 8 | # Step 1 - Load Data 9 | import pandas as pd 10 | dataset = pd.read_csv("Position_Salaries.csv") 11 | X = dataset.iloc[:, 1:2].values 12 | y = dataset.iloc[:, 2].values 13 | 14 | # Step 2 - Fit Regressor 15 | from sklearn.ensemble import RandomForestRegressor 16 | regressor = RandomForestRegressor(n_estimators=100, random_state=0) 17 | regressor.fit(X, y) 18 | 19 | # Step 3 - Visualize 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | X_grid = np.arange(min(X), max(X), 0.01) 23 | X_grid = X_grid.reshape((len(X_grid),1)) 24 | 25 | plt.scatter(X, y, color="red") 26 | plt.plot(X_grid, regressor.predict(X_grid), color="blue") 27 | plt.title("Random Forest Regressor - 100 Trees") 28 | plt.xlabel("Position") 29 | plt.ylabel("Salaries") 30 | plt.show() 31 | 32 | # Step 4 - Predict 33 | y_pred = regressor.predict([[6.5]]) 34 | print('The predicted salary of a person at 6.5 Level is ',y_pred) 35 | -------------------------------------------------------------------------------- /project_7_compare_regression_models/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /project_7_compare_regression_models/compare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Dec 3 11:39:26 2018 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load Data 10 | import pandas as pd 11 | dataset = pd.read_csv("Position_Salaries.csv") 12 | X = dataset.iloc[:, 1:2].values 13 | y = dataset.iloc[:, 2].values 14 | 15 | 16 | ########################### 17 | ### Linear Regression ### 18 | ########################### 19 | from sklearn.linear_model import LinearRegression 20 | linear_regressor = LinearRegression() 21 | linear_regressor.fit(X, y) 22 | 23 | # Predict 24 | lin_pred = linear_regressor.predict([[6.5]]) 25 | print('The predicted salary of a person at 6.5 Level with Linear Regression is ',lin_pred) 26 | 27 | ################################ 28 | ### Polynomial Regression ### 29 | ################################ 30 | 31 | # ** NOTE - conver X to X_poly of required degree 32 | from sklearn.preprocessing import PolynomialFeatures 33 | poly_features = PolynomialFeatures(degree=4) 34 | X_poly = poly_features.fit_transform(X) 35 | 36 | from sklearn.linear_model import LinearRegression 37 | poly_regressor = LinearRegression() 38 | poly_regressor.fit(X_poly, y) 39 | 40 | # Predict - have to convert 6.5 to poly format 41 | poly_pred = poly_regressor.predict(poly_features.fit_transform([[6.5]])) 42 | print('The predicted salary of a person at 6.5 Level with Polynomial Regression is ',poly_pred) 43 | 44 | ################################ 45 | ### SVR Regression ### 46 | ################################ 47 | 48 | # ** NOTE - SVR does not do feature scaling 49 | from sklearn.preprocessing import StandardScaler 50 | ss_x = StandardScaler() 51 | ss_y = StandardScaler() 52 | X_scaled = ss_x.fit_transform(X) 53 | y_scaled = ss_y.fit_transform(y.reshape(-1,1)) 54 | 55 | 56 | from sklearn.svm import SVR 57 | svr_regressor = SVR(kernel="rbf") 58 | svr_regressor.fit(X_scaled, y_scaled) 59 | 60 | # Predict - since we did feature scaling - 61 | # So have to scale/transform 6.5 also 62 | position_val = ss_x.transform([[6.5]]) 63 | pred_val_scaled = svr_regressor.predict(position_val) 64 | # The above statement will return scaled predicted value 65 | # So have to convert that using inverse transform 66 | svr_pred = ss_y.inverse_transform(pred_val_scaled) 67 | print('The predicted salary of a person at 6.5 Level with Support Vector Regression is ',svr_pred) 68 | 69 | ################################ 70 | ### Decision Tree Regression ### 71 | ################################ 72 | from sklearn.tree import DecisionTreeRegressor 73 | tree_regressor = DecisionTreeRegressor(criterion="mse") 74 | tree_regressor.fit(X, y) 75 | 76 | # Predict 77 | tree_pred = tree_regressor.predict([[6.5]]) 78 | print('The predicted salary of a person at 6.5 Level with Decision Tree Regression is ',tree_pred) 79 | 80 | ################################ 81 | ### Random Forest Regression ### 82 | ################################ 83 | from sklearn.ensemble import RandomForestRegressor 84 | forest_regressor = RandomForestRegressor(n_estimators=300, random_state=0) 85 | forest_regressor.fit(X, y) 86 | 87 | # Predict 88 | forest_pred = forest_regressor.predict([[6.5]]) 89 | print('The predicted salary of a person at 6.5 Level with Random Forest Regression is ',forest_pred) 90 | 91 | 92 | ################################ 93 | ### Visualizations ### 94 | ################################ 95 | import matplotlib.pyplot as plt 96 | import numpy as np 97 | 98 | X_grid = np.arange(min(X), max(X), 0.01) 99 | X_grid = X_grid.reshape((len(X_grid),1)) 100 | 101 | plt.scatter(X, y,color="red") 102 | plt.plot(X_grid, linear_regressor.predict(X_grid), color="blue") 103 | plt.plot(X_grid, poly_regressor.predict(poly_features.fit_transform(X_grid)), color="green") 104 | plt.plot(X_grid, ss_y.inverse_transform(svr_regressor.predict(ss_x.transform(X_grid))), color="orange") 105 | plt.plot(X_grid, tree_regressor.predict(X_grid), color="black") 106 | plt.plot(X_grid, forest_regressor.predict(X_grid), color="purple") 107 | #plt.xticks(np.arange(min(X), max(X)+1, 1)) 108 | #plt.yticks(np.arange(min(y), max(y)+1, 50000)) 109 | plt.title("Regression") 110 | plt.xlabel("Position") 111 | plt.ylabel("Salaries") 112 | #plt.figure(figsize=(20,10)) 113 | #fig = plt.gcf() 114 | #fig.set_size_inches(10.5, 10) 115 | plt.show() 116 | -------------------------------------------------------------------------------- /project_7_compare_regression_models/project_7: -------------------------------------------------------------------------------- 1 | In the last 6 articles, I've covered some of the most popular form of regression models 2 | - Simple Linear Regression 3 | - Multiple Linear Regression 4 | - Polynomial Regression 5 | - Support Vector Regression 6 | - Decision Tree Regression 7 | - Random Forest Regression 8 | 9 | #100DaysOfMLCode #100ProjectsInML 10 | 11 | The articles are for absolute beginners and have been presented in the most simplest form. In programmaing terms - they are like the "Hello World" examples of different types of Regressions. The intent is to get the developers acquainted with the high level concept of Regression and in the process get their hands dirty by implementing a very simple project. 12 | 13 | In reality, building the model and making a prediction are the most simplest steps. All it involves is just 3 lines of code 14 | - Create an object of the Regression class. 15 | - Call the fit method. 16 | - Call the predict method. 17 | 18 | But if it was this simple - every other person would be a Data Scientist. Apparently bulk of the work involves exploring and understanding the data, cleaning up the data, imputing missing values. Basically understanding and preparing the data is most of the work. 19 | 20 | In the upcoming projects on Regression, I will be implementing some end to end projects which will include all the steps mentioned above. 21 | 22 | For now - let's recapture the different types of Regression models that we built and see which model gives us the best prediction. 23 | 24 | Dataset 25 | 26 | First lets look at the dataset. It is Position_Salaries.csv and can be found here 27 | It has 3 columns - "Position", "Level" and "Salary" and describes the approximate salary range for an employee based on what level he falls under. 28 | 29 | For example if an employee is a Manager - he falls in Level 4 and should get around $80,000. 30 | 31 | Below is the screenshot of the dataset. 32 | 33 | Project Objective 34 | 35 | A company "XYZ" uses the dataset above to determine what salary to offer a new employee. Let's say an employee has applied for the role of a Regional Manager and has already worked as a Regional Manager for past 2 years. So based on the table above - he falls between level 6 and level 7. 36 | 37 | The new employee is saying he is currently withdrawing a salary of $160,000. 38 | 39 | We want to build a model to predict if he is saying the truth or not. 40 | 41 | Let's get started. 42 | 43 | Step 1: Load the Dataset 44 | 45 | If we look at the dataset, we need to predict the salary for an employee who falls between Level 6 and 7 - So we really do not need the first column "Position". 46 | 47 | Here X is the independent variable which is the "Level" 48 | and y is the dependent variable which is the "Salary" 49 | 50 | # Step 1 - Load Data 51 | import pandas as pd 52 | dataset = pd.read_csv("Position_Salaries.csv") 53 | X = dataset.iloc[:, 1:2].values 54 | y = dataset.iloc[:, 2].values 55 | 56 | Step 2: Apply Linear Regression Model and make prediction 57 | 58 | ########################### 59 | ### Linear Regression ### 60 | ########################### 61 | from sklearn.linear_model import LinearRegression 62 | linear_regressor = LinearRegression() 63 | linear_regressor.fit(X, y) 64 | 65 | # Predict 66 | lin_pred = linear_regressor.predict([[6.5]]) 67 | 68 | Step 3: Apply Polynomial Regression Model and make prediction 69 | 70 | ################################ 71 | ### Polynomial Regression ### 72 | ################################ 73 | 74 | # ** NOTE - conver X to X_poly of required degree 75 | from sklearn.preprocessing import PolynomialFeatures 76 | poly_features = PolynomialFeatures(degree=4) 77 | X_poly = poly_features.fit_transform(X) 78 | 79 | from sklearn.linear_model import LinearRegression 80 | poly_regressor = LinearRegression() 81 | poly_regressor.fit(X_poly, y) 82 | 83 | # Predict - have to convert 6.5 to poly format 84 | poly_pred = poly_regressor.predict(poly_features.fit_transform([[6.5]])) 85 | 86 | 87 | Step 4: Apply Support Vector Regression Model and make prediction 88 | 89 | ################################ 90 | ### SVR Regression ### 91 | ################################ 92 | 93 | # ** NOTE - SVR does not do feature scaling 94 | from sklearn.preprocessing import StandardScaler 95 | ss_x = StandardScaler() 96 | ss_y = StandardScaler() 97 | X_scaled = ss_x.fit_transform(X) 98 | y_scaled = ss_y.fit_transform(y.reshape(-1,1)) 99 | 100 | from sklearn.svm import SVR 101 | svr_regressor = SVR(kernel="rbf") 102 | svr_regressor.fit(X_scaled, y_scaled) 103 | 104 | # Predict - since we did feature scaling - so have to scale/transform 6.5 also 105 | position_val = ss_x.transform([[6.5]]) 106 | 107 | # Predict 108 | pred_val_scaled = svr_regressor.predict(position_val) 109 | 110 | # The above statement will return scaled predicted value - so have to convert that using inverse transform 111 | svr_pred = ss_y.inverse_transform(pred_val_scaled) 112 | 113 | Step 5: Apply Decision Tree Regression Model and make prediction 114 | 115 | ################################ 116 | ### Decision Tree Regression ### 117 | ################################ 118 | from sklearn.tree import DecisionTreeRegressor 119 | tree_regressor = DecisionTreeRegressor(criterion="mse") 120 | tree_regressor.fit(X, y) 121 | 122 | # Predict 123 | tree_pred = tree_regressor.predict([[6.5]]) 124 | 125 | 126 | Step 6: Apply Random Forest Regression Model and make prediction 127 | 128 | ################################ 129 | ### Random Forest Regression ### 130 | ################################ 131 | from sklearn.ensemble import RandomForestRegressor 132 | forest_regressor = RandomForestRegressor(n_estimators=300, random_state=0) 133 | forest_regressor.fit(X, y) 134 | 135 | # Predict 136 | forest_pred = forest_regressor.predict([[6.5]]) 137 | 138 | Step 7: Compare Prediction Results 139 | 140 | The table below shows the prediction results obtained from different regression models we tried above. 141 | 142 | Step 8: Conclusion 143 | 144 | According to the problem statement, we know that the new employee is saying he is currently making $160k. 145 | 146 | Now based on the models that we ran, we can see from the table above that Polynomial Regression and Random Forest have made pretty accurate predictions. So we can conclude that the new employee is saying the truth. 147 | 148 | 149 | -------------------------------------------------------------------------------- /project_8_predict_weight/Height_Weight_single_variable_data_101_series_1.0.csv: -------------------------------------------------------------------------------- 1 | Height,Weight 2 | 121.92,28 3 | 124.46,28.1 4 | 127,28.2 5 | 129.54,28.3 6 | 132.08,28.4 7 | 134.62,28.5 8 | 137.16,35.2 9 | 139.7,37.6 10 | 142.24,40 11 | 144.78,42.6 12 | 147.32,44.9 13 | 149.86,47.6 14 | 152.4,49.9 15 | 154.94,52.6 16 | 157.48,54.9 17 | 160.02,57.6 18 | 162.56,59.9 19 | 165.1,62.6 20 | 167.64,64.8 21 | 170.18,67.6 22 | 172.72,69.8 23 | 175.26,72.6 24 | 177.8,74.8 25 | 180.34,77.5 26 | 182.88,79.8 27 | 185.42,82.5 28 | 187.96,84.8 29 | 190.5,87.5 30 | 193.04,89.8 31 | 195.58,92.5 32 | 198.12,94.8 33 | 200.66,97.5 34 | 203.2,99.8 35 | 205,102.5 36 | 208,104.8 -------------------------------------------------------------------------------- /project_8_predict_weight/project_8: -------------------------------------------------------------------------------- 1 | I thought of working on a Kaggle dataset today but at the same time I was looking for a very simple project. I came across this dataset that list the height and weight of people. You can check it out here. 2 | 3 | We will go over some more complex and large datasets in the upcoming projects. But in this project - I will introduce two new topics - "Check for null values" and "Evaluate Regression Model" 4 | 5 | So in this project, we have to train our model on this data and then make a new weight prediction of the person given his height. 6 | 7 | #100DaysOfMLCode #100ProjectsInML 8 | 9 | Let's get started. 10 | 11 | Step 1 - Load Dataset 12 | First we will load the dataset. The file is Height_Weight_single_variable_data_101_series_1.0.csv. 13 | 14 | # Step 1 : Load Dataset 15 | import pandas as pd 16 | dataset = pd.read_csv("Height_Weight_single_variable_data_101_series_1.0.csv") 17 | X = dataset.iloc[:, :-1].values 18 | y = dataset.iloc[:, 1].values 19 | 20 | Step 2 - Check for missing values 21 | It is one of the most important steps in data preparation. We have to see if the dataset has any missing values and then figure out the best possible way to impute the missing values. 22 | 23 | # Step 2: Check for missing values 24 | dataset.isnull().sum() 25 | 26 | As we can see, it shows there are 0 missing values in height and weight column. So there is nothing to be done here. 27 | 28 | Step 3 - Split dataset into training set and test set 29 | 30 | There are 35 entries in the dataset. Lets take 20% of data as test data. 31 | 32 | # Step 3: Split dataset into train and test 33 | from sklearn.model_selection import train_test_split 34 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 35 | 36 | Step 4 - Fit Regression Model 37 | 38 | We will first fit the Regression Model and see how good it fits the data. If the performance is not good - then we can experiment with other Regression Models. 39 | 40 | # Step 4: Fit Linear Regression Model 41 | from sklearn.linear_model import LinearRegression 42 | lin_reg = LinearRegression() 43 | lin_reg.fit(X_train, y_train) 44 | 45 | Step 5 - Predict test set values 46 | 47 | Now that the Linear Regression model is trained, let's predict the X_test values. 48 | 49 | # Step 5: Predict values for test data 50 | lin_pred = lin_reg.predict(X_test) 51 | 52 | Step 6 - Evaluate performance of the Model 53 | 54 | We can see from the image below that the predicted values are very close to the real values. 55 | 56 | IMAGE y_test, y_pred 57 | 58 | After we fit the model, we have to evaluate how well the model fits the data. For this purpose, we will look at "R squared" and Mean Squared Error. 59 | 60 | - "R Squared": This value is between 0 and 1 ie between 0% and 100%. Generally speaking, in most cases - the closer it is to 1 - the better - 1 means perfect correlation. 61 | 62 | Wikipedia defines it as 63 | R squared is the proportion of the variance in the dependent variable that is predictable from the independent variable(s) 64 | 65 | So if it is 100%, the two variables are perfectly correlated. 66 | 67 | - Mean Squared Error (MSE): This is the average of the square of errors. Error here implies the difference between the actual values and predicted values. We sqaure each difference. So if mean squared error is large, it means the error is large. The lower the value, the better the model. We can use the MSE value in selecting one model over the other. 68 | 69 | 70 | from sklearn import metrics 71 | print('R square = ',metrics.r2_score(y_test, lin_pred)) 72 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred)) 73 | -------------------------------------------------------------------------------- /project_8_predict_weight/project_8_predict_weight.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Sep 8 08:49:06 2019 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 : Load Dataset 10 | import pandas as pd 11 | dataset = pd.read_csv("Height_Weight_single_variable_data_101_series_1.0.csv") 12 | X = dataset.iloc[:, :-1].values 13 | y = dataset.iloc[:, 1].values 14 | 15 | # Step 2: Check for missing values 16 | dataset.isnull().sum() 17 | 18 | # Step 3: Split dataset into train and test 19 | from sklearn.model_selection import train_test_split 20 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 21 | 22 | # Step 4: Fit Linear Regression Model 23 | from sklearn.linear_model import LinearRegression 24 | lin_reg = LinearRegression() 25 | lin_reg.fit(X_train, y_train) 26 | 27 | # Step 5: Predict values for test data 28 | lin_pred = lin_reg.predict(X_test) 29 | 30 | # Step 6: Compare predictions with real results 31 | from sklearn import metrics 32 | print('R square = ',metrics.r2_score(y_test, lin_pred)) 33 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred)) 34 | 35 | 36 | # Step 7: Visualize Training set 37 | import matplotlib.pyplot as plt 38 | plt.scatter(X_train, y_train, color="red") 39 | plt.plot(X_train, lin_reg.predict(X_train), color="blue" ) 40 | plt.title("Height and Weight - Training Set") 41 | plt.xlabel("Height") 42 | plt.ylabel("Weight") 43 | plt.show() 44 | 45 | # Step 8: Visualize Test set 46 | import matplotlib.pyplot as plt 47 | plt.scatter(X_test, y_test, color="red") 48 | plt.plot(X_train, lin_reg.predict(X_train), color="blue" ) 49 | plt.title("Height and Weight - Test Set") 50 | plt.xlabel("Height") 51 | plt.ylabel("Weight") 52 | plt.show() 53 | 54 | # Step 9: Make new Prediction 55 | lin_pred_new = lin_reg.predict([[166]]) 56 | print('If a person has height 166, the predicted weight is ',lin_pred_new) 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /project_9_predict_weight_sex/predict_weight.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Sep 9 07:41:20 2019 5 | 6 | @author: omairaasim 7 | """ 8 | 9 | # Step 1 - Load data 10 | import pandas as pd 11 | dataset = pd.read_csv("weight-height.csv") 12 | 13 | # Step 2 - Analyze data 14 | dataset.info() 15 | dataset.describe() 16 | dataset.isnull().sum() 17 | 18 | # Step 3 - Convert Gender to number 19 | # Using LabelEncoder Start # Comment this section if using other option 20 | X = dataset.iloc[:, :-1].values 21 | y = dataset.iloc[:, 2].values 22 | from sklearn.preprocessing import LabelEncoder 23 | labelEncoder_gender = LabelEncoder() 24 | X[:,0] = labelEncoder_gender.fit_transform(X[:,0]) 25 | 26 | import numpy as np 27 | X = np.vstack(X[:, :]).astype(np.float) 28 | # Using LabelEncoder End # 29 | 30 | ############ OR ############## 31 | 32 | 33 | # Step 3 - Convert Gender to number 34 | # Replace directly in dataframe Start # 35 | # dataset['Gender'].replace('Female',0, inplace=True) 36 | # dataset['Gender'].replace('Male',1, inplace=True) 37 | # X = dataset.iloc[:, :-1].values 38 | # y = dataset.iloc[:, 2].values 39 | # Replace directly in dataframe End # 40 | 41 | # Step 4 - Split data 42 | from sklearn.model_selection import train_test_split 43 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 44 | 45 | # Step 5 - Fit Regression Model 46 | from sklearn.linear_model import LinearRegression 47 | lin_reg = LinearRegression() 48 | lin_reg.fit(X_train, y_train) 49 | 50 | # Step 6 - Make Prediction using test data 51 | lin_pred = lin_reg.predict(X_test) 52 | 53 | 54 | # Step 7 - Model Accuracy 55 | from sklearn import metrics 56 | print('R square = ',metrics.r2_score(y_test, lin_pred)) 57 | print('Mean squared Error = ',metrics.mean_squared_error(y_test, lin_pred)) 58 | print('Mean absolute Error = ',metrics.mean_absolute_error(y_test, lin_pred)) 59 | 60 | # Step 8 - Predict my weight 61 | my_weight_pred = lin_reg.predict([[0,74]]) 62 | print('My predicted weight = ',my_weight_pred) 63 | --------------------------------------------------------------------------------