├── .gitignore ├── README.md ├── Session01 ├── Internet_DataSet.csv ├── crisp_dm.png ├── data_preparation.ipynb └── preparation_data.csv ├── Session02 ├── CS583-association-rules.ppt ├── association_rules_data_set.csv ├── challenge.ipynb └── simple_association_rules.ipynb ├── Session03 ├── ML.pptx ├── deployment_data.csv ├── house-prices-advanced-regression-techniques.zip ├── linear_regression-Challenge.ipynb ├── linear_regression.ipynb ├── linear_regression.pptx └── linear_regression_data.csv ├── Session04 ├── LogisticRegression │ ├── 02_logestic_reg_pic.PNG │ ├── Logistic_Regression_to_Teach.ipynb │ ├── data_scoring.csv │ ├── data_training.csv │ ├── ex_1.PNG │ ├── ex_2.PNG │ ├── fruit_dat_with_colors.txt │ ├── linear_reg_first_pic.PNG │ └── logisitic_regression_challenge.ipynb ├── NeuralNetwork.pptx └── NeuralNetwork │ ├── data_scoring.csv │ ├── data_training.csv │ ├── first_pic.PNG │ ├── forth_pic.PNG │ ├── neural_network_challenge.ipynb │ ├── neural_network_to_teach.ipynb │ ├── second_pic.PNG │ └── third_pic.PNG ├── Session05 ├── hyper-parameter.pptx ├── imbalance-dataset.pptx ├── metrics.pptx └── mnist.ipynb └── Session06 ├── K-means.pptx ├── KNN.ppt ├── k-means ├── k_means.ipynb └── k_means_dataset.csv ├── mnist.ipynb ├── text.ppt └── text ├── .ipynb_checkpoints └── text_clustering_to_teach-checkpoint.ipynb ├── Sentences.txt ├── text_clustering_to_teach.ipynb └── text_clustering_to_teach_students.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.ipynb_checkpoints 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataMiningwithPython 2 | 3 | This repository is created for the Data Mining with Python course which has been held at Khaje Nasir Toosi University in 2019 fall. 4 | 5 | You can find the videos of the course in the following youtube channel: 6 | 7 | [Youtube-Channel](https://www.youtube.com/watch?v=JM6yP9GTLqI&list=PL2g_5adpoaeKH7NJRjcFzfMLMNHZSR5_D&index=2&t=0s) 8 | 9 | I'm Pooya Mohammadi and you can contact me from one of the following ways: 10 | 11 | 12 | LinkedIn: https://www.linkedin.com/in/pooya-mohammadi/ 13 | 14 | email: pooyamohammadikazaj@gmail.com 15 | -------------------------------------------------------------------------------- /Session01/Internet_DataSet.csv: -------------------------------------------------------------------------------- 1 | Gender,Race,Birth_Year,Marital_Status,Years_on_Internet,Hours_Per_Day,Preferred_Browser,Preferred_Search_Engine,Preferred_Email,Read_News,Online_Shopping,Online_Gaming,Facebook,Twitter,Other_Social_Network 2 | M,White,1972,M,8,1,Firefox,Google,Yahoo,Y,N,N,Y,N, 3 | M,Hispanic,1981,S,14,2,Chrome,Google,Hotmail,Y,N,N,Y,N, 4 | F,African American,1977,S,6,2,Firefox,Yahoo,Yahoo,Y,Y,,Y,N, 5 | F,White,1961,D,8,6,Firefox,Google,Hotmail,N,Y,N,N,Y, 6 | M,White,1954,M,2,3,Internet Explorer,Bing,Hotmail,Y,Y,N,Y,N, 7 | M,African American,1982,D,15,4,Internet Explorer,Google,Yahoo,Y,N,Y,N,N, 8 | M,African American,1981,D,11,2,Firefox,Google,Yahoo,,Y,,Y,Y,LinkedIn 9 | M,White,1977,S,3,3,Internet Explorer,Yahoo,Yahoo,Y,,,Y,99,LinkedIn 10 | F,African American,1969,M,6,2,Firefox,Google,Gmail,N,Y,N,N,N, 11 | M,White,1987,S,12,1,Safari,Yahoo,Yahoo,Y,,Y,Y,N,MySpace 12 | F,Hispanic,1959,D,12,5,Chrome,Google,Gmail,Y,N,N,Y,N,Google+ 13 | -------------------------------------------------------------------------------- /Session01/crisp_dm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session01/crisp_dm.png -------------------------------------------------------------------------------- /Session01/preparation_data.csv: -------------------------------------------------------------------------------- 1 | Weight,Cholesterol,Gender 2 | 102,111,1 3 | 115,135,1 4 | 115,136,1 5 | 140,167,0 6 | 1130,158,1 7 | 198,227,1 8 | 114,131,1 9 | 145,176,0 10 | 191,223,0 11 | 186,221,1 12 | 104,116,0 13 | 188,222,1 14 | 96,102,0 15 | 156,192,0 16 | 125,152,3 17 | 178,213,0 18 | 109,125,0 19 | 168,204,1 20 | 152,189,0 21 | 133,163,0 22 | 153,189,0 23 | 107,122,0 24 | 199,228,1 25 | -140,168,0 26 | 95,102,1 27 | 183,218,0 28 | 108,123,0 29 | 190,222,0 30 | 174,208,1 31 | 149,183,1 32 | 169,204,1 33 | 138,167,1 34 | 151,188,1 35 | 109,126,1 36 | 178,213,0 37 | 106,122,1 38 | 195,225,1 39 | 96,105,1 40 | 129,155,0 41 | 166,203,1 42 | 197,225,1 43 | 148,177,1 44 | 117,139,1 45 | 193,224,0 46 | 170,207,1 47 | 130,-158,0 48 | 183,218,1 49 | 134,164,0 50 | 128,154,0 51 | 105,118,0 52 | 115,138,0 53 | 166,199,1 54 | 183,219,1 55 | 149,183,1 56 | 110,128,0 57 | 164,197,0 58 | 157,196,0 59 | 170,1208,0 60 | 124,152,1 61 | 145,175,1 62 | 113,129,0 63 | 150,185,0 64 | 100,107,1 65 | 178,211,3 66 | 139,167,0 67 | 150,185,1 68 | 101,110,1 69 | 157,194,1 70 | 187,221,0 71 | 137,166,0 72 | 121,143,1 73 | 132,163,0 74 | 130,158,1 75 | 156,191,0 76 | 180,216,0 77 | 100,106,1 78 | 122,146,0 79 | 185,220,0 80 | 113,131,0 81 | 123,147,0 82 | 119,141,0 83 | 113,130,1 84 | 168,204,1 85 | 126,152,1 86 | 102,111,1 87 | 116,139,1 88 | 144,172,1 89 | 140,168,0 90 | 124,148,0 91 | 108,123,0 92 | 154,189,0 93 | 185,220,0 94 | 106,120,1 95 | 97,106,1 96 | 96,105,0 97 | 129,157,1 98 | 124,149,1 99 | 149,185,0 100 | 97,-106,0 101 | 102,115,1 102 | 146,177,1 103 | 199,230,0 104 | 100,108,0 105 | 118,140,0 106 | 179,214,1 107 | 179,216,1 108 | 113,130,0 109 | 142,169,0 110 | 101,110,1 111 | 174,209,1 112 | 131,162,1 113 | 179,214,1 114 | 122,145,1 115 | 105,116,1 116 | 176,209,0 117 | 103,116,1 118 | 120,139,1 119 | 118,143,1 120 | 143,168,0 121 | 134,158,1 122 | 203,232,1 123 | 116,136,1 124 | 150,180,0 125 | 192,228,0 126 | 187,222,1 127 | 107,117,0 128 | 192,224,1 129 | 97,1104,0 130 | 157,195,0 131 | 128,156,0 132 | 183,213,0 133 | 112,133,0 134 | 173,208,1 135 | 156,195,0 136 | 137,163,0 137 | 156,195,0 138 | 112,124,0 139 | 203,231,1 140 | 141,172,0 141 | 97,104,1 142 | 186,221,0 143 | 113,128,0 144 | 195,222,0 145 | 175,214,1 146 | 154,184,1 147 | 173,204,1 148 | 141,171,1 149 | 152,195,4 150 | 114,132,1 151 | 181,214,0 152 | 111,123,1 153 | 200,233,1 154 | 100,107,1 155 | 134,162,0 156 | 167,204,1 157 | 198,227,1 158 | 151,185,1 159 | 121,142,1 160 | 196,225,0 161 | 171,212,1 162 | 135,161,0 163 | 186,220,1 164 | 138,168,0 165 | 133,155,0 166 | 107,125,0 167 | 120,144,0 168 | 168,201,1 169 | 184,222,1 170 | 154,189,1 171 | 115,132,0 172 | 168,204,0 173 | 161,201,0 174 | 171,211,0 175 | 129,156,1 176 | 150,179,1 177 | 118,129,0 178 | 154,185,0 179 | 103,107,1 180 | 180,213,1 181 | 141,172,0 182 | 152,193,1 183 | 104,114,1 184 | 159,202,1 185 | 191,222,0 186 | 142,168,0 187 | 123,146,1 188 | 135,170,0 189 | 132,165,1 190 | 158,196,0 191 | 181,224,0 192 | 105,108,1 193 | 126,149,0 194 | 186,224,0 195 | 116,136,0 196 | 124,152,0 197 | 122,143,0 198 | 115,135,1 199 | 173,211,1 200 | 128,156,1 201 | 103,115,1 202 | 120,143,1 203 | 146,179,1 204 | 145,173,0 205 | 125,148,0 206 | 109,127,0 207 | 156,194,0 208 | 187,225,0 209 | 110,127,1 210 | 100,106,1 211 | 97,112,0 212 | 133,164,1 213 | 128,151,1 214 | 154,191,0 215 | 100,110,0 216 | 104,116,1 217 | 149,180,1 218 | 203,235,0 219 | 104,108,0 220 | 119,141,0 221 | 181,219,1 222 | 180,219,1 223 | 116,138,0 224 | 144,169,0 225 | 102,-116,1 226 | 176,212,1 227 | 135,162,1 228 | 184,220,1 229 | 124,151,1 230 | 110,119,1 231 | 180,212,0 232 | 103,111,1 233 | 117,142,1 234 | 120,138,1 235 | 145,172,0 236 | 133,158,1 237 | 201,228,1 238 | 117,134,1 239 | 146,181,0 240 | 195,231,0 241 | 188,223,1 242 | 108,121,0 243 | 191,222,1 244 | 100,102,0 245 | 158,195,0 246 | 127,156,0 247 | 181,214,0 248 | 113,132,0 249 | 171,204,1 250 | 153,190,0 251 | 137,167,0 252 | 154,195,0 253 | 112,127,0 254 | 201,235,1 255 | 141,171,0 256 | 100,110,1 257 | 185,221,0 258 | 111,128,0 259 | 191,226,0 260 | 176,208,1 261 | 150,191,1 262 | 171,205,1 263 | 140,173,1 264 | 153,191,1 265 | 111,128,1 266 | 180,220,0 267 | 107,126,1 268 | 197,232,1 269 | 101,108,1 270 | 132,159,0 271 | 168,211,1 272 | 200,232,1 273 | 150,182,1 274 | 119,145,1 275 | 197,224,0 276 | 172,209,1 277 | 134,160,0 278 | 185,219,1 279 | 135,170,0 280 | 132,161,0 281 | 1,122,0 282 | 116,141,0 283 | 170,204,1 284 | 187,226,1 285 | 154,187,1 286 | 111,129,0 287 | 169,199,0 288 | 161,199,0 289 | 174,214,0 290 | 125,159,1 291 | 148,176,1 292 | 117,129,0 293 | 154,187,0 294 | 104,109,1 295 | 183,212,1 296 | 142,170,0 297 | 155,191,1 298 | 105,116,1 299 | 160,202,1 300 | 191,226,0 301 | 140,166,0 302 | 123,151,1 303 | 136,170,0 304 | 132,166,1 305 | 158,193,0 306 | 182,218,0 307 | 104,109,1 308 | 126,152,0 309 | 188,220,0 310 | 118,133,0 311 | 127,152,0 312 | 123,142,0 313 | 118,131,1 314 | 169,204,1 315 | 128,153,1 316 | 106,114,1 317 | 119,144,1 318 | 147,172,1 319 | 141,176,0 320 | 125,150,0 321 | 109,129,0 322 | 158,190,0 323 | 190,226,0 324 | 110,128,1 325 | 99,106,1 326 | 98,107,0 327 | 134,159,1 328 | 129,157,1 329 | 154,192,0 330 | 100,110,0 331 | 107,115,1 332 | 149,183,1 333 | 203,233,0 334 | 101,112,0 335 | 119,146,0 336 | 182,222,1 337 | 181,216,1 338 | 115,134,0 339 | 145,174,0 340 | 102,114,1 341 | 176,211,1 342 | 134,167,1 343 | 183,214,1 344 | 123,145,1 345 | 108,120,1 346 | 177,217,0 347 | 104,113,1 348 | 117,141,1 349 | 117,140,1 350 | 145,171,0 351 | 134,163,1 352 | 199,232,1 353 | 115,131,1 354 | 149,184,0 355 | 192,225,0 356 | 188,223,1 357 | 107,121,0 358 | 190,223,1 359 | 98,105,0 360 | 158,197,0 361 | 128,160,0 362 | 180,218,0 363 | 110,131,0 364 | 172,211,1 365 | 157,194,0 366 | 137,169,0 367 | 158,189,0 368 | 110,128,0 369 | 203,228,1 370 | 145,169,0 371 | 96,108,1 372 | 186,219,0 373 | 110,124,0 374 | 194,224,0 375 | 177,214,1 376 | 152,190,1 377 | 172,205,1 378 | 140,167,1 379 | 154,190,1 380 | 113,128,1 381 | 179,217,0 382 | 111,pooya,1 383 | 196,228,1 384 | 101,105,1 385 | 131,162,0 386 | 171,211,1 387 | 199,227,1 388 | 151,178,1 389 | 121,146,1 390 | 196,227,0 391 | 174,208,1 392 | 135,163,0 393 | 184,225,1 394 | 137,167,0 395 | 129,162,0 396 | 108,125,0 397 | 116,144,0 398 | 168,199,1 399 | 184,226,1 400 | 151,183,1 401 | 114,136,0 402 | 167,199,0 403 | 160,204,0 404 | 175,215,0 405 | 127,155,1 406 | 146,179,1 407 | 118,133,0 408 | 151,186,0 409 | 105,111,1 410 | 180,219,1 411 | 140,173,0 412 | 155,185,1 413 | 106,118,1 414 | 160,199,1 415 | 192,226,0 416 | 138,169,0 417 | 122,144,1 418 | 135,166,0 419 | 132,166,1 420 | 158,198,0 421 | 184,222,0 422 | 101,110,1 423 | 126,154,0 424 | 188,226,0 425 | 118,135,0 426 | 127,151,0 427 | 123,146,0 428 | 117,133,1 429 | 172,205,1 430 | 127,154,1 431 | 104,116,1 432 | 121,145,1 433 | 148,174,1 434 | 141,168,0 435 | 128,156,0 436 | 113,124,0 437 | 155,196,0 438 | 189,227,0 439 | 108,122,1 440 | 98,106,1 441 | menoo,110,0 442 | 134,158,1 443 | 125,154,1 444 | 151,190,0 445 | 100,110,0 446 | 107,120,1 447 | 147,180,1 448 | 200,232,0 449 | 101,110,0 450 | 120,142,0 451 | 181,222,1 452 | 182,218,1 453 | 117,138,0 454 | 145,176,0 455 | 106,115,1 456 | 179,211,1 457 | 134,168,1 458 | 180,216,1 459 | 126,147,1 460 | 110,124,1 461 | 180,215,0 462 | 103,117,1 463 | 116,139,1 464 | 119,138,1 465 | 144,171,0 466 | 133,158,1 467 | 199,232,1 468 | 115,139,1 469 | 150,178,0 470 | 192,224,0 471 | 188,224,1 472 | 109,116,0 473 | 189,223,1 474 | 101,109,0 475 | 159,193,0 476 | 128,155,0 477 | 182,221,0 478 | 111,130,0 479 | 171,212,1 480 | 154,190,0 481 | 134,169,0 482 | 157,191,0 483 | 112,122,0 484 | 203,234,1 485 | 142,175,0 486 | 97,108,1 487 | 187,220,0 488 | 111,126,0 489 | 194,227,0 490 | 176,216,1 491 | 153,188,1 492 | 171,205,1 493 | 142,172,1 494 | 153,192,1 495 | 113,129,1 496 | 183,219,0 497 | 108,127,1 498 | 200,228,1 499 | 99,105,1 500 | 131,159,0 501 | 171,205,1 502 | 200,228,1 503 | 152,178,1 504 | 119,139,1 505 | 196,232,0 506 | 173,212,1 507 | 132,162,0 508 | 185,226,1 509 | 138,171,0 510 | 132,159,0 511 | 110,120,0 512 | 119,143,0 513 | 167,200,1 514 | 188,221,1 515 | 151,184,1 516 | 112,134,0 517 | 166,200,0 518 | 158,201,0 519 | 172,210,0 520 | 127,159,1 521 | 150,178,1 522 | 115,137,0 523 | 151,190,0 524 | 103,110,1 525 | 181,213,1 526 | 141,170,0 527 | 152,193,1 528 | 105,113,1 529 | 162,196,1 530 | 190,222,0 531 | 141,174,0 532 | 122,143,1 533 | 136,169,0 534 | 131,165,1 535 | 157,194,0 536 | 181,217,0 537 | 103,113,1 538 | 127,150,0 539 | 189,223,0 540 | 114,133,0 541 | 125,149,0 542 | 123,149,0 543 | 116,130,1 544 | 172,207,1 545 | -172,157,1 546 | 107,115,1 547 | 117,147,1 548 | 148,176,1 549 | -------------------------------------------------------------------------------- /Session02/CS583-association-rules.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session02/CS583-association-rules.ppt -------------------------------------------------------------------------------- /Session02/challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Association Rules\n", 10 | "\n", 11 | "### After completing materials of this notebook, you should be able to:\n", 12 | "\n", 13 | "* Explain what association rules are, how they are found and the benefits of using them.\n", 14 | "* Recognize the necessary format for data in order to create association rules.\n", 15 | "* Develop an association rule model.\n", 16 | "* Interpret the rules generated by an association rule model and explain their significance, if any :D\n", 17 | "\n", 18 | "### What are Association Rules ??\n", 19 | " Association rules are a data mining methodology that seeks to find frequent connections between attributes in a data set\n", 20 | " * shopping basket analysis\n", 21 | " * products purchase frequency\n", 22 | " * recommandation systems: items are found to be __associated__ with the item you are looking for, and that association is so frequent in the web site’s data set, that the association might be considered a __rule__. Thus is born the name of this data mining approach: “association rules”\n", 23 | "\n", 24 | "#### Business Understanding\n", 25 | "we want to identify and then try to take advantage of existing connections in our local\n", 26 | "community to get some work done that will benefit the entire community.\n", 27 | "\n", 28 | "#### Data Understanding\n", 29 | "\n", 30 | "* __Elapsed_Time__: This is the amount of time each respondent spent completing our survey. It is expressed in decimal minutes (e.g. 4.5 in this attribute would be four minutes, thirty seconds).\n", 31 | "* __Time_in_Community__: This question on the survey asked the person if they have lived in the area for 0-2 years, 3-9 years, or 10+ years; and is recorded in the data set as Short, Medium, or Long respectively.\n", 32 | "* __Gender__: The survey respondent’s gender.\n", 33 | "* __Working__: A yes/no column indicating whether or not the respondent currently has a paid job.\n", 34 | "* __Age__: The survey respondent’s age in years.\n", 35 | "* __Family__: A yes/no column indicating whether or not the respondent is currently a member of a family-oriented community organization, such as Big Brothers/Big Sisters, childrens’recreation or sports leagues, genealogy groups, etc.\n", 36 | "* __Hobbies__: A yes/no column indicating whether or not the respondent is currently a member of a hobby-oriented community organization, such as amateur radio, outdoor recreation, motorcycle or bicycle riding, etc.\n", 37 | "* __Social_Club__: A yes/no column indicating whether or not the respondent is currently a member of a community social organization, such as Rotary International, Lion’s Club, etc.\n", 38 | "* __Political__: A yes/no column indicating whether or not the respondent is currently a member of a political organization with regular meetings in the community, such as a political party, a grass-roots action group, a lobbying effort, etc.\n", 39 | "* __Professional__: A yes/no column indicating whether or not the respondent is currently a member of a professional organization with local chapter meetings, such as a chapter of a law or medical society, a small business owner’s group, etc.\n", 40 | "* __Religious__: A yes/no column indicating whether or not the respondent is currently a member of a church in the community.\n", 41 | "* __Support_Group__: A yes/no column indicating whether or not the respondent is currently a member of a support-oriented community organization, such as Alcoholics Anonymous, an anger management group, etc.\n", 42 | "\n", 43 | "#### Data Preparation" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 1, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import pandas as pd" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 2, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "df = pd.read_csv('association_rules_data_set.csv')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 3, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/html": [ 72 | "
\n", 73 | "\n", 86 | "\n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
Elapsed_TimeTime_in_CommunityGenderWorkingAgeFamilyHobbiesSocial_ClubPoliticalProfessionalReligiousSupport_Group
08.71ShortMNo531000000
15.24MediumFNo310000011
24.22MediumMNo421100100
34.81LongFNo300000000
43.95LongMYes290001101
\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " Elapsed_Time Time_in_Community Gender Working Age Family Hobbies \\\n", 186 | "0 8.71 Short M No 53 1 0 \n", 187 | "1 5.24 Medium F No 31 0 0 \n", 188 | "2 4.22 Medium M No 42 1 1 \n", 189 | "3 4.81 Long F No 30 0 0 \n", 190 | "4 3.95 Long M Yes 29 0 0 \n", 191 | "\n", 192 | " Social_Club Political Professional Religious Support_Group \n", 193 | "0 0 0 0 0 0 \n", 194 | "1 0 0 0 1 1 \n", 195 | "2 0 0 1 0 0 \n", 196 | "3 0 0 0 0 0 \n", 197 | "4 0 1 1 0 1 " 198 | ] 199 | }, 200 | "execution_count": 3, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "df.head()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 4, 212 | "metadata": {}, 213 | "outputs": [ 214 | { 215 | "data": { 216 | "text/plain": [ 217 | "Counter({'Short': 714, 'Medium': 1304, 'Long': 1465})" 218 | ] 219 | }, 220 | "execution_count": 4, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "from collections import Counter\n", 227 | "Counter(df.Time_in_Community)" 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Python 3", 234 | "language": "python", 235 | "name": "python3" 236 | }, 237 | "language_info": { 238 | "codemirror_mode": { 239 | "name": "ipython", 240 | "version": 3 241 | }, 242 | "file_extension": ".py", 243 | "mimetype": "text/x-python", 244 | "name": "python", 245 | "nbconvert_exporter": "python", 246 | "pygments_lexer": "ipython3", 247 | "version": "3.7.4" 248 | } 249 | }, 250 | "nbformat": 4, 251 | "nbformat_minor": 2 252 | } 253 | -------------------------------------------------------------------------------- /Session03/ML.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session03/ML.pptx -------------------------------------------------------------------------------- /Session03/house-prices-advanced-regression-techniques.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session03/house-prices-advanced-regression-techniques.zip -------------------------------------------------------------------------------- /Session03/linear_regression-Challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Linear Regression\n", 10 | "\n", 11 | "### After completing materials of this notebook, you should be able to:\n", 12 | "\n", 13 | "* Explain what linear regression is, how it is used and the benefits of using it.\n", 14 | "* Recognize the necessary format for data in order to perform predictive linear regression.\n", 15 | "* Explain the basic algebraic formula for calculating linear regression.\n", 16 | "* Develop a linear regression data mining model using a training data set.\n", 17 | "* Interpret the model’s coefficients and apply them to a scoring data set in order to deploy the model.\n", 18 | "\n", 19 | "#### ORGANIZATIONAL UNDERSTANDING\n", 20 | " we are trying to predict heating oil usage for new customers\n", 21 | "\n", 22 | "#### Data Understanding\n", 23 | "* __Insulation__: This is a density rating, ranging from one to ten, indicating the thickness of each home’s insulation. A home with a density rating of one is poorly insulated, while a home with a density of ten has excellent insulation.\n", 24 | "* __Temperature__: This is the average outdoor ambient temperature at each home for the most recent year, measure in degree Fahrenheit.\n", 25 | "* __Heating_Oil__: This is the total number of units of heating oil purchased by the owner of each home in the most recent year.\n", 26 | "* __Num_Occupants__: This is the total number of occupants living in each home.\n", 27 | "* __Avg_Age__: This is the average age of those occupants.\n", 28 | "* __Home_Size__: This is a rating, on a scale of one to eight, of the home’s overall size. The higher the number, the larger the home.\n", 29 | "\n", 30 | "#### Data Preparation\n", 31 | " using linear regression as a predictive model, it is extremely important to remember that the ranges for all attributes in the scoring data must be within the ranges for the corresponding attributes in the training data" 32 | ] 33 | } 34 | ], 35 | "metadata": { 36 | "kernelspec": { 37 | "display_name": "Python 3", 38 | "language": "python", 39 | "name": "python3" 40 | }, 41 | "language_info": { 42 | "codemirror_mode": { 43 | "name": "ipython", 44 | "version": 3 45 | }, 46 | "file_extension": ".py", 47 | "mimetype": "text/x-python", 48 | "name": "python", 49 | "nbconvert_exporter": "python", 50 | "pygments_lexer": "ipython3", 51 | "version": "3.7.4" 52 | } 53 | }, 54 | "nbformat": 4, 55 | "nbformat_minor": 2 56 | } 57 | -------------------------------------------------------------------------------- /Session03/linear_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Linear Regression\n", 10 | "\n", 11 | "### After completing materials of this notebook, you should be able to:\n", 12 | "\n", 13 | "* Explain what linear regression is, how it is used and the benefits of using it.\n", 14 | "* Recognize the necessary format for data in order to perform predictive linear regression.\n", 15 | "* Explain the basic algebraic formula for calculating linear regression.\n", 16 | "* Develop a linear regression data mining model using a training data set.\n", 17 | "* Interpret the model’s coefficients and apply them to a scoring data set in order to deploy the model.\n", 18 | "\n", 19 | "#### ORGANIZATIONAL UNDERSTANDING\n", 20 | " we are trying to predict heating oil usage for new customers\n", 21 | "\n", 22 | "#### Data Understanding\n", 23 | "* __Insulation__: This is a density rating, ranging from one to ten, indicating the thickness of each home’s insulation. A home with a density rating of one is poorly insulated, while a home with a density of ten has excellent insulation.\n", 24 | "* __Temperature__: This is the average outdoor ambient temperature at each home for the most recent year, measure in degree Fahrenheit.\n", 25 | "* __Heating_Oil__: This is the total number of units of heating oil purchased by the owner of each home in the most recent year.\n", 26 | "* __Num_Occupants__: This is the total number of occupants living in each home.\n", 27 | "* __Avg_Age__: This is the average age of those occupants.\n", 28 | "* __Home_Size__: This is a rating, on a scale of one to eight, of the home’s overall size. The higher the number, the larger the home.\n", 29 | "\n", 30 | "#### Data Preparation\n", 31 | " using linear regression as a predictive model, it is extremely important to remember that the ranges for all attributes in the scoring data must be within the ranges for the corresponding attributes in the training data" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 49, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "import pandas as pd\n", 41 | "training_data = pd.read_csv('linear_regression_data.csv')\n", 42 | "scoring_data = pd.read_csv('deployment_data.csv')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 30, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "Is there any null value in training_data?? False\n" 55 | ] 56 | }, 57 | { 58 | "data": { 59 | "text/html": [ 60 | "
\n", 61 | "\n", 74 | "\n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | "
InsulationTemperatureHeating_OilNum_OccupantsAvg_AgeHome_Size
\n", 89 | "
" 90 | ], 91 | "text/plain": [ 92 | "Empty DataFrame\n", 93 | "Columns: [Insulation, Temperature, Heating_Oil, Num_Occupants, Avg_Age, Home_Size]\n", 94 | "Index: []" 95 | ] 96 | }, 97 | "execution_count": 30, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "# Check for Missing data\n", 104 | "training_data.isnull().values.any()\n", 105 | "print(f'Is there any null value in training_data?? {training_data.isnull().values.any()}')\n", 106 | "training_data[training_data.isnull().any(axis = 1)]" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 31, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Is there any null value in scoring_data?? False\n" 119 | ] 120 | }, 121 | { 122 | "data": { 123 | "text/html": [ 124 | "
\n", 125 | "\n", 138 | "\n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | "
InsulationTemperatureNum_OccupantsAvg_AgeHome_SizePredicted_Heatin_Oil
\n", 153 | "
" 154 | ], 155 | "text/plain": [ 156 | "Empty DataFrame\n", 157 | "Columns: [Insulation, Temperature, Num_Occupants, Avg_Age, Home_Size, Predicted_Heatin_Oil]\n", 158 | "Index: []" 159 | ] 160 | }, 161 | "execution_count": 31, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "# Check for Missing data\n", 168 | "scoring_data.isnull().values.any()\n", 169 | "print(f'Is there any null value in scoring_data?? {scoring_data.isnull().values.any()}')\n", 170 | "scoring_data[scoring_data.isnull().any(axis = 1)]" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 32, 176 | "metadata": { 177 | "scrolled": false 178 | }, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/html": [ 183 | "
\n", 184 | "\n", 197 | "\n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | "
InsulationTemperatureHeating_OilNum_OccupantsAvg_AgeHome_Size
count1218.0000001218.0000001218.0000001218.0000001218.0000001218.000000
mean3.78571465.078818197.3940893.11330042.7064044.649425
std2.76809416.93242556.2482671.69060515.0511372.321226
min0.00000038.000000114.0000001.00000015.1000001.000000
25%1.00000049.000000148.2500002.00000029.7000003.000000
50%4.00000060.000000185.0000003.00000042.9000005.000000
75%6.00000081.000000253.0000004.00000055.6000007.000000
max8.00000090.000000301.00000010.00000072.2000008.000000
\n", 284 | "
" 285 | ], 286 | "text/plain": [ 287 | " Insulation Temperature Heating_Oil Num_Occupants Avg_Age \\\n", 288 | "count 1218.000000 1218.000000 1218.000000 1218.000000 1218.000000 \n", 289 | "mean 3.785714 65.078818 197.394089 3.113300 42.706404 \n", 290 | "std 2.768094 16.932425 56.248267 1.690605 15.051137 \n", 291 | "min 0.000000 38.000000 114.000000 1.000000 15.100000 \n", 292 | "25% 1.000000 49.000000 148.250000 2.000000 29.700000 \n", 293 | "50% 4.000000 60.000000 185.000000 3.000000 42.900000 \n", 294 | "75% 6.000000 81.000000 253.000000 4.000000 55.600000 \n", 295 | "max 8.000000 90.000000 301.000000 10.000000 72.200000 \n", 296 | "\n", 297 | " Home_Size \n", 298 | "count 1218.000000 \n", 299 | "mean 4.649425 \n", 300 | "std 2.321226 \n", 301 | "min 1.000000 \n", 302 | "25% 3.000000 \n", 303 | "50% 5.000000 \n", 304 | "75% 7.000000 \n", 305 | "max 8.000000 " 306 | ] 307 | }, 308 | "execution_count": 32, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "training_data.describe()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 33, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/html": [ 325 | "
\n", 326 | "\n", 339 | "\n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | "
InsulationTemperatureNum_OccupantsAvg_AgeHome_SizePredicted_Heatin_Oil
count42650.00000042650.00000042650.00000042650.00000042650.00000042650.000000
mean4.01099663.9620875.48928544.0401314.495193198.285437
std2.57551115.3133512.87461216.7369012.29091137.057353
min0.00000038.0000001.00000015.0000001.00000096.666505
25%2.00000051.0000003.00000029.5000003.000000169.616597
50%4.00000064.0000005.00000044.1000004.000000198.386502
75%6.00000077.0000008.00000058.6000006.000000226.893676
max8.00000090.00000010.00000073.0000008.000000300.891633
\n", 426 | "
" 427 | ], 428 | "text/plain": [ 429 | " Insulation Temperature Num_Occupants Avg_Age Home_Size \\\n", 430 | "count 42650.000000 42650.000000 42650.000000 42650.000000 42650.000000 \n", 431 | "mean 4.010996 63.962087 5.489285 44.040131 4.495193 \n", 432 | "std 2.575511 15.313351 2.874612 16.736901 2.290911 \n", 433 | "min 0.000000 38.000000 1.000000 15.000000 1.000000 \n", 434 | "25% 2.000000 51.000000 3.000000 29.500000 3.000000 \n", 435 | "50% 4.000000 64.000000 5.000000 44.100000 4.000000 \n", 436 | "75% 6.000000 77.000000 8.000000 58.600000 6.000000 \n", 437 | "max 8.000000 90.000000 10.000000 73.000000 8.000000 \n", 438 | "\n", 439 | " Predicted_Heatin_Oil \n", 440 | "count 42650.000000 \n", 441 | "mean 198.285437 \n", 442 | "std 37.057353 \n", 443 | "min 96.666505 \n", 444 | "25% 169.616597 \n", 445 | "50% 198.386502 \n", 446 | "75% 226.893676 \n", 447 | "max 300.891633 " 448 | ] 449 | }, 450 | "execution_count": 33, 451 | "metadata": {}, 452 | "output_type": "execute_result" 453 | } 454 | ], 455 | "source": [ 456 | "scoring_data.describe()" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 35, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "name": "stdout", 466 | "output_type": "stream", 467 | "text": [ 468 | "Index(['Insulation', 'Temperature', 'Num_Occupants', 'Avg_Age', 'Home_Size',\n", 469 | " 'Predicted_Heatin_Oil'],\n", 470 | " dtype='object')\n" 471 | ] 472 | } 473 | ], 474 | "source": [ 475 | "print(scoring_data.columns)" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 36, 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "data": { 485 | "text/plain": [ 486 | "Index(['Insulation', 'Temperature', 'Heating_Oil', 'Num_Occupants', 'Avg_Age',\n", 487 | " 'Home_Size'],\n", 488 | " dtype='object')" 489 | ] 490 | }, 491 | "execution_count": 36, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | } 495 | ], 496 | "source": [ 497 | "training_data.columns" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 17, 503 | "metadata": {}, 504 | "outputs": [], 505 | "source": [ 506 | "# we want to predict heating-oil so...\n", 507 | "x = training_data.drop(['Heating_Oil'],axis=1)\n", 508 | "y = training_data.Heating_Oil" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 37, 514 | "metadata": {}, 515 | "outputs": [ 516 | { 517 | "data": { 518 | "text/html": [ 519 | "
\n", 520 | "\n", 533 | "\n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | "
InsulationTemperatureNum_OccupantsAvg_AgeHome_Size
0474423.84
1043456.74
2781228.06
3150445.13
4880520.82
\n", 587 | "
" 588 | ], 589 | "text/plain": [ 590 | " Insulation Temperature Num_Occupants Avg_Age Home_Size\n", 591 | "0 4 74 4 23.8 4\n", 592 | "1 0 43 4 56.7 4\n", 593 | "2 7 81 2 28.0 6\n", 594 | "3 1 50 4 45.1 3\n", 595 | "4 8 80 5 20.8 2" 596 | ] 597 | }, 598 | "execution_count": 37, 599 | "metadata": {}, 600 | "output_type": "execute_result" 601 | } 602 | ], 603 | "source": [ 604 | "x.head()" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "#### Modeling \n", 612 | "class sklearn.linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)\n", 613 | "\n", 614 | "__Parameters__:\n", 615 | "\n", 616 | "__fit_intercept__ : boolean, optional, default True\n", 617 | "\n", 618 | " whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (e.g. data is expected to be already centered).\n", 619 | "\n", 620 | "__normalize__ : boolean, optional, default False\n", 621 | "\n", 622 | " This parameter is ignored when fit_intercept is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. If you wish to standardize, please use sklearn.preprocessing.StandardScaler before calling fit on an estimator with normalize=False.\n", 623 | "\n", 624 | "__copy_X__ : boolean, optional, default True\n", 625 | "\n", 626 | " If True, X will be copied; else, it may be overwritten.\n", 627 | "\n", 628 | "__n_jobs__ : int, optional, default 1\n", 629 | "\n", 630 | " The number of jobs to use for the computation. If -1 all CPUs are used. This will only provide speedup for n_targets > 1 and sufficient large problems.\n", 631 | "\n", 632 | "__Attributes__:\t\n", 633 | "\n", 634 | "__coef___ : array, shape (n_features, ) or (n_targets, n_features)\n", 635 | "\n", 636 | " Estimated coefficients for the linear regression problem. If multiple targets are passed during the fit (y 2D), this is a 2D array of shape (n_targets, n_features), while if only one target is passed, this is a 1D array of length n_features.\n", 637 | "\n", 638 | "__intercept___ : array\n", 639 | "\n", 640 | " Independent term in the linear model.\n" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "execution_count": 38, 646 | "metadata": {}, 647 | "outputs": [], 648 | "source": [ 649 | "from sklearn.linear_model import LinearRegression\n", 650 | "from sklearn.model_selection import train_test_split" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "execution_count": 40, 656 | "metadata": {}, 657 | "outputs": [], 658 | "source": [ 659 | "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 43, 665 | "metadata": {}, 666 | "outputs": [], 667 | "source": [ 668 | "lr = LinearRegression()\n", 669 | "lr.fit(x_train, y_train)\n", 670 | "y_train_predicted = lr.predict(x_train)\n", 671 | "y_test_predicted = lr.predict(x_test)" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 41, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "data": { 681 | "text/plain": [ 682 | "array([189.51712098, 151.71171326, 170.03182213, 148.17719489,\n", 683 | " 245.69163745, 252.04296693, 194.80430361, 127.42658043,\n", 684 | " 126.92259429, 174.27547566])" 685 | ] 686 | }, 687 | "execution_count": 41, 688 | "metadata": {}, 689 | "output_type": "execute_result" 690 | } 691 | ], 692 | "source": [ 693 | "y_test_predicted[:10]" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "### Evaluation" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 44, 706 | "metadata": {}, 707 | "outputs": [ 708 | { 709 | "name": "stdout", 710 | "output_type": "stream", 711 | "text": [ 712 | "train_mse: 564.689979963641\n", 713 | "test_mse: 588.3367577896204\n" 714 | ] 715 | } 716 | ], 717 | "source": [ 718 | "from sklearn.metrics import mean_squared_error\n", 719 | "train_mse = mean_squared_error(y_true = y_train, y_pred = y_train_predicted)\n", 720 | "test_mse = mean_squared_error(y_true = y_test, y_pred = y_test_predicted)\n", 721 | "print('train_mse:', train_mse)\n", 722 | "print('test_mse:', test_mse)" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 45, 728 | "metadata": { 729 | "scrolled": true 730 | }, 731 | "outputs": [ 732 | { 733 | "name": "stdout", 734 | "output_type": "stream", 735 | "text": [ 736 | "Insulation | -3.2884 |\n", 737 | "Temperature | -0.8544 |\n", 738 | "Num_Occupants | -0.2390 |\n", 739 | "Avg_Age | 1.9871 |\n", 740 | "Home_Size | 3.2789 |\n" 741 | ] 742 | } 743 | ], 744 | "source": [ 745 | "for col , coef in zip(X_train.columns, lr.coef_):\n", 746 | " print(f'{col :15s}|{coef :^15.4f}|')" 747 | ] 748 | }, 749 | { 750 | "cell_type": "markdown", 751 | "metadata": {}, 752 | "source": [ 753 | "* Insulation has negetive coefficient: So if a house has a thick Insulation the amount of heating oil will decrease\n", 754 | "* Num_Occupants: The effect of Num_Occupants is very low, we can ignore it\n", 755 | "* Home_Size: As much as the home size increases the oil which is required for house\n", 756 | "* Avg_Age: Old people spend more time on shover and mostly want to keep the house warmer than young people..." 757 | ] 758 | }, 759 | { 760 | "cell_type": "markdown", 761 | "metadata": {}, 762 | "source": [ 763 | "#### Deployment" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 48, 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "data": { 773 | "text/plain": [ 774 | "array([247.26731977, 216.40815181, 222.73635144, ..., 150.58341369,\n", 775 | " 250.07287595, 220.48505967])" 776 | ] 777 | }, 778 | "execution_count": 48, 779 | "metadata": {}, 780 | "output_type": "execute_result" 781 | } 782 | ], 783 | "source": [ 784 | "y_predicted" 785 | ] 786 | }, 787 | { 788 | "cell_type": "code", 789 | "execution_count": 50, 790 | "metadata": {}, 791 | "outputs": [ 792 | { 793 | "data": { 794 | "text/html": [ 795 | "
\n", 796 | "\n", 809 | "\n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | "
InsulationTemperatureNum_OccupantsAvg_AgeHome_SizePredicted_Heating_Oil
05691070.17251.195384
1580166.71217.518543
2689967.87226.488073
3381952.46209.307842
4658822.97163.991065
.....................
42645363959.18244.558154
42646384143.65187.891206
42647867127.34150.304390
42648258165.73250.750871
42649385556.98222.346274
\n", 923 | "

42650 rows × 6 columns

\n", 924 | "
" 925 | ], 926 | "text/plain": [ 927 | " Insulation Temperature Num_Occupants Avg_Age Home_Size \\\n", 928 | "0 5 69 10 70.1 7 \n", 929 | "1 5 80 1 66.7 1 \n", 930 | "2 6 89 9 67.8 7 \n", 931 | "3 3 81 9 52.4 6 \n", 932 | "4 6 58 8 22.9 7 \n", 933 | "... ... ... ... ... ... \n", 934 | "42645 3 63 9 59.1 8 \n", 935 | "42646 3 84 1 43.6 5 \n", 936 | "42647 8 67 1 27.3 4 \n", 937 | "42648 2 58 1 65.7 3 \n", 938 | "42649 3 85 5 56.9 8 \n", 939 | "\n", 940 | " Predicted_Heating_Oil \n", 941 | "0 251.195384 \n", 942 | "1 217.518543 \n", 943 | "2 226.488073 \n", 944 | "3 209.307842 \n", 945 | "4 163.991065 \n", 946 | "... ... \n", 947 | "42645 244.558154 \n", 948 | "42646 187.891206 \n", 949 | "42647 150.304390 \n", 950 | "42648 250.750871 \n", 951 | "42649 222.346274 \n", 952 | "\n", 953 | "[42650 rows x 6 columns]" 954 | ] 955 | }, 956 | "execution_count": 50, 957 | "metadata": {}, 958 | "output_type": "execute_result" 959 | } 960 | ], 961 | "source": [ 962 | "y_predicted = lr.predict(scoring_data)\n", 963 | "scoring_data['Predicted_Heating_Oil'] = y_predicted\n", 964 | "scoring_data" 965 | ] 966 | }, 967 | { 968 | "cell_type": "markdown", 969 | "metadata": {}, 970 | "source": [ 971 | "How much oil we will need, how much oil we will need for each house in average???" 972 | ] 973 | }, 974 | { 975 | "cell_type": "code", 976 | "execution_count": 29, 977 | "metadata": { 978 | "scrolled": true 979 | }, 980 | "outputs": [ 981 | { 982 | "name": "stdout", 983 | "output_type": "stream", 984 | "text": [ 985 | " sum of heating oils: 8456873.9019 \n", 986 | " mean of heating oils: 198.2854 \n" 987 | ] 988 | } 989 | ], 990 | "source": [ 991 | "print(f' sum of heating oils: {scoring_data.Predicted_Heatin_Oil.sum() :^15.4f} \\n '\n", 992 | " f'mean of heating oils: {scoring_data.Predicted_Heatin_Oil.mean():^15.4f}')" 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "execution_count": null, 998 | "metadata": {}, 999 | "outputs": [], 1000 | "source": [ 1001 | "scoring_data.to_csv('results.csv', index_label=False)" 1002 | ] 1003 | }, 1004 | { 1005 | "cell_type": "markdown", 1006 | "metadata": {}, 1007 | "source": [ 1008 | "*:)*" 1009 | ] 1010 | } 1011 | ], 1012 | "metadata": { 1013 | "kernelspec": { 1014 | "display_name": "Python 3", 1015 | "language": "python", 1016 | "name": "python3" 1017 | }, 1018 | "language_info": { 1019 | "codemirror_mode": { 1020 | "name": "ipython", 1021 | "version": 3 1022 | }, 1023 | "file_extension": ".py", 1024 | "mimetype": "text/x-python", 1025 | "name": "python", 1026 | "nbconvert_exporter": "python", 1027 | "pygments_lexer": "ipython3", 1028 | "version": "3.7.4" 1029 | } 1030 | }, 1031 | "nbformat": 4, 1032 | "nbformat_minor": 2 1033 | } 1034 | -------------------------------------------------------------------------------- /Session03/linear_regression.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session03/linear_regression.pptx -------------------------------------------------------------------------------- /Session03/linear_regression_data.csv: -------------------------------------------------------------------------------- 1 | Insulation,Temperature,Heating_Oil,Num_Occupants,Avg_Age,Home_Size 2 | 4,74,132,4,23.8,4 3 | 0,43,263,4,56.7,4 4 | 7,81,145,2,28,6 5 | 1,50,196,4,45.1,3 6 | 8,80,131,5,20.8,2 7 | 5,76,129,3,21.5,3 8 | 5,72,131,4,23.5,3 9 | 4,88,161,2,38.2,6 10 | 5,77,184,3,42.5,3 11 | 0,42,225,3,51.1,1 12 | 4,90,178,2,42.1,2 13 | 7,83,121,1,19.8,2 14 | 0,43,186,5,45.1,6 15 | 2,59,206,2,50.1,8 16 | 6,86,179,5,41.4,6 17 | 6,80,156,3,32.8,3 18 | 6,78,135,4,22.8,5 19 | 6,76,186,1,50.5,4 20 | 0,47,282,2,62,6 21 | 0,55,193,4,47.1,8 22 | 0,55,295,2,66.9,5 23 | 5,75,136,1,23,7 24 | 5,75,142,2,26.7,2 25 | 0,57,262,1,58.6,6 26 | 5,73,120,1,21.3,2 27 | 3,56,275,1,59.5,5 28 | 1,43,207,4,23,3 29 | 0,45,243,2,54.5,5 30 | 2,58,250,1,54.6,5 31 | 2,53,262,3,59.3,2 32 | 7,83,147,1,30.4,4 33 | 6,77,137,4,24.1,1 34 | 3,57,299,3,68.6,7 35 | 0,58,288,1,65.1,7 36 | 1,39,274,3,58.9,7 37 | 6,81,177,2,38.8,7 38 | 1,57,205,5,49.9,1 39 | 5,83,186,2,44.7,7 40 | 2,40,234,2,53.5,3 41 | 7,81,133,3,24.8,1 42 | 6,84,185,1,43.5,2 43 | 8,86,137,5,23.2,5 44 | 2,56,264,4,57.9,6 45 | 6,87,189,2,45.8,6 46 | 3,57,298,1,69.5,8 47 | 5,73,131,2,22.2,1 48 | 8,89,118,4,19.8,1 49 | 5,87,179,4,41.3,2 50 | 0,44,258,1,57.8,5 51 | 6,79,161,2,34.1,1 52 | 1,48,267,4,58.1,8 53 | 5,76,124,2,21.2,4 54 | 0,42,226,2,53.3,3 55 | 5,86,125,2,19.6,1 56 | 0,47,287,5,64.5,6 57 | 7,82,117,4,17.1,4 58 | 5,76,137,1,24.5,4 59 | 8,72,154,1,31.8,7 60 | 2,52,283,5,61.2,8 61 | 0,50,244,5,56.9,3 62 | 7,80,157,2,32.2,4 63 | 0,45,271,2,60.8,6 64 | 1,55,287,4,65.2,5 65 | 0,57,195,1,43.7,3 66 | 7,86,138,3,25.7,2 67 | 0,52,223,1,51.6,6 68 | 3,40,235,2,52.6,1 69 | 1,60,232,5,53.1,7 70 | 1,59,298,5,68.5,6 71 | 8,39,181,5,41.1,4 72 | 0,46,242,2,52.4,1 73 | 8,73,156,2,34.9,8 74 | 0,55,196,5,47,6 75 | 8,89,156,3,32.7,5 76 | 0,56,195,5,46.9,6 77 | 3,45,264,2,60.2,6 78 | 2,52,294,2,67.6,5 79 | 2,43,248,3,56.4,5 80 | 0,43,273,1,60.4,6 81 | 4,75,159,4,34.7,8 82 | 7,83,121,5,17.3,2 83 | 2,48,209,5,49.6,7 84 | 1,60,248,3,55.3,4 85 | 6,85,168,3,37.3,2 86 | 8,89,116,5,20.3,4 87 | 7,75,142,1,26.5,3 88 | 4,88,183,5,40.5,3 89 | 5,83,184,1,41.8,2 90 | 0,41,274,8,59.8,7 91 | 6,77,133,5,23,3 92 | 2,48,207,5,50.7,4 93 | 8,81,172,3,39.5,7 94 | 2,47,238,4,55.9,6 95 | 5,89,180,4,39.7,4 96 | 0,46,288,2,63.9,6 97 | 0,49,207,3,49.6,4 98 | 5,75,138,1,24.7,6 99 | 8,80,134,4,24.2,6 100 | 1,47,224,1,51.9,3 101 | 6,85,168,3,37,6 102 | 1,59,230,3,50.8,5 103 | 1,48,203,4,47.4,8 104 | 6,81,142,5,27.1,5 105 | 0,55,278,1,60.5,5 106 | 6,72,123,2,18.1,4 107 | 6,76,130,5,24.2,3 108 | 0,53,292,1,66.3,5 109 | 2,54,256,4,57.5,8 110 | 0,57,256,2,55.4,4 111 | 5,73,134,4,24.6,6 112 | 6,75,140,3,25.2,6 113 | 5,75,180,2,39.6,3 114 | 7,87,189,5,44,7 115 | 7,80,178,3,41,2 116 | 7,81,175,2,38.4,5 117 | 0,53,182,2,43.5,1 118 | 1,56,274,4,60,7 119 | 3,59,208,5,50.4,6 120 | 1,49,207,4,21.3,7 121 | 7,74,143,10,26.9,5 122 | 3,44,264,4,57.2,8 123 | 8,52,163,5,36.6,7 124 | 0,53,290,4,19.5,8 125 | 5,73,188,4,43.6,1 126 | 3,46,266,3,57.8,7 127 | 3,42,256,2,55.6,1 128 | 8,86,120,4,20.6,3 129 | 5,76,142,6,28.6,5 130 | 0,54,206,3,49.1,5 131 | 3,43,228,2,50.5,4 132 | 5,75,178,5,39.3,7 133 | 5,75,178,4,40.2,6 134 | 2,56,300,2,66.7,8 135 | 0,60,288,4,66.3,6 136 | 5,78,130,3,24,1 137 | 7,87,129,1,20.8,3 138 | 0,56,197,10,46.4,7 139 | 6,85,146,2,28,5 140 | 0,50,219,4,49.2,5 141 | 0,50,197,6,48.4,3 142 | 7,75,145,3,28.1,5 143 | 3,53,224,5,50.9,3 144 | 1,58,251,3,55.6,3 145 | 0,40,275,5,61,5 146 | 7,74,118,1,68.1,2 147 | 7,75,145,3,29.7,5 148 | 1,51,285,1,64.6,6 149 | 6,76,117,3,16.9,4 150 | 7,90,172,3,39.3,4 151 | 6,74,120,3,17.4,3 152 | 5,86,173,1,54.1,8 153 | 0,43,225,4,53.1,8 154 | 6,87,181,1,43,3 155 | 6,77,116,3,17.5,2 156 | 6,90,154,3,31.6,6 157 | 3,56,289,2,67.4,7 158 | 1,48,238,4,56,4 159 | 8,87,130,2,23.3,1 160 | 6,81,154,3,33.1,8 161 | 7,82,123,2,20.2,4 162 | 8,90,117,2,18.1,3 163 | 0,40,264,4,56.1,8 164 | 1,48,204,1,46.4,1 165 | 4,43,274,4,59.9,7 166 | 6,87,156,5,31.6,5 167 | 8,46,287,4,65.3,7 168 | 0,52,225,5,52.2,8 169 | 6,86,179,3,38.3,4 170 | 0,49,253,3,56,3 171 | 5,50,220,2,70.1,2 172 | 0,40,278,1,61.5,7 173 | 4,87,125,2,24,3 174 | 0,55,298,5,66.5,6 175 | 0,53,202,1,49.2,4 176 | 6,73,148,3,29,2 177 | 7,50,245,5,54,1 178 | 8,76,143,5,27.8,4 179 | 6,87,180,3,39.7,7 180 | 6,78,148,5,72.2,4 181 | 7,78,184,8,41.6,5 182 | 3,53,224,10,54.4,3 183 | 3,51,294,2,66.7,8 184 | 6,76,115,4,16.2,3 185 | 8,79,134,1,24.6,1 186 | 0,48,279,2,64.6,6 187 | 8,82,148,4,29.8,2 188 | 8,74,155,5,32.3,1 189 | 2,46,185,1,42,1 190 | 5,75,131,2,21.8,2 191 | 5,88,152,4,31,1 192 | 3,59,291,1,66.3,8 193 | 6,86,188,1,46.6,8 194 | 0,45,270,3,58.4,7 195 | 0,57,201,3,49.6,1 196 | 0,54,204,1,48.6,2 197 | 0,38,200,1,48.2,6 198 | 2,47,287,3,66.7,5 199 | 1,40,273,2,59.3,8 200 | 2,57,289,3,64.3,6 201 | 6,83,123,5,18.1,3 202 | 0,45,272,4,59.2,6 203 | 2,41,284,5,63.4,8 204 | 0,48,279,4,61.6,5 205 | 0,40,287,1,63.3,6 206 | 0,56,280,3,62.8,7 207 | 0,47,285,1,64.7,6 208 | 0,43,189,4,44.8,7 209 | 1,39,277,1,17.5,8 210 | 1,49,204,4,48.7,6 211 | 1,58,262,1,58.5,5 212 | 7,88,186,4,42.8,7 213 | 6,78,162,1,36.4,7 214 | 7,78,123,1,21.1,3 215 | 4,87,182,4,41.6,4 216 | 5,85,159,2,36.5,8 217 | 7,76,115,5,17.4,2 218 | 6,89,131,6,21.6,2 219 | 7,74,148,3,32.3,2 220 | 1,50,196,5,45.2,5 221 | 6,83,117,4,17.5,2 222 | 2,44,248,2,55.2,7 223 | 5,90,134,4,25.1,2 224 | 7,52,164,5,37.8,2 225 | 5,76,142,4,27.2,7 226 | 7,55,248,1,57.2,7 227 | 5,76,186,1,44.7,7 228 | 0,58,275,4,59,8 229 | 7,82,147,2,27.6,6 230 | 3,56,288,3,66.9,6 231 | 0,56,294,5,64.6,6 232 | 8,80,123,4,19.2,4 233 | 5,74,129,4,22.8,2 234 | 7,82,114,1,17.1,2 235 | 7,82,175,1,39.5,4 236 | 5,76,185,1,44.1,6 237 | 7,86,170,6,37.8,4 238 | 6,78,162,2,36.5,5 239 | 5,86,176,2,62.8,4 240 | 5,90,181,5,39.1,8 241 | 1,53,290,2,16.5,8 242 | 5,84,159,4,34.6,5 243 | 0,48,185,5,43.4,5 244 | 2,43,261,2,59.2,5 245 | 0,45,244,3,44.7,8 246 | 8,81,125,1,21.6,3 247 | 1,56,259,5,53.9,1 248 | 3,59,293,3,67.5,6 249 | 8,77,164,1,34.9,1 250 | 0,48,238,1,53.9,5 251 | 4,89,172,1,36.4,1 252 | 6,78,125,4,21.9,4 253 | 8,80,134,4,24.4,2 254 | 8,82,124,3,18.2,4 255 | 4,84,140,3,26.5,6 256 | 2,44,263,4,58.7,2 257 | 7,79,129,2,21.8,1 258 | 4,75,182,3,39.6,4 259 | 1,46,230,3,53.7,8 260 | 0,42,205,1,50.1,7 261 | 1,48,266,5,59.8,5 262 | 6,77,186,3,46.1,3 263 | 7,88,187,2,43.2,3 264 | 7,82,147,2,27.8,2 265 | 1,48,267,1,58.9,7 266 | 5,89,184,5,42.7,1 267 | 0,43,208,4,52.4,2 268 | 0,55,248,1,57.3,6 269 | 7,89,155,3,34.5,2 270 | 7,74,117,4,18.4,4 271 | 2,42,203,5,49.2,2 272 | 2,45,266,3,56.7,7 273 | 7,86,169,1,37.5,1 274 | 8,76,163,3,36.6,8 275 | 3,55,285,5,62.6,8 276 | 4,84,141,2,29,8 277 | 7,90,150,1,32.1,1 278 | 2,58,206,2,49.3,3 279 | 7,86,118,2,19.8,1 280 | 5,72,170,1,38.7,5 281 | 7,77,162,3,35.5,1 282 | 4,76,132,2,21.5,4 283 | 1,42,284,5,64.4,7 284 | 1,52,259,3,69.8,7 285 | 6,84,148,5,29.1,8 286 | 8,75,137,1,26.3,6 287 | 0,50,244,3,52.8,4 288 | 6,76,136,5,24.9,5 289 | 3,51,182,4,42.5,1 290 | 4,77,171,1,39,1 291 | 6,80,156,4,34.6,2 292 | 6,84,185,3,44,6 293 | 4,73,134,4,24.4,6 294 | 1,56,257,4,57.7,5 295 | 1,43,264,1,64,4 296 | 6,52,183,3,43.4,8 297 | 4,75,141,4,26.6,7 298 | 6,78,156,5,34.6,1 299 | 5,77,152,4,34.1,5 300 | 6,80,130,3,22,3 301 | 5,84,146,2,31,4 302 | 5,74,131,3,25.2,4 303 | 5,82,158,6,34.7,4 304 | 0,51,217,2,51.1,1 305 | 2,42,224,5,50.9,3 306 | 7,85,138,4,25,3 307 | 4,74,134,1,26.2,1 308 | 1,44,270,3,61.4,6 309 | 0,48,276,3,23.9,6 310 | 0,54,214,3,51,4 311 | 7,83,114,3,18.1,3 312 | 2,58,298,2,67.3,7 313 | 6,77,182,1,41.8,2 314 | 0,55,298,2,67.3,6 315 | 3,42,278,1,64.6,5 316 | 0,49,207,3,18,1 317 | 4,73,134,3,24.5,2 318 | 0,47,278,4,64.5,7 319 | 3,49,206,1,49.3,4 320 | 5,73,188,10,46.5,2 321 | 2,55,286,1,62.9,8 322 | 8,79,134,1,23.1,4 323 | 0,57,197,3,46.5,6 324 | 2,55,267,4,61.2,4 325 | 4,75,158,3,34.8,3 326 | 5,88,174,5,40.2,3 327 | 8,89,154,4,32.7,1 328 | 2,57,289,2,19.7,8 329 | 6,89,132,5,24.7,4 330 | 7,81,147,5,31.7,2 331 | 0,43,266,2,58.7,7 332 | 8,80,134,3,24.3,4 333 | 1,53,292,9,65.4,5 334 | 0,44,256,3,57.5,8 335 | 3,46,185,4,43,5 336 | 0,49,207,5,23.9,8 337 | 2,52,181,5,42.3,4 338 | 3,43,188,1,46.4,4 339 | 7,74,157,1,35.2,1 340 | 0,42,267,2,45.4,4 341 | 7,73,120,5,19.4,2 342 | 5,88,171,1,38.1,8 343 | 0,43,264,1,63,2 344 | 1,55,215,1,53,7 345 | 7,78,156,3,34.9,3 346 | 2,48,217,3,50.1,4 347 | 4,74,129,4,19.5,4 348 | 3,49,197,2,47.3,1 349 | 2,55,288,5,65.2,7 350 | 1,49,300,5,66.8,7 351 | 5,82,160,2,35.8,2 352 | 1,58,201,3,46.9,2 353 | 7,81,174,4,39.2,5 354 | 6,77,126,2,23.2,1 355 | 4,88,161,4,35.2,5 356 | 8,75,136,3,24.9,8 357 | 8,72,154,1,32.8,8 358 | 0,45,195,2,46.7,2 359 | 5,76,179,3,41.5,3 360 | 8,89,182,4,41.7,7 361 | 8,51,164,1,34.8,7 362 | 3,42,256,3,57.4,2 363 | 0,42,206,2,50.2,5 364 | 8,73,155,2,32.4,2 365 | 8,89,147,5,29.6,8 366 | 7,77,158,8,34.6,5 367 | 0,49,198,3,46.5,4 368 | 6,86,187,2,43.3,3 369 | 6,81,177,5,40.9,4 370 | 5,88,171,4,37,3 371 | 7,75,156,3,33.6,4 372 | 6,76,184,5,40.7,1 373 | 2,55,288,3,67,5 374 | 6,83,126,4,21.2,4 375 | 6,89,172,5,38.6,8 376 | 5,88,139,4,26.2,4 377 | 4,90,172,1,40.6,7 378 | 1,59,231,4,51.9,5 379 | 6,84,160,2,38,5 380 | 2,40,230,3,47,6 381 | 3,52,181,4,39.8,5 382 | 5,89,183,1,43.2,2 383 | 0,41,286,5,62.9,7 384 | 0,55,214,3,50.8,7 385 | 6,74,118,5,56.2,3 386 | 0,60,229,4,52.6,2 387 | 5,79,162,1,35.5,5 388 | 7,77,183,3,41.5,3 389 | 1,40,264,5,56.3,6 390 | 6,52,183,3,42.3,4 391 | 0,50,243,4,54.6,6 392 | 7,79,133,2,24.9,2 393 | 0,40,275,1,61.2,5 394 | 6,83,121,4,20.3,3 395 | 2,38,293,1,64.5,5 396 | 3,51,293,5,68.5,7 397 | 7,75,154,5,33.2,5 398 | 2,52,260,5,55.2,7 399 | 8,82,173,1,39.1,7 400 | 7,82,116,1,19.3,4 401 | 1,54,204,5,49.5,7 402 | 5,72,130,3,22.2,3 403 | 2,41,277,1,61.3,8 404 | 7,79,157,2,51.8,8 405 | 4,76,129,5,22.8,3 406 | 7,75,158,1,35.8,4 407 | 5,83,123,4,18,3 408 | 7,81,177,1,39.8,1 409 | 0,59,273,1,46.2,5 410 | 8,82,173,1,65.2,3 411 | 8,49,253,5,55.8,7 412 | 3,41,283,5,62.1,5 413 | 0,45,194,1,44.7,2 414 | 7,89,152,2,32,8 415 | 3,49,208,1,48.3,3 416 | 1,39,292,5,66.4,6 417 | 5,83,145,4,28.1,4 418 | 7,82,146,4,27.8,7 419 | 1,53,200,2,48.1,7 420 | 4,74,186,2,45,3 421 | 0,54,217,3,51.1,2 422 | 1,48,218,1,50.2,3 423 | 2,43,245,2,57,5 424 | 7,43,257,4,69.7,8 425 | 7,89,179,3,41.3,8 426 | 2,53,259,3,55.6,8 427 | 3,58,275,4,23.8,5 428 | 6,85,149,4,32.6,7 429 | 2,56,299,3,66.7,6 430 | 3,58,291,2,67.3,5 431 | 5,84,161,5,36.3,4 432 | 6,76,126,1,22.3,2 433 | 7,89,172,5,38.3,5 434 | 0,48,240,1,53.3,7 435 | 8,83,121,10,20.6,1 436 | 1,60,230,6,50.9,4 437 | 6,76,143,3,29.7,2 438 | 0,50,196,4,46.3,3 439 | 4,90,173,1,38.7,3 440 | 3,56,265,5,58.5,7 441 | 4,87,158,1,44.7,1 442 | 6,75,140,2,27.5,3 443 | 0,42,188,2,45.6,6 444 | 7,88,180,5,40.5,8 445 | 0,50,220,7,53.7,4 446 | 4,78,171,4,39.1,2 447 | 2,43,223,3,53.6,2 448 | 1,59,248,4,55.5,2 449 | 0,56,294,2,66.6,8 450 | 0,42,252,3,54.8,5 451 | 5,78,170,2,37.6,7 452 | 3,42,226,7,52.2,2 453 | 1,48,268,4,58.2,1 454 | 6,80,157,4,35,6 455 | 4,74,133,4,22.9,2 456 | 6,88,182,4,41.6,8 457 | 1,45,229,4,52.7,1 458 | 3,57,290,3,65.9,6 459 | 1,48,301,2,70,8 460 | 0,40,287,1,64.3,5 461 | 1,49,255,5,56.3,8 462 | 3,56,285,1,41.7,6 463 | 1,49,196,4,47.2,7 464 | 1,58,288,4,63.9,6 465 | 6,84,125,3,21.1,4 466 | 5,72,169,5,38.5,8 467 | 1,59,275,4,61,5 468 | 0,59,188,1,45.6,3 469 | 5,88,131,2,21.6,3 470 | 1,59,248,4,55.4,2 471 | 2,45,183,2,42.4,1 472 | 6,76,136,3,26,5 473 | 3,39,235,1,52.7,8 474 | 8,80,123,4,48.5,2 475 | 5,85,161,1,36.4,2 476 | 6,77,186,3,43.8,2 477 | 8,79,131,3,22.1,1 478 | 6,77,126,2,20,2 479 | 7,52,163,3,34.6,2 480 | 8,73,118,1,19.8,4 481 | 1,58,200,4,49.4,3 482 | 4,83,138,1,28,1 483 | 7,83,119,5,20,4 484 | 3,58,292,2,66.3,8 485 | 0,49,255,3,55.3,4 486 | 7,80,131,1,23.1,3 487 | 0,48,267,1,60.9,2 488 | 0,55,280,5,64.9,8 489 | 3,55,253,5,57,7 490 | 2,49,196,5,48.4,2 491 | 2,56,286,2,64.9,5 492 | 8,79,159,1,35.6,6 493 | 7,80,134,2,25.4,8 494 | 6,82,147,1,28.8,4 495 | 7,82,145,1,28.6,5 496 | 0,54,248,1,54.3,5 497 | 4,73,133,2,24,4 498 | 6,78,137,2,24.5,2 499 | 6,86,150,5,31.8,4 500 | 3,46,288,2,66.1,6 501 | 2,54,223,3,53.6,5 502 | 5,73,131,4,22.8,1 503 | 8,89,150,2,30.7,1 504 | 4,75,181,3,39.1,8 505 | 6,73,151,1,31.6,2 506 | 0,39,202,2,48.1,8 507 | 7,77,159,1,37.2,2 508 | 4,77,170,5,36.6,3 509 | 5,89,139,1,26.1,1 510 | 6,84,133,4,25.8,3 511 | 5,90,154,5,34.8,2 512 | 7,74,156,1,34.9,8 513 | 7,89,189,6,45.9,7 514 | 5,89,153,4,32.5,4 515 | 5,76,141,4,26.6,6 516 | 6,48,185,3,44.2,3 517 | 5,87,130,4,21,4 518 | 8,77,165,4,38.9,1 519 | 4,88,183,4,40.2,2 520 | 8,84,119,8,16.9,3 521 | 2,45,229,2,52.6,3 522 | 2,48,220,2,51.4,6 523 | 3,54,253,3,55,6 524 | 6,78,163,3,35.7,7 525 | 1,58,297,2,65.2,7 526 | 6,73,150,5,30.1,6 527 | 7,75,120,3,60.6,4 528 | 0,55,196,3,45.9,3 529 | 5,87,123,2,19,1 530 | 6,48,185,4,44.3,6 531 | 0,45,195,3,55.3,5 532 | 3,49,196,5,45,1 533 | 7,80,154,3,33.2,5 534 | 6,84,148,5,29.2,1 535 | 6,74,142,2,66.8,4 536 | 0,59,188,4,45.6,4 537 | 6,76,184,1,40.6,8 538 | 2,51,285,2,64.9,6 539 | 7,90,172,2,38.4,3 540 | 2,51,282,2,64,7 541 | 0,42,267,4,58.9,8 542 | 0,42,206,5,48.5,4 543 | 5,84,160,1,36.1,2 544 | 1,57,258,3,58.9,6 545 | 4,76,159,4,35.3,6 546 | 6,74,159,4,37,4 547 | 5,76,160,4,37,4 548 | 6,80,155,3,33.4,8 549 | 5,89,154,2,33.1,1 550 | 3,59,208,3,66.9,1 551 | 8,76,138,4,24.9,6 552 | 7,83,146,1,27.8,7 553 | 7,80,156,5,35.9,7 554 | 4,83,141,5,29,8 555 | 3,49,207,2,23.7,1 556 | 3,58,273,2,66,6 557 | 6,84,187,6,43.2,2 558 | 0,53,201,5,48.6,5 559 | 0,57,260,2,59.2,4 560 | 6,86,178,3,39.2,4 561 | 3,55,273,3,61.6,7 562 | 8,89,150,5,29.9,1 563 | 2,59,232,1,43,2 564 | 5,90,134,4,23.1,8 565 | 6,75,118,4,18.8,2 566 | 6,88,132,3,21.2,1 567 | 1,40,277,3,21.3,7 568 | 5,84,133,1,22.9,2 569 | 2,54,223,1,51.7,8 570 | 6,84,134,1,26.3,2 571 | 0,57,263,4,57.7,6 572 | 2,42,287,3,64.5,7 573 | 1,45,194,1,47.2,7 574 | 4,75,128,5,20.3,4 575 | 7,80,129,5,20.4,4 576 | 6,84,148,5,30,2 577 | 7,83,124,2,21.2,1 578 | 0,48,208,3,50.2,1 579 | 7,89,187,1,43.2,7 580 | 4,73,189,5,46.8,6 581 | 4,75,131,5,24.1,3 582 | 0,58,200,3,42.4,3 583 | 1,40,288,1,65,7 584 | 1,55,236,3,55.8,3 585 | 7,81,125,2,18.7,1 586 | 7,89,180,3,42.5,7 587 | 0,56,193,10,46.1,7 588 | 2,50,254,1,57,8 589 | 1,47,225,5,51.2,8 590 | 8,82,148,4,29.2,3 591 | 6,72,131,2,23.2,4 592 | 7,80,156,1,32.6,8 593 | 8,88,131,1,22.9,2 594 | 6,89,174,3,37.4,7 595 | 2,51,293,3,66.4,5 596 | 0,52,222,3,51.4,4 597 | 3,51,293,3,67.4,7 598 | 1,47,283,3,63.3,7 599 | 1,40,229,3,54.5,4 600 | 5,77,150,3,32,1 601 | 3,56,280,4,62.8,6 602 | 0,42,209,4,51.7,6 603 | 6,90,133,5,26,2 604 | 2,47,240,1,65.2,2 605 | 5,89,131,1,23.1,2 606 | 5,87,125,4,17.4,3 607 | 3,43,204,3,48.5,3 608 | 4,55,295,2,65.8,6 609 | 0,59,190,1,45,5 610 | 2,53,225,4,51.2,1 611 | 6,86,189,2,45.9,8 612 | 7,81,177,4,38,8 613 | 5,89,178,5,41,1 614 | 1,40,288,5,63.9,7 615 | 6,74,159,5,35.4,7 616 | 2,55,291,3,66,6 617 | 0,54,237,4,54.9,6 618 | 0,49,209,3,49.6,3 619 | 5,90,180,2,40.6,2 620 | 0,40,270,5,59.3,5 621 | 7,77,118,2,18.6,4 622 | 0,42,228,1,50.5,1 623 | 5,88,184,1,43.9,8 624 | 6,77,124,5,21.5,3 625 | 7,80,129,5,21.8,2 626 | 6,81,146,2,30.1,5 627 | 6,83,115,5,45.7,3 628 | 7,88,154,2,31.6,6 629 | 3,48,206,4,49.4,6 630 | 7,76,153,2,31.2,1 631 | 4,56,279,2,63.6,7 632 | 4,83,141,1,28.7,8 633 | 0,58,199,1,46.9,4 634 | 6,85,173,5,38.8,2 635 | 0,58,287,3,63.4,8 636 | 3,51,295,1,64.8,6 637 | 0,47,190,4,43,7 638 | 8,81,178,1,42.3,7 639 | 1,40,236,3,52.8,2 640 | 7,81,133,4,22.9,2 641 | 1,56,276,2,19.2,7 642 | 2,41,278,3,63.5,7 643 | 8,80,124,2,19.4,1 644 | 1,44,270,1,61.4,1 645 | 0,58,206,1,49.3,6 646 | 6,72,167,1,38,4 647 | 5,89,170,3,36.8,1 648 | 8,82,123,5,22,1 649 | 1,47,226,5,50.8,4 650 | 0,40,277,3,64.4,8 651 | 3,43,262,1,56.6,3 652 | 2,50,195,1,45.9,4 653 | 6,76,145,5,28.3,3 654 | 1,55,216,3,51,8 655 | 2,44,264,2,57.3,7 656 | 0,60,288,1,65.9,6 657 | 0,44,259,5,59.1,7 658 | 3,58,273,4,59.6,6 659 | 0,45,271,3,57.7,6 660 | 5,72,170,5,49,6 661 | 1,39,231,9,53,2 662 | 7,86,137,1,27.1,8 663 | 3,56,288,2,64.9,5 664 | 0,39,264,3,59.3,8 665 | 6,43,255,4,57.3,2 666 | 5,85,173,3,39.8,1 667 | 8,81,124,1,19.9,2 668 | 0,53,184,1,42.7,1 669 | 7,88,129,3,21.4,1 670 | 2,51,285,5,62.7,8 671 | 1,56,256,3,57.3,5 672 | 6,86,150,8,31.9,3 673 | 7,73,117,10,19.5,1 674 | 7,80,130,2,22.4,4 675 | 7,76,137,3,27.7,4 676 | 5,75,181,4,38.8,1 677 | 4,87,160,3,35,2 678 | 5,83,148,1,28.8,8 679 | 5,88,152,1,31.8,7 680 | 1,49,300,1,66.9,7 681 | 0,55,273,4,60.4,7 682 | 4,89,142,4,28.4,3 683 | 8,88,151,4,31.7,7 684 | 3,43,225,1,53.2,6 685 | 5,86,126,2,21.3,4 686 | 6,75,142,4,28.4,8 687 | 0,59,199,2,49,8 688 | 7,81,145,4,27.4,3 689 | 5,48,256,2,58.5,8 690 | 2,42,277,5,17.8,8 691 | 1,53,206,2,50.2,1 692 | 7,88,152,2,31.9,5 693 | 7,90,173,5,37.8,1 694 | 1,57,252,2,55.7,1 695 | 7,74,157,2,33.4,3 696 | 0,43,266,1,60.5,1 697 | 2,43,254,2,56.2,5 698 | 5,86,182,4,41.8,1 699 | 8,47,279,5,63.6,6 700 | 2,41,284,5,65.3,7 701 | 6,77,133,2,23,2 702 | 6,78,158,4,35.5,8 703 | 7,83,116,4,22.3,1 704 | 8,76,154,3,32.6,8 705 | 0,54,292,1,65.3,8 706 | 6,81,142,3,27.2,1 707 | 7,76,120,2,18.4,1 708 | 0,55,277,1,19.1,6 709 | 5,84,134,3,24.4,7 710 | 2,55,288,2,64.1,6 711 | 5,76,141,2,25.9,6 712 | 0,59,232,4,52.3,1 713 | 1,55,275,2,23.2,6 714 | 3,42,277,2,68.6,6 715 | 2,54,253,4,56.9,2 716 | 5,85,149,2,29.5,3 717 | 0,44,259,9,56.9,6 718 | 2,54,255,4,57.2,1 719 | 8,75,154,4,33,4 720 | 7,81,172,1,39.2,5 721 | 5,77,151,2,31.5,6 722 | 1,55,238,5,55.9,5 723 | 6,77,130,7,22,1 724 | 3,42,278,4,64.5,5 725 | 2,43,263,10,56.8,1 726 | 1,55,286,1,65.1,8 727 | 0,39,289,4,67.4,7 728 | 2,48,300,4,68.8,6 729 | 0,40,278,1,41.5,7 730 | 0,42,208,5,50.3,6 731 | 6,88,131,2,21,2 732 | 2,39,295,1,68,7 733 | 7,72,155,4,34.4,7 734 | 3,42,224,1,50.9,3 735 | 0,48,267,3,60.1,2 736 | 6,80,145,3,27.6,7 737 | 1,58,287,4,63.7,8 738 | 5,76,179,6,41.3,3 739 | 8,76,154,5,31.9,6 740 | 7,82,173,5,40.1,5 741 | 2,46,243,8,55.6,4 742 | 6,77,118,3,17.9,2 743 | 3,59,208,3,51.4,1 744 | 7,78,151,4,31.4,4 745 | 2,46,266,4,60.7,1 746 | 0,51,220,5,52.3,1 747 | 8,90,154,2,32.7,7 748 | 8,76,162,4,37.5,7 749 | 6,90,153,2,32.5,6 750 | 2,52,283,1,64.3,6 751 | 8,88,149,2,31.6,7 752 | 0,47,188,3,43.7,5 753 | 8,80,159,1,35.9,8 754 | 0,39,183,1,42.3,8 755 | 0,40,287,2,62.9,5 756 | 8,89,115,3,24.8,1 757 | 7,38,201,2,48.9,4 758 | 7,76,164,1,36.8,2 759 | 7,89,171,2,39.2,7 760 | 2,58,233,1,52.5,4 761 | 1,50,256,5,55.4,1 762 | 5,73,123,5,20.9,3 763 | 0,38,184,3,44.8,3 764 | 0,48,204,3,48.7,3 765 | 5,76,183,2,41.3,7 766 | 6,59,289,1,64.3,7 767 | 5,76,142,2,29.3,4 768 | 3,56,290,1,66.9,5 769 | 0,39,271,1,57.9,5 770 | 7,87,130,2,20,1 771 | 0,40,271,2,59.5,5 772 | 5,88,141,1,25.7,7 773 | 6,76,132,1,23.8,3 774 | 6,79,129,3,21.7,1 775 | 0,48,203,2,46.2,4 776 | 4,87,159,6,37.2,6 777 | 2,58,273,3,61.3,7 778 | 7,77,123,5,42.8,4 779 | 1,58,253,4,55,8 780 | 1,53,201,2,50,8 781 | 1,59,202,3,48,6 782 | 5,77,150,2,32,7 783 | 3,55,289,3,16.1,6 784 | 7,84,123,1,66.7,1 785 | 1,49,254,4,58.2,4 786 | 1,60,188,3,43.6,4 787 | 2,58,234,5,55.6,4 788 | 1,57,263,4,58.9,7 789 | 7,81,175,2,39.6,6 790 | 7,82,147,1,29.6,6 791 | 0,39,276,6,17.9,8 792 | 2,53,226,2,51.3,5 793 | 7,80,156,1,31.9,3 794 | 1,41,284,5,64.4,8 795 | 2,56,275,2,62.3,8 796 | 1,42,207,4,17.7,4 797 | 0,39,262,1,55.4,4 798 | 5,73,131,5,22.1,3 799 | 0,48,183,5,42.9,5 800 | 0,40,277,4,62.4,5 801 | 8,86,119,1,19.9,1 802 | 8,84,120,4,18.2,1 803 | 2,40,231,1,52,3 804 | 2,45,227,2,52.4,4 805 | 1,46,224,3,52.9,1 806 | 7,90,150,5,32,6 807 | 0,43,265,5,57.4,7 808 | 1,39,236,1,54.7,8 809 | 8,57,198,1,45.7,1 810 | 2,56,297,3,53.2,6 811 | 2,57,291,4,67.2,7 812 | 5,83,136,1,25.9,7 813 | 7,90,173,5,37.6,1 814 | 1,39,262,4,58.6,8 815 | 6,78,159,2,35.7,8 816 | 0,57,200,2,60.8,8 817 | 0,46,289,3,63.4,8 818 | 7,90,150,1,31.7,7 819 | 3,48,188,2,46.5,7 820 | 6,78,186,3,42.1,3 821 | 7,85,170,3,38.8,6 822 | 4,83,140,3,27.4,3 823 | 8,83,148,4,32.3,7 824 | 2,39,236,3,54.8,8 825 | 7,84,117,5,20.2,1 826 | 5,87,183,2,41,5 827 | 6,77,181,5,42.1,2 828 | 0,50,198,3,47.9,5 829 | 0,55,247,1,54.1,6 830 | 3,40,235,5,51.7,7 831 | 7,74,144,2,26,2 832 | 5,72,122,2,21.9,1 833 | 1,55,284,4,64.5,8 834 | 5,77,136,2,25.8,8 835 | 6,73,167,4,37,7 836 | 0,57,296,1,69.1,5 837 | 7,80,127,4,23.3,4 838 | 6,77,131,3,24.2,3 839 | 7,86,187,5,44.2,4 840 | 5,84,125,5,20.4,1 841 | 2,56,284,3,63.4,5 842 | 7,83,116,5,19.6,3 843 | 7,79,150,3,32.7,6 844 | 0,57,196,1,45.9,5 845 | 3,58,294,3,67.6,5 846 | 2,43,253,2,56.9,2 847 | 6,74,151,3,30.7,4 848 | 5,78,161,1,36.1,8 849 | 0,51,219,3,51.2,6 850 | 7,89,174,1,38.4,7 851 | 0,48,188,1,43.7,7 852 | 8,55,193,5,45,8 853 | 5,77,170,4,38.8,4 854 | 7,86,137,1,25.5,2 855 | 0,41,287,1,63.7,7 856 | 1,58,234,5,55.6,5 857 | 3,45,265,1,60.4,5 858 | 6,75,142,2,27.3,2 859 | 1,47,226,4,53.3,7 860 | 5,83,160,2,36.8,8 861 | 7,88,129,5,21.9,1 862 | 8,89,155,1,32.4,6 863 | 7,81,145,2,27.6,3 864 | 2,39,233,5,52.4,1 865 | 8,77,165,4,38.9,8 866 | 7,48,276,4,45.2,6 867 | 6,76,186,1,43,7 868 | 1,45,230,2,54.7,1 869 | 6,89,151,3,30.5,3 870 | 0,47,187,2,45.2,1 871 | 5,85,176,1,38.6,8 872 | 3,57,299,5,66.7,8 873 | 8,81,123,2,19.2,2 874 | 7,87,157,5,34.3,8 875 | 1,58,251,7,54.7,4 876 | 7,80,155,5,35.3,4 877 | 1,57,262,3,56.3,2 878 | 1,59,274,8,60.7,7 879 | 5,89,174,4,39.3,6 880 | 1,55,287,2,63.4,8 881 | 0,57,194,3,44.3,7 882 | 8,81,125,4,17,4 883 | 2,42,284,2,62.4,7 884 | 4,50,242,4,54.4,8 885 | 7,73,119,3,18.9,2 886 | 3,42,253,5,56,8 887 | 0,49,201,5,48.5,5 888 | 0,47,240,4,55.2,5 889 | 0,56,296,4,66.1,7 890 | 2,56,288,1,66.2,5 891 | 7,74,143,2,25.9,5 892 | 5,89,174,3,41.2,8 893 | 7,82,147,4,30.6,2 894 | 1,40,235,1,54.7,7 895 | 0,56,297,2,67.1,8 896 | 0,39,202,2,48,1 897 | 5,76,141,3,24.7,8 898 | 0,38,182,3,65.7,1 899 | 4,48,254,5,55.2,1 900 | 0,57,288,4,65,7 901 | 0,54,246,3,56.1,1 902 | 5,77,126,5,21.1,2 903 | 8,77,162,1,36.4,2 904 | 4,88,151,3,56.1,2 905 | 7,75,137,2,24.4,6 906 | 7,81,142,5,26.6,2 907 | 3,57,199,2,47.1,4 908 | 8,75,144,2,26,2 909 | 2,42,202,3,49.1,7 910 | 5,84,146,4,31.2,7 911 | 0,48,282,1,63.9,8 912 | 6,80,155,5,32.4,8 913 | 6,88,187,1,42.4,5 914 | 6,88,181,2,41.3,8 915 | 0,43,188,5,44.7,4 916 | 6,81,177,3,37.8,7 917 | 6,88,174,1,41.3,6 918 | 6,74,145,1,30,7 919 | 7,81,177,1,39.8,3 920 | 1,60,188,2,43.7,4 921 | 4,87,183,3,40.9,7 922 | 7,76,161,3,34.2,8 923 | 0,40,274,5,58.7,7 924 | 2,49,194,2,44.6,5 925 | 0,42,271,3,58.9,6 926 | 7,78,184,5,42.7,6 927 | 4,75,159,3,34.9,7 928 | 7,83,114,1,15.1,2 929 | 0,47,239,5,56,7 930 | 4,76,129,4,21.7,1 931 | 7,56,193,1,46.1,5 932 | 1,41,285,3,63.9,7 933 | 0,39,200,3,48.1,3 934 | 7,82,145,8,26.1,7 935 | 6,86,173,1,38.9,7 936 | 8,88,179,1,42.3,4 937 | 7,79,151,1,31.6,8 938 | 8,83,117,2,18.2,3 939 | 0,48,239,2,54,6 940 | 2,53,259,1,56.1,1 941 | 4,89,172,1,38.2,3 942 | 0,39,264,1,57.1,5 943 | 0,54,203,5,49.3,1 944 | 8,75,138,2,24.8,5 945 | 0,38,183,3,42.2,7 946 | 0,42,207,7,49.6,6 947 | 3,59,276,4,24.3,8 948 | 2,44,246,2,56.1,5 949 | 2,47,220,2,52.3,6 950 | 0,60,290,1,24.8,8 951 | 0,57,287,1,64.4,8 952 | 5,88,153,8,34.3,1 953 | 4,73,132,5,25.5,3 954 | 6,80,130,2,23.3,3 955 | 5,89,152,5,29.9,6 956 | 2,49,194,3,46.6,1 957 | 0,60,187,1,44.3,3 958 | 7,83,116,4,15.8,4 959 | 5,89,185,5,44.4,5 960 | 5,77,132,2,24.7,4 961 | 3,43,225,4,52.2,8 962 | 7,89,186,7,41.7,7 963 | 1,40,275,5,62.5,5 964 | 2,53,261,2,56.2,6 965 | 4,87,183,5,43.2,4 966 | 0,45,243,1,52.6,7 967 | 7,80,159,5,34.4,1 968 | 7,77,132,2,21.3,4 969 | 0,59,288,4,64.1,5 970 | 5,42,272,2,59.1,8 971 | 6,86,149,1,30.5,7 972 | 7,73,156,3,32.6,8 973 | 8,90,173,5,39.1,3 974 | 6,78,150,8,32,8 975 | 0,55,237,3,53.9,1 976 | 4,76,182,5,40.4,8 977 | 1,42,265,3,59.5,2 978 | 4,76,141,1,26.7,5 979 | 0,44,259,1,57.9,8 980 | 4,57,297,2,68.1,6 981 | 7,87,180,5,39.7,4 982 | 2,58,207,4,43.4,4 983 | 0,53,183,2,43.5,5 984 | 0,59,201,4,46,1 985 | 3,56,275,9,61.4,8 986 | 4,73,134,4,23.1,2 987 | 3,44,245,4,53.9,4 988 | 0,43,271,2,58.7,8 989 | 7,82,144,1,30,1 990 | 1,50,254,1,57.2,3 991 | 8,86,119,1,18.1,3 992 | 5,88,131,3,21.7,4 993 | 1,47,285,4,64.6,8 994 | 5,88,142,4,26.7,8 995 | 6,76,117,2,16.1,4 996 | 2,58,252,3,56.8,7 997 | 6,76,152,4,33.1,7 998 | 5,74,187,2,43.4,6 999 | 1,43,269,2,59.3,7 1000 | 0,49,184,4,40.7,4 1001 | 1,47,240,1,54.3,8 1002 | 2,58,294,3,66.6,7 1003 | 0,50,244,4,55.9,6 1004 | 6,76,182,2,49.8,8 1005 | 6,74,119,3,18.1,2 1006 | 2,40,234,2,52.6,7 1007 | 6,79,155,1,32.4,8 1008 | 0,55,247,5,56.1,7 1009 | 1,59,297,2,66.3,8 1010 | 0,42,253,1,57.9,2 1011 | 5,86,177,1,37.9,4 1012 | 7,86,118,5,71,4 1013 | 7,74,116,4,16.8,1 1014 | 0,57,205,5,49,1 1015 | 2,39,229,3,52.6,7 1016 | 7,77,116,5,19.2,3 1017 | 4,87,125,5,21.9,3 1018 | 7,73,117,3,19.5,3 1019 | 2,40,235,3,53.7,2 1020 | 7,78,150,5,31.2,8 1021 | 0,57,258,1,58.8,8 1022 | 3,40,284,3,62.4,7 1023 | 5,76,159,6,35.9,8 1024 | 0,43,186,3,44.2,3 1025 | 8,72,153,1,32.5,8 1026 | 7,81,122,2,19.8,4 1027 | 5,83,126,2,22.3,1 1028 | 2,50,254,2,57.1,1 1029 | 0,42,254,1,56.2,6 1030 | 5,73,131,3,23,1 1031 | 2,54,255,4,56.3,6 1032 | 0,54,298,3,65.4,6 1033 | 4,90,180,5,40.6,4 1034 | 5,88,142,4,27.6,6 1035 | 7,82,146,5,27,3 1036 | 7,74,157,1,34.1,5 1037 | 7,76,120,2,19.3,3 1038 | 5,76,142,4,26,7 1039 | 8,75,142,8,28.2,5 1040 | 1,59,250,2,54.5,5 1041 | 5,87,177,2,40,8 1042 | 2,47,220,4,51.4,8 1043 | 5,85,150,3,33.2,6 1044 | 2,45,186,3,46,1 1045 | 6,76,185,3,42.3,5 1046 | 6,83,146,7,29,5 1047 | 1,55,274,2,59.8,8 1048 | 3,56,273,4,61.4,6 1049 | 1,41,276,1,18.3,6 1050 | 6,76,125,2,18.9,3 1051 | 4,75,178,3,39.1,8 1052 | 0,41,207,4,21.2,6 1053 | 7,81,177,3,40.9,6 1054 | 7,52,163,5,36.6,2 1055 | 2,56,264,1,57.2,2 1056 | 0,40,274,1,63,5 1057 | 7,73,148,3,31.1,2 1058 | 7,52,163,5,36.6,3 1059 | 4,74,130,5,22.2,3 1060 | 1,45,194,4,44.3,1 1061 | 6,86,167,5,37,3 1062 | 2,49,299,4,67.7,8 1063 | 0,39,271,1,59.8,8 1064 | 6,47,187,2,45.4,8 1065 | 6,78,124,3,20.2,2 1066 | 6,43,207,5,20.6,7 1067 | 1,50,195,2,43.7,2 1068 | 1,46,226,1,52.3,5 1069 | 6,54,296,1,68,7 1070 | 5,88,160,5,34.9,4 1071 | 6,87,155,3,31.4,7 1072 | 1,54,201,1,46.8,8 1073 | 8,49,185,1,48.5,8 1074 | 0,54,236,5,53.8,3 1075 | 8,81,133,5,23.9,4 1076 | 2,39,231,3,52.9,5 1077 | 8,88,149,4,29.4,4 1078 | 6,83,159,3,33.9,5 1079 | 8,89,174,1,39.4,3 1080 | 5,72,131,4,23.2,4 1081 | 1,41,286,3,64.1,5 1082 | 1,42,264,5,45.6,6 1083 | 8,89,149,1,30.6,1 1084 | 6,81,146,5,29.7,1 1085 | 1,59,250,5,56.6,1 1086 | 8,73,155,3,35.5,1 1087 | 2,43,201,2,47,3 1088 | 1,40,271,2,58,5 1089 | 0,52,224,3,49.9,1 1090 | 5,88,132,4,23.3,4 1091 | 2,46,227,3,52.3,2 1092 | 0,52,185,4,43.2,6 1093 | 7,89,181,3,41.3,6 1094 | 3,46,185,4,44.6,2 1095 | 2,51,179,5,39.4,8 1096 | 5,89,153,1,70.8,5 1097 | 0,38,183,5,41.9,3 1098 | 2,56,287,2,58.6,7 1099 | 7,81,146,4,30.9,8 1100 | 3,42,256,1,58.8,1 1101 | 5,78,171,7,38.1,5 1102 | 6,89,173,1,40.1,4 1103 | 1,41,285,1,63.5,8 1104 | 6,78,132,4,22.8,3 1105 | 2,43,203,8,48.3,4 1106 | 2,45,183,3,42.9,4 1107 | 0,60,248,9,55.4,4 1108 | 7,42,254,5,56.2,3 1109 | 1,58,299,4,67.7,8 1110 | 6,84,146,4,28.2,2 1111 | 5,75,144,3,27,1 1112 | 0,55,236,5,51.8,4 1113 | 5,89,173,5,37.8,8 1114 | 3,56,275,4,61.3,6 1115 | 2,58,232,4,52,4 1116 | 0,50,198,2,47.5,2 1117 | 2,41,283,1,62.2,6 1118 | 0,60,289,5,23.2,7 1119 | 8,78,162,1,38.4,1 1120 | 0,42,225,3,51.2,3 1121 | 7,83,146,2,27.8,6 1122 | 3,42,201,1,47.8,1 1123 | 7,86,136,4,23.9,2 1124 | 0,54,200,2,46.5,4 1125 | 7,77,123,3,18.2,4 1126 | 7,75,154,1,32.2,4 1127 | 1,55,214,1,52.8,6 1128 | 0,41,208,4,48.5,8 1129 | 7,82,147,1,28.5,3 1130 | 7,88,156,3,34.5,8 1131 | 0,57,204,1,48.4,7 1132 | 1,40,290,4,23.3,8 1133 | 6,79,130,4,21.3,3 1134 | 0,40,276,10,21.3,8 1135 | 8,84,119,5,21,2 1136 | 5,75,183,3,44.7,5 1137 | 4,73,189,2,46,5 1138 | 8,80,158,1,34.6,7 1139 | 1,59,274,4,59.8,6 1140 | 2,59,299,3,67.7,8 1141 | 7,83,148,2,30.3,2 1142 | 0,51,198,1,47.6,2 1143 | 0,57,204,2,48.5,6 1144 | 0,42,272,2,59,7 1145 | 3,52,181,4,41,7 1146 | 6,77,135,6,23.7,8 1147 | 0,40,275,3,24.6,5 1148 | 7,87,156,2,33.6,8 1149 | 6,75,158,4,34.9,5 1150 | 7,88,181,5,40,7 1151 | 3,46,266,1,57.6,6 1152 | 7,89,117,4,16.3,4 1153 | 0,52,223,3,53.5,3 1154 | 6,83,122,5,16.6,3 1155 | 7,80,127,4,22.3,1 1156 | 4,52,223,2,50.6,1 1157 | 7,76,164,5,37.8,3 1158 | 0,45,241,5,56.3,8 1159 | 1,41,287,5,65.8,6 1160 | 1,38,293,3,66.6,5 1161 | 8,73,119,1,17.1,3 1162 | 0,50,196,4,44.3,3 1163 | 2,44,246,4,56.1,5 1164 | 6,79,128,4,65.5,3 1165 | 4,47,241,5,56.3,6 1166 | 5,75,184,1,43.6,8 1167 | 5,83,157,6,34.3,1 1168 | 5,76,138,3,26.1,3 1169 | 2,55,267,5,57.9,5 1170 | 8,42,253,2,55,5 1171 | 1,38,295,3,67.9,5 1172 | 0,57,201,3,48.7,8 1173 | 2,48,237,4,53.8,3 1174 | 2,48,240,3,54.2,7 1175 | 6,73,121,4,18.9,4 1176 | 6,85,147,4,31.2,1 1177 | 5,84,146,3,30.9,6 1178 | 7,74,142,3,26.7,5 1179 | 6,73,122,1,19.8,4 1180 | 6,84,121,5,24.7,4 1181 | 5,84,136,5,68,8 1182 | 1,48,220,3,50.2,5 1183 | 0,55,276,3,21,7 1184 | 1,51,197,3,47.5,1 1185 | 4,86,182,3,40.7,2 1186 | 1,58,205,2,48.1,5 1187 | 6,77,151,2,31.6,7 1188 | 2,58,232,5,52.3,1 1189 | 6,76,142,1,25,3 1190 | 8,86,118,2,18.7,4 1191 | 1,42,267,2,41.4,8 1192 | 1,40,275,3,61.1,5 1193 | 1,38,293,4,67.5,6 1194 | 0,58,273,1,59.4,5 1195 | 1,54,206,3,49.4,3 1196 | 1,47,283,3,63.1,8 1197 | 0,45,195,2,45.8,5 1198 | 1,40,236,3,53.8,7 1199 | 8,90,118,3,16.7,4 1200 | 7,78,165,4,37.9,2 1201 | 5,77,131,3,24.2,4 1202 | 6,84,186,2,42.8,5 1203 | 3,40,283,3,63.1,8 1204 | 6,78,132,4,22.6,2 1205 | 6,77,132,3,23.8,1 1206 | 1,48,301,1,68,5 1207 | 1,42,285,2,64.6,7 1208 | 5,82,157,5,41.7,2 1209 | 7,39,202,7,48.1,8 1210 | 6,82,145,1,27.3,4 1211 | 0,48,254,4,58.2,2 1212 | 3,51,180,5,41.6,3 1213 | 5,72,168,3,35.1,7 1214 | 1,54,291,5,66.9,6 1215 | 3,56,264,5,58.2,5 1216 | 5,78,129,1,22.5,1 1217 | 5,77,138,3,26.8,1 1218 | 7,89,156,2,34,2 1219 | 3,59,273,1,61.4,5 1220 | -------------------------------------------------------------------------------- /Session04/LogisticRegression/02_logestic_reg_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/LogisticRegression/02_logestic_reg_pic.PNG -------------------------------------------------------------------------------- /Session04/LogisticRegression/data_scoring.csv: -------------------------------------------------------------------------------- 1 | Age,Marital_Status,Gender,Weight_Category,Cholesterol,Stress_Management,Trait_Anxiety 2 | 61,0,1,1,139,1,50 3 | 55,2,1,2,163,0,40 4 | 53,1,1,1,172,0,55 5 | 58,1,1,2,206,0,70 6 | 62,2,1,1,148,1,50 7 | 70,1,0,0,172,0,60 8 | 52,1,0,0,171,1,35 9 | 50,1,1,1,172,0,55 10 | 67,2,1,1,172,0,60 11 | 62,1,1,1,166,1,50 12 | 66,2,1,2,220,0,60 13 | 56,2,1,0,141,0,45 14 | 77,2,1,2,181,1,80 15 | 64,2,1,1,174,0,60 16 | 67,2,1,1,146,1,50 17 | 62,3,1,1,171,0,55 18 | 68,2,1,1,239,0,60 19 | 48,1,0,0,175,0,60 20 | 67,1,0,0,168,0,60 21 | 69,2,1,1,236,0,60 22 | 51,1,0,0,190,1,65 23 | 73,3,0,1,236,0,65 24 | 59,0,0,1,140,1,50 25 | 74,3,0,1,234,0,65 26 | 51,1,0,0,191,1,65 27 | 65,2,1,1,173,0,60 28 | 68,2,1,1,177,0,60 29 | 66,2,1,1,169,0,60 30 | 70,2,1,1,239,0,60 31 | 63,0,1,1,142,1,50 32 | 81,3,1,1,224,0,80 33 | 63,0,1,1,137,1,50 34 | 63,0,1,1,138,1,50 35 | 58,2,0,0,140,0,45 36 | 55,1,0,0,177,0,60 37 | 58,1,1,2,205,0,70 38 | 71,3,0,1,237,0,65 39 | 56,1,0,0,174,0,35 40 | 77,1,0,2,180,0,75 41 | 53,3,0,0,126,1,45 42 | 52,1,0,0,187,1,65 43 | 50,1,1,0,171,1,40 44 | 59,2,1,0,144,0,45 45 | 68,2,1,1,172,0,60 46 | 50,1,1,0,171,1,40 47 | 57,1,1,1,173,0,50 48 | 52,1,1,0,173,1,40 49 | 69,2,1,1,148,1,50 50 | 69,2,1,1,171,0,60 51 | 49,1,0,0,187,1,65 52 | 50,1,1,0,177,1,35 53 | 52,1,0,0,179,1,35 54 | 52,1,1,0,173,1,35 55 | 74,1,0,2,180,0,75 56 | 76,2,1,2,183,1,80 57 | 76,2,1,2,178,1,80 58 | 68,1,0,0,126,1,45 59 | 63,3,1,1,172,0,55 60 | 60,1,1,2,199,0,70 61 | 50,1,0,0,178,1,35 62 | 60,2,0,1,150,1,50 63 | 58,1,1,1,171,0,50 64 | 61,2,1,2,239,1,80 65 | 74,2,1,2,178,1,80 66 | 64,0,1,1,138,1,50 67 | 53,2,1,2,158,0,40 68 | 74,1,0,2,183,1,75 69 | 51,1,0,0,174,1,35 70 | 75,1,0,2,180,0,75 71 | 60,1,1,1,168,1,50 72 | 60,3,0,0,185,1,65 73 | 59,2,1,0,145,0,45 74 | 55,2,1,2,159,0,40 75 | 60,2,0,1,153,1,50 76 | 50,1,0,0,176,1,35 77 | 64,2,1,2,220,0,60 78 | 70,1,0,0,169,0,60 79 | 56,1,0,0,172,1,35 80 | 65,3,0,0,126,1,45 81 | 64,3,1,1,176,0,55 82 | 64,2,1,2,167,0,40 83 | 56,2,1,0,142,0,45 84 | 59,2,1,0,146,1,45 85 | 67,3,1,1,173,0,55 86 | 57,2,1,0,145,0,45 87 | 74,1,0,2,184,0,75 88 | 70,3,0,1,233,0,65 89 | 62,2,0,1,148,1,50 90 | 73,2,1,1,236,0,60 91 | 62,2,1,2,218,0,60 92 | 66,2,1,1,173,0,60 93 | 60,2,1,0,142,0,45 94 | 50,1,0,0,185,1,65 95 | 69,2,1,1,149,1,50 96 | 70,3,0,0,188,1,65 97 | 54,1,1,0,176,1,40 98 | 72,3,0,0,187,1,65 99 | 56,3,0,0,122,1,45 100 | 62,1,1,1,167,1,50 101 | 62,2,0,1,152,1,50 102 | 63,0,1,1,138,1,50 103 | 57,1,1,2,201,0,70 104 | 66,0,1,1,140,1,50 105 | 63,3,1,1,175,0,55 106 | 63,2,1,2,164,0,40 107 | 81,3,1,1,227,0,80 108 | 63,1,1,1,167,1,50 109 | 60,1,1,2,203,0,70 110 | 67,3,0,0,122,1,45 111 | 55,1,0,0,169,1,35 112 | 68,3,1,1,178,0,55 113 | 69,2,1,2,163,0,40 114 | 55,1,0,0,174,1,35 115 | 58,1,1,2,200,0,70 116 | 62,1,1,1,165,1,50 117 | 66,2,1,2,218,0,60 118 | 71,3,0,1,232,0,65 119 | 53,3,0,0,122,1,45 120 | 68,2,1,1,173,0,60 121 | 70,3,0,0,187,1,65 122 | 61,0,1,1,139,1,50 123 | 48,1,1,0,172,1,40 124 | 60,1,1,1,170,0,50 125 | 71,2,1,1,170,0,60 126 | 72,2,1,1,235,0,60 127 | 49,1,1,0,175,1,40 128 | 69,2,0,1,172,0,60 129 | 73,2,1,2,162,0,40 130 | 81,3,1,1,229,0,80 131 | 66,1,1,0,170,0,60 132 | 50,1,0,0,186,1,65 133 | 48,1,0,0,192,1,65 134 | 54,1,0,0,168,1,35 135 | 65,2,1,2,221,0,60 136 | 58,2,0,0,139,0,45 137 | 63,2,0,0,124,0,45 138 | 65,2,1,2,219,0,60 139 | 71,2,1,2,159,0,40 140 | 72,3,0,1,235,0,65 141 | 49,2,1,2,164,0,40 142 | 66,2,1,2,224,0,60 143 | 71,2,1,2,164,0,40 144 | 69,2,1,1,170,0,60 145 | 64,1,0,0,123,1,45 146 | 72,2,1,1,237,0,60 147 | 54,1,1,1,171,0,55 148 | 54,3,0,0,123,1,45 149 | 52,1,0,0,174,1,35 150 | 62,2,1,1,148,1,50 151 | 62,2,1,0,142,0,45 152 | 61,0,1,1,138,1,50 153 | 60,2,0,1,144,1,50 154 | 71,2,1,1,232,0,60 155 | 74,2,1,1,236,0,60 156 | 61,2,1,0,146,0,45 157 | 77,2,1,2,180,1,80 158 | 70,2,1,2,163,0,40 159 | 52,1,1,0,178,0,40 160 | 71,2,0,1,239,0,65 161 | 72,2,1,2,181,1,80 162 | 59,2,1,2,239,1,80 163 | 51,1,1,0,176,1,40 164 | 62,1,0,0,168,0,60 165 | 75,2,1,1,237,0,60 166 | 52,1,0,0,175,1,35 167 | 61,1,1,1,165,1,50 168 | 61,2,1,2,239,1,80 169 | 74,1,0,2,181,0,75 170 | 65,3,0,0,127,1,45 171 | 66,2,1,2,223,0,60 172 | 70,2,1,1,238,0,60 173 | 49,1,1,0,178,1,40 174 | 51,1,1,0,170,1,40 175 | 60,1,0,0,170,0,60 176 | 61,3,0,0,125,1,45 177 | 61,0,1,1,140,1,50 178 | 54,3,0,0,122,1,45 179 | 50,1,0,0,177,1,35 180 | 65,2,1,2,226,0,60 181 | 50,2,0,2,160,0,40 182 | 66,2,1,1,173,0,60 183 | 67,2,1,1,172,0,60 184 | 70,1,0,0,167,0,60 185 | 81,3,1,1,226,0,80 186 | 52,2,1,2,157,0,40 187 | 68,3,0,0,186,1,65 188 | 62,2,0,2,235,1,80 189 | 74,2,1,1,239,0,60 190 | 66,2,1,2,222,0,60 191 | 65,3,1,1,170,0,55 192 | 63,2,1,2,239,1,80 193 | 70,2,1,1,150,1,50 194 | 65,2,0,0,126,0,45 195 | 54,1,1,0,173,1,40 196 | 68,2,0,1,170,0,60 197 | 49,1,1,0,173,1,40 198 | 62,2,1,2,223,0,60 199 | 58,1,1,2,202,0,70 200 | 70,2,1,1,144,1,50 201 | 57,2,1,0,139,0,45 202 | 54,3,0,0,122,1,45 203 | 59,1,1,1,171,1,50 204 | 65,2,1,2,223,0,60 205 | 50,1,1,0,180,1,35 206 | 61,0,0,1,141,1,50 207 | 80,3,1,1,226,0,80 208 | 53,1,1,0,173,1,40 209 | 66,0,1,1,139,1,50 210 | 61,2,0,0,138,0,45 211 | 55,1,0,0,173,1,35 212 | 72,2,1,1,239,0,60 213 | 71,1,0,2,185,0,75 214 | 66,2,1,2,221,0,60 215 | 54,1,0,0,173,0,35 216 | 54,1,1,0,174,1,40 217 | 62,1,0,1,167,1,50 218 | 64,0,0,1,141,1,50 219 | 79,3,1,1,226,0,80 220 | 76,2,1,2,174,1,80 221 | 67,2,1,1,171,0,60 222 | 60,1,1,1,169,0,50 223 | 72,2,1,1,237,0,60 224 | 63,1,1,1,174,1,50 225 | 74,3,0,1,233,0,65 226 | 53,1,1,0,175,0,40 227 | 74,2,1,1,239,0,60 228 | 71,2,1,1,174,0,60 229 | 65,0,1,1,143,1,50 230 | 48,1,1,0,178,1,40 231 | 50,1,0,0,171,1,35 232 | 60,0,0,1,136,1,50 233 | 66,2,1,1,173,0,60 234 | 59,1,1,1,169,1,50 235 | 58,1,1,2,203,0,70 236 | 74,2,1,2,179,1,80 237 | 67,1,0,2,204,1,70 238 | 59,1,0,0,172,1,60 239 | 42,1,0,0,123,1,45 240 | 73,1,0,2,181,0,75 241 | 72,1,1,2,181,0,75 242 | 57,2,1,0,140,0,45 243 | 65,0,1,1,135,1,50 244 | 42,1,0,0,125,1,45 245 | 53,2,1,2,162,0,40 246 | 73,1,0,2,182,1,75 247 | 52,1,1,0,176,1,40 248 | 68,2,1,1,238,0,60 249 | 68,3,1,1,174,0,55 250 | 57,1,1,1,171,0,50 251 | 69,1,0,0,172,0,60 252 | 52,1,0,0,174,1,35 253 | 62,2,1,2,165,0,40 254 | 57,1,1,2,199,0,70 255 | 71,1,0,2,185,0,75 256 | 75,2,1,2,182,1,80 257 | 68,2,1,1,238,0,60 258 | 61,0,0,1,137,1,50 259 | 61,2,0,1,150,1,50 260 | 65,3,1,1,172,0,55 261 | 47,1,0,0,175,0,60 262 | 66,3,1,1,173,0,55 263 | 66,2,1,2,222,0,60 264 | 71,3,0,0,188,1,65 265 | 76,2,1,2,179,1,80 266 | 67,2,1,1,174,0,60 267 | 63,2,1,2,239,1,80 268 | 65,2,1,2,223,0,60 269 | 49,1,1,0,172,1,40 270 | 54,2,1,2,164,0,40 271 | 65,3,1,1,175,0,55 272 | 58,1,1,2,205,0,70 273 | 74,1,1,2,179,0,75 274 | 67,2,0,1,172,0,60 275 | 60,3,0,0,186,1,65 276 | 69,2,1,1,237,0,60 277 | 61,1,1,2,205,0,70 278 | 67,3,1,1,169,1,60 279 | 54,1,1,0,175,1,40 280 | 61,1,0,0,168,1,60 281 | 53,1,1,0,174,1,35 282 | 64,2,1,2,227,1,60 283 | 58,1,0,0,171,0,60 284 | 59,2,1,0,139,0,45 285 | 67,3,1,1,172,0,55 286 | 79,3,1,1,223,0,80 287 | 63,0,1,1,140,1,50 288 | 70,3,0,0,185,1,65 289 | 58,1,1,1,170,1,50 290 | 59,2,1,0,143,1,45 291 | 66,3,1,1,165,1,60 292 | 61,0,0,1,139,1,50 293 | 69,2,0,1,239,0,65 294 | 62,2,1,2,222,0,60 295 | 64,2,0,2,238,1,80 296 | 70,3,0,1,236,0,65 297 | 60,2,0,1,149,1,50 298 | 74,1,0,2,180,0,75 299 | 75,2,1,2,178,1,80 300 | 65,3,1,1,172,0,55 301 | 70,1,0,2,182,0,75 302 | 79,3,1,1,229,0,80 303 | 57,1,0,1,173,1,50 304 | 73,2,1,2,175,1,80 305 | 50,1,0,0,170,1,35 306 | 60,2,0,1,148,1,50 307 | 72,2,1,1,238,0,60 308 | 69,2,0,1,146,1,50 309 | 66,2,1,2,223,0,60 310 | 76,2,1,2,178,1,80 311 | 70,2,0,1,172,0,60 312 | 64,1,0,0,172,0,60 313 | 80,3,1,1,227,0,80 314 | 50,1,1,0,175,1,40 315 | 64,0,1,1,137,1,50 316 | 59,2,0,1,147,1,50 317 | 68,3,1,1,176,0,55 318 | 52,2,1,2,164,0,40 319 | 73,2,1,2,161,0,40 320 | 48,1,1,0,174,1,40 321 | 63,1,1,1,173,0,55 322 | 69,1,0,0,173,0,60 323 | 77,1,0,2,179,1,75 324 | 61,2,0,1,150,1,50 325 | 64,1,0,0,171,0,60 326 | 60,1,1,1,170,1,50 327 | 65,0,1,1,137,1,50 328 | 73,3,0,1,234,0,65 329 | 65,0,1,1,137,1,50 330 | 70,3,0,1,238,0,65 331 | 71,2,1,1,232,0,60 332 | 62,2,0,2,239,1,80 333 | 58,2,1,0,142,0,45 334 | 63,3,0,0,127,1,45 335 | 56,2,1,0,137,0,45 336 | 66,2,1,2,220,0,60 337 | 55,3,0,0,125,1,45 338 | 67,3,1,1,167,1,60 339 | 77,1,0,2,181,1,75 340 | 42,1,0,0,125,1,45 341 | 58,2,1,0,144,0,45 342 | 66,3,1,1,175,0,55 343 | 62,0,1,1,138,1,50 344 | 58,1,0,0,171,0,60 345 | 48,1,0,0,170,0,60 346 | 60,1,0,1,174,1,50 347 | 68,2,1,1,170,0,60 348 | 64,2,1,2,224,1,60 349 | 74,3,0,1,237,0,65 350 | 64,3,0,0,184,1,65 351 | 72,2,1,1,239,0,60 352 | 58,2,0,1,152,1,50 353 | 62,0,1,1,143,1,50 354 | 72,3,0,1,239,0,65 355 | 63,2,1,2,226,1,60 356 | 72,3,0,1,238,0,65 357 | 56,1,1,1,169,1,50 358 | 67,2,1,1,173,0,60 359 | 67,1,0,0,169,0,60 360 | 66,2,1,2,217,0,60 361 | 59,1,1,1,167,1,50 362 | 64,3,0,0,124,1,45 363 | 66,1,0,0,126,1,45 364 | 65,0,1,1,141,1,50 365 | 54,1,1,1,174,0,55 366 | 52,2,1,2,160,0,40 367 | 54,1,1,1,172,0,55 368 | 73,3,0,1,239,0,65 369 | 66,2,0,0,126,0,45 370 | 73,2,1,1,239,0,60 371 | 63,0,1,1,137,1,50 372 | 70,3,0,1,237,0,65 373 | 62,1,1,1,166,1,50 374 | 56,2,1,0,142,0,45 375 | 54,1,1,0,173,1,40 376 | 60,1,1,1,169,1,50 377 | 70,2,1,1,233,0,60 378 | 50,2,1,2,159,0,40 379 | 65,2,0,2,238,1,80 380 | 63,3,0,0,190,1,65 381 | 59,2,1,2,239,1,80 382 | 60,2,1,1,150,1,50 383 | 67,2,0,1,147,1,50 384 | 57,1,1,2,204,0,70 385 | 60,1,1,1,165,1,50 386 | 57,2,0,1,147,1,50 387 | 53,3,0,0,124,1,45 388 | 63,1,1,1,169,1,50 389 | 70,3,0,1,235,0,65 390 | 62,2,1,2,166,0,40 391 | 58,1,1,2,204,0,70 392 | 66,1,1,0,176,0,60 393 | 75,1,0,2,178,0,75 394 | 71,1,0,2,205,1,70 395 | 63,1,1,1,175,0,55 396 | 65,2,1,1,176,0,60 397 | 65,3,0,0,126,1,45 398 | 67,3,0,0,122,1,45 399 | 63,0,1,1,140,1,50 400 | 71,2,1,1,173,0,60 401 | 54,2,1,2,162,0,40 402 | 69,3,0,0,189,1,65 403 | 66,2,1,1,174,0,60 404 | 70,3,0,1,239,0,65 405 | 61,2,1,0,140,1,45 406 | 67,1,1,0,169,0,60 407 | 64,1,0,0,124,1,45 408 | 58,2,1,0,142,0,45 409 | 67,2,1,2,222,0,60 410 | 58,1,1,2,203,0,70 411 | 58,1,0,0,170,1,60 412 | 62,1,1,1,171,1,50 413 | 63,1,1,1,174,0,55 414 | 71,2,1,1,238,0,60 415 | 71,2,1,2,159,0,40 416 | 71,3,0,1,237,0,65 417 | 60,1,0,0,175,0,60 418 | 51,2,1,2,160,0,40 419 | 57,1,0,0,173,0,60 420 | 57,1,1,2,201,0,70 421 | 61,1,1,2,200,0,70 422 | 70,3,1,1,173,0,55 423 | 66,1,0,0,169,0,60 424 | 49,1,1,0,175,1,40 425 | 52,1,1,1,173,0,55 426 | 53,2,1,2,162,0,40 427 | 59,1,1,1,173,1,50 428 | 69,2,1,1,239,0,60 429 | 68,3,1,1,173,0,55 430 | 73,2,1,2,180,1,80 431 | 71,2,0,1,145,1,50 432 | 56,1,0,0,168,1,35 433 | 57,1,0,0,173,0,60 434 | 74,3,1,2,177,0,75 435 | 62,2,1,0,143,0,45 436 | 54,1,0,0,171,1,35 437 | 67,2,1,2,223,0,60 438 | 49,1,0,0,172,0,60 439 | 66,1,1,0,175,0,60 440 | 66,3,0,0,123,1,45 441 | 52,1,0,0,173,1,35 442 | 64,3,1,1,174,0,55 443 | 66,3,1,1,175,0,55 444 | 52,1,0,0,171,1,35 445 | 68,1,0,0,168,0,60 446 | 55,1,0,0,172,1,35 447 | 62,2,0,0,139,0,45 448 | 65,2,1,1,172,0,60 449 | 59,2,1,0,141,1,45 450 | 53,1,1,0,173,1,40 451 | 54,1,0,0,171,1,35 452 | 61,2,0,1,153,1,50 453 | 71,3,0,1,239,0,65 454 | 65,0,1,1,141,1,50 455 | 68,2,1,1,234,0,60 456 | 75,2,1,1,238,0,60 457 | 75,2,1,2,175,1,80 458 | 52,1,0,0,173,1,35 459 | 61,0,1,1,139,1,50 460 | 64,2,1,2,220,0,60 461 | 71,2,1,2,162,0,40 462 | 74,2,1,1,239,0,60 463 | 53,1,0,0,188,1,65 464 | 64,2,1,1,177,0,60 465 | 59,1,1,2,198,0,70 466 | 65,2,1,2,223,0,60 467 | 57,3,0,0,124,1,45 468 | 67,1,1,0,171,0,60 469 | 60,1,1,1,166,1,50 470 | 66,2,1,2,219,0,60 471 | 62,2,1,1,145,1,50 472 | 59,2,0,1,149,1,50 473 | 62,1,1,1,170,1,50 474 | 70,3,0,0,187,1,65 475 | 73,3,1,2,181,0,75 476 | 66,2,1,2,226,1,60 477 | 53,3,0,0,125,1,45 478 | 63,2,1,2,222,0,60 479 | 62,1,1,1,165,1,50 480 | 62,0,1,1,141,1,50 481 | 56,1,0,0,172,0,35 482 | 49,1,1,0,177,1,40 483 | 75,2,1,2,175,1,80 484 | 66,2,1,2,223,0,60 485 | 71,2,1,1,146,1,50 486 | 68,3,1,1,172,0,55 487 | 71,3,1,1,173,0,55 488 | 70,2,0,1,145,1,50 489 | 66,0,1,1,137,1,50 490 | 53,2,0,2,158,0,40 491 | 53,1,0,0,192,1,65 492 | 72,2,1,1,234,0,60 493 | 64,2,1,2,239,1,80 494 | 58,2,1,0,142,0,45 495 | 75,2,1,1,233,0,60 496 | 51,2,0,2,161,0,40 497 | 72,1,0,2,184,0,75 498 | 61,3,0,0,123,1,45 499 | 50,1,1,0,179,1,40 500 | 51,1,1,0,174,1,40 501 | 79,3,1,1,228,0,80 502 | 66,2,1,1,147,1,50 503 | 77,2,1,2,178,1,80 504 | 63,1,0,1,166,1,50 505 | 70,2,1,2,164,0,40 506 | 72,2,1,1,238,0,60 507 | 70,1,0,0,174,0,60 508 | 58,1,1,1,169,1,50 509 | 58,2,1,1,150,1,50 510 | 73,2,1,2,180,1,80 511 | 74,1,0,2,183,0,75 512 | 69,3,0,0,190,1,65 513 | 63,2,1,2,167,0,40 514 | 42,1,0,0,126,1,45 515 | 52,1,1,0,173,1,40 516 | 58,2,0,1,147,1,50 517 | 71,1,0,2,207,1,70 518 | 64,2,0,2,238,1,80 519 | 59,2,0,1,150,1,50 520 | 61,1,1,2,200,0,70 521 | 81,3,1,1,225,0,80 522 | 68,2,1,1,175,0,60 523 | 68,2,0,1,239,0,65 524 | 52,2,1,2,161,0,40 525 | 72,2,1,1,239,0,60 526 | 72,2,1,1,239,0,60 527 | 54,1,1,0,171,1,40 528 | 69,2,0,1,239,0,65 529 | 51,1,0,0,175,1,35 530 | 70,2,1,1,167,0,60 531 | 58,1,0,0,172,1,60 532 | 54,1,0,0,174,0,35 533 | 65,1,0,0,128,1,45 534 | 58,2,1,0,143,0,45 535 | 43,1,0,0,125,1,45 536 | 74,3,0,1,239,0,65 537 | 65,2,1,1,167,0,60 538 | 65,2,1,1,171,0,60 539 | 60,0,0,1,136,1,50 540 | 61,1,0,0,175,1,60 541 | 50,1,0,0,172,1,35 542 | 65,1,0,0,170,0,60 543 | 63,0,1,1,142,1,50 544 | 72,2,1,1,238,0,60 545 | 61,1,0,1,168,1,50 546 | 52,1,1,0,172,0,40 547 | 62,0,1,1,142,1,50 548 | 72,2,0,1,235,0,65 549 | 61,1,1,2,202,0,70 550 | 70,2,1,2,161,0,40 551 | 50,1,0,0,179,1,35 552 | 56,2,1,0,143,0,45 553 | 63,2,1,2,224,0,60 554 | 52,2,0,2,158,0,40 555 | 70,3,0,1,236,0,65 556 | 62,2,1,2,239,1,80 557 | 59,2,0,1,148,1,50 558 | 69,3,0,0,192,1,65 559 | 69,3,1,1,176,0,55 560 | 65,3,1,1,172,1,60 561 | 65,3,0,0,125,1,45 562 | 61,2,0,1,147,1,50 563 | 58,2,1,0,140,0,45 564 | 63,2,1,2,239,1,80 565 | 54,1,0,0,172,1,35 566 | 69,2,0,1,146,1,50 567 | 63,2,1,2,220,0,60 568 | 75,1,0,2,183,0,75 569 | 73,2,1,1,238,0,60 570 | 64,3,0,0,128,1,45 571 | 63,0,0,1,140,1,50 572 | 81,3,1,1,229,0,80 573 | 62,1,0,1,163,1,50 574 | 63,1,1,1,170,0,55 575 | 61,3,0,0,128,1,45 576 | 77,1,0,2,183,1,75 577 | 65,2,1,2,219,0,60 578 | 55,1,0,0,176,1,35 579 | 46,1,0,0,173,0,60 580 | 66,1,0,0,175,0,60 581 | 53,2,1,2,159,0,40 582 | 74,2,1,2,181,1,80 583 | 81,3,1,1,229,0,80 584 | 56,1,1,1,172,0,55 585 | 65,3,1,1,166,1,60 586 | 73,2,1,2,176,1,80 587 | 72,2,1,1,236,0,60 588 | 55,2,1,2,161,0,40 589 | 67,1,0,2,205,1,70 590 | 66,2,1,2,219,0,60 591 | 64,2,1,2,223,0,60 592 | 70,2,0,1,170,0,60 593 | 60,2,1,0,140,0,45 594 | 69,3,0,1,233,0,65 595 | 58,1,0,0,175,0,60 596 | 72,3,0,1,235,0,65 597 | 62,2,1,2,226,1,60 598 | 62,3,1,1,177,0,55 599 | 70,3,0,0,185,1,65 600 | 69,2,1,1,146,1,50 601 | 49,1,1,0,176,1,40 602 | 51,2,1,2,161,0,40 603 | 47,1,1,0,173,1,40 604 | 61,2,0,1,152,1,50 605 | 61,3,0,0,187,1,65 606 | 74,3,1,2,175,0,75 607 | 56,1,0,0,172,1,35 608 | 72,1,1,2,178,0,75 609 | 71,2,1,1,237,0,60 610 | 52,1,1,1,171,0,55 611 | 60,1,1,2,200,0,70 612 | 63,2,1,2,222,0,60 613 | 65,2,1,2,221,0,60 614 | 81,3,1,1,221,0,80 615 | 58,2,1,0,144,0,45 616 | 50,1,1,0,177,0,40 617 | 74,1,1,2,180,0,75 618 | 60,1,0,0,170,0,60 619 | 71,1,0,2,206,1,70 620 | 61,2,0,0,137,0,45 621 | 79,3,1,1,220,0,80 622 | 61,1,1,2,200,0,70 623 | 52,1,0,0,178,0,35 624 | 58,2,0,1,147,1,50 625 | 52,1,0,0,169,1,35 626 | 52,1,1,1,171,0,55 627 | 63,1,0,1,165,1,50 628 | 56,1,1,1,170,0,55 629 | 64,3,0,0,126,1,45 630 | 69,3,0,0,183,1,65 631 | 52,1,1,0,176,0,40 632 | 69,2,1,1,238,0,60 633 | 50,1,1,0,173,1,40 634 | 69,2,1,2,221,0,60 635 | 69,3,0,0,188,1,65 636 | 61,1,1,2,203,0,70 637 | 56,1,0,1,173,1,50 638 | 56,2,1,2,163,0,40 639 | 61,1,1,2,203,0,70 640 | 71,3,0,0,182,1,65 641 | 59,2,1,0,139,0,45 642 | 70,3,0,0,183,1,65 643 | 73,1,0,2,180,0,75 644 | 52,2,1,2,164,0,40 645 | 60,2,1,0,145,0,45 646 | 60,1,1,2,199,0,70 647 | 73,3,0,1,239,0,65 648 | 64,1,1,1,171,0,55 649 | 64,0,1,1,135,1,50 650 | 49,1,1,0,169,1,40 651 | 59,2,1,2,239,1,80 652 | 57,2,1,0,136,0,45 653 | 60,1,0,1,172,1,50 654 | 56,1,0,1,170,1,50 655 | 61,1,1,1,166,1,50 656 | 54,1,1,0,178,1,35 657 | 65,2,0,0,125,0,45 658 | 51,1,1,0,171,1,40 659 | 61,2,1,0,139,0,45 660 | 68,2,1,1,150,1,50 661 | 66,2,1,1,174,0,60 662 | 50,2,1,2,161,0,40 663 | 61,0,0,1,134,1,50 664 | 48,1,1,0,175,1,40 665 | 70,1,0,0,165,0,60 666 | 73,1,1,2,179,0,75 667 | 66,2,0,0,127,0,45 668 | 57,2,1,0,142,0,45 669 | 68,3,0,1,239,0,65 670 | 67,2,1,1,145,1,50 671 | 68,2,1,1,168,0,60 672 | 66,2,1,2,222,0,60 673 | 51,1,1,0,171,1,40 674 | 51,1,0,0,192,1,65 675 | 71,2,1,1,238,0,60 676 | 60,1,1,2,200,0,70 677 | 51,2,0,2,162,0,40 678 | 76,3,1,2,174,0,75 679 | 56,2,1,0,136,0,45 680 | 63,0,1,1,140,1,50 681 | 65,3,0,0,126,1,45 682 | 70,3,1,1,173,0,55 683 | 52,1,1,0,175,1,40 684 | 69,2,1,1,239,0,60 685 | 56,2,1,0,140,0,45 686 | 60,2,1,0,143,1,45 687 | 73,2,1,1,237,0,60 688 | 73,3,1,2,177,0,75 689 | 59,1,1,1,169,1,50 690 | 63,0,0,1,139,1,50 691 | 70,3,1,1,174,0,55 692 | -------------------------------------------------------------------------------- /Session04/LogisticRegression/data_training.csv: -------------------------------------------------------------------------------- 1 | Age,Marital_Status,Gender,Weight_Category,Cholesterol,Stress_Management,Trait_Anxiety,2nd_Heart_Attack 2 | 60,2,0,1,150,1,50,Yes 3 | 69,2,1,1,170,0,60,Yes 4 | 52,1,0,0,174,1,35,No 5 | 66,2,1,1,169,0,60,Yes 6 | 70,3,0,1,237,0,65,Yes 7 | 52,1,0,0,174,1,35,No 8 | 58,2,1,0,140,0,45,No 9 | 59,2,1,0,143,0,45,Yes 10 | 60,2,0,0,139,0,45,No 11 | 51,1,1,0,174,1,40,No 12 | 52,1,0,0,189,1,65,No 13 | 70,2,1,1,147,1,50,Yes 14 | 52,2,1,2,160,0,40,Yes 15 | 74,3,1,2,178,0,75,Yes 16 | 64,2,1,2,236,1,80,Yes 17 | 69,2,0,1,146,1,50,Yes 18 | 58,2,0,0,141,0,45,No 19 | 68,1,0,0,172,0,60,No 20 | 66,1,0,0,172,0,60,No 21 | 63,0,1,1,138,1,50,No 22 | 50,1,1,0,174,1,40,No 23 | 60,2,0,1,146,1,50,Yes 24 | 70,2,1,1,238,0,60,Yes 25 | 54,1,0,0,172,1,35,No 26 | 75,1,0,2,178,0,75,Yes 27 | 72,3,0,1,236,0,65,Yes 28 | 59,1,1,2,202,0,70,Yes 29 | 60,2,1,0,140,0,45,No 30 | 51,1,0,0,173,1,35,No 31 | 65,2,0,0,124,1,45,No 32 | 64,2,1,2,224,0,60,Yes 33 | 59,1,1,2,203,0,70,Yes 34 | 58,1,0,1,169,1,50,No 35 | 52,1,1,0,175,1,40,No 36 | 64,2,1,2,220,0,60,Yes 37 | 67,2,1,1,169,0,60,Yes 38 | 42,1,0,0,125,1,45,No 39 | 54,2,1,2,162,0,40,Yes 40 | 63,2,1,2,162,0,40,Yes 41 | 73,2,1,1,238,0,60,Yes 42 | 60,1,0,0,170,0,60,No 43 | 66,1,0,0,171,0,60,No 44 | 71,3,0,0,187,1,65,No 45 | 72,1,0,2,182,0,75,Yes 46 | 72,2,1,1,235,0,60,Yes 47 | 55,3,0,0,123,1,45,No 48 | 66,3,1,1,172,0,55,No 49 | 63,0,1,1,139,1,50,No 50 | 59,1,1,2,199,0,70,Yes 51 | 51,1,1,0,175,1,40,No 52 | 69,1,1,2,203,0,70,Yes 53 | 51,2,1,2,161,0,40,Yes 54 | 58,2,1,0,139,0,45,No 55 | 63,0,1,1,139,1,50,No 56 | 72,3,0,1,236,0,65,Yes 57 | 72,2,1,1,236,0,60,Yes 58 | 71,3,0,1,233,0,65,Yes 59 | 61,1,1,1,165,1,50,No 60 | 64,0,1,1,139,1,50,No 61 | 57,1,0,0,172,0,60,No 62 | 52,1,1,0,172,1,40,No 63 | 65,3,0,0,122,1,45,No 64 | 74,1,0,2,179,0,75,No 65 | 70,3,0,0,186,1,65,No 66 | 58,2,1,0,141,0,45,No 67 | 59,2,0,1,148,1,50,Yes 68 | 59,1,1,2,203,0,70,Yes 69 | 61,0,0,1,138,1,50,No 70 | 61,2,1,2,239,1,80,Yes 71 | 71,3,1,1,174,0,55,Yes 72 | 71,2,1,2,161,0,40,Yes 73 | 69,2,1,1,169,0,60,Yes 74 | 75,1,0,2,180,0,75,Yes 75 | 53,2,1,2,159,0,40,Yes 76 | 49,1,1,0,172,1,40,No 77 | 75,2,1,2,178,1,80,Yes 78 | 72,2,1,2,161,0,40,Yes 79 | 65,2,1,2,220,0,60,Yes 80 | 64,2,1,2,220,0,60,Yes 81 | 80,3,1,1,223,0,80,Yes 82 | 61,1,1,1,169,1,50,No 83 | 76,2,1,2,178,1,80,Yes 84 | 54,1,1,1,172,0,55,No 85 | 69,2,1,1,171,0,60,Yes 86 | 71,2,1,1,237,0,60,Yes 87 | 58,1,1,1,167,1,50,No 88 | 67,2,1,1,170,0,60,Yes 89 | 64,0,1,1,137,1,50,No 90 | 54,1,0,0,174,1,35,No 91 | 58,1,1,2,200,0,70,Yes 92 | 61,2,1,0,141,0,45,No 93 | 64,2,1,2,221,0,60,Yes 94 | 66,1,0,0,125,1,45,No 95 | 66,3,1,1,174,0,55,No 96 | 63,3,1,1,173,0,55,No 97 | 64,2,1,2,220,0,60,Yes 98 | 70,3,0,1,236,0,65,Yes 99 | 49,1,0,0,188,1,65,No 100 | 71,3,0,0,186,1,65,No 101 | 50,1,1,0,172,1,40,No 102 | 66,2,1,2,222,0,60,Yes 103 | 70,2,1,1,236,0,60,Yes 104 | 60,1,1,1,165,1,50,No 105 | 52,1,1,1,173,0,55,No 106 | 67,2,1,1,169,0,60,Yes 107 | 66,3,1,1,172,0,55,No 108 | 61,1,0,1,165,1,50,No 109 | 53,2,1,2,161,0,40,Yes 110 | 62,2,1,2,239,1,80,Yes 111 | 60,2,1,1,146,1,50,Yes 112 | 66,2,1,1,173,0,60,Yes 113 | 63,3,0,0,126,1,45,No 114 | 75,2,1,2,177,1,80,Yes 115 | 72,3,0,1,233,0,65,No 116 | 68,2,0,1,148,1,50,Yes 117 | 68,1,0,0,168,0,60,No 118 | 50,1,1,0,172,1,40,No 119 | 62,0,1,1,137,1,50,No 120 | 58,1,1,1,169,1,50,No 121 | 54,1,0,0,173,1,35,No 122 | 63,3,0,0,125,1,45,No 123 | 64,0,1,1,137,1,50,No 124 | 73,2,1,2,178,1,80,Yes 125 | 73,2,1,1,239,0,60,Yes 126 | 81,3,1,1,224,0,80,Yes 127 | 73,1,0,2,181,0,75,Yes 128 | 52,1,0,0,172,1,35,No 129 | 48,1,0,0,172,0,60,No 130 | 61,2,0,1,148,1,50,Yes 131 | 67,2,1,2,220,0,60,Yes 132 | 58,1,1,1,169,1,50,No 133 | 65,1,1,1,173,0,55,No 134 | 80,3,1,1,222,0,80,Yes 135 | 54,1,0,0,170,1,35,No 136 | 59,1,0,0,172,0,60,No 137 | 55,3,0,0,122,1,45,No 138 | 73,2,1,1,236,0,60,Yes 139 | 62,3,0,0,185,1,65,No 140 | -------------------------------------------------------------------------------- /Session04/LogisticRegression/ex_1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/LogisticRegression/ex_1.PNG -------------------------------------------------------------------------------- /Session04/LogisticRegression/ex_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/LogisticRegression/ex_2.PNG -------------------------------------------------------------------------------- /Session04/LogisticRegression/fruit_dat_with_colors.txt: -------------------------------------------------------------------------------- 1 | fruit_label fruit_name fruit_subtype mass width height color_score 2 | 1 apple granny_smith 192 8.4 7.3 0.55 3 | 1 apple granny_smith 180 8.0 6.8 0.59 4 | 1 apple granny_smith 176 7.4 7.2 0.60 5 | 2 mandarin mandarin 86 6.2 4.7 0.80 6 | 2 mandarin mandarin 84 6.0 4.6 0.79 7 | 2 mandarin mandarin 80 5.8 4.3 0.77 8 | 2 mandarin mandarin 80 5.9 4.3 0.81 9 | 2 mandarin mandarin 76 5.8 4.0 0.81 10 | 1 apple braeburn 178 7.1 7.8 0.92 11 | 1 apple braeburn 172 7.4 7.0 0.89 12 | 1 apple braeburn 166 6.9 7.3 0.93 13 | 1 apple braeburn 172 7.1 7.6 0.92 14 | 1 apple braeburn 154 7.0 7.1 0.88 15 | 1 apple golden_delicious 164 7.3 7.7 0.70 16 | 1 apple golden_delicious 152 7.6 7.3 0.69 17 | 1 apple golden_delicious 156 7.7 7.1 0.69 18 | 1 apple golden_delicious 156 7.6 7.5 0.67 19 | 1 apple golden_delicious 168 7.5 7.6 0.73 20 | 1 apple cripps_pink 162 7.5 7.1 0.83 21 | 1 apple cripps_pink 162 7.4 7.2 0.85 22 | 1 apple cripps_pink 160 7.5 7.5 0.86 23 | 1 apple cripps_pink 156 7.4 7.4 0.84 24 | 1 apple cripps_pink 140 7.3 7.1 0.87 25 | 1 apple cripps_pink 170 7.6 7.9 0.88 26 | 3 orange spanish_jumbo 342 9.0 9.4 0.75 27 | 3 orange spanish_jumbo 356 9.2 9.2 0.75 28 | 3 orange spanish_jumbo 362 9.6 9.2 0.74 29 | 3 orange selected_seconds 204 7.5 9.2 0.77 30 | 3 orange selected_seconds 140 6.7 7.1 0.72 31 | 3 orange selected_seconds 160 7.0 7.4 0.81 32 | 3 orange selected_seconds 158 7.1 7.5 0.79 33 | 3 orange selected_seconds 210 7.8 8.0 0.82 34 | 3 orange selected_seconds 164 7.2 7.0 0.80 35 | 3 orange turkey_navel 190 7.5 8.1 0.74 36 | 3 orange turkey_navel 142 7.6 7.8 0.75 37 | 3 orange turkey_navel 150 7.1 7.9 0.75 38 | 3 orange turkey_navel 160 7.1 7.6 0.76 39 | 3 orange turkey_navel 154 7.3 7.3 0.79 40 | 3 orange turkey_navel 158 7.2 7.8 0.77 41 | 3 orange turkey_navel 144 6.8 7.4 0.75 42 | 3 orange turkey_navel 154 7.1 7.5 0.78 43 | 3 orange turkey_navel 180 7.6 8.2 0.79 44 | 3 orange turkey_navel 154 7.2 7.2 0.82 45 | 4 lemon spanish_belsan 194 7.2 10.3 0.70 46 | 4 lemon spanish_belsan 200 7.3 10.5 0.72 47 | 4 lemon spanish_belsan 186 7.2 9.2 0.72 48 | 4 lemon spanish_belsan 216 7.3 10.2 0.71 49 | 4 lemon spanish_belsan 196 7.3 9.7 0.72 50 | 4 lemon spanish_belsan 174 7.3 10.1 0.72 51 | 4 lemon unknown 132 5.8 8.7 0.73 52 | 4 lemon unknown 130 6.0 8.2 0.71 53 | 4 lemon unknown 116 6.0 7.5 0.72 54 | 4 lemon unknown 118 5.9 8.0 0.72 55 | 4 lemon unknown 120 6.0 8.4 0.74 56 | 4 lemon unknown 116 6.1 8.5 0.71 57 | 4 lemon unknown 116 6.3 7.7 0.72 58 | 4 lemon unknown 116 5.9 8.1 0.73 59 | 4 lemon unknown 152 6.5 8.5 0.72 60 | 4 lemon unknown 118 6.1 8.1 0.70 -------------------------------------------------------------------------------- /Session04/LogisticRegression/linear_reg_first_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/LogisticRegression/linear_reg_first_pic.PNG -------------------------------------------------------------------------------- /Session04/LogisticRegression/logisitic_regression_challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Logistic Regression\n", 10 | "\n", 11 | "### After completing materials of this notebook, you should be able to:\n", 12 | "\n", 13 | "* Explain what logistic regression is, how it is used and the benefits of using it.\n", 14 | "* Recognize the necessary format for data in order to perform predictive logistic regression.\n", 15 | "* Develop a logistic regression data mining model using a training data set.\n", 16 | "* Interpret the model’s outputs and apply them to a scoring data set in order to deploy the model.\n", 17 | "\n", 18 | "#### ORGANIZATIONAL UNDERSTANDING\n", 19 | " we are trying to determine what kinds of programs we should develop to help victims of heart attacks avoid suffering a recurrence.\n", 20 | "\n", 21 | "#### Data Understanding\n", 22 | "\n", 23 | "* __Age__: The age in years of the person, rounded to the nearest whole year.\n", 24 | "* __Marital_Status__: The person’s current marital status, indicated by a coded number: 0–Single, never married; 1–Married; 2–Divorced; 3–Widowed.\n", 25 | "* __Gender__: The person’s gender: 0 for female; 1 for male.\n", 26 | "* __Weight_Category__: The person’s weight categorized into one of three levels: 0 for normal weight range; 1 for overweight; and 2 for obese.\n", 27 | "* __Cholesterol__: The person’s cholesterol level, as recorded at the time of their treatment for their most recent heart attack (their only heart attack, in the case of those individuals in the scoring data set.)\n", 28 | "* __Stress_Management__: A binary attribute indicating whether or not the person has previously attended a stress management course: 0 for no; 1 for yes.\n", 29 | "* __Trait_Anxiety__: A score on a scale of 0 to 100 measuring the level of each person’s natural stress levels and abilities to cope with stress. A short time after each person in each of the two data sets had recovered from their first heart attack, they were administered a standard test of natural anxiety. Their scores are tabulated and recorded in this attribute along five point increments. A score of 0 would indicate that the person never feels anxiety, pressure or stress in any situation, while a score of 100 would indicate that the person lives in a constant state of being overwhelmed and unable to deal with his or her circumstances.\n", 30 | "* __2nd_Heart_Attack__: This attribute exists only in the __training__ data set. It will be our label, the prediction or target attribute. In the training data set, the attribute is set to ‘yes’ for individuals who have suffered second heart attacks, and ‘no’ for those who have not.\n", 31 | "\n", 32 | "#### Data Preparation\n", 33 | " using logistic regression as a predictive model, it is extremely important to remember that the ranges for all attributes in the scoring data must be within the ranges for the corresponding attributes in the training data" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 1, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# data preparation" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "#### MODELING\n", 50 | "\n", 51 | "We are no longer\n", 52 | "calculating the slope of a straight line, but rather, we are trying to determine the likelihood\n", 53 | "of an observation falling at a given point along a curvy and less well-defined imaginary line\n", 54 | "through a data set\n", 55 | "\n", 56 | "class sklearn.linear_model.__LogisticRegression__(penalty=’l2’, dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver=’liblinear’, max_iter=100, multi_class=’ovr’, verbose=0, warm_start=False, n_jobs=1)\n", 57 | "\n", 58 | "\n", 59 | "__Logistic Regression__ (aka logit, MaxEnt) classifier.\n", 60 | "\n", 61 | "In the multiclass case, the training algorithm uses the one-vs-rest (OvR) scheme if the ‘multi_class’ option is set to ‘ovr’, and uses the cross- entropy loss if the ‘multi_class’ option is set to ‘multinomial’. (Currently the ‘multinomial’ option is supported only by the ‘lbfgs’, ‘sag’ and ‘newton-cg’ solvers.)\n", 62 | "\n", 63 | "This class implements regularized logistic regression using the ‘liblinear’ library, ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers. It can handle both dense and sparse input. Use C-ordered arrays or CSR matrices containing 64-bit floats for optimal performance; any other input format will be converted (and copied).\n", 64 | "\n", 65 | "The ‘newton-cg’, ‘sag’, and ‘lbfgs’ solvers support only L2 regularization with primal formulation. The ‘liblinear’ solver supports both L1 and L2 regularization, with a dual formulation only for the L2 penalty.\n", 66 | "\n", 67 | "__Parameters__:\t\n", 68 | "\n", 69 | "__penalty__ : str, ‘l1’ or ‘l2’, default: ‘l2’\n", 70 | "\n", 71 | " Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties.\n", 72 | "\n", 73 | " New in version 0.19: l1 penalty with SAGA solver (allowing ‘multinomial’ + L1)\n", 74 | "\n", 75 | "__dual__ : bool, default: False\n", 76 | "\n", 77 | " Dual or primal formulation. Dual formulation is only implemented for l2 penalty with liblinear solver. Prefer dual=False when n_samples > n_features.\n", 78 | "\n", 79 | "__tol__ : float, default: 1e-4\n", 80 | "\n", 81 | " Tolerance for stopping criteria.\n", 82 | "\n", 83 | "__C__: float, default: 1.0\n", 84 | "\n", 85 | " Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.\n", 86 | " \n", 87 | " Regularization term\n", 88 | " Regularization does NOT improve the performance on the data set that the algorithm used to learn the model parameters (feature weights). However, it can improve the generalization performance, i.e., the performance on new, unseen data, which is exactly what we want.\n", 89 | "\n", 90 | " In intuitive terms, we can think of regularization as a penalty against complexity. Increasing the regularization strength penalizes \"large\" weight coefficients -- our goal is to prevent that our model picks up \"peculiarities,\" \"noise,\" or \"imagines a pattern where there is none.\"\n", 91 | "\n", 92 | " Again, we don't want the model to memorize the training dataset, we want a model that generalizes well to new, unseen data.\n", 93 | "\n", 94 | "__fit_intercept__ : bool, default: True\n", 95 | "\n", 96 | " Specifies if a constant (a.k.a. bias or intercept) should be added to the decision function.\n", 97 | "\n", 98 | "__intercept_scaling__ : float, default 1.\n", 99 | "\n", 100 | " Useful only when the solver ‘liblinear’ is used and self.fit_intercept is set to True. In this case, x becomes [x, self.intercept_scaling], i.e. a “synthetic” feature with constant value equal to intercept_scaling is appended to the instance vector. The intercept becomes intercept_scaling * synthetic_feature_weight.\n", 101 | "\n", 102 | " Note! the synthetic feature weight is subject to l1/l2 regularization as all other features. To lessen the effect of regularization on synthetic feature weight (and therefore on the intercept) intercept_scaling has to be increased.\n", 103 | "\n", 104 | "__class_weight__ : dict or ‘balanced’, default: None\n", 105 | "\n", 106 | " Weights associated with classes in the form {class_label: weight}. If not given, all classes are supposed to have weight one.\n", 107 | "\n", 108 | " The “balanced” mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as n_samples / (n_classes * np.bincount(y)).\n", 109 | "\n", 110 | " Note that these weights will be multiplied with sample_weight (passed through the fit method) if sample_weight is specified.\n", 111 | "\n", 112 | " New in version 0.17: class_weight=’balanced’\n", 113 | "\n", 114 | "__random_state__ : int, RandomState instance or None, optional, default: None\n", 115 | "\n", 116 | " The seed of the pseudo random number generator to use when shuffling the data. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random. Used when solver == ‘sag’ or ‘liblinear’.\n", 117 | "\n", 118 | "__solver__ : {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’},\n", 119 | "\n", 120 | " default: ‘liblinear’ Algorithm to use in the optimization problem.\n", 121 | "\n", 122 | " For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and\n", 123 | " ‘saga’ are faster for large ones.\n", 124 | "\n", 125 | " For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’\n", 126 | " handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes.\n", 127 | "\n", 128 | " ‘newton-cg’, ‘lbfgs’ and ‘sag’ only handle L2 penalty, whereas\n", 129 | " ‘liblinear’ and ‘saga’ handle L1 penalty.\n", 130 | "\n", 131 | " Note that ‘sag’ and ‘saga’ fast convergence is only guaranteed on features with approximately the same scale. You can preprocess the data with a scaler from sklearn.preprocessing.\n", 132 | "\n", 133 | " New in version 0.17: Stochastic Average Gradient descent solver.\n", 134 | "\n", 135 | " New in version 0.19: SAGA solver.\n", 136 | "\n", 137 | "__max_iter__ : int, default: 100\n", 138 | "\n", 139 | " Useful only for the newton-cg, sag and lbfgs solvers. Maximum number of iterations taken for the solvers to converge.\n", 140 | "\n", 141 | "__multi_class__ : str, {‘ovr’, ‘multinomial’}, default: ‘ovr’\n", 142 | "\n", 143 | " Multiclass option can be either ‘ovr’ or ‘multinomial’. If the option chosen is ‘ovr’, then a binary problem is fit for each label. Else the loss minimised is the multinomial loss fit across the entire probability distribution. Does not work for liblinear solver.\n", 144 | "\n", 145 | " New in version 0.18: Stochastic Average Gradient descent solver for ‘multinomial’ case.\n", 146 | "\n", 147 | "__verbose__ : int, default: 0\n", 148 | "\n", 149 | " For the liblinear and lbfgs solvers set verbose to any positive number for verbosity.\n", 150 | "\n", 151 | "__warm_start__ : bool, default: False\n", 152 | "\n", 153 | " When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. Useless for liblinear solver.\n", 154 | "\n", 155 | " New in version 0.17: warm_start to support lbfgs, newton-cg, sag, saga solvers.\n", 156 | "\n", 157 | "__n_jobs__ : int, default: 1\n", 158 | "\n", 159 | " Number of CPU cores used when parallelizing over classes if multi_class=’ovr’”. This parameter is ignored when the ``solver``is set to ‘liblinear’ regardless of whether ‘multi_class’ is specified or not. If given a value of -1, all cores are used.\n", 160 | "\n", 161 | "__Attributes__:\t\n", 162 | "\n", 163 | "coef_ : array, shape (1, n_features) or (n_classes, n_features)\n", 164 | "\n", 165 | " Coefficient of the features in the decision function.\n", 166 | "\n", 167 | " coef_ is of shape (1, n_features) when the given problem is binary.\n", 168 | "\n", 169 | "__intercept___ : array, shape (1,) or (n_classes,)\n", 170 | "\n", 171 | " Intercept (a.k.a. bias) added to the decision function.\n", 172 | "\n", 173 | " If fit_intercept is set to False, the intercept is set to zero. intercept_ is of shape(1,) when the problem is binary.\n", 174 | "\n", 175 | "__n_iter___ : array, shape (n_classes,) or (1, )\n", 176 | "\n", 177 | " Actual number of iterations for all classes. If binary or multinomial, it returns only 1 element. For liblinear solver, only the maximum number of iteration across all classes is given.\n", 178 | "\n", 179 | " Changed in version 0.20: In SciPy <= 1.0.0 the number of lbfgs iterations may exceed max_iter. n_iter_ will now report at most max_iter.\n" 180 | ] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.7.4" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /Session04/NeuralNetwork.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/NeuralNetwork.pptx -------------------------------------------------------------------------------- /Session04/NeuralNetwork/data_scoring.csv: -------------------------------------------------------------------------------- 1 | Player_Name,Position_ID,Shots,Makes,Personal_Points,Total_Points,Assists,Concessions,Blocks,Block_Assists,Fouls,Years_Exp,Career_Shots,Career_Makes,Career_PP,Career_TP,Career_Assists,Career_Con 2 | Gary Price,2,200,51,2,14,29,25,398,29,7,23,9778,2732,379,1272,1652,925 3 | Raul Little,6,216,54,0,21,18,15,103,84,5,18,7318,1926,46,796,627,483 4 | Roman Richards,7,284,64,14,30,42,24,96,4,4,18,7023,1925,348,986,1239,666 5 | Geoffrey Lloyd,3,313,84,9,42,30,39,127,221,7,17,6890,1833,224,1033,864,1087 6 | Jesus Huff,3,289,63,7,36,41,44,166,211,7,17,7402,1954,195,1115,919,1153 7 | Jan Becker,5,346,98,5,31,53,30,0,0,0,16,5913,1615,235,784,901,560 8 | John Mcguire,2,542,140,12,46,75,41,697,61,9,16,7099,2130,235,987,1089,431 9 | Robert Holloway,5,561,118,35,70,94,33,463,32,8,16,6677,1575,442,901,1210,608 10 | Herbert Watkins,2,277,71,2,27,29,14,360,32,5,15,5952,1647,60,753,596,259 11 | Stewart Chavez,5,336,93,9,35,46,23,0,0,0,15,5779,1610,128,730,741,497 12 | Ralph Sharp,0,235,61,3,24,39,21,425,43,4,14,3926,1029,35,441,401,333 13 | Drew Kelley,9,397,114,23,67,67,53,244,2,4,13,5589,1632,241,906,926,716 14 | Jessie Strickland,5,315,59,16,45,36,58,0,0,0,13,4677,1051,268,681,782,697 15 | Gerald Luna,2,407,104,6,57,43,65,912,88,9,12,5233,1478,100,643,658,653 16 | Fred Clarke,7,419,113,1,44,27,44,211,2,1,12,4484,1231,32,612,344,422 17 | Fernando Rowe,11,219,47,8,24,26,17,260,58,4,12,1188,286,23,100,125,63 18 | Jerry Reed,8,359,84,4,46,27,21,151,8,5,12,4992,1257,37,699,386,387 19 | Nicholas Blake,0,194,40,7,19,29,30,325,22,2,11,4183,1069,64,486,493,608 20 | Zachary Lawson,6,225,61,5,32,26,26,132,9,0,11,1568,408,25,202,185,257 21 | Lance Goodwin,0,395,106,16,48,56,35,709,41,7,10,2303,571,86,266,323,248 22 | Jack Hudson,3,209,45,0,38,19,42,132,205,5,10,3859,916,23,557,279,478 23 | Ian Tucker,2,517,141,27,70,87,52,1378,102,8,9,3571,994,215,545,652,337 24 | Luis Chapman,1,200,57,6,23,14,14,69,1,1,9,2516,684,46,371,230,195 25 | Harvey Dean,7,508,146,8,80,44,46,245,5,9,9,3148,915,41,571,289,326 26 | Paul Vega,1,195,55,5,24,33,30,83,2,1,8,1313,338,25,144,149,153 27 | Ernesto Johnston,4,278,69,3,24,21,29,142,210,10,8,2079,565,32,258,192,162 28 | Malcolm Robbins,6,317,88,3,40,32,19,220,16,4,8,2543,715,28,269,270,118 29 | Rodolfo Jacobs,7,580,194,9,91,62,78,270,13,6,8,3372,1028,48,604,314,469 30 | Toby Gomez,8,221,53,2,21,23,22,325,58,6,8,1063,283,15,107,124,106 31 | Alan Hunter,0,462,119,16,49,65,37,866,65,6,7,2131,583,69,244,288,150 32 | Tom Cook,1,307,80,1,42,36,29,145,2,2,7,2421,656,18,379,198,184 33 | Devin Coleman,0,22,10,1,4,2,1,812,84,11,6,84,26,2,9,9,3 34 | Lynn Williams,10,210,37,8,15,19,15,40,115,15,6,994,244,36,107,114,53 35 | Bobby Long,8,227,46,7,23,20,12,92,2,2,5,1325,324,44,156,158,67 36 | Jimmie Cohen,3,284,69,1,33,18,25,122,140,5,5,1407,361,6,139,98,111 37 | Johnny Dennis,2,551,160,23,86,90,87,1224,115,11,5,2235,602,75,278,328,273 38 | Dewey Osborne,10,181,46,1,19,18,17,37,98,9,5,937,238,9,88,95,104 39 | Dale Hines,4,161,36,0,19,10,17,70,149,12,4,1053,244,3,156,86,107 40 | Derek Mann,5,19,7,0,1,2,1,0,0,0,4,41,13,1,3,4,4 41 | Shane Clark,0,191,37,4,12,17,14,391,38,8,4,773,163,16,61,74,52 42 | Samuel French,8,401,100,2,60,19,28,193,11,4,4,876,238,2,126,44,55 43 | Tomas Cunningham,1,249,69,6,32,19,20,103,8,2,4,702,209,10,97,48,44 44 | Emilio Cole,1,243,53,4,18,26,27,107,3,3,4,853,228,23,101,110,76 45 | Jesse Martin,0,209,59,6,20,37,27,415,35,3,4,884,209,14,66,106,92 46 | Mark Copeland,2,330,76,12,35,41,47,512,30,5,4,1367,326,55,167,198,167 47 | Tim Cox,1,183,39,3,20,15,11,118,0,0,3,201,42,3,20,16,11 48 | Rene Todd,1,24,3,0,1,0,2,80,4,0,3,159,28,0,20,12,9 49 | Edwin Wilkerson,0,138,31,8,18,21,38,244,21,4,3,244,53,12,33,32,55 50 | Pete Ingram,7,420,95,23,55,58,37,206,10,7,3,646,139,31,77,77,61 51 | Randall Parsons,0,161,43,4,17,26,22,300,12,2,3,707,179,21,77,99,76 52 | Lowell Marsh,4,524,132,9,69,47,54,212,327,20,2,972,260,14,123,92,90 53 | Jose Neal,6,212,54,13,28,44,18,243,23,5,2,233,59,13,31,46,20 54 | Michael Padilla,9,303,84,4,35,32,23,179,5,3,2,312,87,4,39,32,23 55 | Randy Gonzales,9,16,2,0,1,0,0,247,4,8,2,28,4,0,1,0,0 56 | Virgil Schwartz,0,293,66,1,30,29,14,446,33,20,1,293,66,1,30,29,14 57 | Jeffery Lucas,3,241,61,1,34,12,14,166,172,10,1,241,61,1,34,12,14 58 | Marlon Tran,4,166,34,0,20,13,17,64,119,9,1,166,34,0,20,13,17 59 | Leon Weaver,8,33,6,0,2,4,7,205,5,4,1,33,6,0,2,4,7 60 | Nathaniel Garrett,1,186,44,7,28,16,11,99,3,1,1,186,44,7,28,16,11 61 | -------------------------------------------------------------------------------- /Session04/NeuralNetwork/data_training.csv: -------------------------------------------------------------------------------- 1 | Player_Name,Position_ID,Shots,Makes,Personal_Points,Total_Points,Assists,Concessions,Blocks,Block_Assists,Fouls,Years_Pro,Career_Shots,Career_Makes,Career_PP,Career_TP,Career_Assists,Career_Con,Team_Value 2 | Andrew Graham,5,627,177,25,98,81,70,240,482,13,6,3210,927,133,529,472,313,Superstar 3 | Fred Walton,1,388,103,15,59,47,39,182,9,4,6,2174,555,80,285,274,186,Contributor 4 | Ignacio Barber,4,526,163,12,88,50,77,250,11,1,4,1556,470,38,245,167,174,Franchise Player 5 | Jackie Ortiz,5,298,73,0,24,24,7,121,283,9,3,509,108,0,41,37,12,Role Player 6 | Cecil Bowen,11,589,170,40,107,108,69,368,20,3,6,2325,634,128,371,376,238,Superstar 7 | Lance Webster,4,278,86,4,33,38,45,102,4,2,1,278,86,4,33,38,45,Role Player 8 | Kristopher Andrews,4,600,144,33,85,117,65,319,4,14,2,696,173,38,101,130,69,Role Player 9 | Danny Wilson,3,521,142,20,67,86,45,107,242,23,4,815,205,22,99,103,78,Role Player 10 | Randolph Malone,0,143,39,5,18,30,15,138,15,1,9,639,151,16,80,97,61,Role Player 11 | Brent Burns,2,550,152,6,92,37,81,262,329,16,5,2308,633,32,349,182,308,Franchise Player 12 | Noel Castillo,7,155,41,12,21,29,22,165,9,1,16,5409,1338,181,746,805,875,Contributor 13 | Ricky Paul,1,205,43,2,24,17,20,131,6,1,7,854,219,12,105,99,71,Contributor 14 | Milton Wise,5,407,93,8,47,30,30,172,317,25,2,969,230,14,121,69,68,Role Player 15 | Oscar Swanson,1,404,92,11,54,49,18,222,5,5,6,1354,325,30,188,135,63,Contributor 16 | Chris Stanley,5,220,54,10,30,39,31,50,136,20,5,1185,299,40,145,154,128,Contributor 17 | Clark Carr,1,275,68,5,42,42,61,181,3,2,6,961,238,16,128,104,172,Contributor 18 | Mack Perkins,10,426,109,3,55,43,62,361,22,2,1,426,109,3,55,43,62,Role Player 19 | Jimmy Warren,7,220,66,5,20,28,13,281,21,3,3,290,80,5,27,31,15,Role Player 20 | Timothy Burns,1,288,76,7,34,37,15,203,3,3,4,1644,408,16,198,120,113,Contributor 21 | Aaron Newton,9,311,81,3,42,30,26,153,223,10,17,8247,2198,100,950,909,690,Contributor 22 | Kim Holloway,6,587,163,4,92,51,70,434,9,3,6,2695,747,17,442,198,317,Superstar 23 | Adrian Maxwell,11,491,141,11,77,47,37,239,8,2,15,4291,1240,84,615,430,340,Franchise Player 24 | Duane Roy,5,574,159,21,107,75,59,238,445,22,10,4631,1300,90,702,504,488,Franchise Player 25 | Mike Richards,1,490,150,21,69,58,35,96,5,3,14,6126,1839,121,983,707,600,Superstar 26 | Enrique Ward,6,299,75,6,38,23,26,212,1,2,3,580,160,8,71,33,44,Role Player 27 | Kurt Lowe,7,528,132,21,61,74,41,885,105,8,6,2641,671,97,273,383,226,Superstar 28 | Santiago Little,7,315,81,7,24,38,39,632,43,10,14,3449,835,69,321,414,375,Franchise Player 29 | Marcus Robertson,2,627,178,14,68,76,46,309,492,5,6,3146,902,74,494,345,242,Franchise Player 30 | Ralph Pearson,11,196,43,7,29,27,30,80,45,8,13,3231,825,36,376,290,238,Contributor 31 | Scott Hampton,3,313,83,9,43,41,30,58,141,23,14,5885,1543,104,751,714,535,Franchise Player 32 | Darrin Benson,10,209,56,12,22,36,19,201,6,3,2,216,58,12,24,37,19,Role Player 33 | Robin Mccarthy,9,217,46,7,32,19,9,307,25,1,4,694,160,32,86,76,32,Role Player 34 | Greg Rowe,7,281,76,3,42,25,20,106,144,7,8,2658,657,48,324,300,179,Contributor 35 | Alvin Norman,7,315,73,5,23,37,16,227,15,3,4,450,108,6,38,46,28,Role Player 36 | Terry Payne,6,589,149,21,89,86,64,371,6,6,7,3558,928,102,513,471,351,Superstar 37 | Keith Poole,6,608,160,28,130,74,89,426,4,6,8,4071,1182,103,862,417,708,Superstar 38 | Spencer Wong,11,418,113,13,48,61,47,211,11,7,4,1512,392,41,205,204,203,Franchise Player 39 | Jake Smith,5,160,39,8,18,31,22,33,3,0,14,2128,543,56,304,268,298,Contributor 40 | Raymond Maldonado,3,263,70,1,26,23,30,81,147,4,4,888,220,9,83,82,86,Contributor 41 | Erik Flores,4,357,96,7,50,45,39,167,2,4,5,1394,344,43,178,192,136,Contributor 42 | Evan Simpson,8,327,85,3,30,44,20,91,185,12,8,2140,568,16,216,208,93,Contributor 43 | Christian Rivera,11,584,158,15,70,84,42,331,20,4,5,2358,636,58,265,316,134,Franchise Player 44 | Abraham Shaw,2,387,124,1,67,27,36,186,290,17,7,1775,506,6,272,125,194,Franchise Player 45 | Carroll Gardner,4,600,139,0,94,29,60,300,12,9,2,1236,309,1,201,69,110,Role Player 46 | Marco Rodriquez,2,288,63,3,25,33,16,135,257,7,10,2682,667,38,315,259,204,Contributor 47 | Willie Barber,3,504,120,28,71,71,54,103,283,19,3,1085,259,54,150,167,114,Role Player 48 | Eric Crawford,7,510,147,10,56,52,53,810,99,18,7,2872,821,63,307,340,174,Superstar 49 | Brad Bailey,3,583,168,17,83,80,56,109,292,25,5,1646,452,44,219,208,136,Franchise Player 50 | Andrew Hawkins,4,415,115,27,97,71,68,274,2,7,3,711,184,45,156,119,99,Contributor 51 | Lyle Huff,7,268,60,5,24,25,15,442,59,6,2,350,78,5,34,29,18,Role Player 52 | Theodore Bush,3,475,126,3,61,43,52,37,113,7,6,1700,433,7,217,93,146,Contributor 53 | Terrell Steele,5,514,144,0,67,54,79,229,453,15,9,4739,1169,13,583,374,528,Superstar 54 | Jessie Flowers,3,20,1,0,0,0,0,78,220,6,2,41,9,2,6,7,4,Superstar 55 | Homer Murphy,4,280,82,16,44,45,47,148,4,2,2,428,113,25,61,70,63,Role Player 56 | Aubrey Holmes,6,399,102,3,56,34,34,211,9,3,5,670,167,4,89,48,54,Role Player 57 | Sergio Hernandez,1,283,70,8,33,37,27,156,2,2,12,4479,1222,94,557,483,307,Contributor 58 | Ian Mendoza,3,568,158,20,89,75,73,105,290,10,15,8068,2273,177,1045,993,732,Superstar 59 | Toby Webb,2,453,103,8,53,33,52,289,407,6,2,507,123,8,63,39,58,Role Player 60 | Jorge Sutton,2,441,113,5,76,52,76,160,290,11,5,1546,397,17,226,149,191,Contributor 61 | Samuel Bates,6,324,73,4,32,18,22,222,3,3,7,1931,491,13,291,108,180,Franchise Player 62 | Arthur Martin,2,497,136,7,58,38,26,304,347,10,11,3871,1066,40,450,367,241,Superstar 63 | Luke Christensen,2,492,136,5,76,50,94,313,381,20,12,5511,1511,39,897,451,875,Superstar 64 | Dana Summers,10,244,58,9,28,25,35,142,14,2,4,1335,333,49,164,179,194,Contributor 65 | Juan Carter,5,323,81,6,26,32,8,143,290,19,2,341,86,6,32,34,8,Role Player 66 | Darin Snyder,1,382,101,16,50,55,22,200,7,6,1,382,101,16,50,55,22,Role Player 67 | Paul Chapman,3,317,78,7,35,35,32,45,122,26,1,317,78,7,35,35,32,Role Player 68 | Perry Carlson,6,614,163,29,89,83,75,303,6,6,11,5017,1388,266,813,822,617,Superstar 69 | Lionel Bishop,10,155,44,6,21,23,15,53,88,3,16,6631,1634,98,698,661,777,Contributor 70 | Craig Barrett,0,573,144,9,85,60,78,1314,131,12,8,3198,857,97,470,420,332,Superstar 71 | Zachary Chambers,1,283,74,4,34,29,22,145,5,7,10,3919,1062,85,505,456,283,Franchise Player 72 | Enrique Hicks,5,510,126,2,42,44,35,207,358,20,11,5562,1578,44,703,519,256,Franchise Player 73 | Doyle Pittman,7,314,83,13,39,46,16,533,40,4,5,1457,405,28,156,159,76,Franchise Player 74 | Warren Allison,2,279,69,4,35,31,32,133,173,9,4,1359,355,31,180,148,158,Contributor 75 | John Baldwin,6,424,110,15,70,47,36,292,6,3,7,2130,544,38,335,174,258,Franchise Player 76 | Raymond Houston,3,256,70,13,42,36,44,41,118,8,16,7058,1845,312,965,1128,990,Superstar 77 | Forrest Manning,0,512,117,29,54,88,43,1236,98,18,6,1750,412,100,204,276,155,Role Player 78 | Kurt Walters,8,216,54,2,27,25,33,317,36,1,1,216,54,2,27,25,33,Role Player 79 | Curtis Hill,5,439,96,0,44,36,65,229,406,22,4,711,148,1,68,56,99,Role Player 80 | Derrick Foster,5,255,70,7,49,35,43,51,54,8,15,6311,1661,154,1019,608,820,Franchise Player 81 | Ernesto Sanders,5,279,64,0,31,26,30,107,205,16,1,279,64,0,31,26,30,Role Player 82 | Otis Miles,4,215,51,4,19,18,11,116,5,12,1,215,51,4,19,18,11,Role Player 83 | Joshua Bell,3,480,112,18,50,71,44,94,270,16,7,3031,771,110,338,406,239,Franchise Player 84 | Lowell Hubbard,5,313,78,6,32,41,12,106,206,7,12,3742,968,35,409,321,170,Contributor 85 | Jerome Wagner,5,599,183,10,80,74,32,231,374,18,5,2482,715,27,330,326,158,Superstar 86 | Gregory Garner,3,596,171,34,91,108,52,118,334,21,6,2862,728,107,361,401,224,Superstar 87 | Zachary Moody,3,591,168,19,80,72,39,67,147,4,9,4478,1307,113,634,563,319,Superstar 88 | Casey Sparks,0,574,152,31,91,101,64,1253,111,11,3,985,260,53,148,173,95,Contributor 89 | Myron Jenkins,4,216,53,1,31,15,22,73,152,11,4,926,210,9,118,69,114,Contributor 90 | Pablo Daniel,4,341,95,6,48,42,20,158,4,5,10,2964,808,81,379,428,221,Role Player 91 | Ray Williams,3,486,145,11,51,76,40,88,204,16,11,3967,1102,67,410,497,284,Franchise Player 92 | Joel Nelson,1,585,139,31,93,94,62,0,0,0,17,7546,1982,315,1141,1179,727,Superstar 93 | Pete Gibson,11,591,184,20,83,79,38,303,12,5,5,1689,462,40,219,195,82,Franchise Player 94 | Preston Allison,3,496,119,8,57,33,21,155,371,29,7,3358,882,36,365,280,165,Superstar 95 | Noel Rice,6,329,83,9,50,39,56,276,6,2,9,3828,948,145,575,528,635,Franchise Player 96 | Devin Hines,4,618,200,20,98,110,62,330,16,8,13,7127,2163,351,1104,1289,564,Superstar 97 | Martin Arnold,4,513,137,20,90,95,90,267,5,3,14,5201,1382,166,763,734,784,Superstar 98 | Luke Cannon,2,408,94,4,42,36,66,282,487,19,9,3573,866,59,429,365,410,Franchise Player 99 | Sylvester Holt,11,441,118,28,84,86,68,190,2,2,8,2723,750,126,433,420,309,Superstar 100 | Woodrow Patrick,1,209,54,3,25,14,12,102,6,3,1,209,54,3,25,14,12,Role Player 101 | Neil Carson,0,282,78,13,37,51,29,670,57,5,5,1649,453,73,211,280,138,Franchise Player 102 | Joshua Diaz,11,127,32,4,14,25,12,167,18,6,19,8396,2402,242,1048,1348,819,Franchise Player 103 | Carl Soto,5,581,145,17,66,68,21,320,465,32,2,831,210,21,106,86,40,Role Player 104 | Nicolas Phelps,4,344,85,24,69,64,88,0,0,0,7,911,214,64,150,156,187,Contributor 105 | Jeff Waters,7,165,39,2,13,9,16,332,19,2,3,196,44,2,18,10,18,Role Player 106 | Dennis Willis,11,511,138,25,76,96,61,157,7,8,3,592,164,28,87,110,71,Role Player 107 | Blake Knight,4,490,148,14,64,78,49,0,0,0,13,3400,1000,113,445,491,301,Franchise Player 108 | Ronnie Lane,1,381,110,9,61,45,32,228,7,5,7,3015,834,40,451,249,168,Superstar 109 | Ed Padilla,4,210,70,13,32,51,28,0,0,0,15,4040,1130,97,544,462,551,Contributor 110 | Gordon Fuller,11,565,148,24,90,104,77,292,9,5,14,7287,2083,305,1135,1234,791,Superstar 111 | Kent Newman,11,529,137,26,86,97,97,280,10,5,15,6661,1785,291,1082,949,989,Superstar 112 | Jeffery Garza,11,526,146,13,71,70,84,303,9,9,6,2648,715,77,352,342,289,Superstar 113 | Jonathon Collier,11,330,77,19,47,53,27,149,8,6,6,1928,516,90,247,288,161,Franchise Player 114 | Pete Hayes,0,550,147,29,85,91,71,1218,104,10,6,2816,815,117,405,474,319,Superstar 115 | Blake Page,6,351,97,4,55,29,39,226,7,3,4,1258,353,16,196,110,117,Contributor 116 | Jean Foster,6,413,92,16,72,48,65,280,9,5,1,413,92,16,72,48,65,Role Player 117 | Gary Copeland,2,239,60,0,30,11,22,121,151,6,6,1941,510,4,309,103,207,Franchise Player 118 | Claude Caldwell,3,578,138,1,56,59,34,133,371,20,3,1399,357,7,149,161,87,Role Player 119 | Guillermo Howell,11,593,152,23,69,75,53,315,10,6,6,2765,686,133,369,384,321,Superstar 120 | Cameron Goodwin,2,368,103,3,48,28,54,209,246,3,8,1897,493,9,207,162,198,Contributor 121 | Victor Nguyen,0,522,140,16,73,77,60,1320,166,17,4,730,185,22,93,106,86,Contributor 122 | Timothy Barker,7,359,80,15,45,48,63,682,93,13,7,1493,359,61,176,202,175,Franchise Player 123 | Edward Becker,4,370,96,21,49,46,60,137,5,9,15,6986,1972,231,1070,955,921,Franchise Player 124 | Ted Cruz,7,490,125,24,81,105,62,869,62,8,13,6063,1646,271,847,999,680,Superstar 125 | Guillermo Gonzales,5,312,68,2,32,22,24,86,150,15,1,312,68,2,32,22,24,Role Player 126 | Maurice Davidson,7,374,94,5,36,26,62,756,64,15,7,1968,519,26,181,199,288,Superstar 127 | Wallace Mcdonald,3,461,112,18,54,54,35,111,226,11,2,680,160,24,76,75,49,Role Player 128 | Rafael Hines,3,376,82,21,42,60,35,0,0,0,5,1770,408,115,238,299,157,Contributor 129 | Elijah Byrd,7,315,76,13,35,60,25,498,39,13,3,630,151,24,68,94,55,Role Player 130 | Anthony Quinn,7,127,32,8,16,22,14,202,22,2,8,727,180,24,67,82,56,Role Player 131 | Austin Underwood,7,254,68,2,28,26,22,359,30,4,6,999,236,21,108,117,118,Contributor 132 | Gregg Moran,4,438,103,2,65,32,71,276,7,9,2,440,103,2,67,32,71,Role Player 133 | Jay Lee,6,403,101,12,45,53,39,316,6,5,12,5150,1429,166,747,666,526,Franchise Player 134 | Cameron James,0,593,172,22,82,100,57,1222,139,15,1,593,172,22,82,100,57,Role Player 135 | Randolph Morgan,0,495,151,17,61,84,78,1045,88,13,10,5624,1679,275,884,1015,709,Superstar 136 | Johnathan Gilbert,7,327,84,22,53,62,38,483,48,6,10,4273,1123,212,577,700,334,Superstar 137 | Tyler Harper,4,369,93,9,43,42,49,149,1,6,5,1258,323,54,181,177,157,Contributor 138 | Rolando Turner,11,496,141,20,65,78,37,200,11,3,11,5628,1575,225,828,838,354,Franchise Player 139 | Lonnie Valdez,7,474,129,10,50,56,40,732,83,13,10,2331,604,61,246,327,166,Franchise Player 140 | Cedric Barton,4,479,133,10,48,72,55,237,5,4,17,7472,2147,153,980,1032,854,Superstar 141 | Lloyd Wallace,3,616,163,27,83,107,32,110,308,15,3,1437,377,65,181,227,82,Contributor 142 | Lionel Gibbs,7,19,4,1,2,3,1,692,70,8,1,19,4,1,2,3,1,Superstar 143 | Wendell Mitchell,3,348,90,11,50,45,43,60,176,6,10,2288,614,43,295,273,269,Franchise Player 144 | Michael Rose,0,232,55,9,34,23,45,623,35,3,12,4405,1213,194,702,705,625,Superstar 145 | Craig Morales,5,687,213,10,91,65,27,294,445,13,4,1518,448,15,196,137,89,Contributor 146 | Mike Jackson,11,466,108,33,75,86,72,286,8,8,3,652,142,44,102,109,102,Contributor 147 | Hubert Rodgers,2,523,135,8,52,44,52,367,475,19,9,3368,895,39,377,284,296,Franchise Player 148 | Barry Burton,1,205,52,8,31,27,17,155,3,2,12,5134,1323,56,643,445,459,Contributor 149 | Pablo Boone,9,459,113,20,59,57,68,0,0,0,12,5348,1369,155,713,660,735,Franchise Player 150 | Josh Jackson,7,288,65,8,30,36,27,259,30,10,9,2815,698,55,315,325,189,Franchise Player 151 | Ben Mack,3,537,147,23,58,88,47,92,257,20,10,2744,730,97,302,351,174,Contributor 152 | Salvatore Dunn,1,184,47,5,20,28,18,49,2,0,11,3327,890,74,419,382,304,Franchise Player 153 | Ramiro Gordon,3,441,128,16,70,73,80,97,218,16,14,6675,2095,209,1072,1050,695,Superstar 154 | Joseph Erickson,3,580,207,8,107,71,105,121,267,19,5,2778,978,32,474,322,417,Superstar 155 | Levi Beck,1,181,58,6,34,23,22,88,0,3,1,181,58,6,34,23,22,Role Player 156 | Brandon Hansen,1,343,103,6,48,36,40,211,56,13,15,4338,1193,70,581,421,325,Franchise Player 157 | Peter Wong,5,394,86,1,38,28,36,203,369,16,4,1089,267,3,94,71,76,Contributor 158 | Mark Alvarado,0,479,130,18,66,72,76,880,82,14,3,1624,457,63,224,266,263,Franchise Player 159 | Neil Coleman,1,236,56,6,41,19,21,172,1,4,5,1257,329,24,166,125,105,Role Player 160 | Cory Walsh,1,309,94,5,37,32,26,161,3,3,13,4618,1330,57,616,522,436,Franchise Player 161 | Danny Clarke,2,579,174,7,67,78,58,280,479,5,6,3053,880,32,366,337,218,Franchise Player 162 | Wm Mullins,6,539,139,5,93,58,69,462,9,7,5,1469,369,12,247,126,198,Contributor 163 | Garrett Wheeler,11,540,135,30,82,88,55,157,6,14,1,540,135,30,82,88,55,Role Player 164 | Shaun Summers,7,429,91,12,41,42,57,686,46,4,13,5590,1397,83,578,579,644,Superstar 165 | Tracy Patton,3,503,136,5,62,48,83,65,258,8,10,3423,970,20,408,303,414,Franchise Player 166 | Roman Becker,3,151,41,4,26,21,19,28,56,2,2,288,68,9,45,39,35,Role Player 167 | Ronald Patterson,6,631,170,9,77,44,31,408,4,3,11,4908,1457,30,775,357,249,Superstar 168 | Hubert Sharp,4,246,76,5,35,39,13,44,0,1,6,912,234,12,102,96,80,Contributor 169 | Daryl Watts,7,193,47,10,21,29,24,299,13,5,6,1136,256,42,129,139,106,Contributor 170 | Sheldon Barber,0,551,171,13,94,83,94,1199,149,5,13,6090,1840,128,969,900,917,Superstar 171 | Kenneth Jefferson,0,237,52,0,15,25,30,523,43,6,24,14053,4256,160,2165,1314,1566,Franchise Player 172 | Guy Wilkins,5,309,72,0,33,31,26,117,269,12,5,354,82,0,41,32,26,Role Player 173 | Jared Lewis,7,211,43,10,26,35,39,463,32,8,3,498,116,14,59,55,78,Role Player 174 | Steven Drake,3,382,119,13,54,58,36,59,156,9,12,2133,594,41,287,294,227,Franchise Player 175 | Albert Bush,9,328,91,12,51,43,33,145,59,8,2,342,94,12,51,44,33,Role Player 176 | Alex Sherman,2,625,179,4,94,60,65,303,450,14,5,1696,476,12,216,163,166,Franchise Player 177 | Christopher Cunningham,2,566,154,22,76,84,43,316,439,10,14,6100,1583,131,743,693,300,Franchise Player 178 | Oscar Marshall,3,437,123,9,62,55,40,82,170,15,9,4139,1203,79,676,390,364,Superstar 179 | Lewis Bishop,2,591,157,16,90,78,26,290,440,25,4,2020,541,52,310,226,91,Franchise Player 180 | Loren Knight,2,549,149,7,73,47,42,255,450,17,1,549,149,7,73,47,42,Role Player 181 | Hector Sanchez,11,663,200,29,108,121,32,241,8,6,4,1447,404,57,210,222,68,Contributor 182 | Calvin Woods,4,340,84,11,62,33,47,185,8,4,5,1516,376,42,284,141,219,Contributor 183 | Richard Logan,9,416,113,24,58,69,16,203,70,10,1,416,113,24,58,69,16,Role Player 184 | Kevin Davis,11,489,131,19,77,55,34,310,9,9,7,2051,549,62,300,263,153,Superstar 185 | Ricardo Pratt,1,265,68,8,26,30,29,92,5,3,7,1337,339,32,135,163,128,Contributor 186 | Leroy Carter,7,327,68,13,42,29,45,659,53,7,18,3949,939,78,438,380,466,Contributor 187 | Charles Walters,3,271,77,5,35,29,33,62,90,3,12,4933,1358,48,630,435,403,Franchise Player 188 | Wade Schmidt,7,202,53,4,31,26,27,304,45,11,9,1876,467,15,192,186,161,Franchise Player 189 | Russell Lloyd,4,205,57,8,34,32,9,58,4,4,5,756,192,32,117,107,51,Contributor 190 | Marion Mccarthy,2,424,119,6,57,46,13,224,286,8,9,3651,1046,32,461,301,112,Superstar 191 | Conrad Palmer,6,419,108,6,55,36,22,226,7,4,3,591,149,8,80,46,31,Role Player 192 | Charlie Bell,6,560,161,26,89,96,66,332,9,8,4,1789,470,65,233,260,155,Franchise Player 193 | Max Hunter,7,204,49,6,23,25,12,419,46,5,7,1309,308,27,126,132,66,Contributor 194 | Jeremiah Weaver,5,236,56,0,27,15,11,125,199,13,4,1115,270,1,116,64,57,Contributor 195 | Clarence Becker,0,677,238,31,117,113,53,1377,100,6,5,2223,737,93,349,401,171,Superstar 196 | Ted Mcdaniel,5,339,96,4,37,29,23,104,213,9,4,1064,290,11,123,108,55,Contributor 197 | Randy Welch,0,321,87,10,39,42,30,805,40,4,2,396,101,12,48,46,33,Role Player 198 | Barry Russell,4,641,198,31,101,108,41,269,17,10,5,2129,610,92,297,319,117,Superstar 199 | Billy Adkins,7,258,60,8,28,33,18,358,32,8,3,638,170,17,80,75,36,Role Player 200 | Dwight Parker,4,442,131,18,68,77,33,233,7,7,6,1416,398,47,210,203,136,Franchise Player 201 | Julio Garner,1,278,70,7,22,37,18,0,0,0,18,7186,2081,190,935,1088,643,Contributor 202 | Cecil Todd,1,214,53,2,30,29,23,109,7,3,2,226,59,2,32,32,27,Role Player 203 | Michael Zimmerman,5,233,49,2,41,23,18,102,132,10,8,1350,336,7,166,122,106,Contributor 204 | Kent Wade,3,416,132,7,57,49,33,73,177,18,3,932,273,24,113,121,80,Contributor 205 | Alton Reyes,0,181,41,1,15,21,33,326,29,5,2,232,50,4,20,29,45,Role Player 206 | Karl Adams,6,572,152,18,105,49,65,325,13,3,2,978,249,36,168,91,101,Contributor 207 | Joseph Rodriguez,3,354,77,16,36,55,41,83,174,16,20,8716,2172,384,1172,1267,1057,Contributor 208 | Jamie Newton,0,610,186,19,107,98,74,1182,96,13,6,2728,753,69,399,366,286,Superstar 209 | Todd Romero,0,484,127,20,66,65,67,1231,80,7,7,3006,844,116,436,458,377,Superstar 210 | Cesar Watkins,6,431,127,8,77,45,58,283,8,3,2,667,187,9,117,64,88,Contributor 211 | Kelly Clark,5,594,169,4,74,51,35,282,421,25,11,4408,1133,19,501,336,194,Franchise Player 212 | Omar Townsend,2,445,99,1,46,24,29,278,415,16,4,618,129,1,72,31,48,Role Player 213 | Wilbert Kim,0,507,122,29,78,85,91,808,108,2,18,7761,1947,347,1175,1152,1380,Franchise Player 214 | Toby Cortez,4,380,120,5,54,51,31,237,8,1,8,3118,900,92,444,419,240,Franchise Player 215 | Carl May,0,325,76,16,33,52,37,726,87,3,5,1506,351,71,195,219,214,Contributor 216 | Ellis Gregory,0,629,168,18,73,102,40,1067,157,14,18,8424,2464,164,1008,1072,402,Superstar 217 | William Barker,1,213,61,4,17,22,3,178,45,4,17,4061,1145,83,488,491,244,Contributor 218 | Irving Love,4,338,92,18,42,60,21,0,0,0,3,682,185,36,88,112,50,Role Player 219 | Randall Gonzalez,5,306,104,14,50,58,25,116,222,15,7,2954,822,55,313,377,187,Franchise Player 220 | Claude Lucas,5,530,159,3,82,50,47,196,354,15,6,1619,426,11,218,149,163,Franchise Player 221 | Austin Porter,5,472,118,12,63,54,30,228,377,26,4,793,187,14,102,80,50,Role Player 222 | Javier Jordan,7,472,116,16,60,62,74,518,55,3,6,1924,489,67,242,251,240,Franchise Player 223 | Ruben Harris,2,633,210,6,91,56,59,367,432,16,6,3070,872,19,420,230,274,Role Player 224 | Ramiro Lawrence,5,547,137,2,58,47,12,261,459,22,2,1038,271,3,129,80,24,Role Player 225 | Johnny Gonzales,11,475,123,27,76,93,72,226,10,6,4,1810,471,108,292,343,267,Superstar 226 | Alex Walters,6,522,163,9,82,46,62,352,9,1,13,7037,2019,153,1043,827,535,Superstar 227 | Archie Bass,7,216,56,4,22,18,15,391,44,4,12,2796,665,43,266,304,198,Contributor 228 | Percy Rodriguez,2,464,128,28,67,94,52,0,0,0,13,5829,1552,210,740,840,452,Franchise Player 229 | Wayne Greene,6,497,127,7,65,48,37,325,9,3,5,2703,806,32,379,311,138,Franchise Player 230 | Hugh Vasquez,3,379,106,10,38,60,30,72,170,24,14,6207,1906,146,859,803,571,Superstar 231 | Grady Willis,2,584,157,20,95,73,63,276,421,11,10,4704,1320,93,724,522,576,Contributor 232 | Martin Neal,5,453,101,3,46,43,61,249,444,16,3,948,218,6,96,72,91,Contributor 233 | Pedro Marsh,11,642,211,14,107,59,52,337,19,4,5,2364,770,27,352,230,193,Franchise Player 234 | Jessie Cunningham,6,199,53,5,29,22,21,152,3,5,3,514,120,8,57,40,39,Role Player 235 | Stephen Woods,3,520,120,17,53,44,21,70,144,11,4,927,227,22,106,80,52,Role Player 236 | Glen Houston,10,199,52,9,26,28,21,235,22,5,6,805,191,30,113,119,87,Contributor 237 | Stewart Gray,0,557,142,21,58,81,23,1160,53,7,18,8759,2583,271,1138,1299,478,Superstar 238 | Jeffrey Wallace,10,401,92,17,49,66,65,0,0,0,13,5206,1332,253,784,890,866,Superstar 239 | Travis Copeland,11,586,159,12,72,79,53,181,13,4,9,3082,880,83,363,477,295,Superstar 240 | Dave Moore,1,283,77,14,45,47,26,144,6,5,16,6840,1910,259,915,1067,546,Franchise Player 241 | Dale Meyer,10,257,66,5,31,26,32,87,166,14,14,3910,979,33,518,324,382,Contributor 242 | Darrell Payne,9,419,101,18,65,58,92,0,0,0,20,9528,2510,548,1509,1659,1342,Franchise Player 243 | Tyler Black,1,172,42,3,17,14,15,65,0,0,10,4086,1150,57,579,363,406,Superstar 244 | Doug Rice,7,457,101,14,42,63,22,389,39,4,17,6521,1767,281,1003,977,619,Superstar 245 | Phillip Lewis,2,360,81,5,37,44,37,170,284,3,7,2268,566,41,279,257,246,Franchise Player 246 | Boyd Brock,3,512,131,26,69,96,52,119,216,12,14,5347,1397,221,712,815,548,Superstar 247 | Ron Silva,4,126,27,3,8,10,5,190,2,9,4,239,49,3,16,13,14,Role Player 248 | Norman Arnold,1,185,40,4,23,11,18,97,2,2,3,524,125,7,58,37,47,Role Player 249 | Trevor Carr,7,341,110,9,45,49,46,251,9,4,9,2331,658,50,249,322,274,Franchise Player 250 | Steven Perez,2,185,37,1,23,8,21,76,127,7,2,214,42,1,30,9,24,Role Player 251 | Delbert Lowe,5,190,46,2,24,8,15,102,177,16,5,479,102,5,65,23,39,Role Player 252 | Erick Barrett,0,473,154,6,61,48,29,846,84,9,6,1966,566,29,250,252,178,Franchise Player 253 | Matt Spencer,2,562,169,17,88,73,53,351,442,17,8,3181,841,61,450,342,373,Franchise Player 254 | Dean Vargas,5,528,122,1,67,45,51,209,372,17,4,1716,403,12,211,146,155,Contributor 255 | Irvin Christensen,1,405,102,18,49,85,20,161,10,3,6,950,231,29,99,138,64,Contributor 256 | Benny Young,0,408,117,11,66,41,34,942,72,11,1,408,117,11,66,41,34,Role Player 257 | Willis Pratt,6,576,167,8,89,49,57,325,12,8,4,822,232,19,132,83,79,Contributor 258 | Darrell Lloyd,11,570,169,21,72,88,38,295,15,5,7,3754,1077,140,492,589,263,Superstar 259 | Gerald Ramirez,3,208,57,8,32,25,18,42,94,13,3,653,170,17,98,54,62,Role Player 260 | Nathan Pittman,11,637,174,31,89,116,56,278,9,9,14,6727,2024,247,978,1093,495,Superstar 261 | Hugh Mathis,7,303,71,3,18,30,36,468,47,6,3,344,76,3,20,36,45,Role Player 262 | Bradford Hernandez,2,559,141,2,48,61,73,352,414,9,8,3162,874,16,421,349,359,Superstar 263 | Lawrence Graham,5,458,114,13,67,57,48,246,389,18,4,1350,298,28,160,123,122,Franchise Player 264 | Grant Anderson,6,680,223,31,119,96,34,429,8,6,3,1928,587,35,262,201,91,Contributor 265 | -------------------------------------------------------------------------------- /Session04/NeuralNetwork/first_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/NeuralNetwork/first_pic.PNG -------------------------------------------------------------------------------- /Session04/NeuralNetwork/forth_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/NeuralNetwork/forth_pic.PNG -------------------------------------------------------------------------------- /Session04/NeuralNetwork/neural_network_challenge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Neural Network\n", 10 | "\n", 11 | "### After completing materials of this notebook, you should be able to:\n", 12 | "\n", 13 | "* Explain what a neural network is, how it is used and the benefits of using it.\n", 14 | "* Recognize the necessary format for data in order to perform neural network data mining.\n", 15 | "* Develop a neural network data mining model using a training data set.\n", 16 | "* Interpret the model’s outputs and apply them to a scoring data set in order to deploy the model.\n", 17 | "\n", 18 | "#### ORGANIZATIONAL UNDERSTANDING\n", 19 | " we want to to mine a data set of all current players in the league in order to find those prospects that can bring the most excitement, scoring and defense to the team in order to reach the league championship.\n", 20 | "#### Data Understanding\n", 21 | "\n", 22 | "* __Player_Name__: This is the player’s name. In our data preparation phase, we will pop it, since it is not predictive in any way, but is important to keep in our data set so that we can quickly make our recommendations without having to match the data back to the players’ names later.\n", 23 | "* __Position_ID__: For the sport Juan’s team plays, there are 12 possible positions. Each one is represented as an integer from 0 to 11 in the data sets. \n", 24 | "* __Shots__: This the total number of shots, or scoring opportunities each player took in their most recent season.\n", 25 | "* __Makes__: This is the number times the athlete scored when shooting during the most recent season.\n", 26 | "* __Personal_Points__: This is the number of points the athlete personally scored during the most recent season.\n", 27 | "* __Total_Points__: This is the total number of points the athlete contributed to scoring in the most recent season. In the sport our team plays, this statistic is recorded for each point an athlete contributes to scoring. In other words, each time an athlete scores a personal point, their total points increase by one, and every time an athlete contributes to a teammate scoring, their total points increase by one as well.\n", 28 | "* __Assists__: This is a defensive statistic indicating the number of times the athlete helped his team get the ball away from the opposing team during the most recent season. \n", 29 | "* __Concessions__: This is the number of times the athlete’s play directly caused the opposing team to concede an offensive advantage during the most recent season. \n", 30 | "* __Blocks__: This is the number of times the athlete directly and independently blocked the opposing team’s shot during the most recent season.\n", 31 | "* __Block_Assists__: This is the number of times an athlete collaborated with a teammate to block the opposing team’s shot during the most recent season. If recorded as a block assist, two or more players must have been involved. If only one player blocked the shot, it is recorded as a block. Since the playing surface is large and the players are spread out, it is much more likely for an athlete to record a block than for two or more to record block assists.\n", 32 | "* __Fouls__: This is the number of times, in the most recent season, that the athlete committed a foul. Since fouling the other team gives them an advantage, the lower this number, the better the athlete’s performance for his own team.\n", 33 | "* __Years_Pro__: In the training data set, this is the number of years the athlete has played at the professional level. In the scoring data set, this is the number of year experience the athlete has, including years as a professional if any, and years in organized, competitive amateur leagues.\n", 34 | "* __Career_Shots__: This is the same as the Shots attribute, except it is cumulative for the athlete’s entire career. All career attributes are an attempt to assess the person’s ability to perform consistently over time.\n", 35 | "* __Career_Makes__: This is the same as the Makes attribute, except it is cumulative for the athlete’s entire career.\n", 36 | "* __Career_PP__: This is the same as the Personal Points attribute, except it is cumulative for the athlete’s entire career.\n", 37 | "* __Career_TP__: This is the same as the Total Points attribute, except it is cumulative for the athlete’s entire career.\n", 38 | "* __Career_Assists__: This is the same as the Career Assists attribute, except it is cumulative for the athlete’s entire career.\n", 39 | "* __Career_Con__: This is the same as the Career Concessions attribute, except it is cumulative for the athlete’s entire career.\n", 40 | "* __Team_Value__: This is a categorical attribute summarizing the athlete’s value to his team. It is present only in the training data, as it will serve as our label to predict a Team_Value for each observation in the scoring data set. There are four categories:\n", 41 | " * __Role Player__: This is an athlete who is good enough to play at the professional level, and may be really good in one area, but is not excellent overall. \n", 42 | " * __Contributor__: This is an athlete who contributes across several categories of defense and offense and can be counted on to regularly help the team win.\n", 43 | " * __Franchise Player__: This is an athlete whose skills are so broad, strong and consistent that the team will want to hang on to them for a long time. These players are of such a talent level that they can form the foundation of a really good, competitive team.\n", 44 | " * __Superstar__: This is that rare individual who gifts are so superior that they make a difference in every game. Most teams in the league will have one such player, but teams with two or three always contend for the league title.\n", 45 | "\n", 46 | "#### Data Preparation" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# data preparation" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "#### Modeling\n", 63 | "\n", 64 | "class sklearn.__neural_network.MLPClassifier__(hidden_layer_sizes=(100, ), activation=’relu’, solver=’adam’, alpha=0.0001, batch_size=’auto’, learning_rate=’constant’, learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)\n", 65 | "\n", 66 | "__Parameters__:\t\n", 67 | "\n", 68 | "__hidden_layer_sizes__ : tuple, length = n_layers - 2, default (100,)\n", 69 | "\n", 70 | " The ith element represents the number of neurons in the ith hidden layer.\n", 71 | "\n", 72 | "__activation__ : {‘identity’, ‘logistic’, ‘tanh’, ‘relu’}, default ‘relu’\n", 73 | "\n", 74 | " Activation function for the hidden layer.\n", 75 | "\n", 76 | " ‘identity’, no-op activation, useful to implement linear bottleneck, returns f(x) = x\n", 77 | " ‘logistic’, the logistic sigmoid function, returns f(x) = 1 / (1 + exp(-x)).\n", 78 | " ‘tanh’, the hyperbolic tan function, returns f(x) = tanh(x).\n", 79 | " ‘relu’, the rectified linear unit function, returns f(x) = max(0, x)\n", 80 | "\n", 81 | "__solver__ : {‘lbfgs’, ‘sgd’, ‘adam’}, default ‘adam’\n", 82 | "\n", 83 | " The solver for weight optimization.\n", 84 | "\n", 85 | " ‘lbfgs’ is an optimizer in the family of quasi-Newton methods.\n", 86 | " ‘sgd’ refers to stochastic gradient descent.\n", 87 | " ‘adam’ refers to a stochastic gradient-based optimizer proposed by Kingma, Diederik, and Jimmy Ba\n", 88 | "\n", 89 | " Note: The default solver ‘adam’ works pretty well on relatively large datasets (with thousands of training samples or more) in terms of both training time and validation score. For small datasets, however, ‘lbfgs’ can converge faster and perform better.\n", 90 | "\n", 91 | "__alpha__ : float, optional, default 0.0001\n", 92 | "\n", 93 | " L2 penalty (regularization term) parameter.\n", 94 | "\n", 95 | "__batch_size__ : int, optional, default ‘auto’\n", 96 | "\n", 97 | " Size of minibatches for stochastic optimizers. If the solver is ‘lbfgs’, the classifier will not use minibatch. When set to “auto”, batch_size=min(200, n_samples)\n", 98 | "\n", 99 | "__learning_rate__ : {‘constant’, ‘invscaling’, ‘adaptive’}, default ‘constant’\n", 100 | "\n", 101 | " Learning rate schedule for weight updates.\n", 102 | "\n", 103 | " ‘constant’ is a constant learning rate given by ‘learning_rate_init’.\n", 104 | " ‘invscaling’ gradually decreases the learning rate learning_rate_ at each time step ‘t’ using an inverse scaling exponent of ‘power_t’. effective_learning_rate = learning_rate_init / pow(t, power_t)\n", 105 | " ‘adaptive’ keeps the learning rate constant to ‘learning_rate_init’ as long as training loss keeps decreasing. Each time two consecutive epochs fail to decrease training loss by at least tol, or fail to increase validation score by at least tol if ‘early_stopping’ is on, the current learning rate is divided by 5.\n", 106 | "\n", 107 | " Only used when solver='sgd'.\n", 108 | "\n", 109 | "__learning_rate_init__ : double, optional, default 0.001\n", 110 | "\n", 111 | " The initial learning rate used. It controls the step-size in updating the weights. Only used when solver=’sgd’ or ‘adam’.\n", 112 | "\n", 113 | "__power_t__ : double, optional, default 0.5\n", 114 | "\n", 115 | " The exponent for inverse scaling learning rate. It is used in updating effective learning rate when the learning_rate is set to ‘invscaling’. Only used when solver=’sgd’.\n", 116 | "\n", 117 | "__max_iter__ : int, optional, default 200\n", 118 | "\n", 119 | " Maximum number of iterations. The solver iterates until convergence (determined by ‘tol’) or this number of iterations. For stochastic solvers (‘sgd’, ‘adam’), note that this determines the number of epochs (how many times each data point will be used), not the number of gradient steps.\n", 120 | "\n", 121 | "__shuffle__ : bool, optional, default True\n", 122 | "\n", 123 | " Whether to shuffle samples in each iteration. Only used when solver=’sgd’ or ‘adam’.\n", 124 | "\n", 125 | "__random_state__ : int, RandomState instance or None, optional, default None\n", 126 | "\n", 127 | " If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by np.random.\n", 128 | "\n", 129 | "__tol__ : float, optional, default 1e-4\n", 130 | "\n", 131 | " Tolerance for the optimization. When the loss or score is not improving by at least tol for two consecutive iterations, unless learning_rate is set to ‘adaptive’, convergence is considered to be reached and training stops.\n", 132 | "\n", 133 | "__verbose__ : bool, optional, default False\n", 134 | "\n", 135 | " Whether to print progress messages to stdout.\n", 136 | "\n", 137 | "__warm_start__ : bool, optional, default False\n", 138 | "\n", 139 | " When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.\n", 140 | "\n", 141 | "__momentum__ : float, default 0.9\n", 142 | "\n", 143 | " Momentum for gradient descent update. Should be between 0 and 1. Only used when solver=’sgd’.\n", 144 | "\n", 145 | "__nesterovs_momentum__ : boolean, default True\n", 146 | "\n", 147 | " Whether to use Nesterov’s momentum. Only used when solver=’sgd’ and momentum > 0.\n", 148 | "\n", 149 | "__early_stopping__ : bool, default False\n", 150 | "\n", 151 | " Whether to use early stopping to terminate training when validation score is not improving. If set to true, it will automatically set aside 10% of training data as validation and terminate training when validation score is not improving by at least tol for two consecutive epochs. Only effective when solver=’sgd’ or ‘adam’\n", 152 | "\n", 153 | "__validation_fraction__ : float, optional, default 0.1\n", 154 | "\n", 155 | " The proportion of training data to set aside as validation set for early stopping. Must be between 0 and 1. Only used if early_stopping is True\n", 156 | "\n", 157 | "__beta_1__ : float, optional, default 0.9\n", 158 | "\n", 159 | " Exponential decay rate for estimates of first moment vector in adam, should be in [0, 1). Only used when solver=’adam’\n", 160 | "\n", 161 | "__beta_2__ : float, optional, default 0.999\n", 162 | "\n", 163 | " Exponential decay rate for estimates of second moment vector in adam, should be in [0, 1). Only used when solver=’adam’\n", 164 | "\n", 165 | "__epsilon__ : float, optional, default 1e-8\n", 166 | "\n", 167 | " Value for numerical stability in adam. Only used when solver=’adam’\n", 168 | "\n", 169 | "__Attributes__:\t\n", 170 | "\n", 171 | "__classes___ : array or list of array of shape (n_classes,)\n", 172 | "\n", 173 | " Class labels for each output.\n", 174 | "\n", 175 | "__loss___ : float\n", 176 | "\n", 177 | " The current loss computed with the loss function.\n", 178 | "\n", 179 | "__coefs___ : list, length n_layers - 1\n", 180 | "\n", 181 | " The ith element in the list represents the weight matrix corresponding to layer i.\n", 182 | "\n", 183 | "__intercepts___ : list, length n_layers - 1\n", 184 | "\n", 185 | " The ith element in the list represents the bias vector corresponding to layer i + 1.\n", 186 | "\n", 187 | "__n_iter___ : int,\n", 188 | "\n", 189 | " The number of iterations the solver has ran.\n", 190 | "\n", 191 | "__n_layers___ : int\n", 192 | "\n", 193 | " Number of layers.\n", 194 | "\n", 195 | "__n_outputs___ : int\n", 196 | "\n", 197 | " Number of outputs.\n", 198 | "\n", 199 | "__out_activation___ : string\n", 200 | "\n", 201 | " Name of the output activation function.\n" 202 | ] 203 | } 204 | ], 205 | "metadata": { 206 | "kernelspec": { 207 | "display_name": "Python 3", 208 | "language": "python", 209 | "name": "python3" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.7.4" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 2 226 | } 227 | -------------------------------------------------------------------------------- /Session04/NeuralNetwork/second_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/NeuralNetwork/second_pic.PNG -------------------------------------------------------------------------------- /Session04/NeuralNetwork/third_pic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session04/NeuralNetwork/third_pic.PNG -------------------------------------------------------------------------------- /Session05/hyper-parameter.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session05/hyper-parameter.pptx -------------------------------------------------------------------------------- /Session05/imbalance-dataset.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session05/imbalance-dataset.pptx -------------------------------------------------------------------------------- /Session05/metrics.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session05/metrics.pptx -------------------------------------------------------------------------------- /Session06/K-means.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session06/K-means.pptx -------------------------------------------------------------------------------- /Session06/KNN.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session06/KNN.ppt -------------------------------------------------------------------------------- /Session06/k-means/k_means_dataset.csv: -------------------------------------------------------------------------------- 1 | Weight,Cholesterol,Gender 2 | 102,111,1 3 | 115,135,1 4 | 115,136,1 5 | 140,167,0 6 | 130,158,1 7 | 198,227,1 8 | 114,131,1 9 | 145,176,0 10 | 191,223,0 11 | 186,221,1 12 | 104,116,0 13 | 188,222,1 14 | 96,102,0 15 | 156,192,0 16 | 125,152,0 17 | 178,213,0 18 | 109,125,0 19 | 168,204,1 20 | 152,189,0 21 | 133,163,0 22 | 153,189,0 23 | 107,122,0 24 | 199,228,1 25 | 140,168,0 26 | 95,102,1 27 | 183,218,0 28 | 108,123,0 29 | 190,222,0 30 | 174,208,1 31 | 149,183,1 32 | 169,204,1 33 | 138,167,1 34 | 151,188,1 35 | 109,126,1 36 | 178,213,0 37 | 106,122,1 38 | 195,225,1 39 | 96,105,1 40 | 129,155,0 41 | 166,203,1 42 | 197,225,1 43 | 148,177,1 44 | 117,139,1 45 | 193,224,0 46 | 170,207,1 47 | 130,158,0 48 | 183,218,1 49 | 134,164,0 50 | 128,154,0 51 | 105,118,0 52 | 115,138,0 53 | 166,199,1 54 | 183,219,1 55 | 149,183,1 56 | 110,128,0 57 | 164,197,0 58 | 157,196,0 59 | 170,208,0 60 | 124,152,1 61 | 145,175,1 62 | 113,129,0 63 | 150,185,0 64 | 100,107,1 65 | 178,211,1 66 | 139,167,0 67 | 150,185,1 68 | 101,110,1 69 | 157,194,1 70 | 187,221,0 71 | 137,166,0 72 | 121,143,1 73 | 132,163,0 74 | 130,158,1 75 | 156,191,0 76 | 180,216,0 77 | 100,106,1 78 | 122,146,0 79 | 185,220,0 80 | 113,131,0 81 | 123,147,0 82 | 119,141,0 83 | 113,130,1 84 | 168,204,1 85 | 126,152,1 86 | 102,111,1 87 | 116,139,1 88 | 144,172,1 89 | 140,168,0 90 | 124,148,0 91 | 108,123,0 92 | 154,189,0 93 | 185,220,0 94 | 106,120,1 95 | 97,106,1 96 | 96,105,0 97 | 129,157,1 98 | 124,149,1 99 | 149,185,0 100 | 97,106,0 101 | 102,115,1 102 | 146,177,1 103 | 199,230,0 104 | 100,108,0 105 | 118,140,0 106 | 179,214,1 107 | 179,216,1 108 | 113,130,0 109 | 142,169,0 110 | 101,110,1 111 | 174,209,1 112 | 131,162,1 113 | 179,214,1 114 | 122,145,1 115 | 105,116,1 116 | 176,209,0 117 | 103,116,1 118 | 120,139,1 119 | 118,143,1 120 | 143,168,0 121 | 134,158,1 122 | 203,232,1 123 | 116,136,1 124 | 150,180,0 125 | 192,228,0 126 | 187,222,1 127 | 107,117,0 128 | 192,224,1 129 | 97,104,0 130 | 157,195,0 131 | 128,156,0 132 | 183,213,0 133 | 112,133,0 134 | 173,208,1 135 | 156,195,0 136 | 137,163,0 137 | 156,195,0 138 | 112,124,0 139 | 203,231,1 140 | 141,172,0 141 | 97,104,1 142 | 186,221,0 143 | 113,128,0 144 | 195,222,0 145 | 175,214,1 146 | 154,184,1 147 | 173,204,1 148 | 141,171,1 149 | 152,195,1 150 | 114,132,1 151 | 181,214,0 152 | 111,123,1 153 | 200,233,1 154 | 100,107,1 155 | 134,162,0 156 | 167,204,1 157 | 198,227,1 158 | 151,185,1 159 | 121,142,1 160 | 196,225,0 161 | 171,212,1 162 | 135,161,0 163 | 186,220,1 164 | 138,168,0 165 | 133,155,0 166 | 107,125,0 167 | 120,144,0 168 | 168,201,1 169 | 184,222,1 170 | 154,189,1 171 | 115,132,0 172 | 168,204,0 173 | 161,201,0 174 | 171,211,0 175 | 129,156,1 176 | 150,179,1 177 | 118,129,0 178 | 154,185,0 179 | 103,107,1 180 | 180,213,1 181 | 141,172,0 182 | 152,193,1 183 | 104,114,1 184 | 159,202,1 185 | 191,222,0 186 | 142,168,0 187 | 123,146,1 188 | 135,170,0 189 | 132,165,1 190 | 158,196,0 191 | 181,224,0 192 | 105,108,1 193 | 126,149,0 194 | 186,224,0 195 | 116,136,0 196 | 124,152,0 197 | 122,143,0 198 | 115,135,1 199 | 173,211,1 200 | 128,156,1 201 | 103,115,1 202 | 120,143,1 203 | 146,179,1 204 | 145,173,0 205 | 125,148,0 206 | 109,127,0 207 | 156,194,0 208 | 187,225,0 209 | 110,127,1 210 | 100,106,1 211 | 97,112,0 212 | 133,164,1 213 | 128,151,1 214 | 154,191,0 215 | 100,110,0 216 | 104,116,1 217 | 149,180,1 218 | 203,235,0 219 | 104,108,0 220 | 119,141,0 221 | 181,219,1 222 | 180,219,1 223 | 116,138,0 224 | 144,169,0 225 | 102,116,1 226 | 176,212,1 227 | 135,162,1 228 | 184,220,1 229 | 124,151,1 230 | 110,119,1 231 | 180,212,0 232 | 103,111,1 233 | 117,142,1 234 | 120,138,1 235 | 145,172,0 236 | 133,158,1 237 | 201,228,1 238 | 117,134,1 239 | 146,181,0 240 | 195,231,0 241 | 188,223,1 242 | 108,121,0 243 | 191,222,1 244 | 100,102,0 245 | 158,195,0 246 | 127,156,0 247 | 181,214,0 248 | 113,132,0 249 | 171,204,1 250 | 153,190,0 251 | 137,167,0 252 | 154,195,0 253 | 112,127,0 254 | 201,235,1 255 | 141,171,0 256 | 100,110,1 257 | 185,221,0 258 | 111,128,0 259 | 191,226,0 260 | 176,208,1 261 | 150,191,1 262 | 171,205,1 263 | 140,173,1 264 | 153,191,1 265 | 111,128,1 266 | 180,220,0 267 | 107,126,1 268 | 197,232,1 269 | 101,108,1 270 | 132,159,0 271 | 168,211,1 272 | 200,232,1 273 | 150,182,1 274 | 119,145,1 275 | 197,224,0 276 | 172,209,1 277 | 134,160,0 278 | 185,219,1 279 | 135,170,0 280 | 132,161,0 281 | 109,122,0 282 | 116,141,0 283 | 170,204,1 284 | 187,226,1 285 | 154,187,1 286 | 111,129,0 287 | 169,199,0 288 | 161,199,0 289 | 174,214,0 290 | 125,159,1 291 | 148,176,1 292 | 117,129,0 293 | 154,187,0 294 | 104,109,1 295 | 183,212,1 296 | 142,170,0 297 | 155,191,1 298 | 105,116,1 299 | 160,202,1 300 | 191,226,0 301 | 140,166,0 302 | 123,151,1 303 | 136,170,0 304 | 132,166,1 305 | 158,193,0 306 | 182,218,0 307 | 104,109,1 308 | 126,152,0 309 | 188,220,0 310 | 118,133,0 311 | 127,152,0 312 | 123,142,0 313 | 118,131,1 314 | 169,204,1 315 | 128,153,1 316 | 106,114,1 317 | 119,144,1 318 | 147,172,1 319 | 141,176,0 320 | 125,150,0 321 | 109,129,0 322 | 158,190,0 323 | 190,226,0 324 | 110,128,1 325 | 99,106,1 326 | 98,107,0 327 | 134,159,1 328 | 129,157,1 329 | 154,192,0 330 | 100,110,0 331 | 107,115,1 332 | 149,183,1 333 | 203,233,0 334 | 101,112,0 335 | 119,146,0 336 | 182,222,1 337 | 181,216,1 338 | 115,134,0 339 | 145,174,0 340 | 102,114,1 341 | 176,211,1 342 | 134,167,1 343 | 183,214,1 344 | 123,145,1 345 | 108,120,1 346 | 177,217,0 347 | 104,113,1 348 | 117,141,1 349 | 117,140,1 350 | 145,171,0 351 | 134,163,1 352 | 199,232,1 353 | 115,131,1 354 | 149,184,0 355 | 192,225,0 356 | 188,223,1 357 | 107,121,0 358 | 190,223,1 359 | 98,105,0 360 | 158,197,0 361 | 128,160,0 362 | 180,218,0 363 | 110,131,0 364 | 172,211,1 365 | 157,194,0 366 | 137,169,0 367 | 158,189,0 368 | 110,128,0 369 | 203,228,1 370 | 145,169,0 371 | 96,108,1 372 | 186,219,0 373 | 110,124,0 374 | 194,224,0 375 | 177,214,1 376 | 152,190,1 377 | 172,205,1 378 | 140,167,1 379 | 154,190,1 380 | 113,128,1 381 | 179,217,0 382 | 111,126,1 383 | 196,228,1 384 | 101,105,1 385 | 131,162,0 386 | 171,211,1 387 | 199,227,1 388 | 151,178,1 389 | 121,146,1 390 | 196,227,0 391 | 174,208,1 392 | 135,163,0 393 | 184,225,1 394 | 137,167,0 395 | 129,162,0 396 | 108,125,0 397 | 116,144,0 398 | 168,199,1 399 | 184,226,1 400 | 151,183,1 401 | 114,136,0 402 | 167,199,0 403 | 160,204,0 404 | 175,215,0 405 | 127,155,1 406 | 146,179,1 407 | 118,133,0 408 | 151,186,0 409 | 105,111,1 410 | 180,219,1 411 | 140,173,0 412 | 155,185,1 413 | 106,118,1 414 | 160,199,1 415 | 192,226,0 416 | 138,169,0 417 | 122,144,1 418 | 135,166,0 419 | 132,166,1 420 | 158,198,0 421 | 184,222,0 422 | 101,110,1 423 | 126,154,0 424 | 188,226,0 425 | 118,135,0 426 | 127,151,0 427 | 123,146,0 428 | 117,133,1 429 | 172,205,1 430 | 127,154,1 431 | 104,116,1 432 | 121,145,1 433 | 148,174,1 434 | 141,168,0 435 | 128,156,0 436 | 113,124,0 437 | 155,196,0 438 | 189,227,0 439 | 108,122,1 440 | 98,106,1 441 | 100,110,0 442 | 134,158,1 443 | 125,154,1 444 | 151,190,0 445 | 100,110,0 446 | 107,120,1 447 | 147,180,1 448 | 200,232,0 449 | 101,110,0 450 | 120,142,0 451 | 181,222,1 452 | 182,218,1 453 | 117,138,0 454 | 145,176,0 455 | 106,115,1 456 | 179,211,1 457 | 134,168,1 458 | 180,216,1 459 | 126,147,1 460 | 110,124,1 461 | 180,215,0 462 | 103,117,1 463 | 116,139,1 464 | 119,138,1 465 | 144,171,0 466 | 133,158,1 467 | 199,232,1 468 | 115,139,1 469 | 150,178,0 470 | 192,224,0 471 | 188,224,1 472 | 109,116,0 473 | 189,223,1 474 | 101,109,0 475 | 159,193,0 476 | 128,155,0 477 | 182,221,0 478 | 111,130,0 479 | 171,212,1 480 | 154,190,0 481 | 134,169,0 482 | 157,191,0 483 | 112,122,0 484 | 203,234,1 485 | 142,175,0 486 | 97,108,1 487 | 187,220,0 488 | 111,126,0 489 | 194,227,0 490 | 176,216,1 491 | 153,188,1 492 | 171,205,1 493 | 142,172,1 494 | 153,192,1 495 | 113,129,1 496 | 183,219,0 497 | 108,127,1 498 | 200,228,1 499 | 99,105,1 500 | 131,159,0 501 | 171,205,1 502 | 200,228,1 503 | 152,178,1 504 | 119,139,1 505 | 196,232,0 506 | 173,212,1 507 | 132,162,0 508 | 185,226,1 509 | 138,171,0 510 | 132,159,0 511 | 110,120,0 512 | 119,143,0 513 | 167,200,1 514 | 188,221,1 515 | 151,184,1 516 | 112,134,0 517 | 166,200,0 518 | 158,201,0 519 | 172,210,0 520 | 127,159,1 521 | 150,178,1 522 | 115,137,0 523 | 151,190,0 524 | 103,110,1 525 | 181,213,1 526 | 141,170,0 527 | 152,193,1 528 | 105,113,1 529 | 162,196,1 530 | 190,222,0 531 | 141,174,0 532 | 122,143,1 533 | 136,169,0 534 | 131,165,1 535 | 157,194,0 536 | 181,217,0 537 | 103,113,1 538 | 127,150,0 539 | 189,223,0 540 | 114,133,0 541 | 125,149,0 542 | 123,149,0 543 | 116,130,1 544 | 172,207,1 545 | 129,157,1 546 | 107,115,1 547 | 117,147,1 548 | 148,176,1 549 | -------------------------------------------------------------------------------- /Session06/text.ppt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pooya-mohammadi/DataMiningwithPython/6ab2642f9f474ad316d5e8cc174921f94d0bd5e1/Session06/text.ppt -------------------------------------------------------------------------------- /Session06/text/Sentences.txt: -------------------------------------------------------------------------------- 1 | 1. Cricket is a bat and ball game played between two teams of eleven players each on a cricket field. 2 | 2. Each phase of play is called an innings during which one team bats, attempting to score as many runs as possible. 3 | 3. The teams have one or two innings apiece and, when the first innings ends, the teams swap roles for the next innings 4 | 4. Before a match begins, the two team captains meet on the pitch for the toss of a coin to determine which team will bat first. 5 | 5. Two batsmen and eleven fielders then enter the field and play begins when a member of the fielding team, known as the bowler, delivers the ball. 6 | 6. The most common dismissal in cricket match are bowled, when the bowler hits the stumps directly with the ball and dislodges the bails. Batsman gets out. 7 | 7. Runs are scored by two main methods: either by hitting the ball hard enough for it to cross the boundary, or by the two batsmen swapping ends. 8 | 8. The main objective of each team is to score more runs than their opponents. 9 | 9. If the team batting last is all out having scored fewer runs than their opponents, they are said to have "lost by n runs". 10 | 10. The role of striker batsman is to prevent the ball from hitting the stumps by using his bat and, simultaneously, to strike it well enough to score runs 11 | 11. Artificial intelligence is intelligence exhibited by machines, rather than humans or other animals. 12 | 12. the field of AI research defines itself as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of success at some goal 13 | 13. The overall research goal of artificial intelligence is to create technology that allows computers and machines to function in an intelligent manner. 14 | 14. Natural language processing[77] gives machines the ability to read and understand human language and extract intelligence from it. 15 | 15. AI researchers developed sophisticated mathematical tools to solve specific subproblems. These tools are truly scientific, in the sense that their results are both measurable and verifiable. 16 | 16. An intelligent agent is a system that perceives its environment and takes actions which maximize its chances of success. 17 | 17. AI techniques have become an essential part of the technology industry, helping to solve many challenging problems in computer science. 18 | 18. Recent advancements in AI, and specifically in machine learning, have contributed to the growth of Autonomous Things such as drones and self-driving cars. 19 | 19. AI research was revived by the commercial success of expert systems,[28] a form of AI program that simulated the knowledge and analytical skills of human experts. 20 | 20. Advanced statistical techniques (loosely known as deep learning), access to large amounts of data and faster computers enabled advances in machine learning and perception. 21 | 21. A compound is a pure chemical substance composed of more than one element and the properties of a compound bear little similarity to those of its elements. 22 | 22. Since the properties of an element are mostly determined by its electron configuration, the properties of the elements likewise show recurring patterns or periodic behaviour. 23 | 23. The property of inertness of noble gases makes them very suitable in chemicals where reactions are not wanted. 24 | 24. The atom is also the smallest entity that can be envisaged to retain the chemical properties of the element, such as electronegativity, ionization potential and preferred oxidation state. 25 | 25. The nucleus is made up of positively charged protons and uncharged neutrons (together called nucleons), while the electron cloud consists of negatively charged electrons which orbit the nucleus 26 | 26. The atom is the basic unit of chemistry. It consists of a dense core called the atomic nucleus surrounded by a space called the electron cloud. 27 | 27. A chemical reaction is a transformation of some substances into one or more different substances. 28 | 28. Chemistry is sometimes called the central science because it bridges other natural sciences, including physics, geology and biology. 29 | 29. Chemistry includes topics such as the properties of individual atoms and how atoms form chemical bonds to create chemical compounds. 30 | 30. Chemistry is a branch of physical science that studies the composition, structure of atoms, properties and change of matter. -------------------------------------------------------------------------------- /Session06/text/text_clustering_to_teach_students.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data Mining\n", 8 | "\n", 9 | "## Text Mining\n", 10 | "\n", 11 | "1. A simple text cleaning definition.\n", 12 | "2. Feature Extraction.\n", 13 | "3. Training the models.\n", 14 | "4. Testing the models.\n", 15 | "\n", 16 | "For training K-Means models, the following 30 sentences were collected from 3 categories, namely Cricket, Artificial Intelligence and Chemistry." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "1. Cricket is a bat and ball game played between two teams of eleven players each on a cricket field.\n", 24 | "2. Each phase of play is called an innings during which one team bats, attempting to score as many runs as possible.\n", 25 | "3. The teams have one or two innings apiece and, when the first innings ends, the teams swap roles for the next innings\n", 26 | "4. Before a match begins, the two team captains meet on the pitch for the toss of a coin to determine which team will bat first.\n", 27 | "5. Two batsmen and eleven fielders then enter the field and play begins when a member of the fielding team, known as the bowler, delivers the ball.\n", 28 | "6. The most common dismissal in cricket match are bowled, when the bowler hits the stumps directly with the ball and dislodges the bails. Batsman gets out.\n", 29 | "7. Runs are scored by two main methods: either by hitting the ball hard enough for it to cross the boundary, or by the two batsmen swapping ends.\n", 30 | "8. The main objective of each team is to score more runs than their opponents.\n", 31 | "9. If the team batting last is all out having scored fewer runs than their opponents, they are said to have \"lost by n runs\".\n", 32 | "10. The role of striker batsman is to prevent the ball from hitting the stumps by using his bat and, simultaneously, to strike it well enough to score runs\n", 33 | "11. Artificial intelligence is intelligence exhibited by machines, rather than humans or other animals. \n", 34 | "12. the field of AI research defines itself as the study of \"intelligent agents\": any device that perceives its environment and takes actions that maximize its chance of success at some goal\n", 35 | "13. The overall research goal of artificial intelligence is to create technology that allows computers and machines to function in an intelligent manner.\n", 36 | "14. Natural language processing[77] gives machines the ability to read and understand human language and extract intelligence from it.\n", 37 | "15. AI researchers developed sophisticated mathematical tools to solve specific subproblems. These tools are truly scientific, in the sense that their results are both measurable and verifiable.\n", 38 | "16. An intelligent agent is a system that perceives its environment and takes actions which maximize its chances of success.\n", 39 | "17. AI techniques have become an essential part of the technology industry, helping to solve many challenging problems in computer science.\n", 40 | "18. Recent advancements in AI, and specifically in machine learning, have contributed to the growth of Autonomous Things such as drones and self-driving cars.\n", 41 | "19. AI research was revived by the commercial success of expert systems,[28] a form of AI program that simulated the knowledge and analytical skills of human experts.\n", 42 | "20. Advanced statistical techniques (loosely known as deep learning), access to large amounts of data and faster computers enabled advances in machine learning and perception.\n", 43 | "21. A compound is a pure chemical substance composed of more than one element and the properties of a compound bear little similarity to those of its elements.\n", 44 | "22. Since the properties of an element are mostly determined by its electron configuration, the properties of the elements likewise show recurring patterns or periodic behaviour.\n", 45 | "23. The property of inertness of noble gases makes them very suitable in chemicals where reactions are not wanted.\n", 46 | "24. The atom is also the smallest entity that can be envisaged to retain the chemical properties of the element, such as electronegativity, ionization potential and preferred oxidation state.\n", 47 | "25. The nucleus is made up of positively charged protons and uncharged neutrons (together called nucleons), while the electron cloud consists of negatively charged electrons which orbit the nucleus\n", 48 | "26. The atom is the basic unit of chemistry. It consists of a dense core called the atomic nucleus surrounded by a space called the electron cloud.\n", 49 | "27. A chemical reaction is a transformation of some substances into one or more different substances.\n", 50 | "28. Chemistry is sometimes called the central science because it bridges other natural sciences, including physics, geology and biology.\n", 51 | "29. Chemistry includes topics such as the properties of individual atoms and how atoms form chemical bonds to create chemical compounds.\n", 52 | "30. Chemistry is a branch of physical science that studies the composition, structure of atoms, properties and change of matter." 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "__Work Fellow__:\n", 60 | "\n", 61 | "\n", 62 | "Texts ==> Stop words removal ==> Punctuation free ==> Word Lemmatization ==> Digit removal ==> Feature Extraction (Tf-Idf) ==> Model training" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "### Text cleaning(Data Preparation)\n", 70 | "* __Removal of stop words__\n", 71 | "Stop words like “and”, “if”, “the”, etc are very common in all English sentences and are not very meaningful in deciding the theme of the article, so these words can be removed from the articles.\n", 72 | "\n", 73 | "* __Removal of Punctuation Characters__\n", 74 | "– Exclude all punctuation marks from the set([‘!’, ‘#’, ‘”‘, ‘%’, ‘$’, “‘”, ‘&’, ‘)’, ‘(‘, ‘+’, ‘*’, ‘-‘, ‘,’, ‘/’, ‘.’, ‘;’, ‘:’, ‘=’, ‘<‘, ‘?’, ‘>’, ‘@’, ‘[‘, ‘]’, ‘\\\\’, ‘_’, ‘^’, ‘`’, ‘{‘, ‘}’, ‘|’, ‘~’]).\n", 75 | "\n", 76 | "* __Lemmatization__\n", 77 | "– It is the process of grouping together the different inflected forms of a word so they can be analyzed as a single item. For example, “include”, “includes,” and “included” would all be represented as “include”. The context of the sentence is also preserved in lemmatization as opposed to stemming (another buzz word in text mining which does not consider the meaning of the sentence).\n", 78 | "\n", 79 | "* __Removal__ of digits from the text sentence." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "!pip install nltk" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "from sklearn.feature_extraction.text import TfidfVectorizer\n", 98 | "from sklearn.cluster import KMeans\n", 99 | "from nltk.corpus import stopwords\n", 100 | "from nltk.stem.wordnet import WordNetLemmatizer\n", 101 | "import string\n", 102 | "import re\n", 103 | "import numpy as np\n", 104 | "from collections import Counter\n", 105 | "import nltk" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "nltk.download('stopwords')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "### Tf-Idf Feature Extraction\n", 122 | "The most popular and widely used word weighing scheme in text mining problems, __term frequency and inverse document frequency (tf-idf)__, is a numerical statistic that is intended to reflect how important a word is to a document in a collection or corpus. The tf-idf value increases proportionally to the number of times a word appears in the document (tf), but is often offset by the frequency of the word in the whole corpus (idf), which helps to adjust for the fact that some words appear more frequently in general.\n", 123 | "\n", 124 | "\n", 125 | "\n", 126 | "__TF__:\n", 127 | " Term Frequency, which measures how frequently a term occurs in a document. Since every document is different in length, it is possible that a term would appear much more times in long documents than shorter ones. Thus, the term frequency is often divided by the document length (aka. the total number of terms in the document) as a way of normalization:\n", 128 | "\n", 129 | " TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document).\n", 130 | "\n", 131 | "__IDF__:\n", 132 | " Inverse Document Frequency, which measures how important a term is. While computing TF, all terms are considered equally important. However it is known that certain terms, such as \"is\", \"of\", and \"that\", may appear a lot of times but have little importance. Thus we need to weigh down the frequent terms while scale up the rare ones, by computing the following:\n", 133 | "\n", 134 | " IDF(t) = log_e(Total number of documents / Number of documents with term t in it).\n", 135 | "\n", 136 | "__Example:__\n", 137 | " Consider a document containing 100 words wherein the word cat appears 3 times. The term frequency (i.e., tf) for cat is then (3 / 100) = 0.03. Now, assume we have 10 million documents and the word cat appears in one thousand of these. Then, the inverse document frequency (i.e., idf) is calculated as log(10,000,000 / 1,000) = 4. Thus, the Tf-idf weight is the product of these quantities: 0.03 * 4 = 0.12." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "nltk.download('wordnet')" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "### Training the Clustering (K-Means) models.\n", 154 | "As we have discussed earlier also, Text classification is a supervised learning task, whereas text clustering is an unsupervised task. We are investigating one machine learning algorithms here: K-Means clustering.\n", 155 | "The goal of clustering is to determine the intrinsic grouping in a set of unlabeled data (feature vectors). In K-Means clustering, ‘K’ cluster centers are discovered which is centroid of data points belonging to that cluster. A test data (feature-vector) is assigned to that cluster whose centroid is at minimum Euclidean distance from it." 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Testing on Unseen Texts.\n", 163 | "Once the model has been trained, we demonstrate the concept of classification and clustering with above conventional methods. We tested it on the following few unseen text sentences.\n" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "# Predicting it on test data : Testing Phase\n", 173 | "test_sentences = [\"Chemical compunds are used for preparing bombs based on some reactions\",\\\n", 174 | "\"Cricket is a boring game where the batsman only enjoys the game\",\\\n", 175 | "\"Machine learning is an area of Artificial intelligence\"]" 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.7.4" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 2 200 | } 201 | --------------------------------------------------------------------------------