├── 1-0_Prelude.ipynb ├── 1-1_Basic_NN-Titanic.ipynb ├── 2-1_Basic_NN-MNIST.ipynb ├── 2-2_Regularized_NN.ipynb ├── 2-3_CNN.ipynb ├── 3-3_RNN.ipynb ├── Appendix-SGD.ipynb ├── README.md ├── data ├── mnist.csv ├── rottentomatoes.csv └── titanic.csv └── presentation.pdf /1-0_Prelude.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 1-0 - Prelude" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "We'll start with the Kaggle Titanic dataset. The dataset is a list of Titanic passengers with features such as class, age, sex and fare. We'll use these features to train a model, and use the model to predict whether or not each passenger survived. A more detailed treatment of the dataset can be found here:\n", 15 | "\n", 16 | "https://github.com/savarin/python_for_ml" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "from sklearn.preprocessing import StandardScaler\n", 28 | "from time import time\n", 29 | "\n", 30 | "np.random.seed(1337)\n", 31 | "\n", 32 | "df = pd.read_csv('data/titanic.csv')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | "
SurvivedClassSexAgeFare
003122.07.2500
111038.071.2833
213026.07.9250
311035.053.1000
403135.08.0500
\n", 111 | "
" 112 | ], 113 | "text/plain": [ 114 | " Survived Class Sex Age Fare\n", 115 | "0 0 3 1 22.0 7.2500\n", 116 | "1 1 1 0 38.0 71.2833\n", 117 | "2 1 3 0 26.0 7.9250\n", 118 | "3 1 1 0 35.0 53.1000\n", 119 | "4 0 3 1 35.0 8.0500" 120 | ] 121 | }, 122 | "execution_count": 2, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "df.head()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "We split the data 80-20 into training and test sets. In addition, we scale the data so that each column has mean 0 and standard deviation 1, and create one-hot vectors with the labels (analogous to dummy variables)." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 3, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "df_train = df.iloc[:712, :]\n", 145 | "\n", 146 | "scaler = StandardScaler()\n", 147 | "features = ['Class', 'Sex', 'Age', 'Fare']\n", 148 | "\n", 149 | "X_train = scaler.fit_transform(df_train[features].values)\n", 150 | "y_train = df_train['Survived'].values\n", 151 | "y_train_onehot = pd.get_dummies(df_train['Survived']).values" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 4, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/plain": [ 162 | "array([[ 0.83290956, 0.74926865, -0.61259594, -0.51933199],\n", 163 | " [-1.55353553, -1.33463478, 0.6184268 , 0.79718222],\n", 164 | " [ 0.83290956, -1.33463478, -0.30484025, -0.5054541 ],\n", 165 | " [-1.55353553, -1.33463478, 0.38761004, 0.42333654],\n", 166 | " [ 0.83290956, 0.74926865, 0.38761004, -0.50288412]])" 167 | ] 168 | }, 169 | "execution_count": 4, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "X_train[:5, :]" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "data": { 185 | "text/plain": [ 186 | "array([0, 1, 1, 1, 0])" 187 | ] 188 | }, 189 | "execution_count": 5, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "y_train[:5]" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 6, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "data": { 205 | "text/plain": [ 206 | "array([[1, 0],\n", 207 | " [0, 1],\n", 208 | " [0, 1],\n", 209 | " [0, 1],\n", 210 | " [1, 0]], dtype=uint8)" 211 | ] 212 | }, 213 | "execution_count": 6, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "y_train_onehot[:5]" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 7, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "df_test = df.iloc[712:, :]\n", 229 | "\n", 230 | "X_test = scaler.transform(df_test[features].values)\n", 231 | "y_test = df_test['Survived'].values" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | " ## Benchmark" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "To create a basis for comparison, we train a Random Forest model and record the accuracy on the test set." 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 8, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "name": "stdout", 255 | "output_type": "stream", 256 | "text": [ 257 | "building tree 1 of 100\n", 258 | "building tree 2 of 100\n", 259 | "building tree 3 of 100\n", 260 | "building tree 4 of 100\n", 261 | "building tree 5 of 100\n", 262 | "building tree 6 of 100\n", 263 | "building tree 7 of 100\n", 264 | "building tree 8 of 100\n", 265 | "building tree 9 of 100\n", 266 | "building tree 10 of 100\n", 267 | "building tree 11 of 100\n", 268 | "building tree 12 of 100\n", 269 | "building tree 13 of 100\n", 270 | "building tree 14 of 100\n", 271 | "building tree 15 of 100\n", 272 | "building tree 16 of 100\n", 273 | "building tree 17 of 100\n", 274 | "building tree 18 of 100\n", 275 | "building tree 19 of 100\n", 276 | "building tree 20 of 100\n", 277 | "building tree 21 of 100\n", 278 | "building tree 22 of 100\n", 279 | "building tree 23 of 100\n", 280 | "building tree 24 of 100\n", 281 | "building tree 25 of 100\n", 282 | "building tree 26 of 100\n", 283 | "building tree 27 of 100\n", 284 | "building tree 28 of 100\n", 285 | "building tree 29 of 100\n", 286 | "building tree 30 of 100\n", 287 | "building tree 31 of 100\n", 288 | "building tree 32 of 100\n", 289 | "building tree 33 of 100\n", 290 | "building tree 34 of 100\n", 291 | "building tree 35 of 100\n", 292 | "building tree 36 of 100\n", 293 | "building tree 37 of 100\n", 294 | "building tree 38 of 100\n", 295 | "building tree 39 of 100\n", 296 | "building tree 40 of 100\n", 297 | "building tree 41 of 100\n", 298 | "building tree 42 of 100\n", 299 | "building tree 43 of 100\n", 300 | "building tree 44 of 100\n", 301 | "building tree 45 of 100\n", 302 | "building tree 46 of 100\n", 303 | "building tree 47 of 100\n", 304 | "building tree 48 of 100\n", 305 | "building tree 49 of 100\n", 306 | "building tree 50 of 100\n", 307 | "building tree 51 of 100\n", 308 | "building tree 52 of 100\n", 309 | "building tree 53 of 100\n", 310 | "building tree 54 of 100\n", 311 | "building tree 55 of 100\n", 312 | "building tree 56 of 100\n", 313 | "building tree 57 of 100\n", 314 | "building tree 58 of 100\n", 315 | "building tree 59 of 100\n", 316 | "building tree 60 of 100\n", 317 | "building tree 61 of 100\n", 318 | "building tree 62 of 100\n", 319 | "building tree 63 of 100\n", 320 | "building tree 64 of 100\n", 321 | "building tree 65 of 100\n", 322 | "building tree 66 of 100\n", 323 | "building tree 67 of 100\n", 324 | "building tree 68 of 100\n", 325 | "building tree 69 of 100\n", 326 | "building tree 70 of 100\n", 327 | "building tree 71 of 100\n", 328 | "building tree 72 of 100\n", 329 | "building tree 73 of 100\n", 330 | "building tree 74 of 100\n", 331 | "building tree 75 of 100\n", 332 | "building tree 76 of 100\n", 333 | "building tree 77 of 100\n", 334 | "building tree 78 of 100\n", 335 | "building tree 79 of 100\n", 336 | "building tree 80 of 100\n", 337 | "building tree 81 of 100\n", 338 | "building tree 82 of 100\n", 339 | "building tree 83 of 100\n", 340 | "building tree 84 of 100\n", 341 | "building tree 85 of 100\n", 342 | "building tree 86 of 100\n", 343 | "building tree 87 of 100\n", 344 | "building tree 88 of 100\n", 345 | "building tree 89 of 100\n", 346 | "building tree 90 of 100\n", 347 | "building tree 91 of 100\n", 348 | "building tree 92 of 100\n", 349 | "building tree 93 of 100\n", 350 | "building tree 94 of 100\n", 351 | "building tree 95 of 100\n", 352 | "building tree 96 of 100\n", 353 | "building tree 97 of 100\n", 354 | "building tree 98 of 100\n", 355 | "building tree 99 of 100\n", 356 | "building tree 100 of 100\n", 357 | "\n", 358 | "accuracy 0.8324022346368715\n" 359 | ] 360 | }, 361 | { 362 | "name": "stderr", 363 | "output_type": "stream", 364 | "text": [ 365 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 366 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 367 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n", 368 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.1s finished\n", 369 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 370 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 371 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n", 372 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.0s finished\n" 373 | ] 374 | } 375 | ], 376 | "source": [ 377 | "from sklearn.ensemble import RandomForestClassifier\n", 378 | "\n", 379 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 380 | "model = model.fit(X_train, y_train)\n", 381 | "\n", 382 | "y_prediction = model.predict(X_test)\n", 383 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "## 1-layer Neural Network" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "For the input, we have a vector of length 4 that represents each passenger's features. As an example, we consider the first passenger in the dataset." 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 9, 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "name": "stdout", 407 | "output_type": "stream", 408 | "text": [ 409 | "[ 0.83290956 0.74926865 -0.61259594 -0.51933199]\n" 410 | ] 411 | } 412 | ], 413 | "source": [ 414 | "print(X_train[0])" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "For the output, we want a vector of length 2 to represent the survival probabilities. A simple way to create this mapping is by using a 2x4 matrix. To start off, we generate a random matrix representing feature weights and apply the matrix to our input. We'll also add a bias term." 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 10, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "[[0.00262025 0.00158684 0.00278127 0.00459317]\n", 434 | " [0.00321001 0.00518393 0.00261943 0.00976085]]\n" 435 | ] 436 | } 437 | ], 438 | "source": [ 439 | "W = np.random.rand(2, 4) * 0.01\n", 440 | "\n", 441 | "print(W)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 11, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "name": "stdout", 451 | "output_type": "stream", 452 | "text": [ 453 | "[0.00732815 0.00115274]\n" 454 | ] 455 | } 456 | ], 457 | "source": [ 458 | "b = np.random.rand(2,) * 0.01\n", 459 | "\n", 460 | "print(b)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": 12, 466 | "metadata": {}, 467 | "outputs": [ 468 | { 469 | "name": "stdout", 470 | "output_type": "stream", 471 | "text": [ 472 | "[0.00661037 0.00103677]\n" 473 | ] 474 | } 475 | ], 476 | "source": [ 477 | "result = np.dot(W, X_train[0]) + b\n", 478 | "\n", 479 | "print(result)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "To get the output vector to sum to 1, we apply a softmax mapping. The first element would now represent the probability that the passenger did not survive, and the second element represents the probability the passenger survives." 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 13, 492 | "metadata": {}, 493 | "outputs": [], 494 | "source": [ 495 | "def softmax(x):\n", 496 | " return np.exp(x) / np.exp(x).sum()" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 14, 502 | "metadata": {}, 503 | "outputs": [ 504 | { 505 | "name": "stdout", 506 | "output_type": "stream", 507 | "text": [ 508 | "[0.5013934 0.4986066]\n" 509 | ] 510 | } 511 | ], 512 | "source": [ 513 | "result = softmax(result)\n", 514 | "\n", 515 | "print(result)" 516 | ] 517 | }, 518 | { 519 | "cell_type": "markdown", 520 | "metadata": {}, 521 | "source": [ 522 | "We can then compare the output vector to the actual label. We would have a 'good' model if the probability for the correct label was close to 1, and a 'bad' one if it was close to 0." 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 15, 528 | "metadata": {}, 529 | "outputs": [ 530 | { 531 | "name": "stdout", 532 | "output_type": "stream", 533 | "text": [ 534 | "[1 0]\n" 535 | ] 536 | } 537 | ], 538 | "source": [ 539 | "print(y_train_onehot[0])" 540 | ] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "execution_count": 16, 545 | "metadata": {}, 546 | "outputs": [ 547 | { 548 | "name": "stdout", 549 | "output_type": "stream", 550 | "text": [ 551 | "0\n" 552 | ] 553 | } 554 | ], 555 | "source": [ 556 | "label_index = np.argmax(y_train_onehot[0])\n", 557 | "\n", 558 | "print(label_index)" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": 17, 564 | "metadata": {}, 565 | "outputs": [ 566 | { 567 | "name": "stdout", 568 | "output_type": "stream", 569 | "text": [ 570 | "predicted label-0 probability 0.5013933977819183\n" 571 | ] 572 | } 573 | ], 574 | "source": [ 575 | "print(\"predicted label-0 probability\", result[label_index])" 576 | ] 577 | }, 578 | { 579 | "cell_type": "markdown", 580 | "metadata": {}, 581 | "source": [ 582 | "We define loss to be the negative logarithm of the probability of the correct label. Taking the logarithm penalizes the model for having a high probability associated with the wrong label. Here we have the loss associated with the first passenger." 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 18, 588 | "metadata": {}, 589 | "outputs": [ 590 | { 591 | "name": "stdout", 592 | "output_type": "stream", 593 | "text": [ 594 | "loss for first passenger 0.6903642609116132\n" 595 | ] 596 | } 597 | ], 598 | "source": [ 599 | "loss = -np.log(result[label_index])\n", 600 | "\n", 601 | "print(\"loss for first passenger\", loss)" 602 | ] 603 | }, 604 | { 605 | "cell_type": "markdown", 606 | "metadata": {}, 607 | "source": [ 608 | "We run the calculation through all the passengers in the training set, and divide the total loss by the number of passengers to obtain the average loss." 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 19, 614 | "metadata": {}, 615 | "outputs": [ 616 | { 617 | "name": "stdout", 618 | "output_type": "stream", 619 | "text": [ 620 | "average loss across all passengers 0.6938940425068784\n" 621 | ] 622 | } 623 | ], 624 | "source": [ 625 | "for j in range(X_train.shape[0]):\n", 626 | " result = np.dot(W, X_train[j]) + b\n", 627 | " result = softmax(result)\n", 628 | " \n", 629 | " label_index = np.argmax(y_train_onehot[j])\n", 630 | " loss += -np.log(result[label_index])\n", 631 | "\n", 632 | "loss = loss / float(X_train.shape[0])\n", 633 | "\n", 634 | "print(\"average loss across all passengers\", loss)" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": {}, 640 | "source": [ 641 | "Now we iterate through 1000 iterations for random values of W and b, and keep the pair which minimizes the average loss." 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": 20, 647 | "metadata": {}, 648 | "outputs": [ 649 | { 650 | "name": "stdout", 651 | "output_type": "stream", 652 | "text": [ 653 | "loss 0.711 accuracy 0.317 loop 0\n", 654 | "loss 0.698 accuracy 0.393 loop 1\n", 655 | "loss 0.67 accuracy 0.772 loop 2\n", 656 | "loss 0.663 accuracy 0.785 loop 4\n", 657 | "loss 0.661 accuracy 0.749 loop 156\n", 658 | "loss 0.659 accuracy 0.725 loop 452\n", 659 | "loss 0.658 accuracy 0.787 loop 715\n", 660 | "\n", 661 | "time taken 9.314749240875244 seconds\n" 662 | ] 663 | } 664 | ], 665 | "source": [ 666 | "min_loss = 1000\n", 667 | "best_weights = ()\n", 668 | "\n", 669 | "start = time()\n", 670 | "\n", 671 | "for i in range(1000):\n", 672 | " W = np.random.rand(2, 4) / 10\n", 673 | " b = np.random.rand(2,) / 10\n", 674 | "\n", 675 | " scores = []\n", 676 | " loss = 0\n", 677 | " \n", 678 | " for j in range(X_train.shape[0]):\n", 679 | " result = np.dot(W, X_train[j]) + b\n", 680 | " result = softmax(result)\n", 681 | " scores.append(list(result))\n", 682 | " \n", 683 | " label_index = np.argmax(y_train_onehot[j])\n", 684 | " loss += -np.log(result[label_index])\n", 685 | "\n", 686 | " loss = loss / float(X_train.shape[0])\n", 687 | " y_prediction = np.argmax(np.array(scores), axis=1)\n", 688 | " accuracy = np.sum(y_prediction == y_train) / float(len(y_train))\n", 689 | " \n", 690 | " if loss < min_loss:\n", 691 | " min_loss = loss\n", 692 | " best_weights = (W, b)\n", 693 | " print(\"loss %s accuracy %s loop %s\" % (round(loss, 3), round(accuracy, 3), i))\n", 694 | "\n", 695 | "print(\"\\ntime taken %s seconds\" % str(time() - start))" 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 21, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "name": "stdout", 705 | "output_type": "stream", 706 | "text": [ 707 | "accuracy 0.7821229050279329\n" 708 | ] 709 | } 710 | ], 711 | "source": [ 712 | "W, b = best_weights\n", 713 | "scores = []\n", 714 | "\n", 715 | "for j in range(X_test.shape[0]):\n", 716 | " result = np.dot(W, X_test[j]) + b\n", 717 | " result = softmax(result)\n", 718 | " scores.append(list(result))\n", 719 | "\n", 720 | "y_prediction = np.argmax(np.array(scores), axis=1)\n", 721 | "\n", 722 | "print(\"accuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 723 | ] 724 | }, 725 | { 726 | "cell_type": "markdown", 727 | "metadata": {}, 728 | "source": [ 729 | "For each passenger, predictions for the test set were made by selecting the label with the highest probability. Despite the naïve approach, we obtain a prediction accuracy of 78%!" 730 | ] 731 | }, 732 | { 733 | "cell_type": "markdown", 734 | "metadata": {}, 735 | "source": [ 736 | "## 2-layer Neural Network" 737 | ] 738 | }, 739 | { 740 | "cell_type": "markdown", 741 | "metadata": {}, 742 | "source": [ 743 | "With the 1-layer neural network, we had a 2x4 weight matrix. To create more degrees of freedom, we can introduce intermediary matrices or 'layers'. For example, instead of having a mapping from a vector of length 4 to a vector of length 2, we'll have two mappings - first from 4 to 100, followed by 100 to 2." 744 | ] 745 | }, 746 | { 747 | "cell_type": "code", 748 | "execution_count": 22, 749 | "metadata": {}, 750 | "outputs": [], 751 | "source": [ 752 | "W_1 = np.random.rand(100, 4) * 0.01\n", 753 | "b_1 = np.random.rand(100,) * 0.01\n", 754 | "W_2 = np.random.rand(2, 100) * 0.01\n", 755 | "b_2 = np.random.rand(2,) * 0.01" 756 | ] 757 | }, 758 | { 759 | "cell_type": "code", 760 | "execution_count": 23, 761 | "metadata": {}, 762 | "outputs": [ 763 | { 764 | "name": "stdout", 765 | "output_type": "stream", 766 | "text": [ 767 | "[ 1.13174863e-02 3.27517674e-03 6.64636413e-03 6.67664294e-03\n", 768 | " 3.43098480e-03 1.36640330e-02 9.18247683e-03 1.17094685e-02\n", 769 | " 1.01565364e-03 -1.60432219e-03 9.35207856e-03 7.47293230e-03\n", 770 | " 1.52676682e-02 7.97936749e-03 6.55563928e-04 6.00362175e-03\n", 771 | " 7.23496560e-03 9.92625256e-03 4.14720990e-03 5.41832887e-03\n", 772 | " 1.18845724e-02 1.60057818e-02 7.64679793e-03 6.94767477e-03\n", 773 | " 1.33371235e-02 6.07771410e-03 1.96233598e-02 1.57180776e-02\n", 774 | " 9.84855859e-03 3.82086941e-03 3.77595128e-04 2.26082278e-03\n", 775 | " 1.45080114e-03 1.45319158e-02 7.90452470e-03 8.92775565e-03\n", 776 | " 1.61055072e-02 8.93604299e-03 1.02217040e-02 9.01988122e-03\n", 777 | " 1.02962602e-02 4.56788321e-03 1.14912002e-02 -2.16470496e-04\n", 778 | " 7.17207370e-03 1.32500054e-02 2.73586936e-03 4.19898797e-03\n", 779 | " 5.70306531e-03 9.32144683e-03 8.41222049e-03 1.03885822e-02\n", 780 | " 1.64928832e-02 -2.05164035e-03 1.72807609e-02 3.94223376e-03\n", 781 | " -3.86111353e-03 1.54912269e-02 7.65819757e-03 2.88205757e-05\n", 782 | " 8.11385633e-03 1.10081834e-02 1.88604176e-03 1.22567360e-02\n", 783 | " 5.81988306e-03 6.61609950e-03 1.30486432e-02 1.84395962e-03\n", 784 | " 1.78840358e-02 -1.46500280e-03 1.40330891e-02 8.37979073e-03\n", 785 | " 1.12545050e-02 -1.93523942e-03 1.10487202e-03 1.59472488e-02\n", 786 | " 2.13196859e-03 1.51578949e-02 7.99031206e-03 1.33981824e-02\n", 787 | " 9.99760788e-04 7.04023165e-03 1.21942983e-02 1.33117699e-02\n", 788 | " 1.14257228e-02 1.31578297e-02 1.03026592e-02 4.83051271e-03\n", 789 | " 1.40758009e-02 7.29455432e-03 1.50803904e-03 7.33072912e-03\n", 790 | " 8.18612611e-03 1.45065360e-02 -5.13711067e-04 7.61443534e-03\n", 791 | " -2.83954620e-04 7.66773697e-03 2.88720973e-03 8.91764272e-03]\n" 792 | ] 793 | } 794 | ], 795 | "source": [ 796 | "result = np.dot(W_1, X_train[0]) + b_1\n", 797 | "\n", 798 | "print(result)" 799 | ] 800 | }, 801 | { 802 | "cell_type": "code", 803 | "execution_count": 24, 804 | "metadata": {}, 805 | "outputs": [ 806 | { 807 | "name": "stdout", 808 | "output_type": "stream", 809 | "text": [ 810 | "[0.00977321 0.00541989]\n" 811 | ] 812 | } 813 | ], 814 | "source": [ 815 | "result = np.dot(W_2, result) + b_2\n", 816 | "\n", 817 | "print(result)" 818 | ] 819 | }, 820 | { 821 | "cell_type": "markdown", 822 | "metadata": {}, 823 | "source": [ 824 | "As before we iterate through 1000 iterations of random values for W_1, b_1, W_2 and b_2, and keep the one which minimizes loss." 825 | ] 826 | }, 827 | { 828 | "cell_type": "code", 829 | "execution_count": 25, 830 | "metadata": {}, 831 | "outputs": [ 832 | { 833 | "name": "stdout", 834 | "output_type": "stream", 835 | "text": [ 836 | "loss 0.693 accuracy 0.375 loop 0\n", 837 | "loss 0.693 accuracy 0.61 loop 1\n", 838 | "loss 0.692 accuracy 0.61 loop 3\n", 839 | "loss 0.692 accuracy 0.61 loop 22\n", 840 | "loss 0.692 accuracy 0.61 loop 59\n", 841 | "loss 0.692 accuracy 0.61 loop 144\n", 842 | "loss 0.692 accuracy 0.61 loop 229\n", 843 | "loss 0.692 accuracy 0.61 loop 511\n", 844 | "loss 0.692 accuracy 0.61 loop 808\n", 845 | "loss 0.692 accuracy 0.61 loop 809\n", 846 | "\n", 847 | "time taken 10.838479995727539 seconds\n" 848 | ] 849 | } 850 | ], 851 | "source": [ 852 | "min_loss = 1000\n", 853 | "best_weights = ()\n", 854 | "\n", 855 | "start = time()\n", 856 | "\n", 857 | "for i in range(1000):\n", 858 | " W_1 = np.random.rand(100, 4) * 0.01\n", 859 | " b_1 = np.random.rand(100,) * 0.01\n", 860 | " W_2 = np.random.rand(2, 100) * 0.01\n", 861 | " b_2 = np.random.rand(2,) * 0.01\n", 862 | " \n", 863 | " scores = []\n", 864 | " loss = 0\n", 865 | "\n", 866 | " for j in range(X_train.shape[0]):\n", 867 | " result = np.dot(W_1, X_train[j]) + b_1\n", 868 | " result = np.dot(W_2, result) + b_2\n", 869 | " result = softmax(result)\n", 870 | " scores.append(list(result))\n", 871 | " \n", 872 | " label_index = np.argmax(y_train_onehot[j])\n", 873 | " loss += -np.log(result[label_index])\n", 874 | "\n", 875 | " loss = loss / float(X_train.shape[0])\n", 876 | " y_prediction = np.argmax(np.array(scores), axis=1)\n", 877 | " accuracy = np.sum(y_prediction == y_train) / float(len(y_train))\n", 878 | " \n", 879 | " if loss < min_loss:\n", 880 | " min_loss = loss\n", 881 | " best_weights = (W_1, b_1, W_2, b_2)\n", 882 | " print(\"loss %s accuracy %s loop %s\" % (round(loss, 3), round(accuracy, 3), i))\n", 883 | "\n", 884 | "print(\"\\ntime taken %s seconds\" % str(time() - start))" 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": 26, 890 | "metadata": {}, 891 | "outputs": [ 892 | { 893 | "name": "stdout", 894 | "output_type": "stream", 895 | "text": [ 896 | "accuracy 0.6424581005586593\n" 897 | ] 898 | } 899 | ], 900 | "source": [ 901 | "W_1, b_1, W_2, b_2 = best_weights\n", 902 | "scores = []\n", 903 | "\n", 904 | "for j in range(X_test.shape[0]):\n", 905 | " result = np.dot(W_1, X_test[j]) + b_1\n", 906 | " result = np.dot(W_2, result) + b_2\n", 907 | " result = softmax(result)\n", 908 | " scores.append(list(result))\n", 909 | " \n", 910 | "y_prediction = np.argmax(np.array(scores), axis=1)\n", 911 | "\n", 912 | "print(\"accuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 913 | ] 914 | }, 915 | { 916 | "cell_type": "markdown", 917 | "metadata": {}, 918 | "source": [ 919 | "Despite the greater degree of freedom, we get an accuracy score of only 64%." 920 | ] 921 | }, 922 | { 923 | "cell_type": "markdown", 924 | "metadata": {}, 925 | "source": [ 926 | "## 3-layer Neural Network" 927 | ] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "metadata": {}, 932 | "source": [ 933 | "We take this a step further by adding an additional layer, and similarly review model performance." 934 | ] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": 27, 939 | "metadata": {}, 940 | "outputs": [ 941 | { 942 | "name": "stdout", 943 | "output_type": "stream", 944 | "text": [ 945 | "loss 0.693 accuracy 0.61 loop 0\n", 946 | "loss 0.693 accuracy 0.61 loop 1\n", 947 | "loss 0.693 accuracy 0.61 loop 20\n", 948 | "loss 0.692 accuracy 0.61 loop 58\n", 949 | "loss 0.692 accuracy 0.61 loop 74\n", 950 | "loss 0.692 accuracy 0.61 loop 563\n", 951 | "loss 0.692 accuracy 0.61 loop 809\n", 952 | "loss 0.692 accuracy 0.61 loop 990\n", 953 | "\n", 954 | "time taken 15.281288862228394 seconds\n" 955 | ] 956 | } 957 | ], 958 | "source": [ 959 | "min_loss = 1000\n", 960 | "best_weights = ()\n", 961 | "\n", 962 | "start = time()\n", 963 | "\n", 964 | "for i in range(1000):\n", 965 | " W_1 = np.random.rand(100, 4) * 0.01\n", 966 | " b_1 = np.random.rand(100,) * 0.01\n", 967 | " W_2 = np.random.rand(100, 100) * 0.01\n", 968 | " b_2 = np.random.rand(100,) * 0.01\n", 969 | " W_3 = np.random.rand(2, 100) * 0.01\n", 970 | " b_3 = np.random.rand(2,) * 0.01\n", 971 | " \n", 972 | " scores = []\n", 973 | " loss = 0\n", 974 | "\n", 975 | " for j in range(X_train.shape[0]):\n", 976 | " result = np.dot(W_1, X_train[j]) + b_1\n", 977 | " result = np.dot(W_2, result) + b_2\n", 978 | " result = np.dot(W_3, result) + b_3\n", 979 | " result = softmax(result)\n", 980 | " scores.append(list(result))\n", 981 | " \n", 982 | " label_index = np.argmax(y_train_onehot[j])\n", 983 | " loss += -np.log(result[label_index])\n", 984 | " \n", 985 | " loss = loss / float(X_train.shape[0])\n", 986 | " y_prediction = np.argmax(np.array(scores), axis=1)\n", 987 | " accuracy = np.sum(y_prediction == y_train) / float(len(y_train)) \n", 988 | " \n", 989 | " if loss < min_loss:\n", 990 | " min_loss = loss\n", 991 | " best_weights = (W_1, b_1, W_2, b_2, W_3, b_3)\n", 992 | " print(\"loss %s accuracy %s loop %s\" % (round(loss, 3), round(accuracy, 3), i))\n", 993 | "\n", 994 | "print(\"\\ntime taken %s seconds\" % str(time() - start))" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": 28, 1000 | "metadata": {}, 1001 | "outputs": [ 1002 | { 1003 | "name": "stdout", 1004 | "output_type": "stream", 1005 | "text": [ 1006 | "accuracy 0.6424581005586593\n" 1007 | ] 1008 | } 1009 | ], 1010 | "source": [ 1011 | "W_1, b_1, W_2, b_2, W_3, b_3 = best_weights\n", 1012 | "scores = []\n", 1013 | "\n", 1014 | "for j in range(X_test.shape[0]):\n", 1015 | " result = np.dot(W_1, X_test[j]) + b_1\n", 1016 | " result = np.dot(W_2, result) + b_2\n", 1017 | " result = np.dot(W_3, result) + b_3 \n", 1018 | " result = softmax(result)\n", 1019 | " scores.append(list(result))\n", 1020 | " \n", 1021 | "y_prediction = np.argmax(np.array(scores), axis=1)\n", 1022 | "\n", 1023 | "print(\"accuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 1024 | ] 1025 | }, 1026 | { 1027 | "cell_type": "markdown", 1028 | "metadata": {}, 1029 | "source": [ 1030 | "We run into the same problem of suboptimal model performance, but this is not unexpected given the simplistic approach taken for illustrative purposes. We look to build on this in the next section by taking a systematic approach to optimizing the weight matrices and biases." 1031 | ] 1032 | } 1033 | ], 1034 | "metadata": { 1035 | "kernelspec": { 1036 | "display_name": "Python 3 (ipykernel)", 1037 | "language": "python", 1038 | "name": "python3" 1039 | }, 1040 | "language_info": { 1041 | "codemirror_mode": { 1042 | "name": "ipython", 1043 | "version": 3 1044 | }, 1045 | "file_extension": ".py", 1046 | "mimetype": "text/x-python", 1047 | "name": "python", 1048 | "nbconvert_exporter": "python", 1049 | "pygments_lexer": "ipython3", 1050 | "version": "3.11.1" 1051 | } 1052 | }, 1053 | "nbformat": 4, 1054 | "nbformat_minor": 4 1055 | } 1056 | -------------------------------------------------------------------------------- /1-1_Basic_NN-Titanic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 1-1 - Basic Neural Network - Titanic" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In the previous section, we simply iterated through randomly-generated matrices and chose the best-performing one. We build on this approach by reducing loss in a systematic way via stochastic gradient descent. In particular, we'll be using TensorFlow, an open source library developed by Google, and Keras, a high-level wrapper on top of TensorFlow." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from sklearn.preprocessing import StandardScaler\n", 26 | "from time import time\n", 27 | "\n", 28 | "np.random.seed(1337)\n", 29 | "\n", 30 | "df = pd.read_csv('data/titanic.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "df_train = df.iloc[:712, :]\n", 40 | "\n", 41 | "scaler = StandardScaler()\n", 42 | "features = ['Class', 'Sex', 'Age', 'Fare']\n", 43 | "\n", 44 | "X_train = scaler.fit_transform(df_train[features].values)\n", 45 | "y_train = df_train['Survived'].values\n", 46 | "y_train_onehot = pd.get_dummies(df_train['Survived']).values" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "df_test = df.iloc[712:, :]\n", 56 | "\n", 57 | "X_test = scaler.transform(df_test[features].values)\n", 58 | "y_test = df_test['Survived'].values" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "## Benchmark" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 4, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "building tree 1 of 100\n", 78 | "building tree 2 of 100\n", 79 | "building tree 3 of 100\n", 80 | "building tree 4 of 100\n", 81 | "building tree 5 of 100\n", 82 | "building tree 6 of 100\n", 83 | "building tree 7 of 100\n", 84 | "building tree 8 of 100\n", 85 | "building tree 9 of 100\n", 86 | "building tree 10 of 100\n", 87 | "building tree 11 of 100\n", 88 | "building tree 12 of 100\n", 89 | "building tree 13 of 100\n", 90 | "building tree 14 of 100\n", 91 | "building tree 15 of 100\n", 92 | "building tree 16 of 100\n", 93 | "building tree 17 of 100\n", 94 | "building tree 18 of 100\n", 95 | "building tree 19 of 100\n", 96 | "building tree 20 of 100\n", 97 | "building tree 21 of 100\n", 98 | "building tree 22 of 100\n", 99 | "building tree 23 of 100\n", 100 | "building tree 24 of 100\n", 101 | "building tree 25 of 100\n", 102 | "building tree 26 of 100\n", 103 | "building tree 27 of 100\n", 104 | "building tree 28 of 100\n", 105 | "building tree 29 of 100\n", 106 | "building tree 30 of 100\n", 107 | "building tree 31 of 100\n", 108 | "building tree 32 of 100\n", 109 | "building tree 33 of 100\n", 110 | "building tree 34 of 100\n", 111 | "building tree 35 of 100\n", 112 | "building tree 36 of 100\n", 113 | "building tree 37 of 100\n", 114 | "building tree 38 of 100\n", 115 | "building tree 39 of 100\n", 116 | "building tree 40 of 100\n", 117 | "building tree 41 of 100\n", 118 | "building tree 42 of 100\n", 119 | "building tree 43 of 100\n", 120 | "building tree 44 of 100\n", 121 | "building tree 45 of 100\n", 122 | "building tree 46 of 100\n", 123 | "building tree 47 of 100\n", 124 | "building tree 48 of 100\n", 125 | "building tree 49 of 100\n", 126 | "building tree 50 of 100\n", 127 | "building tree 51 of 100\n", 128 | "building tree 52 of 100\n", 129 | "building tree 53 of 100\n", 130 | "building tree 54 of 100\n", 131 | "building tree 55 of 100\n", 132 | "building tree 56 of 100\n", 133 | "building tree 57 of 100\n", 134 | "building tree 58 of 100\n", 135 | "building tree 59 of 100\n", 136 | "building tree 60 of 100\n", 137 | "building tree 61 of 100\n", 138 | "building tree 62 of 100\n", 139 | "building tree 63 of 100\n", 140 | "building tree 64 of 100\n", 141 | "building tree 65 of 100\n", 142 | "building tree 66 of 100\n", 143 | "building tree 67 of 100\n", 144 | "building tree 68 of 100\n", 145 | "building tree 69 of 100\n", 146 | "building tree 70 of 100\n", 147 | "building tree 71 of 100\n", 148 | "building tree 72 of 100\n", 149 | "building tree 73 of 100\n", 150 | "building tree 74 of 100\n", 151 | "building tree 75 of 100\n", 152 | "building tree 76 of 100\n", 153 | "building tree 77 of 100\n", 154 | "building tree 78 of 100\n", 155 | "building tree 79 of 100\n", 156 | "building tree 80 of 100\n", 157 | "building tree 81 of 100\n", 158 | "building tree 82 of 100\n", 159 | "building tree 83 of 100\n", 160 | "building tree 84 of 100\n", 161 | "building tree 85 of 100\n", 162 | "building tree 86 of 100\n", 163 | "building tree 87 of 100\n", 164 | "building tree 88 of 100\n", 165 | "building tree 89 of 100\n", 166 | "building tree 90 of 100\n", 167 | "building tree 91 of 100\n", 168 | "building tree 92 of 100\n", 169 | "building tree 93 of 100\n", 170 | "building tree 94 of 100\n", 171 | "building tree 95 of 100\n", 172 | "building tree 96 of 100\n", 173 | "building tree 97 of 100\n", 174 | "building tree 98 of 100\n", 175 | "building tree 99 of 100\n", 176 | "building tree 100 of 100\n", 177 | "\n", 178 | "accuracy 0.8324022346368715\n" 179 | ] 180 | }, 181 | { 182 | "name": "stderr", 183 | "output_type": "stream", 184 | "text": [ 185 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 186 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 187 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n", 188 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.1s finished\n", 189 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 190 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 191 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n", 192 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.0s finished\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "from sklearn.ensemble import RandomForestClassifier\n", 198 | "\n", 199 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 200 | "model = model.fit(X_train, df_train['Survived'].values)\n", 201 | "\n", 202 | "y_prediction = model.predict(X_test)\n", 203 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "## 1-layer Neural Network" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "Instead of generating a linear stack of layers with Numpy, we'll be implementing our model using Keras. We initialize our model, add a layer that inputs vectors of length 4 and outputs vectors of length 2, and finally add a softmax layer. We configure the learning process in the compilation step by specifying the optimizer, loss function and performance metric.\n", 218 | "\n", 219 | "Stochastic gradient descent acts by changing the weights gradually in the 'direction' that decreases the average loss. In other words, a particular weight would be increased if acts to decrease loss, or the weight decreased if it acts to increase loss. TensorFlow does the heavy-lifting by efficiently handling these numerical computations under the hood. A simple example of stochastic gradient descent is illustrated in the Appendix." 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 5, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stderr", 229 | "output_type": "stream", 230 | "text": [ 231 | "2023-03-24 10:35:29.824060: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 232 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" 233 | ] 234 | }, 235 | { 236 | "name": "stdout", 237 | "output_type": "stream", 238 | "text": [ 239 | "Epoch 1/12\n", 240 | "23/23 [==============================] - 0s 884us/step - loss: 1.4785 - accuracy: 0.2640\n", 241 | "Epoch 2/12\n", 242 | "23/23 [==============================] - 0s 828us/step - loss: 1.3076 - accuracy: 0.3006\n", 243 | "Epoch 3/12\n", 244 | "23/23 [==============================] - 0s 764us/step - loss: 1.1619 - accuracy: 0.3329\n", 245 | "Epoch 4/12\n", 246 | "23/23 [==============================] - 0s 841us/step - loss: 1.0360 - accuracy: 0.3624\n", 247 | "Epoch 5/12\n", 248 | "23/23 [==============================] - 0s 835us/step - loss: 0.9274 - accuracy: 0.4396\n", 249 | "Epoch 6/12\n", 250 | "23/23 [==============================] - 0s 797us/step - loss: 0.8380 - accuracy: 0.5183\n", 251 | "Epoch 7/12\n", 252 | "23/23 [==============================] - 0s 935us/step - loss: 0.7617 - accuracy: 0.5674\n", 253 | "Epoch 8/12\n", 254 | "23/23 [==============================] - 0s 873us/step - loss: 0.7028 - accuracy: 0.6110\n", 255 | "Epoch 9/12\n", 256 | "23/23 [==============================] - 0s 869us/step - loss: 0.6543 - accuracy: 0.6531\n", 257 | "Epoch 10/12\n", 258 | "23/23 [==============================] - 0s 848us/step - loss: 0.6172 - accuracy: 0.6910\n", 259 | "Epoch 11/12\n", 260 | "23/23 [==============================] - 0s 838us/step - loss: 0.5871 - accuracy: 0.7022\n", 261 | "Epoch 12/12\n", 262 | "23/23 [==============================] - 0s 849us/step - loss: 0.5624 - accuracy: 0.7191\n", 263 | "Model: \"sequential\"\n", 264 | "_________________________________________________________________\n", 265 | " Layer (type) Output Shape Param # \n", 266 | "=================================================================\n", 267 | " dense (Dense) (None, 2) 10 \n", 268 | " \n", 269 | " activation (Activation) (None, 2) 0 \n", 270 | " \n", 271 | "=================================================================\n", 272 | "Total params: 10\n", 273 | "Trainable params: 10\n", 274 | "Non-trainable params: 0\n", 275 | "_________________________________________________________________\n", 276 | "\n", 277 | "time taken 0.6145882606506348 seconds\n" 278 | ] 279 | } 280 | ], 281 | "source": [ 282 | "from keras.models import Sequential\n", 283 | "from keras.layers import Dense, Activation\n", 284 | "\n", 285 | "start = time()\n", 286 | "\n", 287 | "model = Sequential()\n", 288 | "model.add(Dense(2))\n", 289 | "model.add(Activation(\"softmax\"))\n", 290 | "\n", 291 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 292 | "\n", 293 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 294 | "\n", 295 | "model.summary()\n", 296 | "\n", 297 | "print('\\ntime taken %s seconds' % str(time() - start))" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 6, 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "name": "stdout", 307 | "output_type": "stream", 308 | "text": [ 309 | "6/6 [==============================] - 0s 870us/step\n", 310 | "\n", 311 | "accuracy 0.776536312849162\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 317 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "We notice that the loss reduces systematically as the model 'learns' from the data. The rate of loss reduction, however, seems to indicate that loss could be further reduced." 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "## 2-layer Neural Network" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 7, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "name": "stdout", 341 | "output_type": "stream", 342 | "text": [ 343 | "Epoch 1/12\n", 344 | "23/23 [==============================] - 0s 921us/step - loss: 0.6226 - accuracy: 0.7388\n", 345 | "Epoch 2/12\n", 346 | "23/23 [==============================] - 0s 914us/step - loss: 0.5424 - accuracy: 0.7823\n", 347 | "Epoch 3/12\n", 348 | "23/23 [==============================] - 0s 910us/step - loss: 0.5081 - accuracy: 0.7795\n", 349 | "Epoch 4/12\n", 350 | "23/23 [==============================] - 0s 935us/step - loss: 0.4903 - accuracy: 0.7767\n", 351 | "Epoch 5/12\n", 352 | "23/23 [==============================] - 0s 925us/step - loss: 0.4817 - accuracy: 0.7753\n", 353 | "Epoch 6/12\n", 354 | "23/23 [==============================] - 0s 945us/step - loss: 0.4760 - accuracy: 0.7767\n", 355 | "Epoch 7/12\n", 356 | "23/23 [==============================] - 0s 951us/step - loss: 0.4731 - accuracy: 0.7767\n", 357 | "Epoch 8/12\n", 358 | "23/23 [==============================] - 0s 955us/step - loss: 0.4709 - accuracy: 0.7781\n", 359 | "Epoch 9/12\n", 360 | "23/23 [==============================] - 0s 955us/step - loss: 0.4690 - accuracy: 0.7781\n", 361 | "Epoch 10/12\n", 362 | "23/23 [==============================] - 0s 912us/step - loss: 0.4678 - accuracy: 0.7781\n", 363 | "Epoch 11/12\n", 364 | "23/23 [==============================] - 0s 947us/step - loss: 0.4670 - accuracy: 0.7781\n", 365 | "Epoch 12/12\n", 366 | "23/23 [==============================] - 0s 920us/step - loss: 0.4662 - accuracy: 0.7781\n", 367 | "Model: \"sequential_1\"\n", 368 | "_________________________________________________________________\n", 369 | " Layer (type) Output Shape Param # \n", 370 | "=================================================================\n", 371 | " dense_1 (Dense) (None, 100) 500 \n", 372 | " \n", 373 | " dense_2 (Dense) (None, 2) 202 \n", 374 | " \n", 375 | " activation_1 (Activation) (None, 2) 0 \n", 376 | " \n", 377 | "=================================================================\n", 378 | "Total params: 702\n", 379 | "Trainable params: 702\n", 380 | "Non-trainable params: 0\n", 381 | "_________________________________________________________________\n", 382 | "\n", 383 | "time taken 0.6059098243713379 seconds\n" 384 | ] 385 | } 386 | ], 387 | "source": [ 388 | "start = time()\n", 389 | "\n", 390 | "model = Sequential()\n", 391 | "model.add(Dense(100))\n", 392 | "model.add(Dense(2))\n", 393 | "model.add(Activation(\"softmax\"))\n", 394 | "\n", 395 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 396 | "\n", 397 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 398 | "\n", 399 | "model.summary()\n", 400 | "\n", 401 | "print('\\ntime taken %s seconds' % str(time() - start))" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 8, 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "name": "stdout", 411 | "output_type": "stream", 412 | "text": [ 413 | "6/6 [==============================] - 0s 913us/step\n", 414 | "\n", 415 | "accuracy 0.8100558659217877\n" 416 | ] 417 | } 418 | ], 419 | "source": [ 420 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 421 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "The loss reduction 'flattens out' more compared to the 1-layer example, and the accuracy improves to 80%." 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "## 3-layer Neural Network" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 9, 441 | "metadata": {}, 442 | "outputs": [ 443 | { 444 | "name": "stdout", 445 | "output_type": "stream", 446 | "text": [ 447 | "Epoch 1/12\n", 448 | "23/23 [==============================] - 0s 1ms/step - loss: 0.5475 - accuracy: 0.7346\n", 449 | "Epoch 2/12\n", 450 | "23/23 [==============================] - 0s 989us/step - loss: 0.4941 - accuracy: 0.7725\n", 451 | "Epoch 3/12\n", 452 | "23/23 [==============================] - 0s 1ms/step - loss: 0.4781 - accuracy: 0.7725\n", 453 | "Epoch 4/12\n", 454 | "23/23 [==============================] - 0s 946us/step - loss: 0.4724 - accuracy: 0.7767\n", 455 | "Epoch 5/12\n", 456 | "23/23 [==============================] - 0s 951us/step - loss: 0.4693 - accuracy: 0.7767\n", 457 | "Epoch 6/12\n", 458 | "23/23 [==============================] - 0s 946us/step - loss: 0.4675 - accuracy: 0.7795\n", 459 | "Epoch 7/12\n", 460 | "23/23 [==============================] - 0s 952us/step - loss: 0.4660 - accuracy: 0.7809\n", 461 | "Epoch 8/12\n", 462 | "23/23 [==============================] - 0s 1ms/step - loss: 0.4653 - accuracy: 0.7851\n", 463 | "Epoch 9/12\n", 464 | "23/23 [==============================] - 0s 965us/step - loss: 0.4654 - accuracy: 0.7809\n", 465 | "Epoch 10/12\n", 466 | "23/23 [==============================] - 0s 1ms/step - loss: 0.4650 - accuracy: 0.7837\n", 467 | "Epoch 11/12\n", 468 | "23/23 [==============================] - 0s 987us/step - loss: 0.4645 - accuracy: 0.7865\n", 469 | "Epoch 12/12\n", 470 | "23/23 [==============================] - 0s 1ms/step - loss: 0.4646 - accuracy: 0.7851\n", 471 | "Model: \"sequential_2\"\n", 472 | "_________________________________________________________________\n", 473 | " Layer (type) Output Shape Param # \n", 474 | "=================================================================\n", 475 | " dense_3 (Dense) (None, 100) 500 \n", 476 | " \n", 477 | " dense_4 (Dense) (None, 100) 10100 \n", 478 | " \n", 479 | " dense_5 (Dense) (None, 2) 202 \n", 480 | " \n", 481 | " activation_2 (Activation) (None, 2) 0 \n", 482 | " \n", 483 | "=================================================================\n", 484 | "Total params: 10,802\n", 485 | "Trainable params: 10,802\n", 486 | "Non-trainable params: 0\n", 487 | "_________________________________________________________________\n", 488 | "\n", 489 | "time taken 0.6536471843719482 seconds\n" 490 | ] 491 | } 492 | ], 493 | "source": [ 494 | "start = time()\n", 495 | "\n", 496 | "model = Sequential()\n", 497 | "model.add(Dense(100))\n", 498 | "model.add(Dense(100))\n", 499 | "model.add(Dense(2))\n", 500 | "model.add(Activation(\"softmax\"))\n", 501 | "\n", 502 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 503 | "\n", 504 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 505 | "\n", 506 | "model.summary()\n", 507 | "\n", 508 | "print('\\ntime taken %s seconds' % str(time() - start))" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": 10, 514 | "metadata": {}, 515 | "outputs": [ 516 | { 517 | "name": "stdout", 518 | "output_type": "stream", 519 | "text": [ 520 | "6/6 [==============================] - 0s 968us/step\n", 521 | "\n", 522 | "accuracy 0.8044692737430168\n" 523 | ] 524 | } 525 | ], 526 | "source": [ 527 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 528 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 529 | ] 530 | }, 531 | { 532 | "cell_type": "markdown", 533 | "metadata": {}, 534 | "source": [ 535 | "We're not able to reduce loss on the training that much further; the best performance obtained does not beat the benchmark. Since the dataset is small, there isn't as much for the model to 'learn' from (or for that matter, predict on). We'll apply techniques developed so far on a much larger dataset in the next section." 536 | ] 537 | } 538 | ], 539 | "metadata": { 540 | "kernelspec": { 541 | "display_name": "Python 3 (ipykernel)", 542 | "language": "python", 543 | "name": "python3" 544 | }, 545 | "language_info": { 546 | "codemirror_mode": { 547 | "name": "ipython", 548 | "version": 3 549 | }, 550 | "file_extension": ".py", 551 | "mimetype": "text/x-python", 552 | "name": "python", 553 | "nbconvert_exporter": "python", 554 | "pygments_lexer": "ipython3", 555 | "version": "3.11.1" 556 | } 557 | }, 558 | "nbformat": 4, 559 | "nbformat_minor": 4 560 | } 561 | -------------------------------------------------------------------------------- /2-1_Basic_NN-MNIST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 2-1 - Basic Neural Network - MNIST" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In this section, we'll apply techniques developed so far on the MNIST dataset. The MNIST dataset consists of hand-drawn digits from zero to nine. Each image is 28 pixels in height and 28 pixels in width, with the pixel value an integer between 0 and 255." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from sklearn.preprocessing import StandardScaler\n", 26 | "from time import time\n", 27 | "\n", 28 | "np.random.seed(1337)\n", 29 | "\n", 30 | "df = pd.read_csv('data/mnist.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "For illustrative purposes, the first example is shown with pixel values between 0 and 10." 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 50 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 51 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 52 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 53 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,9,3,0,0,0,0,0]\n", 54 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,9,9,3,0,0,0,0,0]\n", 55 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,9,6,0,0,0,0,0,0]\n", 56 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,9,9,8,0,0,0,0,0,0,0]\n", 57 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,7,9,9,2,0,0,0,0,0,0,0]\n", 58 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,9,9,3,0,0,0,0,0,0,0,0]\n", 59 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,9,9,9,6,0,0,0,0,0,0,0,0,0]\n", 60 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,9,9,6,0,0,0,0,0,0,0,0,0,0]\n", 61 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,8,9,9,9,3,0,0,0,0,0,0,0,0,0,0]\n", 62 | "[0,0,0,0,0,0,0,0,0,0,0,0,1,9,9,9,0,0,0,0,0,0,0,0,0,0,0,0]\n", 63 | "[0,0,0,0,0,0,0,0,0,0,0,0,7,9,9,7,0,0,0,0,0,0,0,0,0,0,0,0]\n", 64 | "[0,0,0,0,0,0,0,0,0,0,0,6,9,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 65 | "[0,0,0,0,0,0,0,0,0,0,0,7,9,9,2,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 66 | "[0,0,0,0,0,0,0,0,0,0,7,9,9,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 67 | "[0,0,0,0,0,0,0,0,0,3,9,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 68 | "[0,0,0,0,0,0,0,0,3,9,9,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 69 | "[0,0,0,0,0,0,0,0,8,9,9,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 70 | "[0,0,0,0,0,0,0,3,9,9,9,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 71 | "[0,0,0,0,0,0,0,3,9,9,9,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 72 | "[0,0,0,0,0,0,0,0,8,8,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 73 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 74 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 75 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n", 76 | "[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "for item in df.iloc[0, 1:].values.reshape(28,28)/26:\n", 82 | " print(''.join(str([int(term) for term in item]).split(' ')))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "A hand-drawn 1 can clearly be seen from the visualization in grayscale." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "data": { 99 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAGdCAYAAABU0qcqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAZVUlEQVR4nO3db0yV9/3/8dfx39G2cBgiHE5Fi9rqUpVlThmxRRqJwBbjvyXa9YYuRqPDZsraLiyr2G0Jm0u6pguzu6VrVrUzmZp6w8WiHLMNbbQaY7YSIWxgBGxNOAexoIHP74a/nm+Pggqew5sDz0fyScq5rsN599o1n704xwuPc84JAIAhNsZ6AADA6ESAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACAiXHWA9yrt7dX165dU1JSkjwej/U4AIABcs6po6NDgUBAY8b0f50z7AJ07do1ZWVlWY8BAHhMzc3Nmjp1ar/bh92P4JKSkqxHAADEwMP+PI9bgKqqqvTMM89o4sSJys3N1SeffPJIz+PHbgAwMjzsz/O4BOjDDz9UWVmZKioq9OmnnyonJ0dFRUW6fv16PF4OAJCIXBwsWrTIlZaWRr7u6elxgUDAVVZWPvS5oVDISWKxWCxWgq9QKPTAP+9jfgV0+/ZtnT9/XoWFhZHHxowZo8LCQtXW1t63f3d3t8LhcNQCAIx8MQ/QF198oZ6eHmVkZEQ9npGRodbW1vv2r6yslM/niyw+AQcAo4P5p+DKy8sVCoUiq7m52XokAMAQiPnfA0pLS9PYsWPV1tYW9XhbW5v8fv99+3u9Xnm93liPAQAY5mJ+BTRhwgQtWLBA1dXVkcd6e3tVXV2tvLy8WL8cACBBxeVOCGVlZVq/fr2+853vaNGiRXrnnXfU2dmpH/3oR/F4OQBAAopLgNauXavPP/9cO3fuVGtrq771rW/p+PHj930wAQAwenmcc856iK8Lh8Py+XzWYwAAHlMoFFJycnK/280/BQcAGJ0IEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACAiXHWAwDAQJw6dWrAzykoKBjUaw3mecFgcFCvNRpxBQQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmOBmpADMDOWNRTH8cAUEADBBgAAAJmIeoF27dsnj8UStOXPmxPplAAAJLi7vAT3//PP6+OOP/+9FxvFWEwAgWlzKMG7cOPn9/nh8awDACBGX94CuXLmiQCCgGTNm6JVXXlFTU1O/+3Z3dyscDkctAMDIF/MA5ebmat++fTp+/Lj27NmjxsZGvfjii+ro6Ohz/8rKSvl8vsjKysqK9UgAgGHI45xz8XyB9vZ2TZ8+XW+//bY2btx43/bu7m51d3dHvg6Hw0QIGCWG+98DGsxrBYPB2A+SoEKhkJKTk/vdHvdPB6SkpOi5555TfX19n9u9Xq+8Xm+8xwAADDNx/3tAN2/eVENDgzIzM+P9UgCABBLzAL322msKBoP673//q3/9619atWqVxo4dq5dffjnWLwUASGAx/xHc1atX9fLLL+vGjRuaMmWKXnjhBZ05c0ZTpkyJ9UsBABJYzAN08ODBWH9LAAmgoqJiwM8Zqg8U1NTUDOp5fKAgvrgXHADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgIu6/EXWgwuGwfD6f9RjAqLZkyZIBP2ewN/wcCh6Px3qEUelhvxGVKyAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYGGc9AIDhZzjf2bqgoMB6BMQIV0AAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAluRgokiCVLlgz4OcP5pqLS4G4sGgwGYz8ITHAFBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCY4GakgIGReGPRwczHjUVHN66AAAAmCBAAwMSAA3T69GktX75cgUBAHo9HR44cidrunNPOnTuVmZmpSZMmqbCwUFeuXInVvACAEWLAAers7FROTo6qqqr63L579269++67eu+993T27Fk9+eSTKioqUldX12MPCwAYOQb8IYSSkhKVlJT0uc05p3feeUe/+MUvtGLFCknS+++/r4yMDB05ckTr1q17vGkBACNGTN8DamxsVGtrqwoLCyOP+Xw+5ebmqra2ts/ndHd3KxwORy0AwMgX0wC1trZKkjIyMqIez8jIiGy7V2VlpXw+X2RlZWXFciQAwDBl/im48vJyhUKhyGpubrYeCQAwBGIaIL/fL0lqa2uLerytrS2y7V5er1fJyclRCwAw8sU0QNnZ2fL7/aquro48Fg6HdfbsWeXl5cXypQAACW7An4K7efOm6uvrI183Njbq4sWLSk1N1bRp07R9+3b9+te/1rPPPqvs7Gy9+eabCgQCWrlyZSznBgAkuAEH6Ny5c3rppZciX5eVlUmS1q9fr3379umNN95QZ2enNm/erPb2dr3wwgs6fvy4Jk6cGLupAQAJz+Occ9ZDfF04HJbP57MeA4irYfZ/u/sM5saiX/8PU0CSQqHQA9/XN/8UHABgdCJAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAICJAf86BgDRhvudrQdj165d1iNgFOAKCABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwwc1IgRHM4/FYjwD0iysgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAENyMFvsY5Zz1Cv2pqaqxHAGKKKyAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQ3I8WIdOrUKesRHmgwNxZ96aWXYj8IYIgrIACACQIEADAx4ACdPn1ay5cvVyAQkMfj0ZEjR6K2b9iwQR6PJ2oVFxfHal4AwAgx4AB1dnYqJydHVVVV/e5TXFyslpaWyDpw4MBjDQkAGHkG/CGEkpISlZSUPHAfr9crv98/6KEAACNfXN4DqqmpUXp6umbPnq2tW7fqxo0b/e7b3d2tcDgctQAAI1/MA1RcXKz3339f1dXV+u1vf6tgMKiSkhL19PT0uX9lZaV8Pl9kZWVlxXokAMAwFPO/B7Ru3brIP8+bN0/z58/XzJkzVVNTo6VLl963f3l5ucrKyiJfh8NhIgQAo0DcP4Y9Y8YMpaWlqb6+vs/tXq9XycnJUQsAMPLFPUBXr17VjRs3lJmZGe+XAgAkkAH/CO7mzZtRVzONjY26ePGiUlNTlZqaqrfeektr1qyR3+9XQ0OD3njjDc2aNUtFRUUxHRwAkNgGHKBz585F3ZPqq/dv1q9frz179ujSpUv685//rPb2dgUCAS1btky/+tWv5PV6Yzc1ACDhDThABQUFcs71u/3vf//7Yw0EjAa7du2yHgEwx73gAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYCLmv5IbiLVTp04N+DkFBQWxHySGrxUMBmM/CJBguAICAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAExwM1IMqYqKigE/ZyhvLFpTUzPg53BjUWBwuAICAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEx4nHPOeoivC4fD8vl81mPgESxZsmTAzxnMzT4HY7Cv89JLL8V2EGAUC4VCSk5O7nc7V0AAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAluRopBG2anTpSCgoJBPS8YDMZ2EGAU42akAIBhiQABAEwMKECVlZVauHChkpKSlJ6erpUrV6quri5qn66uLpWWlmry5Ml66qmntGbNGrW1tcV0aABA4htQgILBoEpLS3XmzBmdOHFCd+7c0bJly9TZ2RnZZ8eOHfroo4906NAhBYNBXbt2TatXr4754ACAxPZYH0L4/PPPlZ6ermAwqPz8fIVCIU2ZMkX79+/XD37wA0nSZ599pm9+85uqra3Vd7/73Yd+Tz6EkDj4EAKAB4nrhxBCoZAkKTU1VZJ0/vx53blzR4WFhZF95syZo2nTpqm2trbP79Hd3a1wOBy1AAAj36AD1Nvbq+3bt2vx4sWaO3euJKm1tVUTJkxQSkpK1L4ZGRlqbW3t8/tUVlbK5/NFVlZW1mBHAgAkkEEHqLS0VJcvX9bBgwcfa4Dy8nKFQqHIam5ufqzvBwBIDOMG86Rt27bp2LFjOn36tKZOnRp53O/36/bt22pvb4+6Cmpra5Pf7+/ze3m9Xnm93sGMAQBIYAO6AnLOadu2bTp8+LBOnjyp7OzsqO0LFizQ+PHjVV1dHXmsrq5OTU1NysvLi83EAIARYUBXQKWlpdq/f7+OHj2qpKSkyPs6Pp9PkyZNks/n08aNG1VWVqbU1FQlJyfr1VdfVV5e3iN9Ag4AMHoMKEB79uyRdP9HXPfu3asNGzZIkn7/+99rzJgxWrNmjbq7u1VUVKQ//vGPMRkWADByDChAj/L3PiZOnKiqqipVVVUNeigMrSVLlliPAGAU4l5wAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMDGo34gKDHf3/sqQRxUMBmM7CIB+cQUEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJjwOOec9RBfFw6H5fP5rMfAIxiqU2cwNxblpqKAvVAopOTk5H63cwUEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJgYZz0AEpfH47EeAUAC4woIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmBhQgCorK7Vw4UIlJSUpPT1dK1euVF1dXdQ+BQUF8ng8UWvLli0xHRoAkPgGFKBgMKjS0lKdOXNGJ06c0J07d7Rs2TJ1dnZG7bdp0ya1tLRE1u7du2M6NAAg8Q3oN6IeP3486ut9+/YpPT1d58+fV35+fuTxJ554Qn6/PzYTAgBGpMd6DygUCkmSUlNTox7/4IMPlJaWprlz56q8vFy3bt3q93t0d3crHA5HLQDAKOAGqaenx33/+993ixcvjnr8T3/6kzt+/Li7dOmS+8tf/uKefvppt2rVqn6/T0VFhZPEYrFYrBG2QqHQAzsy6ABt2bLFTZ8+3TU3Nz9wv+rqaifJ1dfX97m9q6vLhUKhyGpubjY/aCwWi8V6/PWwAA3oPaCvbNu2TceOHdPp06c1derUB+6bm5srSaqvr9fMmTPv2+71euX1egczBgAggQ0oQM45vfrqqzp8+LBqamqUnZ390OdcvHhRkpSZmTmoAQEAI9OAAlRaWqr9+/fr6NGjSkpKUmtrqyTJ5/Np0qRJamho0P79+/W9731PkydP1qVLl7Rjxw7l5+dr/vz5cfkXAAAkqIG876N+fs63d+9e55xzTU1NLj8/36Wmpjqv1+tmzZrlXn/99Yf+HPDrQqGQ+c8tWSwWi/X462F/9nv+f1iGjXA4LJ/PZz0GAOAxhUIhJScn97ude8EBAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwMuwA556xHAADEwMP+PB92Aero6LAeAQAQAw/789zjhtklR29vr65du6akpCR5PJ6obeFwWFlZWWpublZycrLRhPY4DndxHO7iONzFcbhrOBwH55w6OjoUCAQ0Zkz/1znjhnCmRzJmzBhNnTr1gfskJyeP6hPsKxyHuzgOd3Ec7uI43GV9HHw+30P3GXY/ggMAjA4ECABgIqEC5PV6VVFRIa/Xaz2KKY7DXRyHuzgOd3Ec7kqk4zDsPoQAABgdEuoKCAAwchAgAIAJAgQAMEGAAAAmEiZAVVVVeuaZZzRx4kTl5ubqk08+sR5pyO3atUsejydqzZkzx3qsuDt9+rSWL1+uQCAgj8ejI0eORG13zmnnzp3KzMzUpEmTVFhYqCtXrtgMG0cPOw4bNmy47/woLi62GTZOKisrtXDhQiUlJSk9PV0rV65UXV1d1D5dXV0qLS3V5MmT9dRTT2nNmjVqa2szmjg+HuU4FBQU3Hc+bNmyxWjiviVEgD788EOVlZWpoqJCn376qXJyclRUVKTr169bjzbknn/+ebW0tETWP/7xD+uR4q6zs1M5OTmqqqrqc/vu3bv17rvv6r333tPZs2f15JNPqqioSF1dXUM8aXw97DhIUnFxcdT5ceDAgSGcMP6CwaBKS0t15swZnThxQnfu3NGyZcvU2dkZ2WfHjh366KOPdOjQIQWDQV27dk2rV682nDr2HuU4SNKmTZuizofdu3cbTdwPlwAWLVrkSktLI1/39PS4QCDgKisrDacaehUVFS4nJ8d6DFOS3OHDhyNf9/b2Or/f7373u99FHmtvb3der9cdOHDAYMKhce9xcM659evXuxUrVpjMY+X69etOkgsGg865u//bjx8/3h06dCiyz3/+8x8nydXW1lqNGXf3HgfnnFuyZIn7yU9+YjfUIxj2V0C3b9/W+fPnVVhYGHlszJgxKiwsVG1treFkNq5cuaJAIKAZM2bolVdeUVNTk/VIphobG9Xa2hp1fvh8PuXm5o7K86Ompkbp6emaPXu2tm7dqhs3bliPFFehUEiSlJqaKkk6f/687ty5E3U+zJkzR9OmTRvR58O9x+ErH3zwgdLS0jR37lyVl5fr1q1bFuP1a9jdjPReX3zxhXp6epSRkRH1eEZGhj777DOjqWzk5uZq3759mj17tlpaWvTWW2/pxRdf1OXLl5WUlGQ9nonW1lZJ6vP8+GrbaFFcXKzVq1crOztbDQ0N+vnPf66SkhLV1tZq7Nix1uPFXG9vr7Zv367Fixdr7ty5ku6eDxMmTFBKSkrUviP5fOjrOEjSD3/4Q02fPl2BQECXLl3Sz372M9XV1elvf/ub4bTRhn2A8H9KSkoi/zx//nzl5uZq+vTp+utf/6qNGzcaTobhYN26dZF/njdvnubPn6+ZM2eqpqZGS5cuNZwsPkpLS3X58uVR8T7og/R3HDZv3hz553nz5ikzM1NLly5VQ0ODZs6cOdRj9mnY/wguLS1NY8eOve9TLG1tbfL7/UZTDQ8pKSl67rnnVF9fbz2Kma/OAc6P+82YMUNpaWkj8vzYtm2bjh07plOnTkX9+ha/36/bt2+rvb09av+Rej70dxz6kpubK0nD6nwY9gGaMGGCFixYoOrq6shjvb29qq6uVl5enuFk9m7evKmGhgZlZmZaj2ImOztbfr8/6vwIh8M6e/bsqD8/rl69qhs3boyo88M5p23btunw4cM6efKksrOzo7YvWLBA48ePjzof6urq1NTUNKLOh4cdh75cvHhRkobX+WD9KYhHcfDgQef1et2+ffvcv//9b7d582aXkpLiWltbrUcbUj/96U9dTU2Na2xsdP/85z9dYWGhS0tLc9evX7ceLa46OjrchQsX3IULF5wk9/bbb7sLFy64//3vf845537zm9+4lJQUd/ToUXfp0iW3YsUKl52d7b788kvjyWPrQceho6PDvfbaa662ttY1Nja6jz/+2H372992zz77rOvq6rIePWa2bt3qfD6fq6mpcS0tLZF169atyD5btmxx06ZNcydPnnTnzp1zeXl5Li8vz3Dq2HvYcaivr3e//OUv3blz51xjY6M7evSomzFjhsvPzzeePFpCBMg55/7whz+4adOmuQkTJrhFixa5M2fOWI805NauXesyMzPdhAkT3NNPP+3Wrl3r6uvrrceKu1OnTjlJ963169c75+5+FPvNN990GRkZzuv1uqVLl7q6ujrboePgQcfh1q1bbtmyZW7KlClu/Pjxbvr06W7Tpk0j7j/S+vr3l+T27t0b2efLL790P/7xj903vvEN98QTT7hVq1a5lpYWu6Hj4GHHoampyeXn57vU1FTn9XrdrFmz3Ouvv+5CoZDt4Pfg1zEAAEwM+/eAAAAjEwECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABg4v8BybhKo7JU7yoAAAAASUVORK5CYII=\n", 100 | "text/plain": [ 101 | "
" 102 | ] 103 | }, 104 | "metadata": {}, 105 | "output_type": "display_data" 106 | } 107 | ], 108 | "source": [ 109 | "%matplotlib inline\n", 110 | "\n", 111 | "import matplotlib.pyplot as plt\n", 112 | "\n", 113 | "plt.imshow(df.iloc[0, 1:].values.reshape(28,28), cmap=plt.get_cmap('gray', 5))\n", 114 | "plt.show()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "There are 42,000 images. We similarly split the images 80:20 into training and test sets, and scale the data through division by 255. " 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 4, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "df_train = df.iloc[:33600, :]\n", 131 | "\n", 132 | "X_train = df_train.iloc[:, 1:].values / 255.\n", 133 | "y_train = df_train['label'].values\n", 134 | "y_train_onehot = pd.get_dummies(df_train['label']).values" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 5, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df_test = df.iloc[33600:, :]\n", 144 | "\n", 145 | "X_test = df_test.iloc[:, 1:].values / 255.\n", 146 | "y_test = df_test['label'].values" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "## Benchmark" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 6, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stderr", 163 | "output_type": "stream", 164 | "text": [ 165 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 166 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s\n" 167 | ] 168 | }, 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | "building tree 1 of 100\n", 174 | "building tree 2 of 100\n" 175 | ] 176 | }, 177 | { 178 | "name": "stderr", 179 | "output_type": "stream", 180 | "text": [ 181 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.4s remaining: 0.0s\n" 182 | ] 183 | }, 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "building tree 3 of 100\n", 189 | "building tree 4 of 100\n", 190 | "building tree 5 of 100\n", 191 | "building tree 6 of 100\n", 192 | "building tree 7 of 100\n", 193 | "building tree 8 of 100\n", 194 | "building tree 9 of 100\n", 195 | "building tree 10 of 100\n", 196 | "building tree 11 of 100\n", 197 | "building tree 12 of 100\n", 198 | "building tree 13 of 100\n", 199 | "building tree 14 of 100\n", 200 | "building tree 15 of 100\n", 201 | "building tree 16 of 100\n", 202 | "building tree 17 of 100\n", 203 | "building tree 18 of 100\n", 204 | "building tree 19 of 100\n", 205 | "building tree 20 of 100\n", 206 | "building tree 21 of 100\n", 207 | "building tree 22 of 100\n", 208 | "building tree 23 of 100\n", 209 | "building tree 24 of 100\n", 210 | "building tree 25 of 100\n", 211 | "building tree 26 of 100\n", 212 | "building tree 27 of 100\n", 213 | "building tree 28 of 100\n", 214 | "building tree 29 of 100\n", 215 | "building tree 30 of 100\n", 216 | "building tree 31 of 100\n", 217 | "building tree 32 of 100\n", 218 | "building tree 33 of 100\n", 219 | "building tree 34 of 100\n", 220 | "building tree 35 of 100\n", 221 | "building tree 36 of 100\n", 222 | "building tree 37 of 100\n", 223 | "building tree 38 of 100\n", 224 | "building tree 39 of 100\n", 225 | "building tree 40 of 100\n", 226 | "building tree 41 of 100\n", 227 | "building tree 42 of 100\n", 228 | "building tree 43 of 100\n", 229 | "building tree 44 of 100\n", 230 | "building tree 45 of 100\n", 231 | "building tree 46 of 100\n", 232 | "building tree 47 of 100\n", 233 | "building tree 48 of 100\n", 234 | "building tree 49 of 100\n", 235 | "building tree 50 of 100\n", 236 | "building tree 51 of 100\n", 237 | "building tree 52 of 100\n", 238 | "building tree 53 of 100\n", 239 | "building tree 54 of 100\n", 240 | "building tree 55 of 100\n", 241 | "building tree 56 of 100\n", 242 | "building tree 57 of 100\n", 243 | "building tree 58 of 100\n", 244 | "building tree 59 of 100\n", 245 | "building tree 60 of 100\n", 246 | "building tree 61 of 100\n", 247 | "building tree 62 of 100\n", 248 | "building tree 63 of 100\n", 249 | "building tree 64 of 100\n", 250 | "building tree 65 of 100\n", 251 | "building tree 66 of 100\n", 252 | "building tree 67 of 100\n", 253 | "building tree 68 of 100\n", 254 | "building tree 69 of 100\n", 255 | "building tree 70 of 100\n", 256 | "building tree 71 of 100\n", 257 | "building tree 72 of 100\n", 258 | "building tree 73 of 100\n", 259 | "building tree 74 of 100\n", 260 | "building tree 75 of 100\n", 261 | "building tree 76 of 100\n", 262 | "building tree 77 of 100\n", 263 | "building tree 78 of 100\n", 264 | "building tree 79 of 100\n", 265 | "building tree 80 of 100\n", 266 | "building tree 81 of 100\n", 267 | "building tree 82 of 100\n", 268 | "building tree 83 of 100\n", 269 | "building tree 84 of 100\n", 270 | "building tree 85 of 100\n", 271 | "building tree 86 of 100\n", 272 | "building tree 87 of 100\n", 273 | "building tree 88 of 100\n", 274 | "building tree 89 of 100\n", 275 | "building tree 90 of 100\n", 276 | "building tree 91 of 100\n", 277 | "building tree 92 of 100\n", 278 | "building tree 93 of 100\n", 279 | "building tree 94 of 100\n", 280 | "building tree 95 of 100\n", 281 | "building tree 96 of 100\n", 282 | "building tree 97 of 100\n", 283 | "building tree 98 of 100\n", 284 | "building tree 99 of 100\n", 285 | "building tree 100 of 100\n" 286 | ] 287 | }, 288 | { 289 | "name": "stderr", 290 | "output_type": "stream", 291 | "text": [ 292 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 19.2s finished\n", 293 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 294 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 295 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n" 296 | ] 297 | }, 298 | { 299 | "name": "stdout", 300 | "output_type": "stream", 301 | "text": [ 302 | "\n", 303 | "accuracy 0.965\n" 304 | ] 305 | }, 306 | { 307 | "name": "stderr", 308 | "output_type": "stream", 309 | "text": [ 310 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.2s finished\n" 311 | ] 312 | } 313 | ], 314 | "source": [ 315 | "from sklearn.ensemble import RandomForestClassifier\n", 316 | "\n", 317 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 318 | "model = model.fit(X_train, df_train['label'].values)\n", 319 | "\n", 320 | "y_prediction = model.predict(X_test)\n", 321 | "print(\"\\naccuracy\", np.sum(y_prediction == df_test['label'].values) / float(len(y_test)))" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "## 1-layer Neural Network" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": 7, 334 | "metadata": {}, 335 | "outputs": [ 336 | { 337 | "name": "stderr", 338 | "output_type": "stream", 339 | "text": [ 340 | "2023-03-24 10:36:11.623274: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 341 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" 342 | ] 343 | }, 344 | { 345 | "name": "stdout", 346 | "output_type": "stream", 347 | "text": [ 348 | "Epoch 1/12\n", 349 | "1050/1050 [==============================] - 1s 738us/step - loss: 0.9533 - accuracy: 0.7783\n", 350 | "Epoch 2/12\n", 351 | "1050/1050 [==============================] - 1s 714us/step - loss: 0.5329 - accuracy: 0.8669\n", 352 | "Epoch 3/12\n", 353 | "1050/1050 [==============================] - 1s 708us/step - loss: 0.4576 - accuracy: 0.8801\n", 354 | "Epoch 4/12\n", 355 | "1050/1050 [==============================] - 1s 718us/step - loss: 0.4206 - accuracy: 0.8875\n", 356 | "Epoch 5/12\n", 357 | "1050/1050 [==============================] - 1s 714us/step - loss: 0.3972 - accuracy: 0.8918\n", 358 | "Epoch 6/12\n", 359 | "1050/1050 [==============================] - 1s 717us/step - loss: 0.3809 - accuracy: 0.8953\n", 360 | "Epoch 7/12\n", 361 | "1050/1050 [==============================] - 1s 727us/step - loss: 0.3684 - accuracy: 0.8982\n", 362 | "Epoch 8/12\n", 363 | "1050/1050 [==============================] - 1s 713us/step - loss: 0.3586 - accuracy: 0.9007\n", 364 | "Epoch 9/12\n", 365 | "1050/1050 [==============================] - 1s 715us/step - loss: 0.3506 - accuracy: 0.9027\n", 366 | "Epoch 10/12\n", 367 | "1050/1050 [==============================] - 1s 714us/step - loss: 0.3440 - accuracy: 0.9047\n", 368 | "Epoch 11/12\n", 369 | "1050/1050 [==============================] - 1s 711us/step - loss: 0.3382 - accuracy: 0.9062\n", 370 | "Epoch 12/12\n", 371 | "1050/1050 [==============================] - 1s 714us/step - loss: 0.3332 - accuracy: 0.9069\n", 372 | "Model: \"sequential\"\n", 373 | "_________________________________________________________________\n", 374 | " Layer (type) Output Shape Param # \n", 375 | "=================================================================\n", 376 | " dense (Dense) (32, 10) 7850 \n", 377 | " \n", 378 | " activation (Activation) (32, 10) 0 \n", 379 | " \n", 380 | "=================================================================\n", 381 | "Total params: 7,850\n", 382 | "Trainable params: 7,850\n", 383 | "Non-trainable params: 0\n", 384 | "_________________________________________________________________\n", 385 | "\n", 386 | "time taken 9.667016744613647 seconds\n" 387 | ] 388 | } 389 | ], 390 | "source": [ 391 | "from keras.models import Sequential\n", 392 | "from keras.layers import Dense, Activation\n", 393 | "\n", 394 | "start = time()\n", 395 | "\n", 396 | "model = Sequential()\n", 397 | "model.add(Dense(10))\n", 398 | "model.add(Activation(\"softmax\"))\n", 399 | "\n", 400 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 401 | "\n", 402 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 403 | "\n", 404 | "model.summary()\n", 405 | "\n", 406 | "print('\\ntime taken %s seconds' % str(time() - start))" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 8, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "263/263 [==============================] - 0s 560us/step\n", 419 | "\n", 420 | "accuracy 0.909047619047619\n" 421 | ] 422 | } 423 | ], 424 | "source": [ 425 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 426 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "## 2-layer Neural Network" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 9, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "name": "stdout", 443 | "output_type": "stream", 444 | "text": [ 445 | "Epoch 1/12\n", 446 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.7052 - accuracy: 0.8178\n", 447 | "Epoch 2/12\n", 448 | "1050/1050 [==============================] - 1s 999us/step - loss: 0.3979 - accuracy: 0.8877\n", 449 | "Epoch 3/12\n", 450 | "1050/1050 [==============================] - 1s 995us/step - loss: 0.3534 - accuracy: 0.8993\n", 451 | "Epoch 4/12\n", 452 | "1050/1050 [==============================] - 1s 999us/step - loss: 0.3312 - accuracy: 0.9051\n", 453 | "Epoch 5/12\n", 454 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3170 - accuracy: 0.9093\n", 455 | "Epoch 6/12\n", 456 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3072 - accuracy: 0.9117\n", 457 | "Epoch 7/12\n", 458 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3000 - accuracy: 0.9153\n", 459 | "Epoch 8/12\n", 460 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2937 - accuracy: 0.9165\n", 461 | "Epoch 9/12\n", 462 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2889 - accuracy: 0.9182\n", 463 | "Epoch 10/12\n", 464 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2847 - accuracy: 0.9182\n", 465 | "Epoch 11/12\n", 466 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2813 - accuracy: 0.9211\n", 467 | "Epoch 12/12\n", 468 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2778 - accuracy: 0.9212\n", 469 | "Model: \"sequential_1\"\n", 470 | "_________________________________________________________________\n", 471 | " Layer (type) Output Shape Param # \n", 472 | "=================================================================\n", 473 | " dense_1 (Dense) (32, 100) 78500 \n", 474 | " \n", 475 | " dense_2 (Dense) (32, 10) 1010 \n", 476 | " \n", 477 | " activation_1 (Activation) (32, 10) 0 \n", 478 | " \n", 479 | "=================================================================\n", 480 | "Total params: 79,510\n", 481 | "Trainable params: 79,510\n", 482 | "Non-trainable params: 0\n", 483 | "_________________________________________________________________\n", 484 | "\n", 485 | "time taken 13.243598699569702 seconds\n" 486 | ] 487 | } 488 | ], 489 | "source": [ 490 | "start = time()\n", 491 | "\n", 492 | "model = Sequential()\n", 493 | "model.add(Dense(100))\n", 494 | "model.add(Dense(10))\n", 495 | "model.add(Activation(\"softmax\"))\n", 496 | "\n", 497 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 498 | "\n", 499 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 500 | "\n", 501 | "model.summary()\n", 502 | "\n", 503 | "print('\\ntime taken %s seconds' % str(time() - start))" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 10, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "name": "stdout", 513 | "output_type": "stream", 514 | "text": [ 515 | "263/263 [==============================] - 0s 765us/step\n", 516 | "\n", 517 | "accuracy 0.9204761904761904\n" 518 | ] 519 | } 520 | ], 521 | "source": [ 522 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 523 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 524 | ] 525 | }, 526 | { 527 | "cell_type": "markdown", 528 | "metadata": {}, 529 | "source": [ 530 | "## 3-layer Neural Network" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 11, 536 | "metadata": {}, 537 | "outputs": [ 538 | { 539 | "name": "stdout", 540 | "output_type": "stream", 541 | "text": [ 542 | "Epoch 1/12\n", 543 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.6318 - accuracy: 0.8294\n", 544 | "Epoch 2/12\n", 545 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3598 - accuracy: 0.8961\n", 546 | "Epoch 3/12\n", 547 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3256 - accuracy: 0.9061\n", 548 | "Epoch 4/12\n", 549 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.3080 - accuracy: 0.9110\n", 550 | "Epoch 5/12\n", 551 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2976 - accuracy: 0.9146\n", 552 | "Epoch 6/12\n", 553 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2893 - accuracy: 0.9176\n", 554 | "Epoch 7/12\n", 555 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2827 - accuracy: 0.9187\n", 556 | "Epoch 8/12\n", 557 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2788 - accuracy: 0.9202\n", 558 | "Epoch 9/12\n", 559 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2744 - accuracy: 0.9222\n", 560 | "Epoch 10/12\n", 561 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2709 - accuracy: 0.9222\n", 562 | "Epoch 11/12\n", 563 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2677 - accuracy: 0.9241\n", 564 | "Epoch 12/12\n", 565 | "1050/1050 [==============================] - 1s 1ms/step - loss: 0.2649 - accuracy: 0.9251\n", 566 | "Model: \"sequential_2\"\n", 567 | "_________________________________________________________________\n", 568 | " Layer (type) Output Shape Param # \n", 569 | "=================================================================\n", 570 | " dense_3 (Dense) (32, 100) 78500 \n", 571 | " \n", 572 | " dense_4 (Dense) (32, 100) 10100 \n", 573 | " \n", 574 | " dense_5 (Dense) (32, 10) 1010 \n", 575 | " \n", 576 | " activation_2 (Activation) (32, 10) 0 \n", 577 | " \n", 578 | "=================================================================\n", 579 | "Total params: 89,610\n", 580 | "Trainable params: 89,610\n", 581 | "Non-trainable params: 0\n", 582 | "_________________________________________________________________\n", 583 | "\n", 584 | "time taken 13.777081727981567 seconds\n" 585 | ] 586 | } 587 | ], 588 | "source": [ 589 | "start = time()\n", 590 | "\n", 591 | "model = Sequential()\n", 592 | "model.add(Dense(100))\n", 593 | "model.add(Dense(100))\n", 594 | "model.add(Dense(10))\n", 595 | "model.add(Activation(\"softmax\"))\n", 596 | "\n", 597 | "model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])\n", 598 | "\n", 599 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 600 | "\n", 601 | "model.summary()\n", 602 | "\n", 603 | "print('\\ntime taken %s seconds' % str(time() - start))" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 12, 609 | "metadata": {}, 610 | "outputs": [ 611 | { 612 | "name": "stdout", 613 | "output_type": "stream", 614 | "text": [ 615 | "263/263 [==============================] - 0s 735us/step\n", 616 | "\n", 617 | "accuracy 0.9204761904761904\n" 618 | ] 619 | } 620 | ], 621 | "source": [ 622 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 623 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "As in the previous section, we were able to drive loss down even further with additional layers. While we see improvements in accuracy, it's still not enough to beat the benchmark. We'll look into more advanced techniques to enhance model performance in next section." 631 | ] 632 | } 633 | ], 634 | "metadata": { 635 | "kernelspec": { 636 | "display_name": "Python 3 (ipykernel)", 637 | "language": "python", 638 | "name": "python3" 639 | }, 640 | "language_info": { 641 | "codemirror_mode": { 642 | "name": "ipython", 643 | "version": 3 644 | }, 645 | "file_extension": ".py", 646 | "mimetype": "text/x-python", 647 | "name": "python", 648 | "nbconvert_exporter": "python", 649 | "pygments_lexer": "ipython3", 650 | "version": "3.11.1" 651 | } 652 | }, 653 | "nbformat": 4, 654 | "nbformat_minor": 4 655 | } 656 | -------------------------------------------------------------------------------- /2-2_Regularized_NN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 2-2 - Regularized Neural Network" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In this section, we'll introduce two additional layers to our model. The first is called the rectified linear unit (hereafter, ReLU), which helps introduce non-linearity into the network. The second is called the dropout layer, which acts to regularize the network and prevent overfitting." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from sklearn.preprocessing import StandardScaler\n", 26 | "from time import time\n", 27 | "\n", 28 | "np.random.seed(1337)\n", 29 | "\n", 30 | "df = pd.read_csv('data/mnist.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "df_train = df.iloc[:33600, :]\n", 40 | "\n", 41 | "X_train = df_train.iloc[:, 1:].values / 255.\n", 42 | "y_train = df_train['label'].values\n", 43 | "y_train_onehot = pd.get_dummies(df_train['label']).values" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "df_test = df.iloc[33600:, :]\n", 53 | "\n", 54 | "X_test = df_test.iloc[:, 1:].values / 255.\n", 55 | "y_test = df_test['label'].values" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Benchmark" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stderr", 72 | "output_type": "stream", 73 | "text": [ 74 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 75 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s\n" 76 | ] 77 | }, 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "building tree 1 of 100\n", 83 | "building tree 2 of 100\n" 84 | ] 85 | }, 86 | { 87 | "name": "stderr", 88 | "output_type": "stream", 89 | "text": [ 90 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.4s remaining: 0.0s\n" 91 | ] 92 | }, 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "building tree 3 of 100\n", 98 | "building tree 4 of 100\n", 99 | "building tree 5 of 100\n", 100 | "building tree 6 of 100\n", 101 | "building tree 7 of 100\n", 102 | "building tree 8 of 100\n", 103 | "building tree 9 of 100\n", 104 | "building tree 10 of 100\n", 105 | "building tree 11 of 100\n", 106 | "building tree 12 of 100\n", 107 | "building tree 13 of 100\n", 108 | "building tree 14 of 100\n", 109 | "building tree 15 of 100\n", 110 | "building tree 16 of 100\n", 111 | "building tree 17 of 100\n", 112 | "building tree 18 of 100\n", 113 | "building tree 19 of 100\n", 114 | "building tree 20 of 100\n", 115 | "building tree 21 of 100\n", 116 | "building tree 22 of 100\n", 117 | "building tree 23 of 100\n", 118 | "building tree 24 of 100\n", 119 | "building tree 25 of 100\n", 120 | "building tree 26 of 100\n", 121 | "building tree 27 of 100\n", 122 | "building tree 28 of 100\n", 123 | "building tree 29 of 100\n", 124 | "building tree 30 of 100\n", 125 | "building tree 31 of 100\n", 126 | "building tree 32 of 100\n", 127 | "building tree 33 of 100\n", 128 | "building tree 34 of 100\n", 129 | "building tree 35 of 100\n", 130 | "building tree 36 of 100\n", 131 | "building tree 37 of 100\n", 132 | "building tree 38 of 100\n", 133 | "building tree 39 of 100\n", 134 | "building tree 40 of 100\n", 135 | "building tree 41 of 100\n", 136 | "building tree 42 of 100\n", 137 | "building tree 43 of 100\n", 138 | "building tree 44 of 100\n", 139 | "building tree 45 of 100\n", 140 | "building tree 46 of 100\n", 141 | "building tree 47 of 100\n", 142 | "building tree 48 of 100\n", 143 | "building tree 49 of 100\n", 144 | "building tree 50 of 100\n", 145 | "building tree 51 of 100\n", 146 | "building tree 52 of 100\n", 147 | "building tree 53 of 100\n", 148 | "building tree 54 of 100\n", 149 | "building tree 55 of 100\n", 150 | "building tree 56 of 100\n", 151 | "building tree 57 of 100\n", 152 | "building tree 58 of 100\n", 153 | "building tree 59 of 100\n", 154 | "building tree 60 of 100\n", 155 | "building tree 61 of 100\n", 156 | "building tree 62 of 100\n", 157 | "building tree 63 of 100\n", 158 | "building tree 64 of 100\n", 159 | "building tree 65 of 100\n", 160 | "building tree 66 of 100\n", 161 | "building tree 67 of 100\n", 162 | "building tree 68 of 100\n", 163 | "building tree 69 of 100\n", 164 | "building tree 70 of 100\n", 165 | "building tree 71 of 100\n", 166 | "building tree 72 of 100\n", 167 | "building tree 73 of 100\n", 168 | "building tree 74 of 100\n", 169 | "building tree 75 of 100\n", 170 | "building tree 76 of 100\n", 171 | "building tree 77 of 100\n", 172 | "building tree 78 of 100\n", 173 | "building tree 79 of 100\n", 174 | "building tree 80 of 100\n", 175 | "building tree 81 of 100\n", 176 | "building tree 82 of 100\n", 177 | "building tree 83 of 100\n", 178 | "building tree 84 of 100\n", 179 | "building tree 85 of 100\n", 180 | "building tree 86 of 100\n", 181 | "building tree 87 of 100\n", 182 | "building tree 88 of 100\n", 183 | "building tree 89 of 100\n", 184 | "building tree 90 of 100\n", 185 | "building tree 91 of 100\n", 186 | "building tree 92 of 100\n", 187 | "building tree 93 of 100\n", 188 | "building tree 94 of 100\n", 189 | "building tree 95 of 100\n", 190 | "building tree 96 of 100\n", 191 | "building tree 97 of 100\n", 192 | "building tree 98 of 100\n", 193 | "building tree 99 of 100\n", 194 | "building tree 100 of 100\n" 195 | ] 196 | }, 197 | { 198 | "name": "stderr", 199 | "output_type": "stream", 200 | "text": [ 201 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 19.1s finished\n", 202 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 203 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 204 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n" 205 | ] 206 | }, 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "\n", 212 | "accuracy 0.965\n" 213 | ] 214 | }, 215 | { 216 | "name": "stderr", 217 | "output_type": "stream", 218 | "text": [ 219 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.3s finished\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "from sklearn.ensemble import RandomForestClassifier\n", 225 | "\n", 226 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 227 | "model = model.fit(X_train, df_train['label'].values)\n", 228 | "\n", 229 | "y_prediction = model.predict(X_test)\n", 230 | "print(\"\\naccuracy\", np.sum(y_prediction == df_test['label'].values) / float(len(y_test)))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## Regularized NN" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "While matrix operations are linear, there could be a non-linear relationship between the features and the label. Introducing a ReLU layer, defined as f(x) = max(0, x), can help the model capture this interaction. ReLU is widely used as its simplicity allows for much faster training without a high cost to accuracy.\n", 245 | "\n", 246 | "The dropout layer can be thought of as a form of sampling, where output values are randomly set to zero by a pre-specified probability. This creates a more robust network as the process prevents interdependence, and as such the model is less likely to overfit on the training data. It is surprisingly effective, which has made it an active area of research.\n", 247 | "\n", 248 | "Following Andrej Karpathy's advice of \"don't be a hero\", the example shown here is from the Keras repository." 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 5, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "name": "stderr", 258 | "output_type": "stream", 259 | "text": [ 260 | "2023-03-24 10:37:54.651495: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 261 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" 262 | ] 263 | }, 264 | { 265 | "name": "stdout", 266 | "output_type": "stream", 267 | "text": [ 268 | "Epoch 1/12\n", 269 | "1050/1050 [==============================] - 5s 4ms/step - loss: 0.2653 - accuracy: 0.9188\n", 270 | "Epoch 2/12\n", 271 | "1050/1050 [==============================] - 5s 5ms/step - loss: 0.1267 - accuracy: 0.9634\n", 272 | "Epoch 3/12\n", 273 | "1050/1050 [==============================] - 5s 5ms/step - loss: 0.0960 - accuracy: 0.9729\n", 274 | "Epoch 4/12\n", 275 | "1050/1050 [==============================] - 5s 5ms/step - loss: 0.0808 - accuracy: 0.9770\n", 276 | "Epoch 5/12\n", 277 | "1050/1050 [==============================] - 5s 5ms/step - loss: 0.0695 - accuracy: 0.9810\n", 278 | "Epoch 6/12\n", 279 | "1050/1050 [==============================] - 5s 5ms/step - loss: 0.0618 - accuracy: 0.9822\n", 280 | "Epoch 7/12\n", 281 | "1050/1050 [==============================] - 6s 5ms/step - loss: 0.0523 - accuracy: 0.9857\n", 282 | "Epoch 8/12\n", 283 | "1050/1050 [==============================] - 6s 6ms/step - loss: 0.0462 - accuracy: 0.9868\n", 284 | "Epoch 9/12\n", 285 | "1050/1050 [==============================] - 6s 6ms/step - loss: 0.0409 - accuracy: 0.9893\n", 286 | "Epoch 10/12\n", 287 | "1050/1050 [==============================] - 6s 6ms/step - loss: 0.0376 - accuracy: 0.9901\n", 288 | "Epoch 11/12\n", 289 | "1050/1050 [==============================] - 6s 6ms/step - loss: 0.0354 - accuracy: 0.9910\n", 290 | "Epoch 12/12\n", 291 | "1050/1050 [==============================] - 6s 5ms/step - loss: 0.0320 - accuracy: 0.9911\n", 292 | "Model: \"sequential\"\n", 293 | "_________________________________________________________________\n", 294 | " Layer (type) Output Shape Param # \n", 295 | "=================================================================\n", 296 | " dense (Dense) (None, 512) 401920 \n", 297 | " \n", 298 | " dropout (Dropout) (None, 512) 0 \n", 299 | " \n", 300 | " dense_1 (Dense) (None, 512) 262656 \n", 301 | " \n", 302 | " dropout_1 (Dropout) (None, 512) 0 \n", 303 | " \n", 304 | " dense_2 (Dense) (None, 10) 5130 \n", 305 | " \n", 306 | "=================================================================\n", 307 | "Total params: 669,706\n", 308 | "Trainable params: 669,706\n", 309 | "Non-trainable params: 0\n", 310 | "_________________________________________________________________\n", 311 | "\n", 312 | "time taken 65.99748110771179 seconds\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "# https://github.com/fchollet/keras/blob/master/examples/mnist_mlp.py\n", 318 | "\n", 319 | "from keras.models import Sequential\n", 320 | "from keras.layers import Dense, Activation, Dropout\n", 321 | "from tensorflow.keras.optimizers import RMSprop\n", 322 | "\n", 323 | "start = time()\n", 324 | "\n", 325 | "model = Sequential()\n", 326 | "model.add(Dense(512, activation='relu', input_shape=(784,)))\n", 327 | "model.add(Dropout(0.2))\n", 328 | "model.add(Dense(512, activation='relu'))\n", 329 | "model.add(Dropout(0.2))\n", 330 | "model.add(Dense(10, activation='softmax'))\n", 331 | "\n", 332 | "model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])\n", 333 | "\n", 334 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 335 | "\n", 336 | "model.summary()\n", 337 | "\n", 338 | "print('\\ntime taken %s seconds' % str(time() - start))" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 6, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "263/263 [==============================] - 0s 1ms/step\n", 351 | "\n", 352 | "accuracy 0.9783333333333334\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 358 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "Introducing ReLU and dropout layers have enabled the model to outperform the benchmark by 1%! In the next section, we introduce new layers that take advantage of the 2D structure of the image to further improve model performance." 366 | ] 367 | } 368 | ], 369 | "metadata": { 370 | "kernelspec": { 371 | "display_name": "Python 3 (ipykernel)", 372 | "language": "python", 373 | "name": "python3" 374 | }, 375 | "language_info": { 376 | "codemirror_mode": { 377 | "name": "ipython", 378 | "version": 3 379 | }, 380 | "file_extension": ".py", 381 | "mimetype": "text/x-python", 382 | "name": "python", 383 | "nbconvert_exporter": "python", 384 | "pygments_lexer": "ipython3", 385 | "version": "3.11.1" 386 | } 387 | }, 388 | "nbformat": 4, 389 | "nbformat_minor": 4 390 | } 391 | -------------------------------------------------------------------------------- /2-3_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 2-3 - Convolutional Neural Network" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "The approach we've taken so far treats each image a single 'flat' vector of length 784. In this section, we'll introduce layers that take advantage the 2D structure of each 28x28 MNIST image, helping simplify computation and improve model performance." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import numpy as np\n", 24 | "import pandas as pd\n", 25 | "from sklearn.preprocessing import StandardScaler\n", 26 | "from time import time\n", 27 | "\n", 28 | "np.random.seed(1337)\n", 29 | "\n", 30 | "df = pd.read_csv('data/mnist.csv')" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "df_train = df.iloc[:33600, :]\n", 40 | "\n", 41 | "X_train = df_train.iloc[:, 1:].values / 255.\n", 42 | "y_train = df_train['label'].values\n", 43 | "y_train_onehot = pd.get_dummies(df_train['label']).values" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "df_test = df.iloc[33600:, :]\n", 53 | "\n", 54 | "X_test = df_test.iloc[:, 1:].values / 255.\n", 55 | "y_test = df_test['label'].values" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "## Benchmark" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 4, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "name": "stderr", 72 | "output_type": "stream", 73 | "text": [ 74 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 75 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s\n" 76 | ] 77 | }, 78 | { 79 | "name": "stdout", 80 | "output_type": "stream", 81 | "text": [ 82 | "building tree 1 of 100\n", 83 | "building tree 2 of 100\n" 84 | ] 85 | }, 86 | { 87 | "name": "stderr", 88 | "output_type": "stream", 89 | "text": [ 90 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.4s remaining: 0.0s\n" 91 | ] 92 | }, 93 | { 94 | "name": "stdout", 95 | "output_type": "stream", 96 | "text": [ 97 | "building tree 3 of 100\n", 98 | "building tree 4 of 100\n", 99 | "building tree 5 of 100\n", 100 | "building tree 6 of 100\n", 101 | "building tree 7 of 100\n", 102 | "building tree 8 of 100\n", 103 | "building tree 9 of 100\n", 104 | "building tree 10 of 100\n", 105 | "building tree 11 of 100\n", 106 | "building tree 12 of 100\n", 107 | "building tree 13 of 100\n", 108 | "building tree 14 of 100\n", 109 | "building tree 15 of 100\n", 110 | "building tree 16 of 100\n", 111 | "building tree 17 of 100\n", 112 | "building tree 18 of 100\n", 113 | "building tree 19 of 100\n", 114 | "building tree 20 of 100\n", 115 | "building tree 21 of 100\n", 116 | "building tree 22 of 100\n", 117 | "building tree 23 of 100\n", 118 | "building tree 24 of 100\n", 119 | "building tree 25 of 100\n", 120 | "building tree 26 of 100\n", 121 | "building tree 27 of 100\n", 122 | "building tree 28 of 100\n", 123 | "building tree 29 of 100\n", 124 | "building tree 30 of 100\n", 125 | "building tree 31 of 100\n", 126 | "building tree 32 of 100\n", 127 | "building tree 33 of 100\n", 128 | "building tree 34 of 100\n", 129 | "building tree 35 of 100\n", 130 | "building tree 36 of 100\n", 131 | "building tree 37 of 100\n", 132 | "building tree 38 of 100\n", 133 | "building tree 39 of 100\n", 134 | "building tree 40 of 100\n", 135 | "building tree 41 of 100\n", 136 | "building tree 42 of 100\n", 137 | "building tree 43 of 100\n", 138 | "building tree 44 of 100\n", 139 | "building tree 45 of 100\n", 140 | "building tree 46 of 100\n", 141 | "building tree 47 of 100\n", 142 | "building tree 48 of 100\n", 143 | "building tree 49 of 100\n", 144 | "building tree 50 of 100\n", 145 | "building tree 51 of 100\n", 146 | "building tree 52 of 100\n", 147 | "building tree 53 of 100\n", 148 | "building tree 54 of 100\n", 149 | "building tree 55 of 100\n", 150 | "building tree 56 of 100\n", 151 | "building tree 57 of 100\n", 152 | "building tree 58 of 100\n", 153 | "building tree 59 of 100\n", 154 | "building tree 60 of 100\n", 155 | "building tree 61 of 100\n", 156 | "building tree 62 of 100\n", 157 | "building tree 63 of 100\n", 158 | "building tree 64 of 100\n", 159 | "building tree 65 of 100\n", 160 | "building tree 66 of 100\n", 161 | "building tree 67 of 100\n", 162 | "building tree 68 of 100\n", 163 | "building tree 69 of 100\n", 164 | "building tree 70 of 100\n", 165 | "building tree 71 of 100\n", 166 | "building tree 72 of 100\n", 167 | "building tree 73 of 100\n", 168 | "building tree 74 of 100\n", 169 | "building tree 75 of 100\n", 170 | "building tree 76 of 100\n", 171 | "building tree 77 of 100\n", 172 | "building tree 78 of 100\n", 173 | "building tree 79 of 100\n", 174 | "building tree 80 of 100\n", 175 | "building tree 81 of 100\n", 176 | "building tree 82 of 100\n", 177 | "building tree 83 of 100\n", 178 | "building tree 84 of 100\n", 179 | "building tree 85 of 100\n", 180 | "building tree 86 of 100\n", 181 | "building tree 87 of 100\n", 182 | "building tree 88 of 100\n", 183 | "building tree 89 of 100\n", 184 | "building tree 90 of 100\n", 185 | "building tree 91 of 100\n", 186 | "building tree 92 of 100\n", 187 | "building tree 93 of 100\n", 188 | "building tree 94 of 100\n", 189 | "building tree 95 of 100\n", 190 | "building tree 96 of 100\n", 191 | "building tree 97 of 100\n", 192 | "building tree 98 of 100\n", 193 | "building tree 99 of 100\n", 194 | "building tree 100 of 100\n" 195 | ] 196 | }, 197 | { 198 | "name": "stderr", 199 | "output_type": "stream", 200 | "text": [ 201 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 19.2s finished\n", 202 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 203 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.0s remaining: 0.0s\n", 204 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.0s remaining: 0.0s\n" 205 | ] 206 | }, 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "\n", 212 | "accuracy 0.965\n" 213 | ] 214 | }, 215 | { 216 | "name": "stderr", 217 | "output_type": "stream", 218 | "text": [ 219 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 0.3s finished\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "from sklearn.ensemble import RandomForestClassifier\n", 225 | "\n", 226 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 227 | "model = model.fit(X_train, df_train['label'].values)\n", 228 | "\n", 229 | "y_prediction = model.predict(X_test)\n", 230 | "print(\"\\naccuracy\", np.sum(y_prediction == df_test['label'].values) / float(len(y_test)))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "## CNN" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "The first layer we'll introduce is called the convolutional layer. Instead of having a weight matrix of shape (output length x input length), we'll instead consider a 3x3 weight matrix called a filter or kernel. We take the vector product of the filter with each (overlapping) 3x3 grid in the 28x28 image.\n", 245 | "\n", 246 | "Since there are 26x26 such grids, a single filter results in a 26x26 output. 32 filters gives us an output 'volume' of shape 26x26x32. By making the 26x26x32 volume go through another convolutional layer, we end up with an output volume of shape 24x24x32.\n", 247 | "\n", 248 | "The second new layer we'll use is called the pooling layer. Here we divide up the input into non-overlapping grids of size 2x2, and take the maximum value from each grid. For an 24x24x32 input, this results in 12x12x32 output. This volume is then 'flattened' to a vector of length 4,608, which we manipulate the same way as any vector.\n", 249 | "\n", 250 | "The use of filters constraints the architecture of the network as each filter only focuses on a specific aspect of the data. This allows our model to scale better and be more translation-invariant. The pooling layer also reduces the number of parameters, helping reduce computation and limit overfitting.\n", 251 | "\n", 252 | "An excellent detailed discussion, with architectural variations and helpful illustrations, can be found on Stanford's CS231n course notes:\n", 253 | "\n", 254 | "http://cs231n.github.io/convolutional-networks/" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 5, 260 | "metadata": {}, 261 | "outputs": [ 262 | { 263 | "name": "stderr", 264 | "output_type": "stream", 265 | "text": [ 266 | "2023-03-24 10:47:02.755609: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 267 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" 268 | ] 269 | }, 270 | { 271 | "name": "stdout", 272 | "output_type": "stream", 273 | "text": [ 274 | "Epoch 1/12\n", 275 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.2648 - accuracy: 0.9194\n", 276 | "Epoch 2/12\n", 277 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.1033 - accuracy: 0.9699\n", 278 | "Epoch 3/12\n", 279 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0791 - accuracy: 0.9770\n", 280 | "Epoch 4/12\n", 281 | "1050/1050 [==============================] - 18s 18ms/step - loss: 0.0719 - accuracy: 0.9784\n", 282 | "Epoch 5/12\n", 283 | "1050/1050 [==============================] - 18s 18ms/step - loss: 0.0670 - accuracy: 0.9805\n", 284 | "Epoch 6/12\n", 285 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0611 - accuracy: 0.9816\n", 286 | "Epoch 7/12\n", 287 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0605 - accuracy: 0.9821\n", 288 | "Epoch 8/12\n", 289 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0569 - accuracy: 0.9834\n", 290 | "Epoch 9/12\n", 291 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0559 - accuracy: 0.9842\n", 292 | "Epoch 10/12\n", 293 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0552 - accuracy: 0.9841\n", 294 | "Epoch 11/12\n", 295 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0556 - accuracy: 0.9841\n", 296 | "Epoch 12/12\n", 297 | "1050/1050 [==============================] - 18s 17ms/step - loss: 0.0541 - accuracy: 0.9843\n", 298 | "Model: \"sequential\"\n", 299 | "_________________________________________________________________\n", 300 | " Layer (type) Output Shape Param # \n", 301 | "=================================================================\n", 302 | " conv2d (Conv2D) (None, 26, 26, 32) 320 \n", 303 | " \n", 304 | " conv2d_1 (Conv2D) (None, 24, 24, 32) 9248 \n", 305 | " \n", 306 | " max_pooling2d (MaxPooling2D (None, 12, 12, 32) 0 \n", 307 | " ) \n", 308 | " \n", 309 | " dropout (Dropout) (None, 12, 12, 32) 0 \n", 310 | " \n", 311 | " flatten (Flatten) (None, 4608) 0 \n", 312 | " \n", 313 | " dense (Dense) (None, 128) 589952 \n", 314 | " \n", 315 | " dropout_1 (Dropout) (None, 128) 0 \n", 316 | " \n", 317 | " dense_1 (Dense) (None, 10) 1290 \n", 318 | " \n", 319 | "=================================================================\n", 320 | "Total params: 600,810\n", 321 | "Trainable params: 600,810\n", 322 | "Non-trainable params: 0\n", 323 | "_________________________________________________________________\n", 324 | "\n", 325 | "time taken 219.8900339603424 seconds\n" 326 | ] 327 | } 328 | ], 329 | "source": [ 330 | "# https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py\n", 331 | "\n", 332 | "from keras.models import Sequential\n", 333 | "from keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, Flatten\n", 334 | "from keras.losses import categorical_crossentropy\n", 335 | "from tensorflow.keras.optimizers import RMSprop\n", 336 | "\n", 337 | "start = time()\n", 338 | "\n", 339 | "img_rows, img_cols = 28, 28\n", 340 | "nb_filters = 32\n", 341 | "pool_size = (2, 2)\n", 342 | "kernel_size = (3, 3)\n", 343 | "\n", 344 | "X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)\n", 345 | "X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)\n", 346 | "input_shape = (img_rows, img_cols, 1)\n", 347 | "\n", 348 | "model = Sequential()\n", 349 | "model.add(Conv2D(nb_filters, kernel_size=kernel_size,\n", 350 | " activation='relu',\n", 351 | " input_shape=input_shape))\n", 352 | "model.add(Conv2D(nb_filters, kernel_size, activation='relu'))\n", 353 | "model.add(MaxPooling2D(pool_size=pool_size))\n", 354 | "model.add(Dropout(0.25))\n", 355 | "model.add(Flatten())\n", 356 | "model.add(Dense(128, activation='relu'))\n", 357 | "model.add(Dropout(0.5))\n", 358 | "model.add(Dense(10, activation='softmax'))\n", 359 | "\n", 360 | "model.compile(loss=categorical_crossentropy,\n", 361 | " optimizer=RMSprop(),\n", 362 | " metrics=['accuracy'])\n", 363 | "\n", 364 | "model.fit(X_train, y_train_onehot, epochs=12)\n", 365 | "\n", 366 | "model.summary()\n", 367 | "\n", 368 | "print('\\ntime taken %s seconds' % str(time() - start))" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 6, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "263/263 [==============================] - 1s 4ms/step\n", 381 | "\n", 382 | "accuracy 0.9860714285714286\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "y_prediction = np.argmax(model.predict(X_test), axis=-1)\n", 388 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "This is our best result yet! It is worth experimenting with different network architectures to see how they affect loss and accuracy. In the next section, we'll continue to take advantage of inherent data structures but apply it to text data." 396 | ] 397 | } 398 | ], 399 | "metadata": { 400 | "kernelspec": { 401 | "display_name": "Python 3 (ipykernel)", 402 | "language": "python", 403 | "name": "python3" 404 | }, 405 | "language_info": { 406 | "codemirror_mode": { 407 | "name": "ipython", 408 | "version": 3 409 | }, 410 | "file_extension": ".py", 411 | "mimetype": "text/x-python", 412 | "name": "python", 413 | "nbconvert_exporter": "python", 414 | "pygments_lexer": "ipython3", 415 | "version": "3.11.1" 416 | } 417 | }, 418 | "nbformat": 4, 419 | "nbformat_minor": 4 420 | } 421 | -------------------------------------------------------------------------------- /3-3_RNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Section 3-3 - Recurrent Neural Network" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "*WARNING: Cells 7 and 17 may require considerable training time.*\n", 15 | "\n", 16 | "We now consider text data, in the form of Rotten Tomatoes movie reviews. Each review is a sentence up to 48 words, with sentiments ranging from 0 (very bad) to 4 (very good). Similar to our approach with CNNs, we look to go further than simply treating the data as a 'flat' vector." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import numpy as np\n", 26 | "import pandas as pd\n", 27 | "from sklearn.feature_extraction.text import CountVectorizer\n", 28 | "from time import time\n", 29 | "\n", 30 | "np.random.seed(1337)\n", 31 | "\n", 32 | "df = pd.read_csv('data/rottentomatoes.csv')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
PhraseIdSentenceIdPhraseSentiment
011A series of escapades demonstrating the adage ...1
121A series of escapades demonstrating the adage ...2
231A series2
341A2
451series2
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " PhraseId SentenceId Phrase \\\n", 109 | "0 1 1 A series of escapades demonstrating the adage ... \n", 110 | "1 2 1 A series of escapades demonstrating the adage ... \n", 111 | "2 3 1 A series \n", 112 | "3 4 1 A \n", 113 | "4 5 1 series \n", 114 | "\n", 115 | " Sentiment \n", 116 | "0 1 \n", 117 | "1 2 \n", 118 | "2 2 \n", 119 | "3 2 \n", 120 | "4 2 " 121 | ] 122 | }, 123 | "execution_count": 2, 124 | "metadata": {}, 125 | "output_type": "execute_result" 126 | } 127 | ], 128 | "source": [ 129 | "df.head()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 3, 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "'A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .'" 141 | ] 142 | }, 143 | "execution_count": 3, 144 | "metadata": {}, 145 | "output_type": "execute_result" 146 | } 147 | ], 148 | "source": [ 149 | "df['Phrase'].values[0]" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 4, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "count = CountVectorizer(analyzer='word')\n", 159 | "\n", 160 | "df_train = df.iloc[:124800, :]\n", 161 | "\n", 162 | "X_train = count.fit_transform(df_train['Phrase'])\n", 163 | "y_train = df_train['Sentiment'].values\n", 164 | "y_train_onehot = pd.get_dummies(df_train['Sentiment']).values" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 5, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "df_test = df.iloc[124800:, :]\n", 174 | "\n", 175 | "X_test = count.transform(df_test['Phrase'])\n", 176 | "y_test = df_test['Sentiment'].values" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 6, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "250 ad\n", 189 | "251 adage\n", 190 | "252 adam\n", 191 | "253 adamant\n", 192 | "254 adams\n", 193 | "255 adaptation\n", 194 | "256 adaptations\n", 195 | "257 adapted\n", 196 | "258 adapts\n", 197 | "259 add\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "for i in range(10):\n", 203 | " print(i+250, count.get_feature_names_out()[i+250])" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "## Benchmark" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "To calculate our benchmark accuracy score, we take a 'bag-of-words' approach by having each column be the word count and train a Random Forest on the word-count matrix." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 7, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "name": "stderr", 227 | "output_type": "stream", 228 | "text": [ 229 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n" 230 | ] 231 | }, 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "building tree 1 of 100\n" 237 | ] 238 | }, 239 | { 240 | "name": "stderr", 241 | "output_type": "stream", 242 | "text": [ 243 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 15.5s remaining: 0.0s\n" 244 | ] 245 | }, 246 | { 247 | "name": "stdout", 248 | "output_type": "stream", 249 | "text": [ 250 | "building tree 2 of 100\n" 251 | ] 252 | }, 253 | { 254 | "name": "stderr", 255 | "output_type": "stream", 256 | "text": [ 257 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 29.5s remaining: 0.0s\n" 258 | ] 259 | }, 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "building tree 3 of 100\n", 265 | "building tree 4 of 100\n", 266 | "building tree 5 of 100\n", 267 | "building tree 6 of 100\n", 268 | "building tree 7 of 100\n", 269 | "building tree 8 of 100\n", 270 | "building tree 9 of 100\n", 271 | "building tree 10 of 100\n", 272 | "building tree 11 of 100\n", 273 | "building tree 12 of 100\n", 274 | "building tree 13 of 100\n", 275 | "building tree 14 of 100\n", 276 | "building tree 15 of 100\n", 277 | "building tree 16 of 100\n", 278 | "building tree 17 of 100\n", 279 | "building tree 18 of 100\n", 280 | "building tree 19 of 100\n", 281 | "building tree 20 of 100\n", 282 | "building tree 21 of 100\n", 283 | "building tree 22 of 100\n", 284 | "building tree 23 of 100\n", 285 | "building tree 24 of 100\n", 286 | "building tree 25 of 100\n", 287 | "building tree 26 of 100\n", 288 | "building tree 27 of 100\n", 289 | "building tree 28 of 100\n", 290 | "building tree 29 of 100\n", 291 | "building tree 30 of 100\n", 292 | "building tree 31 of 100\n", 293 | "building tree 32 of 100\n", 294 | "building tree 33 of 100\n", 295 | "building tree 34 of 100\n", 296 | "building tree 35 of 100\n", 297 | "building tree 36 of 100\n", 298 | "building tree 37 of 100\n", 299 | "building tree 38 of 100\n", 300 | "building tree 39 of 100\n", 301 | "building tree 40 of 100\n", 302 | "building tree 41 of 100\n", 303 | "building tree 42 of 100\n", 304 | "building tree 43 of 100\n", 305 | "building tree 44 of 100\n", 306 | "building tree 45 of 100\n", 307 | "building tree 46 of 100\n", 308 | "building tree 47 of 100\n", 309 | "building tree 48 of 100\n", 310 | "building tree 49 of 100\n", 311 | "building tree 50 of 100\n", 312 | "building tree 51 of 100\n", 313 | "building tree 52 of 100\n", 314 | "building tree 53 of 100\n", 315 | "building tree 54 of 100\n", 316 | "building tree 55 of 100\n", 317 | "building tree 56 of 100\n", 318 | "building tree 57 of 100\n", 319 | "building tree 58 of 100\n", 320 | "building tree 59 of 100\n", 321 | "building tree 60 of 100\n", 322 | "building tree 61 of 100\n", 323 | "building tree 62 of 100\n", 324 | "building tree 63 of 100\n", 325 | "building tree 64 of 100\n", 326 | "building tree 65 of 100\n", 327 | "building tree 66 of 100\n", 328 | "building tree 67 of 100\n", 329 | "building tree 68 of 100\n", 330 | "building tree 69 of 100\n", 331 | "building tree 70 of 100\n", 332 | "building tree 71 of 100\n", 333 | "building tree 72 of 100\n", 334 | "building tree 73 of 100\n", 335 | "building tree 74 of 100\n", 336 | "building tree 75 of 100\n", 337 | "building tree 76 of 100\n", 338 | "building tree 77 of 100\n", 339 | "building tree 78 of 100\n", 340 | "building tree 79 of 100\n", 341 | "building tree 80 of 100\n", 342 | "building tree 81 of 100\n", 343 | "building tree 82 of 100\n", 344 | "building tree 83 of 100\n", 345 | "building tree 84 of 100\n", 346 | "building tree 85 of 100\n", 347 | "building tree 86 of 100\n", 348 | "building tree 87 of 100\n", 349 | "building tree 88 of 100\n", 350 | "building tree 89 of 100\n", 351 | "building tree 90 of 100\n", 352 | "building tree 91 of 100\n", 353 | "building tree 92 of 100\n", 354 | "building tree 93 of 100\n", 355 | "building tree 94 of 100\n", 356 | "building tree 95 of 100\n", 357 | "building tree 96 of 100\n", 358 | "building tree 97 of 100\n", 359 | "building tree 98 of 100\n", 360 | "building tree 99 of 100\n", 361 | "building tree 100 of 100\n" 362 | ] 363 | }, 364 | { 365 | "name": "stderr", 366 | "output_type": "stream", 367 | "text": [ 368 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 25.2min finished\n", 369 | "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n", 370 | "[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.2s remaining: 0.0s\n", 371 | "[Parallel(n_jobs=1)]: Done 2 out of 2 | elapsed: 0.4s remaining: 0.0s\n" 372 | ] 373 | }, 374 | { 375 | "name": "stdout", 376 | "output_type": "stream", 377 | "text": [ 378 | "accuracy 0.5303582853486885\n" 379 | ] 380 | }, 381 | { 382 | "name": "stderr", 383 | "output_type": "stream", 384 | "text": [ 385 | "[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 16.8s finished\n" 386 | ] 387 | } 388 | ], 389 | "source": [ 390 | "from sklearn.ensemble import RandomForestClassifier\n", 391 | "\n", 392 | "model = RandomForestClassifier(random_state=0, verbose=3)\n", 393 | "model = model.fit(X_train, y_train)\n", 394 | "\n", 395 | "y_prediction = model.predict(X_test)\n", 396 | "print(\"accuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## Pre-processing" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "As a pre-processing step, we convert the sentence into word tokens. The word tokens are then mapped to a (numerical) word index. The final step involves 'padding' the list of indices with zeros to ensure every row has the same length." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 8, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "from collections import defaultdict\n", 420 | "\n", 421 | "word_to_index = defaultdict(int)\n", 422 | "\n", 423 | "for i, item in enumerate(count.get_feature_names_out()):\n", 424 | " word_to_index[item] = i+1" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 9, 430 | "metadata": {}, 431 | "outputs": [], 432 | "source": [ 433 | "sequencer = count.build_analyzer()" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 10, 439 | "metadata": {}, 440 | "outputs": [], 441 | "source": [ 442 | "def sentence_to_indices(sentence):\n", 443 | " return [word_to_index[word] for word in sequencer(sentence)]" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 11, 449 | "metadata": {}, 450 | "outputs": [], 451 | "source": [ 452 | "X_train_seq = list(map(sentence_to_indices, df_train['Phrase']))\n", 453 | "X_test_seq = list(map(sentence_to_indices, df_test['Phrase']))" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 12, 459 | "metadata": { 460 | "scrolled": true, 461 | "tags": [] 462 | }, 463 | "outputs": [ 464 | { 465 | "name": "stderr", 466 | "output_type": "stream", 467 | "text": [ 468 | "2023-03-24 10:21:40.803721: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 469 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" 470 | ] 471 | } 472 | ], 473 | "source": [ 474 | "from tensorflow.keras.preprocessing import sequence\n", 475 | "\n", 476 | "X_train_pad = sequence.pad_sequences(X_train_seq, maxlen=48)\n", 477 | "X_test_pad = sequence.pad_sequences(X_test_seq, maxlen=48)" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 13, 483 | "metadata": {}, 484 | "outputs": [ 485 | { 486 | "data": { 487 | "text/plain": [ 488 | "'A series of escapades demonstrating the adage that what is good for the goose is also good for the gander , some of which occasionally amuses but none of which amounts to much of a story .'" 489 | ] 490 | }, 491 | "execution_count": 13, 492 | "metadata": {}, 493 | "output_type": "execute_result" 494 | } 495 | ], 496 | "source": [ 497 | "df_train['Phrase'].values[0]" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 14, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "['series',\n", 509 | " 'of',\n", 510 | " 'escapades',\n", 511 | " 'demonstrating',\n", 512 | " 'the',\n", 513 | " 'adage',\n", 514 | " 'that',\n", 515 | " 'what',\n", 516 | " 'is',\n", 517 | " 'good',\n", 518 | " 'for',\n", 519 | " 'the',\n", 520 | " 'goose',\n", 521 | " 'is',\n", 522 | " 'also',\n", 523 | " 'good',\n", 524 | " 'for',\n", 525 | " 'the',\n", 526 | " 'gander',\n", 527 | " 'some',\n", 528 | " 'of',\n", 529 | " 'which',\n", 530 | " 'occasionally',\n", 531 | " 'amuses',\n", 532 | " 'but',\n", 533 | " 'none',\n", 534 | " 'of',\n", 535 | " 'which',\n", 536 | " 'amounts',\n", 537 | " 'to',\n", 538 | " 'much',\n", 539 | " 'of',\n", 540 | " 'story']" 541 | ] 542 | }, 543 | "execution_count": 14, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "sequencer(df_train['Phrase'].values[0])" 550 | ] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "execution_count": 15, 555 | "metadata": {}, 556 | "outputs": [ 557 | { 558 | "data": { 559 | "text/plain": [ 560 | "[10531,\n", 561 | " 8224,\n", 562 | " 4076,\n", 563 | " 3100,\n", 564 | " 12023,\n", 565 | " 252,\n", 566 | " 12021,\n", 567 | " 13226,\n", 568 | " 6445,\n", 569 | " 5188,\n", 570 | " 4750,\n", 571 | " 12023,\n", 572 | " 5204,\n", 573 | " 6445,\n", 574 | " 462,\n", 575 | " 5188,\n", 576 | " 4750,\n", 577 | " 12023,\n", 578 | " 4991,\n", 579 | " 11053,\n", 580 | " 8224,\n", 581 | " 13242,\n", 582 | " 8201,\n", 583 | " 529,\n", 584 | " 1682,\n", 585 | " 8094,\n", 586 | " 8224,\n", 587 | " 13242,\n", 588 | " 520,\n", 589 | " 12182,\n", 590 | " 7845,\n", 591 | " 8224,\n", 592 | " 11444]" 593 | ] 594 | }, 595 | "execution_count": 15, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "X_train_seq[0]" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 16, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "array([ 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", 613 | " 0, 0, 0, 0, 0, 0, 10531, 8224, 4076,\n", 614 | " 3100, 12023, 252, 12021, 13226, 6445, 5188, 4750, 12023,\n", 615 | " 5204, 6445, 462, 5188, 4750, 12023, 4991, 11053, 8224,\n", 616 | " 13242, 8201, 529, 1682, 8094, 8224, 13242, 520, 12182,\n", 617 | " 7845, 8224, 11444], dtype=int32)" 618 | ] 619 | }, 620 | "execution_count": 16, 621 | "metadata": {}, 622 | "output_type": "execute_result" 623 | } 624 | ], 625 | "source": [ 626 | "X_train_pad[0]" 627 | ] 628 | }, 629 | { 630 | "cell_type": "markdown", 631 | "metadata": {}, 632 | "source": [ 633 | "## Long Short-Term Memory" 634 | ] 635 | }, 636 | { 637 | "cell_type": "markdown", 638 | "metadata": {}, 639 | "source": [ 640 | "To preserve the sequential nature of the sentence, we would train our Recurrent Neural Network (RNN) by feeding each word index one by one. However, it gets expensive to keep track of long-term dependencies, for example between \"series\" and \"story\" in the first sentence. Gradient contributions in deep networks have a tendency of vanishing to zero - this effect is referred to as the 'vanishing gradient' problem.\n", 641 | "\n", 642 | "Long Short-Term Memory networks (LSTMs) was introduced to get around this problem with a gating mechanism. These gates limit how much the existing state is 'affected' by previous states. How much each gate lets through is itself a parameter that gets trained. Chris Olah has an excellent blog post that explains how LSTMs work:\n", 643 | "\n", 644 | "http://colah.github.io/posts/2015-08-Understanding-LSTMs/" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 17, 650 | "metadata": {}, 651 | "outputs": [ 652 | { 653 | "name": "stdout", 654 | "output_type": "stream", 655 | "text": [ 656 | "Epoch 1/2\n", 657 | "3900/3900 [==============================] - 181s 46ms/step - loss: 0.3237 - accuracy: 0.6178\n", 658 | "Epoch 2/2\n", 659 | "3900/3900 [==============================] - 181s 46ms/step - loss: 0.2687 - accuracy: 0.6900\n", 660 | "\n", 661 | "time taken 362.1953592300415 seconds\n" 662 | ] 663 | } 664 | ], 665 | "source": [ 666 | "# https://github.com/fchollet/keras/blob/master/examples/imdb_lstm.py\n", 667 | "\n", 668 | "from keras.models import Sequential\n", 669 | "from keras.layers import Dense, Activation, Embedding, LSTM\n", 670 | "from keras.losses import binary_crossentropy\n", 671 | "from keras.optimizers import Adam\n", 672 | "\n", 673 | "start = time()\n", 674 | "\n", 675 | "model = Sequential()\n", 676 | "model.add(Embedding(len(word_to_index)+1, 128))\n", 677 | "model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))\n", 678 | "model.add(Dense(5, activation='sigmoid'))\n", 679 | "\n", 680 | "model.compile(loss=binary_crossentropy, optimizer=Adam(), metrics=['accuracy'])\n", 681 | "\n", 682 | "model.fit(X_train_pad, y_train_onehot, epochs=2)\n", 683 | "\n", 684 | "print('\\ntime taken %s seconds' % str(time() - start))" 685 | ] 686 | }, 687 | { 688 | "cell_type": "code", 689 | "execution_count": 18, 690 | "metadata": {}, 691 | "outputs": [ 692 | { 693 | "name": "stdout", 694 | "output_type": "stream", 695 | "text": [ 696 | "977/977 [==============================] - 9s 9ms/step\n", 697 | "\n", 698 | "accuracy 0.6054702495201536\n" 699 | ] 700 | } 701 | ], 702 | "source": [ 703 | "y_prediction = np.argmax(model.predict(X_test_pad), axis=-1)\n", 704 | "print(\"\\naccuracy\", np.sum(y_prediction == y_test) / float(len(y_test)))" 705 | ] 706 | }, 707 | { 708 | "cell_type": "markdown", 709 | "metadata": {}, 710 | "source": [ 711 | "Intuitively, preserving the sentence structure would improve performance (for example, in distinguishing between \"good\" and \"not good\"). It is rewarding to see that this is indeed the case. LSTMs is one of the more complicated neural network architectures, but highly impressive applications recently makes it a very worthwhile topic of study." 712 | ] 713 | } 714 | ], 715 | "metadata": { 716 | "kernelspec": { 717 | "display_name": "Python 3 (ipykernel)", 718 | "language": "python", 719 | "name": "python3" 720 | }, 721 | "language_info": { 722 | "codemirror_mode": { 723 | "name": "ipython", 724 | "version": 3 725 | }, 726 | "file_extension": ".py", 727 | "mimetype": "text/x-python", 728 | "name": "python", 729 | "nbconvert_exporter": "python", 730 | "pygments_lexer": "ipython3", 731 | "version": "3.11.1" 732 | } 733 | }, 734 | "nbformat": 4, 735 | "nbformat_minor": 4 736 | } 737 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Neural Networks in a Nutshell 2 | 3 | ## Installation Notes 4 | This tutorial requires *jupyter, *tensorflow* and *keras*. These can be installed with *pip* by typing the following in Terminal: 5 | 6 | pip install jupyterlab tensorflow keras pandas sklearn 7 | 8 | We will be reviewing the materials with Jupyter notebooks. You should be able to type 9 | 10 | jupyter-lab 11 | 12 | in your terminal window and see the notebook panel load in your web browser. 13 | 14 | 15 | ## Presentation Format 16 | 17 | This tutorial is designed to get the audience training neural networks at the end of a 1-hour session. In particular, it covers areas where neural networks really shines - CNNs and RNNs. These techniques are applied on the Kaggle Titanic, MNIST and Rotten Tomatoes datasets. 18 | 19 | - [Section 1-0 - Prelude](https://github.com/savarin/neural-networks/blob/master/1-0_Prelude.ipynb) 20 | - [Section 1-1 - Basic NN, Titanic](https://github.com/savarin/neural-networks/blob/master/1-1_Basic_NN-Titanic.ipynb) 21 | - [Section 2-1 - Basic NN, MNIST](https://github.com/savarin/neural-networks/blob/master/2-1_Basic_NN-MNIST.ipynb) 22 | - [Section 2-2 - Regularized NN](https://github.com/savarin/neural-networks/blob/master/2-2_Regularized_NN.ipynb) 23 | - [Section 2-3 - CNN](https://github.com/savarin/neural-networks/blob/master/2-3_CNN.ipynb) 24 | - [Section 3-3 - RNN](https://github.com/savarin/neural-networks/blob/master/3-3_RNN.ipynb) 25 | - [Appendix - SGD](https://github.com/savarin/neural-networks/blob/master/Appendix-SGD.ipynb) 26 | -------------------------------------------------------------------------------- /data/titanic.csv: -------------------------------------------------------------------------------- 1 | Survived,Class,Sex,Age,Fare 2 | 0,3,1,22.0,7.25 3 | 1,1,0,38.0,71.2833 4 | 1,3,0,26.0,7.925 5 | 1,1,0,35.0,53.1 6 | 0,3,1,35.0,8.05 7 | 0,3,1,29.6991176471,8.4583 8 | 0,1,1,54.0,51.8625 9 | 0,3,1,2.0,21.075 10 | 1,3,0,27.0,11.1333 11 | 1,2,0,14.0,30.0708 12 | 1,3,0,4.0,16.7 13 | 1,1,0,58.0,26.55 14 | 0,3,1,20.0,8.05 15 | 0,3,1,39.0,31.275 16 | 0,3,0,14.0,7.8542 17 | 1,2,0,55.0,16.0 18 | 0,3,1,2.0,29.125 19 | 1,2,1,29.6991176471,13.0 20 | 0,3,0,31.0,18.0 21 | 1,3,0,29.6991176471,7.225 22 | 0,2,1,35.0,26.0 23 | 1,2,1,34.0,13.0 24 | 1,3,0,15.0,8.0292 25 | 1,1,1,28.0,35.5 26 | 0,3,0,8.0,21.075 27 | 1,3,0,38.0,31.3875 28 | 0,3,1,29.6991176471,7.225 29 | 0,1,1,19.0,263.0 30 | 1,3,0,29.6991176471,7.8792 31 | 0,3,1,29.6991176471,7.8958 32 | 0,1,1,40.0,27.7208 33 | 1,1,0,29.6991176471,146.5208 34 | 1,3,0,29.6991176471,7.75 35 | 0,2,1,66.0,10.5 36 | 0,1,1,28.0,82.1708 37 | 0,1,1,42.0,52.0 38 | 1,3,1,29.6991176471,7.2292 39 | 0,3,1,21.0,8.05 40 | 0,3,0,18.0,18.0 41 | 1,3,0,14.0,11.2417 42 | 0,3,0,40.0,9.475 43 | 0,2,0,27.0,21.0 44 | 0,3,1,29.6991176471,7.8958 45 | 1,2,0,3.0,41.5792 46 | 1,3,0,19.0,7.8792 47 | 0,3,1,29.6991176471,8.05 48 | 0,3,1,29.6991176471,15.5 49 | 1,3,0,29.6991176471,7.75 50 | 0,3,1,29.6991176471,21.6792 51 | 0,3,0,18.0,17.8 52 | 0,3,1,7.0,39.6875 53 | 0,3,1,21.0,7.8 54 | 1,1,0,49.0,76.7292 55 | 1,2,0,29.0,26.0 56 | 0,1,1,65.0,61.9792 57 | 1,1,1,29.6991176471,35.5 58 | 1,2,0,21.0,10.5 59 | 0,3,1,28.5,7.2292 60 | 1,2,0,5.0,27.75 61 | 0,3,1,11.0,46.9 62 | 0,3,1,22.0,7.2292 63 | 1,1,0,38.0,80.0 64 | 0,1,1,45.0,83.475 65 | 0,3,1,4.0,27.9 66 | 0,1,1,29.6991176471,27.7208 67 | 1,3,1,29.6991176471,15.2458 68 | 1,2,0,29.0,10.5 69 | 0,3,1,19.0,8.1583 70 | 1,3,0,17.0,7.925 71 | 0,3,1,26.0,8.6625 72 | 0,2,1,32.0,10.5 73 | 0,3,0,16.0,46.9 74 | 0,2,1,21.0,73.5 75 | 0,3,1,26.0,14.4542 76 | 1,3,1,32.0,56.4958 77 | 0,3,1,25.0,7.65 78 | 0,3,1,29.6991176471,7.8958 79 | 0,3,1,29.6991176471,8.05 80 | 1,2,1,0.83,29.0 81 | 1,3,0,30.0,12.475 82 | 0,3,1,22.0,9.0 83 | 1,3,1,29.0,9.5 84 | 1,3,0,29.6991176471,7.7875 85 | 0,1,1,28.0,47.1 86 | 1,2,0,17.0,10.5 87 | 1,3,0,33.0,15.85 88 | 0,3,1,16.0,34.375 89 | 0,3,1,29.6991176471,8.05 90 | 1,1,0,23.0,263.0 91 | 0,3,1,24.0,8.05 92 | 0,3,1,29.0,8.05 93 | 0,3,1,20.0,7.8542 94 | 0,1,1,46.0,61.175 95 | 0,3,1,26.0,20.575 96 | 0,3,1,59.0,7.25 97 | 0,3,1,29.6991176471,8.05 98 | 0,1,1,71.0,34.6542 99 | 1,1,1,23.0,63.3583 100 | 1,2,0,34.0,23.0 101 | 0,2,1,34.0,26.0 102 | 0,3,0,28.0,7.8958 103 | 0,3,1,29.6991176471,7.8958 104 | 0,1,1,21.0,77.2875 105 | 0,3,1,33.0,8.6542 106 | 0,3,1,37.0,7.925 107 | 0,3,1,28.0,7.8958 108 | 1,3,0,21.0,7.65 109 | 1,3,1,29.6991176471,7.775 110 | 0,3,1,38.0,7.8958 111 | 1,3,0,29.6991176471,24.15 112 | 0,1,1,47.0,52.0 113 | 0,3,0,14.5,14.4542 114 | 0,3,1,22.0,8.05 115 | 0,3,0,20.0,9.825 116 | 0,3,0,17.0,14.4583 117 | 0,3,1,21.0,7.925 118 | 0,3,1,70.5,7.75 119 | 0,2,1,29.0,21.0 120 | 0,1,1,24.0,247.5208 121 | 0,3,0,2.0,31.275 122 | 0,2,1,21.0,73.5 123 | 0,3,1,29.6991176471,8.05 124 | 0,2,1,32.5,30.0708 125 | 1,2,0,32.5,13.0 126 | 0,1,1,54.0,77.2875 127 | 1,3,1,12.0,11.2417 128 | 0,3,1,29.6991176471,7.75 129 | 1,3,1,24.0,7.1417 130 | 1,3,0,29.6991176471,22.3583 131 | 0,3,1,45.0,6.975 132 | 0,3,1,33.0,7.8958 133 | 0,3,1,20.0,7.05 134 | 0,3,0,47.0,14.5 135 | 1,2,0,29.0,26.0 136 | 0,2,1,25.0,13.0 137 | 0,2,1,23.0,15.0458 138 | 1,1,0,19.0,26.2833 139 | 0,1,1,37.0,53.1 140 | 0,3,1,16.0,9.2167 141 | 0,1,1,24.0,79.2 142 | 0,3,0,29.6991176471,15.2458 143 | 1,3,0,22.0,7.75 144 | 1,3,0,24.0,15.85 145 | 0,3,1,19.0,6.75 146 | 0,2,1,18.0,11.5 147 | 0,2,1,19.0,36.75 148 | 1,3,1,27.0,7.7958 149 | 0,3,0,9.0,34.375 150 | 0,2,1,36.5,26.0 151 | 0,2,1,42.0,13.0 152 | 0,2,1,51.0,12.525 153 | 1,1,0,22.0,66.6 154 | 0,3,1,55.5,8.05 155 | 0,3,1,40.5,14.5 156 | 0,3,1,29.6991176471,7.3125 157 | 0,1,1,51.0,61.3792 158 | 1,3,0,16.0,7.7333 159 | 0,3,1,30.0,8.05 160 | 0,3,1,29.6991176471,8.6625 161 | 0,3,1,29.6991176471,69.55 162 | 0,3,1,44.0,16.1 163 | 1,2,0,40.0,15.75 164 | 0,3,1,26.0,7.775 165 | 0,3,1,17.0,8.6625 166 | 0,3,1,1.0,39.6875 167 | 1,3,1,9.0,20.525 168 | 1,1,0,29.6991176471,55.0 169 | 0,3,0,45.0,27.9 170 | 0,1,1,29.6991176471,25.925 171 | 0,3,1,28.0,56.4958 172 | 0,1,1,61.0,33.5 173 | 0,3,1,4.0,29.125 174 | 1,3,0,1.0,11.1333 175 | 0,3,1,21.0,7.925 176 | 0,1,1,56.0,30.6958 177 | 0,3,1,18.0,7.8542 178 | 0,3,1,29.6991176471,25.4667 179 | 0,1,0,50.0,28.7125 180 | 0,2,1,30.0,13.0 181 | 0,3,1,36.0,0.0 182 | 0,3,0,29.6991176471,69.55 183 | 0,2,1,29.6991176471,15.05 184 | 0,3,1,9.0,31.3875 185 | 1,2,1,1.0,39.0 186 | 1,3,0,4.0,22.025 187 | 0,1,1,29.6991176471,50.0 188 | 1,3,0,29.6991176471,15.5 189 | 1,1,1,45.0,26.55 190 | 0,3,1,40.0,15.5 191 | 0,3,1,36.0,7.8958 192 | 1,2,0,32.0,13.0 193 | 0,2,1,19.0,13.0 194 | 1,3,0,19.0,7.8542 195 | 1,2,1,3.0,26.0 196 | 1,1,0,44.0,27.7208 197 | 1,1,0,58.0,146.5208 198 | 0,3,1,29.6991176471,7.75 199 | 0,3,1,42.0,8.4042 200 | 1,3,0,29.6991176471,7.75 201 | 0,2,0,24.0,13.0 202 | 0,3,1,28.0,9.5 203 | 0,3,1,29.6991176471,69.55 204 | 0,3,1,34.0,6.4958 205 | 0,3,1,45.5,7.225 206 | 1,3,1,18.0,8.05 207 | 0,3,0,2.0,10.4625 208 | 0,3,1,32.0,15.85 209 | 1,3,1,26.0,18.7875 210 | 1,3,0,16.0,7.75 211 | 1,1,1,40.0,31.0 212 | 0,3,1,24.0,7.05 213 | 1,2,0,35.0,21.0 214 | 0,3,1,22.0,7.25 215 | 0,2,1,30.0,13.0 216 | 0,3,1,29.6991176471,7.75 217 | 1,1,0,31.0,113.275 218 | 1,3,0,27.0,7.925 219 | 0,2,1,42.0,27.0 220 | 1,1,0,32.0,76.2917 221 | 0,2,1,30.0,10.5 222 | 1,3,1,16.0,8.05 223 | 0,2,1,27.0,13.0 224 | 0,3,1,51.0,8.05 225 | 0,3,1,29.6991176471,7.8958 226 | 1,1,1,38.0,90.0 227 | 0,3,1,22.0,9.35 228 | 1,2,1,19.0,10.5 229 | 0,3,1,20.5,7.25 230 | 0,2,1,18.0,13.0 231 | 0,3,0,29.6991176471,25.4667 232 | 1,1,0,35.0,83.475 233 | 0,3,1,29.0,7.775 234 | 0,2,1,59.0,13.5 235 | 1,3,0,5.0,31.3875 236 | 0,2,1,24.0,10.5 237 | 0,3,0,29.6991176471,7.55 238 | 0,2,1,44.0,26.0 239 | 1,2,0,8.0,26.25 240 | 0,2,1,19.0,10.5 241 | 0,2,1,33.0,12.275 242 | 0,3,0,29.6991176471,14.4542 243 | 1,3,0,29.6991176471,15.5 244 | 0,2,1,29.0,10.5 245 | 0,3,1,22.0,7.125 246 | 0,3,1,30.0,7.225 247 | 0,1,1,44.0,90.0 248 | 0,3,0,25.0,7.775 249 | 1,2,0,24.0,14.5 250 | 1,1,1,37.0,52.5542 251 | 0,2,1,54.0,26.0 252 | 0,3,1,29.6991176471,7.25 253 | 0,3,0,29.0,10.4625 254 | 0,1,1,62.0,26.55 255 | 0,3,1,30.0,16.1 256 | 0,3,0,41.0,20.2125 257 | 1,3,0,29.0,15.2458 258 | 1,1,0,29.6991176471,79.2 259 | 1,1,0,30.0,86.5 260 | 1,1,0,35.0,512.3292 261 | 1,2,0,50.0,26.0 262 | 0,3,1,29.6991176471,7.75 263 | 1,3,1,3.0,31.3875 264 | 0,1,1,52.0,79.65 265 | 0,1,1,40.0,0.0 266 | 0,3,0,29.6991176471,7.75 267 | 0,2,1,36.0,10.5 268 | 0,3,1,16.0,39.6875 269 | 1,3,1,25.0,7.775 270 | 1,1,0,58.0,153.4625 271 | 1,1,0,35.0,135.6333 272 | 0,1,1,29.6991176471,31.0 273 | 1,3,1,25.0,0.0 274 | 1,2,0,41.0,19.5 275 | 0,1,1,37.0,29.7 276 | 1,3,0,29.6991176471,7.75 277 | 1,1,0,63.0,77.9583 278 | 0,3,0,45.0,7.75 279 | 0,2,1,29.6991176471,0.0 280 | 0,3,1,7.0,29.125 281 | 1,3,0,35.0,20.25 282 | 0,3,1,65.0,7.75 283 | 0,3,1,28.0,7.8542 284 | 0,3,1,16.0,9.5 285 | 1,3,1,19.0,8.05 286 | 0,1,1,29.6991176471,26.0 287 | 0,3,1,33.0,8.6625 288 | 1,3,1,30.0,9.5 289 | 0,3,1,22.0,7.8958 290 | 1,2,1,42.0,13.0 291 | 1,3,0,22.0,7.75 292 | 1,1,0,26.0,78.85 293 | 1,1,0,19.0,91.0792 294 | 0,2,1,36.0,12.875 295 | 0,3,0,24.0,8.85 296 | 0,3,1,24.0,7.8958 297 | 0,1,1,29.6991176471,27.7208 298 | 0,3,1,23.5,7.2292 299 | 0,1,0,2.0,151.55 300 | 1,1,1,29.6991176471,30.5 301 | 1,1,0,50.0,247.5208 302 | 1,3,0,29.6991176471,7.75 303 | 1,3,1,29.6991176471,23.25 304 | 0,3,1,19.0,0.0 305 | 1,2,0,29.6991176471,12.35 306 | 0,3,1,29.6991176471,8.05 307 | 1,1,1,0.92,151.55 308 | 1,1,0,29.6991176471,110.8833 309 | 1,1,0,17.0,108.9 310 | 0,2,1,30.0,24.0 311 | 1,1,0,30.0,56.9292 312 | 1,1,0,24.0,83.1583 313 | 1,1,0,18.0,262.375 314 | 0,2,0,26.0,26.0 315 | 0,3,1,28.0,7.8958 316 | 0,2,1,43.0,26.25 317 | 1,3,0,26.0,7.8542 318 | 1,2,0,24.0,26.0 319 | 0,2,1,54.0,14.0 320 | 1,1,0,31.0,164.8667 321 | 1,1,0,40.0,134.5 322 | 0,3,1,22.0,7.25 323 | 0,3,1,27.0,7.8958 324 | 1,2,0,30.0,12.35 325 | 1,2,0,22.0,29.0 326 | 0,3,1,29.6991176471,69.55 327 | 1,1,0,36.0,135.6333 328 | 0,3,1,61.0,6.2375 329 | 1,2,0,36.0,13.0 330 | 1,3,0,31.0,20.525 331 | 1,1,0,16.0,57.9792 332 | 1,3,0,29.6991176471,23.25 333 | 0,1,1,45.5,28.5 334 | 0,1,1,38.0,153.4625 335 | 0,3,1,16.0,18.0 336 | 1,1,0,29.6991176471,133.65 337 | 0,3,1,29.6991176471,7.8958 338 | 0,1,1,29.0,66.6 339 | 1,1,0,41.0,134.5 340 | 1,3,1,45.0,8.05 341 | 0,1,1,45.0,35.5 342 | 1,2,1,2.0,26.0 343 | 1,1,0,24.0,263.0 344 | 0,2,1,28.0,13.0 345 | 0,2,1,25.0,13.0 346 | 0,2,1,36.0,13.0 347 | 1,2,0,24.0,13.0 348 | 1,2,0,40.0,13.0 349 | 1,3,0,29.6991176471,16.1 350 | 1,3,1,3.0,15.9 351 | 0,3,1,42.0,8.6625 352 | 0,3,1,23.0,9.225 353 | 0,1,1,29.6991176471,35.0 354 | 0,3,1,15.0,7.2292 355 | 0,3,1,25.0,17.8 356 | 0,3,1,29.6991176471,7.225 357 | 0,3,1,28.0,9.5 358 | 1,1,0,22.0,55.0 359 | 0,2,0,38.0,13.0 360 | 1,3,0,29.6991176471,7.8792 361 | 1,3,0,29.6991176471,7.8792 362 | 0,3,1,40.0,27.9 363 | 0,2,1,29.0,27.7208 364 | 0,3,0,45.0,14.4542 365 | 0,3,1,35.0,7.05 366 | 0,3,1,29.6991176471,15.5 367 | 0,3,1,30.0,7.25 368 | 1,1,0,60.0,75.25 369 | 1,3,0,29.6991176471,7.2292 370 | 1,3,0,29.6991176471,7.75 371 | 1,1,0,24.0,69.3 372 | 1,1,1,25.0,55.4417 373 | 0,3,1,18.0,6.4958 374 | 0,3,1,19.0,8.05 375 | 0,1,1,22.0,135.6333 376 | 0,3,0,3.0,21.075 377 | 1,1,0,29.6991176471,82.1708 378 | 1,3,0,22.0,7.25 379 | 0,1,1,27.0,211.5 380 | 0,3,1,20.0,4.0125 381 | 0,3,1,19.0,7.775 382 | 1,1,0,42.0,227.525 383 | 1,3,0,1.0,15.7417 384 | 0,3,1,32.0,7.925 385 | 1,1,0,35.0,52.0 386 | 0,3,1,29.6991176471,7.8958 387 | 0,2,1,18.0,73.5 388 | 0,3,1,1.0,46.9 389 | 1,2,0,36.0,13.0 390 | 0,3,1,29.6991176471,7.7292 391 | 1,2,0,17.0,12.0 392 | 1,1,1,36.0,120.0 393 | 1,3,1,21.0,7.7958 394 | 0,3,1,28.0,7.925 395 | 1,1,0,23.0,113.275 396 | 1,3,0,24.0,16.7 397 | 0,3,1,22.0,7.7958 398 | 0,3,0,31.0,7.8542 399 | 0,2,1,46.0,26.0 400 | 0,2,1,23.0,10.5 401 | 1,2,0,28.0,12.65 402 | 1,3,1,39.0,7.925 403 | 0,3,1,26.0,8.05 404 | 0,3,0,21.0,9.825 405 | 0,3,1,28.0,15.85 406 | 0,3,0,20.0,8.6625 407 | 0,2,1,34.0,21.0 408 | 0,3,1,51.0,7.75 409 | 1,2,1,3.0,18.75 410 | 0,3,1,21.0,7.775 411 | 0,3,0,29.6991176471,25.4667 412 | 0,3,1,29.6991176471,7.8958 413 | 0,3,1,29.6991176471,6.8583 414 | 1,1,0,33.0,90.0 415 | 0,2,1,29.6991176471,0.0 416 | 1,3,1,44.0,7.925 417 | 0,3,0,29.6991176471,8.05 418 | 1,2,0,34.0,32.5 419 | 1,2,0,18.0,13.0 420 | 0,2,1,30.0,13.0 421 | 0,3,0,10.0,24.15 422 | 0,3,1,29.6991176471,7.8958 423 | 0,3,1,21.0,7.7333 424 | 0,3,1,29.0,7.875 425 | 0,3,0,28.0,14.4 426 | 0,3,1,18.0,20.2125 427 | 0,3,1,29.6991176471,7.25 428 | 1,2,0,28.0,26.0 429 | 1,2,0,19.0,26.0 430 | 0,3,1,29.6991176471,7.75 431 | 1,3,1,32.0,8.05 432 | 1,1,1,28.0,26.55 433 | 1,3,0,29.6991176471,16.1 434 | 1,2,0,42.0,26.0 435 | 0,3,1,17.0,7.125 436 | 0,1,1,50.0,55.9 437 | 1,1,0,14.0,120.0 438 | 0,3,0,21.0,34.375 439 | 1,2,0,24.0,18.75 440 | 0,1,1,64.0,263.0 441 | 0,2,1,31.0,10.5 442 | 1,2,0,45.0,26.25 443 | 0,3,1,20.0,9.5 444 | 0,3,1,25.0,7.775 445 | 1,2,0,28.0,13.0 446 | 1,3,1,29.6991176471,8.1125 447 | 1,1,1,4.0,81.8583 448 | 1,2,0,13.0,19.5 449 | 1,1,1,34.0,26.55 450 | 1,3,0,5.0,19.2583 451 | 1,1,1,52.0,30.5 452 | 0,2,1,36.0,27.75 453 | 0,3,1,29.6991176471,19.9667 454 | 0,1,1,30.0,27.75 455 | 1,1,1,49.0,89.1042 456 | 0,3,1,29.6991176471,8.05 457 | 1,3,1,29.0,7.8958 458 | 0,1,1,65.0,26.55 459 | 1,1,0,29.6991176471,51.8625 460 | 1,2,0,50.0,10.5 461 | 0,3,1,29.6991176471,7.75 462 | 1,1,1,48.0,26.55 463 | 0,3,1,34.0,8.05 464 | 0,1,1,47.0,38.5 465 | 0,2,1,48.0,13.0 466 | 0,3,1,29.6991176471,8.05 467 | 0,3,1,38.0,7.05 468 | 0,2,1,29.6991176471,0.0 469 | 0,1,1,56.0,26.55 470 | 0,3,1,29.6991176471,7.725 471 | 1,3,0,0.75,19.2583 472 | 0,3,1,29.6991176471,7.25 473 | 0,3,1,38.0,8.6625 474 | 1,2,0,33.0,27.75 475 | 1,2,0,23.0,13.7917 476 | 0,3,0,22.0,9.8375 477 | 0,1,1,29.6991176471,52.0 478 | 0,2,1,34.0,21.0 479 | 0,3,1,29.0,7.0458 480 | 0,3,1,22.0,7.5208 481 | 1,3,0,2.0,12.2875 482 | 0,3,1,9.0,46.9 483 | 0,2,1,29.6991176471,0.0 484 | 0,3,1,50.0,8.05 485 | 1,3,0,63.0,9.5875 486 | 1,1,1,25.0,91.0792 487 | 0,3,0,29.6991176471,25.4667 488 | 1,1,0,35.0,90.0 489 | 0,1,1,58.0,29.7 490 | 0,3,1,30.0,8.05 491 | 1,3,1,9.0,15.9 492 | 0,3,1,29.6991176471,19.9667 493 | 0,3,1,21.0,7.25 494 | 0,1,1,55.0,30.5 495 | 0,1,1,71.0,49.5042 496 | 0,3,1,21.0,8.05 497 | 0,3,1,29.6991176471,14.4583 498 | 1,1,0,54.0,78.2667 499 | 0,3,1,29.6991176471,15.1 500 | 0,1,0,25.0,151.55 501 | 0,3,1,24.0,7.7958 502 | 0,3,1,17.0,8.6625 503 | 0,3,0,21.0,7.75 504 | 0,3,0,29.6991176471,7.6292 505 | 0,3,0,37.0,9.5875 506 | 1,1,0,16.0,86.5 507 | 0,1,1,18.0,108.9 508 | 1,2,0,33.0,26.0 509 | 1,1,1,29.6991176471,26.55 510 | 0,3,1,28.0,22.525 511 | 1,3,1,26.0,56.4958 512 | 1,3,1,29.0,7.75 513 | 0,3,1,29.6991176471,8.05 514 | 1,1,1,36.0,26.2875 515 | 1,1,0,54.0,59.4 516 | 0,3,1,24.0,7.4958 517 | 0,1,1,47.0,34.0208 518 | 1,2,0,34.0,10.5 519 | 0,3,1,29.6991176471,24.15 520 | 1,2,0,36.0,26.0 521 | 0,3,1,32.0,7.8958 522 | 1,1,0,30.0,93.5 523 | 0,3,1,22.0,7.8958 524 | 0,3,1,29.6991176471,7.225 525 | 1,1,0,44.0,57.9792 526 | 0,3,1,29.6991176471,7.2292 527 | 0,3,1,40.5,7.75 528 | 1,2,0,50.0,10.5 529 | 0,1,1,29.6991176471,221.7792 530 | 0,3,1,39.0,7.925 531 | 0,2,1,23.0,11.5 532 | 1,2,0,2.0,26.0 533 | 0,3,1,29.6991176471,7.2292 534 | 0,3,1,17.0,7.2292 535 | 1,3,0,29.6991176471,22.3583 536 | 0,3,0,30.0,8.6625 537 | 1,2,0,7.0,26.25 538 | 0,1,1,45.0,26.55 539 | 1,1,0,30.0,106.425 540 | 0,3,1,29.6991176471,14.5 541 | 1,1,0,22.0,49.5 542 | 1,1,0,36.0,71.0 543 | 0,3,0,9.0,31.275 544 | 0,3,0,11.0,31.275 545 | 1,2,1,32.0,26.0 546 | 0,1,1,50.0,106.425 547 | 0,1,1,64.0,26.0 548 | 1,2,0,19.0,26.0 549 | 1,2,1,29.6991176471,13.8625 550 | 0,3,1,33.0,20.525 551 | 1,2,1,8.0,36.75 552 | 1,1,1,17.0,110.8833 553 | 0,2,1,27.0,26.0 554 | 0,3,1,29.6991176471,7.8292 555 | 1,3,1,22.0,7.225 556 | 1,3,0,22.0,7.775 557 | 0,1,1,62.0,26.55 558 | 1,1,0,48.0,39.6 559 | 0,1,1,29.6991176471,227.525 560 | 1,1,0,39.0,79.65 561 | 1,3,0,36.0,17.4 562 | 0,3,1,29.6991176471,7.75 563 | 0,3,1,40.0,7.8958 564 | 0,2,1,28.0,13.5 565 | 0,3,1,29.6991176471,8.05 566 | 0,3,0,29.6991176471,8.05 567 | 0,3,1,24.0,24.15 568 | 0,3,1,19.0,7.8958 569 | 0,3,0,29.0,21.075 570 | 0,3,1,29.6991176471,7.2292 571 | 1,3,1,32.0,7.8542 572 | 1,2,1,62.0,10.5 573 | 1,1,0,53.0,51.4792 574 | 1,1,1,36.0,26.3875 575 | 1,3,0,29.6991176471,7.75 576 | 0,3,1,16.0,8.05 577 | 0,3,1,19.0,14.5 578 | 1,2,0,34.0,13.0 579 | 1,1,0,39.0,55.9 580 | 0,3,0,29.6991176471,14.4583 581 | 1,3,1,32.0,7.925 582 | 1,2,0,25.0,30.0 583 | 1,1,0,39.0,110.8833 584 | 0,2,1,54.0,26.0 585 | 0,1,1,36.0,40.125 586 | 0,3,1,29.6991176471,8.7125 587 | 1,1,0,18.0,79.65 588 | 0,2,1,47.0,15.0 589 | 1,1,1,60.0,79.2 590 | 0,3,1,22.0,8.05 591 | 0,3,1,29.6991176471,8.05 592 | 0,3,1,35.0,7.125 593 | 1,1,0,52.0,78.2667 594 | 0,3,1,47.0,7.25 595 | 0,3,0,29.6991176471,7.75 596 | 0,2,1,37.0,26.0 597 | 0,3,1,36.0,24.15 598 | 1,2,0,29.6991176471,33.0 599 | 0,3,1,49.0,0.0 600 | 0,3,1,29.6991176471,7.225 601 | 1,1,1,49.0,56.9292 602 | 1,2,0,24.0,27.0 603 | 0,3,1,29.6991176471,7.8958 604 | 0,1,1,29.6991176471,42.4 605 | 0,3,1,44.0,8.05 606 | 1,1,1,35.0,26.55 607 | 0,3,1,36.0,15.55 608 | 0,3,1,30.0,7.8958 609 | 1,1,1,27.0,30.5 610 | 1,2,0,22.0,41.5792 611 | 1,1,0,40.0,153.4625 612 | 0,3,0,39.0,31.275 613 | 0,3,1,29.6991176471,7.05 614 | 1,3,0,29.6991176471,15.5 615 | 0,3,1,29.6991176471,7.75 616 | 0,3,1,35.0,8.05 617 | 1,2,0,24.0,65.0 618 | 0,3,1,34.0,14.4 619 | 0,3,0,26.0,16.1 620 | 1,2,0,4.0,39.0 621 | 0,2,1,26.0,10.5 622 | 0,3,1,27.0,14.4542 623 | 1,1,1,42.0,52.5542 624 | 1,3,1,20.0,15.7417 625 | 0,3,1,21.0,7.8542 626 | 0,3,1,21.0,16.1 627 | 0,1,1,61.0,32.3208 628 | 0,2,1,57.0,12.35 629 | 1,1,0,21.0,77.9583 630 | 0,3,1,26.0,7.8958 631 | 0,3,1,29.6991176471,7.7333 632 | 1,1,1,80.0,30.0 633 | 0,3,1,51.0,7.0542 634 | 1,1,1,32.0,30.5 635 | 0,1,1,29.6991176471,0.0 636 | 0,3,0,9.0,27.9 637 | 1,2,0,28.0,13.0 638 | 0,3,1,32.0,7.925 639 | 0,2,1,31.0,26.25 640 | 0,3,0,41.0,39.6875 641 | 0,3,1,29.6991176471,16.1 642 | 0,3,1,20.0,7.8542 643 | 1,1,0,24.0,69.3 644 | 0,3,0,2.0,27.9 645 | 1,3,1,29.6991176471,56.4958 646 | 1,3,0,0.75,19.2583 647 | 1,1,1,48.0,76.7292 648 | 0,3,1,19.0,7.8958 649 | 1,1,1,56.0,35.5 650 | 0,3,1,29.6991176471,7.55 651 | 1,3,0,23.0,7.55 652 | 0,3,1,29.6991176471,7.8958 653 | 1,2,0,18.0,23.0 654 | 0,3,1,21.0,8.4333 655 | 1,3,0,29.6991176471,7.8292 656 | 0,3,0,18.0,6.75 657 | 0,2,1,24.0,73.5 658 | 0,3,1,29.6991176471,7.8958 659 | 0,3,0,32.0,15.5 660 | 0,2,1,23.0,13.0 661 | 0,1,1,58.0,113.275 662 | 1,1,1,50.0,133.65 663 | 0,3,1,40.0,7.225 664 | 0,1,1,47.0,25.5875 665 | 0,3,1,36.0,7.4958 666 | 1,3,1,20.0,7.925 667 | 0,2,1,32.0,73.5 668 | 0,2,1,25.0,13.0 669 | 0,3,1,29.6991176471,7.775 670 | 0,3,1,43.0,8.05 671 | 1,1,0,29.6991176471,52.0 672 | 1,2,0,40.0,39.0 673 | 0,1,1,31.0,52.0 674 | 0,2,1,70.0,10.5 675 | 1,2,1,31.0,13.0 676 | 0,2,1,29.6991176471,0.0 677 | 0,3,1,18.0,7.775 678 | 0,3,1,24.5,8.05 679 | 1,3,0,18.0,9.8417 680 | 0,3,0,43.0,46.9 681 | 1,1,1,36.0,512.3292 682 | 0,3,0,29.6991176471,8.1375 683 | 1,1,1,27.0,76.7292 684 | 0,3,1,20.0,9.225 685 | 0,3,1,14.0,46.9 686 | 0,2,1,60.0,39.0 687 | 0,2,1,25.0,41.5792 688 | 0,3,1,14.0,39.6875 689 | 0,3,1,19.0,10.1708 690 | 0,3,1,18.0,7.7958 691 | 1,1,0,15.0,211.3375 692 | 1,1,1,31.0,57.0 693 | 1,3,0,4.0,13.4167 694 | 1,3,1,29.6991176471,56.4958 695 | 0,3,1,25.0,7.225 696 | 0,1,1,60.0,26.55 697 | 0,2,1,52.0,13.5 698 | 0,3,1,44.0,8.05 699 | 1,3,0,29.6991176471,7.7333 700 | 0,1,1,49.0,110.8833 701 | 0,3,1,42.0,7.65 702 | 1,1,0,18.0,227.525 703 | 1,1,1,35.0,26.2875 704 | 0,3,0,18.0,14.4542 705 | 0,3,1,25.0,7.7417 706 | 0,3,1,26.0,7.8542 707 | 0,2,1,39.0,26.0 708 | 1,2,0,45.0,13.5 709 | 1,1,1,42.0,26.2875 710 | 1,1,0,22.0,151.55 711 | 1,3,1,29.6991176471,15.2458 712 | 1,1,0,24.0,49.5042 713 | 0,1,1,29.6991176471,26.55 714 | 1,1,1,48.0,52.0 715 | 0,3,1,29.0,9.4833 716 | 0,2,1,52.0,13.0 717 | 0,3,1,19.0,7.65 718 | 1,1,0,38.0,227.525 719 | 1,2,0,27.0,10.5 720 | 0,3,1,29.6991176471,15.5 721 | 0,3,1,33.0,7.775 722 | 1,2,0,6.0,33.0 723 | 0,3,1,17.0,7.0542 724 | 0,2,1,34.0,13.0 725 | 0,2,1,50.0,13.0 726 | 1,1,1,27.0,53.1 727 | 0,3,1,20.0,8.6625 728 | 1,2,0,30.0,21.0 729 | 1,3,0,29.6991176471,7.7375 730 | 0,2,1,25.0,26.0 731 | 0,3,0,25.0,7.925 732 | 1,1,0,29.0,211.3375 733 | 0,3,1,11.0,18.7875 734 | 0,2,1,29.6991176471,0.0 735 | 0,2,1,23.0,13.0 736 | 0,2,1,23.0,13.0 737 | 0,3,1,28.5,16.1 738 | 0,3,0,48.0,34.375 739 | 1,1,1,35.0,512.3292 740 | 0,3,1,29.6991176471,7.8958 741 | 0,3,1,29.6991176471,7.8958 742 | 1,1,1,29.6991176471,30.0 743 | 0,1,1,36.0,78.85 744 | 1,1,0,21.0,262.375 745 | 0,3,1,24.0,16.1 746 | 1,3,1,31.0,7.925 747 | 0,1,1,70.0,71.0 748 | 0,3,1,16.0,20.25 749 | 1,2,0,30.0,13.0 750 | 0,1,1,19.0,53.1 751 | 0,3,1,31.0,7.75 752 | 1,2,0,4.0,23.0 753 | 1,3,1,6.0,12.475 754 | 0,3,1,33.0,9.5 755 | 0,3,1,23.0,7.8958 756 | 1,2,0,48.0,65.0 757 | 1,2,1,0.67,14.5 758 | 0,3,1,28.0,7.7958 759 | 0,2,1,18.0,11.5 760 | 0,3,1,34.0,8.05 761 | 1,1,0,33.0,86.5 762 | 0,3,1,29.6991176471,14.5 763 | 0,3,1,41.0,7.125 764 | 1,3,1,20.0,7.2292 765 | 1,1,0,36.0,120.0 766 | 0,3,1,16.0,7.775 767 | 1,1,0,51.0,77.9583 768 | 0,1,1,29.6991176471,39.6 769 | 0,3,0,30.5,7.75 770 | 0,3,1,29.6991176471,24.15 771 | 0,3,1,32.0,8.3625 772 | 0,3,1,24.0,9.5 773 | 0,3,1,48.0,7.8542 774 | 0,2,0,57.0,10.5 775 | 0,3,1,29.6991176471,7.225 776 | 1,2,0,54.0,23.0 777 | 0,3,1,18.0,7.75 778 | 0,3,1,29.6991176471,7.75 779 | 1,3,0,5.0,12.475 780 | 0,3,1,29.6991176471,7.7375 781 | 1,1,0,43.0,211.3375 782 | 1,3,0,13.0,7.2292 783 | 1,1,0,17.0,57.0 784 | 0,1,1,29.0,30.0 785 | 0,3,1,29.6991176471,23.45 786 | 0,3,1,25.0,7.05 787 | 0,3,1,25.0,7.25 788 | 1,3,0,18.0,7.4958 789 | 0,3,1,8.0,29.125 790 | 1,3,1,1.0,20.575 791 | 0,1,1,46.0,79.2 792 | 0,3,1,29.6991176471,7.75 793 | 0,2,1,16.0,26.0 794 | 0,3,0,29.6991176471,69.55 795 | 0,1,1,29.6991176471,30.6958 796 | 0,3,1,25.0,7.8958 797 | 0,2,1,39.0,13.0 798 | 1,1,0,49.0,25.9292 799 | 1,3,0,31.0,8.6833 800 | 0,3,1,30.0,7.2292 801 | 0,3,0,30.0,24.15 802 | 0,2,1,34.0,13.0 803 | 1,2,0,31.0,26.25 804 | 1,1,1,11.0,120.0 805 | 1,3,1,0.42,8.5167 806 | 1,3,1,27.0,6.975 807 | 0,3,1,31.0,7.775 808 | 0,1,1,39.0,0.0 809 | 0,3,0,18.0,7.775 810 | 0,2,1,39.0,13.0 811 | 1,1,0,33.0,53.1 812 | 0,3,1,26.0,7.8875 813 | 0,3,1,39.0,24.15 814 | 0,2,1,35.0,10.5 815 | 0,3,0,6.0,31.275 816 | 0,3,1,30.5,8.05 817 | 0,1,1,29.6991176471,0.0 818 | 0,3,0,23.0,7.925 819 | 0,2,1,31.0,37.0042 820 | 0,3,1,43.0,6.45 821 | 0,3,1,10.0,27.9 822 | 1,1,0,52.0,93.5 823 | 1,3,1,27.0,8.6625 824 | 0,1,1,38.0,0.0 825 | 1,3,0,27.0,12.475 826 | 0,3,1,2.0,39.6875 827 | 0,3,1,29.6991176471,6.95 828 | 0,3,1,29.6991176471,56.4958 829 | 1,2,1,1.0,37.0042 830 | 1,3,1,29.6991176471,7.75 831 | 1,1,0,62.0,80.0 832 | 1,3,0,15.0,14.4542 833 | 1,2,1,0.83,18.75 834 | 0,3,1,29.6991176471,7.2292 835 | 0,3,1,23.0,7.8542 836 | 0,3,1,18.0,8.3 837 | 1,1,0,39.0,83.1583 838 | 0,3,1,21.0,8.6625 839 | 0,3,1,29.6991176471,8.05 840 | 1,3,1,32.0,56.4958 841 | 1,1,1,29.6991176471,29.7 842 | 0,3,1,20.0,7.925 843 | 0,2,1,16.0,10.5 844 | 1,1,0,30.0,31.0 845 | 0,3,1,34.5,6.4375 846 | 0,3,1,17.0,8.6625 847 | 0,3,1,42.0,7.55 848 | 0,3,1,29.6991176471,69.55 849 | 0,3,1,35.0,7.8958 850 | 0,2,1,28.0,33.0 851 | 1,1,0,29.6991176471,89.1042 852 | 0,3,1,4.0,31.275 853 | 0,3,1,74.0,7.775 854 | 0,3,0,9.0,15.2458 855 | 1,1,0,16.0,39.4 856 | 0,2,0,44.0,26.0 857 | 1,3,0,18.0,9.35 858 | 1,1,0,45.0,164.8667 859 | 1,1,1,51.0,26.55 860 | 1,3,0,24.0,19.2583 861 | 0,3,1,29.6991176471,7.2292 862 | 0,3,1,41.0,14.1083 863 | 0,2,1,21.0,11.5 864 | 1,1,0,48.0,25.9292 865 | 0,3,0,29.6991176471,69.55 866 | 0,2,1,24.0,13.0 867 | 1,2,0,42.0,13.0 868 | 1,2,0,27.0,13.8583 869 | 0,1,1,31.0,50.4958 870 | 0,3,1,29.6991176471,9.5 871 | 1,3,1,4.0,11.1333 872 | 0,3,1,26.0,7.8958 873 | 1,1,0,47.0,52.5542 874 | 0,1,1,33.0,5.0 875 | 0,3,1,47.0,9.0 876 | 1,2,0,28.0,24.0 877 | 1,3,0,15.0,7.225 878 | 0,3,1,20.0,9.8458 879 | 0,3,1,19.0,7.8958 880 | 0,3,1,29.6991176471,7.8958 881 | 1,1,0,56.0,83.1583 882 | 1,2,0,25.0,26.0 883 | 0,3,1,33.0,7.8958 884 | 0,3,0,22.0,10.5167 885 | 0,2,1,28.0,10.5 886 | 0,3,1,25.0,7.05 887 | 0,3,0,39.0,29.125 888 | 0,2,1,27.0,13.0 889 | 1,1,0,19.0,30.0 890 | 0,3,0,29.6991176471,23.45 891 | 1,1,1,26.0,30.0 892 | 0,3,1,32.0,7.75 893 | -------------------------------------------------------------------------------- /presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/savarin/neural-networks/8d3ddf363ac0aac8b020a785e8a78e761f5904d8/presentation.pdf --------------------------------------------------------------------------------