├── LICENSE
├── README.md
├── nnfs-visuals-START.ipynb
└── nnfs-visuals.ipynb


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Harrison
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # neural-net-internals-visualized
2 | Visualizing some of the internals of a neural network during training and inference.
3 | 
4 | Video tutorial for the code: https://www.youtube.com/watch?v=ChfEO8l-fas
5 | 


--------------------------------------------------------------------------------
/nnfs-visuals-START.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": null,
   6 |    "metadata": {},
   7 |    "outputs": [],
   8 |    "source": [
   9 |     "from zipfile import ZipFile\n",
  10 |     "import os\n",
  11 |     "import urllib\n",
  12 |     "import urllib.request\n",
  13 |     "\n",
  14 |     "FILE = 'fashion_mnist_images.zip'\n",
  15 |     "FOLDER = 'fashion_mnist_images'\n",
  16 |     "URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'\n",
  17 |     "\n",
  18 |     "\n",
  19 |     "if not os.path.isfile(FILE):\n",
  20 |     "    print(f'Downloading {URL} and saving as {FILE}...')\n",
  21 |     "    urllib.request.urlretrieve(URL, FILE)\n",
  22 |     "print('Unzipping images...')\n",
  23 |     "\n",
  24 |     "with ZipFile(FILE) as zip_images:\n",
  25 |     "    zip_images.extractall(FOLDER)\n",
  26 |     "print('Done!')"
  27 |    ]
  28 |   },
  29 |   {
  30 |    "cell_type": "code",
  31 |    "execution_count": null,
  32 |    "metadata": {},
  33 |    "outputs": [],
  34 |    "source": [
  35 |     "'''\n",
  36 |     "The following code is from the Neural Networks from Scratch book by Harrison Kinsley and Daniel Kukiela.\n",
  37 |     "\n",
  38 |     "https://nnfs.io\n",
  39 |     "'''\n",
  40 |     "\n",
  41 |     "import numpy as np\n",
  42 |     "import os\n",
  43 |     "import cv2\n",
  44 |     "from tqdm import tqdm\n",
  45 |     "import pickle\n",
  46 |     "import copy\n",
  47 |     "import pickle\n",
  48 |     "\n",
  49 |     "# We'll be saving all the data from this model's training. \n",
  50 |     "train_dict = {}\n",
  51 |     "\n",
  52 |     "# Dense layer\n",
  53 |     "class Layer_Dense:\n",
  54 |     "\n",
  55 |     "    # Layer initialization\n",
  56 |     "    def __init__(self, n_inputs, n_neurons,\n",
  57 |     "                 weight_regularizer_l1=0, weight_regularizer_l2=0,\n",
  58 |     "                 bias_regularizer_l1=0, bias_regularizer_l2=0):\n",
  59 |     "        # Initialize weights and biases\n",
  60 |     "        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)\n",
  61 |     "        self.biases = np.zeros((1, n_neurons))\n",
  62 |     "        # Set regularization strength\n",
  63 |     "        self.weight_regularizer_l1 = weight_regularizer_l1\n",
  64 |     "        self.weight_regularizer_l2 = weight_regularizer_l2\n",
  65 |     "        self.bias_regularizer_l1 = bias_regularizer_l1\n",
  66 |     "        self.bias_regularizer_l2 = bias_regularizer_l2\n",
  67 |     "\n",
  68 |     "    # Forward pass\n",
  69 |     "    def forward(self, inputs, training):\n",
  70 |     "        # Remember input values\n",
  71 |     "        self.inputs = inputs\n",
  72 |     "        # Calculate output values from inputs, weights and biases\n",
  73 |     "        self.output = np.dot(inputs, self.weights) + self.biases\n",
  74 |     "\n",
  75 |     "    # Backward pass\n",
  76 |     "    def backward(self, dvalues):\n",
  77 |     "        # Gradients on parameters\n",
  78 |     "        self.dweights = np.dot(self.inputs.T, dvalues)\n",
  79 |     "        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)\n",
  80 |     "\n",
  81 |     "\n",
  82 |     "        # Gradients on regularization\n",
  83 |     "        # L1 on weights\n",
  84 |     "        if self.weight_regularizer_l1 > 0:\n",
  85 |     "            dL1 = np.ones_like(self.weights)\n",
  86 |     "            dL1[self.weights < 0] = -1\n",
  87 |     "            self.dweights += self.weight_regularizer_l1 * dL1\n",
  88 |     "        # L2 on weights\n",
  89 |     "        if self.weight_regularizer_l2 > 0:\n",
  90 |     "            self.dweights += 2 * self.weight_regularizer_l2 * \\\n",
  91 |     "                             self.weights\n",
  92 |     "        # L1 on biases\n",
  93 |     "        if self.bias_regularizer_l1 > 0:\n",
  94 |     "            dL1 = np.ones_like(self.biases)\n",
  95 |     "            dL1[self.biases < 0] = -1\n",
  96 |     "            self.dbiases += self.bias_regularizer_l1 * dL1\n",
  97 |     "        # L2 on biases\n",
  98 |     "        if self.bias_regularizer_l2 > 0:\n",
  99 |     "            self.dbiases += 2 * self.bias_regularizer_l2 * \\\n",
 100 |     "                            self.biases\n",
 101 |     "\n",
 102 |     "        # Gradient on values\n",
 103 |     "        self.dinputs = np.dot(dvalues, self.weights.T)\n",
 104 |     "\n",
 105 |     "\n",
 106 |     "# Dropout\n",
 107 |     "class Layer_Dropout:\n",
 108 |     "\n",
 109 |     "    # Init\n",
 110 |     "    def __init__(self, rate):\n",
 111 |     "        # Store rate, we invert it as for example for dropout\n",
 112 |     "        # of 0.1 we need success rate of 0.9\n",
 113 |     "        self.rate = 1 - rate\n",
 114 |     "\n",
 115 |     "    # Forward pass\n",
 116 |     "    def forward(self, inputs, training):\n",
 117 |     "        # Save input values\n",
 118 |     "        self.inputs = inputs\n",
 119 |     "\n",
 120 |     "        # If not in the training mode - return values\n",
 121 |     "        if not training:\n",
 122 |     "            self.output = inputs.copy()\n",
 123 |     "            return\n",
 124 |     "\n",
 125 |     "        # Generate and save scaled mask\n",
 126 |     "        self.binary_mask = np.random.binomial(1, self.rate,\n",
 127 |     "                           size=inputs.shape) / self.rate\n",
 128 |     "        # Apply mask to output values\n",
 129 |     "        self.output = inputs * self.binary_mask\n",
 130 |     "\n",
 131 |     "\n",
 132 |     "    # Backward pass\n",
 133 |     "    def backward(self, dvalues):\n",
 134 |     "        # Gradient on values\n",
 135 |     "        self.dinputs = dvalues * self.binary_mask\n",
 136 |     "\n",
 137 |     "\n",
 138 |     "# Input \"layer\"\n",
 139 |     "class Layer_Input:\n",
 140 |     "\n",
 141 |     "    # Forward pass\n",
 142 |     "    def forward(self, inputs, training):\n",
 143 |     "        self.output = inputs\n",
 144 |     "\n",
 145 |     "\n",
 146 |     "# ReLU activation\n",
 147 |     "class Activation_ReLU:\n",
 148 |     "\n",
 149 |     "    # Forward pass\n",
 150 |     "    def forward(self, inputs, training):\n",
 151 |     "        # Remember input values\n",
 152 |     "        self.inputs = inputs\n",
 153 |     "        # Calculate output values from inputs\n",
 154 |     "        self.output = np.maximum(0, inputs)\n",
 155 |     "\n",
 156 |     "    # Backward pass\n",
 157 |     "    def backward(self, dvalues):\n",
 158 |     "        # Since we need to modify original variable,\n",
 159 |     "        # let's make a copy of values first\n",
 160 |     "        self.dinputs = dvalues.copy()\n",
 161 |     "\n",
 162 |     "        # Zero gradient where input values were negative\n",
 163 |     "        self.dinputs[self.inputs <= 0] = 0\n",
 164 |     "\n",
 165 |     "    # Calculate predictions for outputs\n",
 166 |     "    def predictions(self, outputs):\n",
 167 |     "        return outputs\n",
 168 |     "\n",
 169 |     "\n",
 170 |     "# Softmax activation\n",
 171 |     "class Activation_Softmax:\n",
 172 |     "\n",
 173 |     "    # Forward pass\n",
 174 |     "    def forward(self, inputs, training):\n",
 175 |     "        # Remember input values\n",
 176 |     "        self.inputs = inputs\n",
 177 |     "\n",
 178 |     "        # Get unnormalized probabilities\n",
 179 |     "        exp_values = np.exp(inputs - np.max(inputs, axis=1,\n",
 180 |     "                                            keepdims=True))\n",
 181 |     "\n",
 182 |     "        # Normalize them for each sample\n",
 183 |     "        probabilities = exp_values / np.sum(exp_values, axis=1,\n",
 184 |     "                                            keepdims=True)\n",
 185 |     "\n",
 186 |     "        self.output = probabilities\n",
 187 |     "\n",
 188 |     "    # Backward pass\n",
 189 |     "    def backward(self, dvalues):\n",
 190 |     "\n",
 191 |     "        # Create uninitialized array\n",
 192 |     "        self.dinputs = np.empty_like(dvalues)\n",
 193 |     "\n",
 194 |     "        # Enumerate outputs and gradients\n",
 195 |     "        for index, (single_output, single_dvalues) in \\\n",
 196 |     "                enumerate(zip(self.output, dvalues)):\n",
 197 |     "            # Flatten output array\n",
 198 |     "            single_output = single_output.reshape(-1, 1)\n",
 199 |     "            # Calculate Jacobian matrix of the output\n",
 200 |     "            jacobian_matrix = np.diagflat(single_output) - \\\n",
 201 |     "                              np.dot(single_output, single_output.T)\n",
 202 |     "            # Calculate sample-wise gradient\n",
 203 |     "            # and add it to the array of sample gradients\n",
 204 |     "            self.dinputs[index] = np.dot(jacobian_matrix,\n",
 205 |     "                                         single_dvalues)\n",
 206 |     "\n",
 207 |     "    # Calculate predictions for outputs\n",
 208 |     "    def predictions(self, outputs):\n",
 209 |     "        return np.argmax(outputs, axis=1)\n",
 210 |     "\n",
 211 |     "\n",
 212 |     "\n",
 213 |     "# Adam optimizer\n",
 214 |     "class Optimizer_Adam:\n",
 215 |     "\n",
 216 |     "    # Initialize optimizer - set settings\n",
 217 |     "    def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7,\n",
 218 |     "                 beta_1=0.9, beta_2=0.999):\n",
 219 |     "        self.learning_rate = learning_rate\n",
 220 |     "        self.current_learning_rate = learning_rate\n",
 221 |     "        self.decay = decay\n",
 222 |     "        self.iterations = 0\n",
 223 |     "        self.epsilon = epsilon\n",
 224 |     "        self.beta_1 = beta_1\n",
 225 |     "        self.beta_2 = beta_2\n",
 226 |     "\n",
 227 |     "\n",
 228 |     "    # Call once before any parameter updates\n",
 229 |     "    def pre_update_params(self):\n",
 230 |     "        if self.decay:\n",
 231 |     "            self.current_learning_rate = self.learning_rate * \\\n",
 232 |     "                (1. / (1. + self.decay * self.iterations))\n",
 233 |     "\n",
 234 |     "    # Update parameters\n",
 235 |     "    def update_params(self, layer):\n",
 236 |     "\n",
 237 |     "        # If layer does not contain cache arrays,\n",
 238 |     "        # create them filled with zeros\n",
 239 |     "        if not hasattr(layer, 'weight_cache'):\n",
 240 |     "            layer.weight_momentums = np.zeros_like(layer.weights)\n",
 241 |     "            layer.weight_cache = np.zeros_like(layer.weights)\n",
 242 |     "            layer.bias_momentums = np.zeros_like(layer.biases)\n",
 243 |     "            layer.bias_cache = np.zeros_like(layer.biases)\n",
 244 |     "\n",
 245 |     "        # Update momentum  with current gradients\n",
 246 |     "        layer.weight_momentums = self.beta_1 * \\\n",
 247 |     "                                 layer.weight_momentums + \\\n",
 248 |     "                                 (1 - self.beta_1) * layer.dweights\n",
 249 |     "        layer.bias_momentums = self.beta_1 * \\\n",
 250 |     "                               layer.bias_momentums + \\\n",
 251 |     "                               (1 - self.beta_1) * layer.dbiases\n",
 252 |     "        # Get corrected momentum\n",
 253 |     "        # self.iteration is 0 at first pass\n",
 254 |     "        # and we need to start with 1 here\n",
 255 |     "        weight_momentums_corrected = layer.weight_momentums / \\\n",
 256 |     "            (1 - self.beta_1 ** (self.iterations + 1))\n",
 257 |     "        bias_momentums_corrected = layer.bias_momentums / \\\n",
 258 |     "            (1 - self.beta_1 ** (self.iterations + 1))\n",
 259 |     "        # Update cache with squared current gradients\n",
 260 |     "        layer.weight_cache = self.beta_2 * layer.weight_cache + \\\n",
 261 |     "            (1 - self.beta_2) * layer.dweights**2\n",
 262 |     "        layer.bias_cache = self.beta_2 * layer.bias_cache + \\\n",
 263 |     "            (1 - self.beta_2) * layer.dbiases**2\n",
 264 |     "        # Get corrected cache\n",
 265 |     "        weight_cache_corrected = layer.weight_cache / \\\n",
 266 |     "            (1 - self.beta_2 ** (self.iterations + 1))\n",
 267 |     "        bias_cache_corrected = layer.bias_cache / \\\n",
 268 |     "            (1 - self.beta_2 ** (self.iterations + 1))\n",
 269 |     "\n",
 270 |     "        # Vanilla SGD parameter update + normalization\n",
 271 |     "        # with square rooted cache\n",
 272 |     "        layer.weights += -self.current_learning_rate * \\\n",
 273 |     "                         weight_momentums_corrected / \\\n",
 274 |     "                         (np.sqrt(weight_cache_corrected) +\n",
 275 |     "                             self.epsilon)\n",
 276 |     "\n",
 277 |     "        layer.biases += -self.current_learning_rate * \\\n",
 278 |     "                         bias_momentums_corrected / \\\n",
 279 |     "                         (np.sqrt(bias_cache_corrected) +\n",
 280 |     "                             self.epsilon)\n",
 281 |     "\n",
 282 |     "    # Call once after any parameter updates\n",
 283 |     "    def post_update_params(self):\n",
 284 |     "        self.iterations += 1\n",
 285 |     "\n",
 286 |     "\n",
 287 |     "# Common loss class\n",
 288 |     "class Loss:\n",
 289 |     "\n",
 290 |     "    # Regularization loss calculation\n",
 291 |     "    def regularization_loss(self):\n",
 292 |     "\n",
 293 |     "        # 0 by default\n",
 294 |     "        regularization_loss = 0\n",
 295 |     "\n",
 296 |     "        # Calculate regularization loss\n",
 297 |     "        # iterate all trainable layers\n",
 298 |     "        for layer in self.trainable_layers:\n",
 299 |     "\n",
 300 |     "            # L1 regularization - weights\n",
 301 |     "            # calculate only when factor greater than 0\n",
 302 |     "            if layer.weight_regularizer_l1 > 0:\n",
 303 |     "                regularization_loss += layer.weight_regularizer_l1 * \\\n",
 304 |     "                                       np.sum(np.abs(layer.weights))\n",
 305 |     "\n",
 306 |     "            # L2 regularization - weights\n",
 307 |     "            if layer.weight_regularizer_l2 > 0:\n",
 308 |     "                regularization_loss += layer.weight_regularizer_l2 * \\\n",
 309 |     "                                       np.sum(layer.weights * \\\n",
 310 |     "                                              layer.weights)\n",
 311 |     "\n",
 312 |     "            # L1 regularization - biases\n",
 313 |     "            # calculate only when factor greater than 0\n",
 314 |     "            if layer.bias_regularizer_l1 > 0:\n",
 315 |     "                regularization_loss += layer.bias_regularizer_l1 * \\\n",
 316 |     "                                       np.sum(np.abs(layer.biases))\n",
 317 |     "\n",
 318 |     "            # L2 regularization - biases\n",
 319 |     "            if layer.bias_regularizer_l2 > 0:\n",
 320 |     "                regularization_loss += layer.bias_regularizer_l2 * \\\n",
 321 |     "                                       np.sum(layer.biases * \\\n",
 322 |     "                                              layer.biases)\n",
 323 |     "\n",
 324 |     "        return regularization_loss\n",
 325 |     "\n",
 326 |     "\n",
 327 |     "    # Set/remember trainable layers\n",
 328 |     "    def remember_trainable_layers(self, trainable_layers):\n",
 329 |     "        self.trainable_layers = trainable_layers\n",
 330 |     "\n",
 331 |     "    # Calculates the data and regularization losses\n",
 332 |     "    # given model output and ground truth values\n",
 333 |     "    def calculate(self, output, y, *, include_regularization=False):\n",
 334 |     "\n",
 335 |     "        # Calculate sample losses\n",
 336 |     "        sample_losses = self.forward(output, y)\n",
 337 |     "\n",
 338 |     "        # Calculate mean loss\n",
 339 |     "        data_loss = np.mean(sample_losses)\n",
 340 |     "\n",
 341 |     "        # Add accumulated sum of losses and sample count\n",
 342 |     "        self.accumulated_sum += np.sum(sample_losses)\n",
 343 |     "        self.accumulated_count += len(sample_losses)\n",
 344 |     "\n",
 345 |     "        # If just data loss - return it\n",
 346 |     "        if not include_regularization:\n",
 347 |     "            return data_loss\n",
 348 |     "\n",
 349 |     "        # Return the data and regularization losses\n",
 350 |     "        return data_loss, self.regularization_loss()\n",
 351 |     "\n",
 352 |     "    # Calculates accumulated loss\n",
 353 |     "    def calculate_accumulated(self, *, include_regularization=False):\n",
 354 |     "\n",
 355 |     "        # Calculate mean loss\n",
 356 |     "        data_loss = self.accumulated_sum / self.accumulated_count\n",
 357 |     "\n",
 358 |     "        # If just data loss - return it\n",
 359 |     "        if not include_regularization:\n",
 360 |     "            return data_loss\n",
 361 |     "\n",
 362 |     "        # Return the data and regularization losses\n",
 363 |     "        return data_loss, self.regularization_loss()\n",
 364 |     "\n",
 365 |     "    # Reset variables for accumulated loss\n",
 366 |     "    def new_pass(self):\n",
 367 |     "        self.accumulated_sum = 0\n",
 368 |     "        self.accumulated_count = 0\n",
 369 |     "\n",
 370 |     "\n",
 371 |     "\n",
 372 |     "# Cross-entropy loss\n",
 373 |     "class Loss_CategoricalCrossentropy(Loss):\n",
 374 |     "\n",
 375 |     "    # Forward pass\n",
 376 |     "    def forward(self, y_pred, y_true):\n",
 377 |     "\n",
 378 |     "        # Number of samples in a batch\n",
 379 |     "        samples = len(y_pred)\n",
 380 |     "\n",
 381 |     "        # Clip data to prevent division by 0\n",
 382 |     "        # Clip both sides to not drag mean towards any value\n",
 383 |     "        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)\n",
 384 |     "\n",
 385 |     "        # Probabilities for target values -\n",
 386 |     "        # only if categorical labels\n",
 387 |     "        if len(y_true.shape) == 1:\n",
 388 |     "            correct_confidences = y_pred_clipped[\n",
 389 |     "                range(samples),\n",
 390 |     "                y_true\n",
 391 |     "            ]\n",
 392 |     "\n",
 393 |     "        # Mask values - only for one-hot encoded labels\n",
 394 |     "        elif len(y_true.shape) == 2:\n",
 395 |     "            correct_confidences = np.sum(\n",
 396 |     "                y_pred_clipped * y_true,\n",
 397 |     "                axis=1\n",
 398 |     "            )\n",
 399 |     "\n",
 400 |     "        # Losses\n",
 401 |     "        negative_log_likelihoods = -np.log(correct_confidences)\n",
 402 |     "        return negative_log_likelihoods\n",
 403 |     "\n",
 404 |     "    # Backward pass\n",
 405 |     "    def backward(self, dvalues, y_true):\n",
 406 |     "\n",
 407 |     "        # Number of samples\n",
 408 |     "        samples = len(dvalues)\n",
 409 |     "        # Number of labels in every sample\n",
 410 |     "        # We'll use the first sample to count them\n",
 411 |     "        labels = len(dvalues[0])\n",
 412 |     "\n",
 413 |     "        # If labels are sparse, turn them into one-hot vector\n",
 414 |     "        if len(y_true.shape) == 1:\n",
 415 |     "            y_true = np.eye(labels)[y_true]\n",
 416 |     "\n",
 417 |     "        # Calculate gradient\n",
 418 |     "        self.dinputs = -y_true / dvalues\n",
 419 |     "        # Normalize gradient\n",
 420 |     "        self.dinputs = self.dinputs / samples\n",
 421 |     "\n",
 422 |     "# Softmax classifier - combined Softmax activation\n",
 423 |     "# and cross-entropy loss for faster backward step\n",
 424 |     "class Activation_Softmax_Loss_CategoricalCrossentropy():\n",
 425 |     "\n",
 426 |     "    # Backward pass\n",
 427 |     "    def backward(self, dvalues, y_true):\n",
 428 |     "\n",
 429 |     "        # Number of samples\n",
 430 |     "        samples = len(dvalues)\n",
 431 |     "\n",
 432 |     "        # If labels are one-hot encoded,\n",
 433 |     "        # turn them into discrete values\n",
 434 |     "        if len(y_true.shape) == 2:\n",
 435 |     "            y_true = np.argmax(y_true, axis=1)\n",
 436 |     "\n",
 437 |     "        # Copy so we can safely modify\n",
 438 |     "        self.dinputs = dvalues.copy()\n",
 439 |     "        # Calculate gradient\n",
 440 |     "        self.dinputs[range(samples), y_true] -= 1\n",
 441 |     "        # Normalize gradient\n",
 442 |     "        self.dinputs = self.dinputs / samples\n",
 443 |     "\n",
 444 |     "\n",
 445 |     "\n",
 446 |     "# Common accuracy class\n",
 447 |     "class Accuracy:\n",
 448 |     "\n",
 449 |     "    # Calculates an accuracy\n",
 450 |     "    # given predictions and ground truth values\n",
 451 |     "    def calculate(self, predictions, y):\n",
 452 |     "\n",
 453 |     "        # Get comparison results\n",
 454 |     "        comparisons = self.compare(predictions, y)\n",
 455 |     "\n",
 456 |     "        # Calculate an accuracy\n",
 457 |     "        accuracy = np.mean(comparisons)\n",
 458 |     "\n",
 459 |     "        # Add accumulated sum of matching values and sample count\n",
 460 |     "        self.accumulated_sum += np.sum(comparisons)\n",
 461 |     "        self.accumulated_count += len(comparisons)\n",
 462 |     "\n",
 463 |     "        # Return accuracy\n",
 464 |     "        return accuracy\n",
 465 |     "\n",
 466 |     "    # Calculates accumulated accuracy\n",
 467 |     "    def calculate_accumulated(self):\n",
 468 |     "\n",
 469 |     "        # Calculate an accuracy\n",
 470 |     "        accuracy = self.accumulated_sum / self.accumulated_count\n",
 471 |     "\n",
 472 |     "        # Return the data and regularization losses\n",
 473 |     "        return accuracy\n",
 474 |     "\n",
 475 |     "    # Reset variables for accumulated accuracy\n",
 476 |     "    def new_pass(self):\n",
 477 |     "        self.accumulated_sum = 0\n",
 478 |     "        self.accumulated_count = 0\n",
 479 |     "\n",
 480 |     "\n",
 481 |     "# Accuracy calculation for classification model\n",
 482 |     "class Accuracy_Categorical(Accuracy):\n",
 483 |     "\n",
 484 |     "    def __init__(self, *, binary=False):\n",
 485 |     "        # Binary mode?\n",
 486 |     "        self.binary = binary\n",
 487 |     "\n",
 488 |     "    # No initialization is needed\n",
 489 |     "    def init(self, y):\n",
 490 |     "        pass\n",
 491 |     "\n",
 492 |     "    # Compares predictions to the ground truth values\n",
 493 |     "    def compare(self, predictions, y):\n",
 494 |     "        if not self.binary and len(y.shape) == 2:\n",
 495 |     "            y = np.argmax(y, axis=1)\n",
 496 |     "        return predictions == y\n",
 497 |     "\n",
 498 |     "\n",
 499 |     "# Model class\n",
 500 |     "class Model:\n",
 501 |     "\n",
 502 |     "    def __init__(self):\n",
 503 |     "        # Create a list of network objects\n",
 504 |     "        self.layers = []\n",
 505 |     "        # Softmax classifier's output object\n",
 506 |     "        self.softmax_classifier_output = None\n",
 507 |     "\n",
 508 |     "    # Add objects to the model\n",
 509 |     "    def add(self, layer):\n",
 510 |     "        self.layers.append(layer)\n",
 511 |     "\n",
 512 |     "\n",
 513 |     "    # Set loss, optimizer and accuracy\n",
 514 |     "    def set(self, *, loss=None, optimizer=None, accuracy=None):\n",
 515 |     "\n",
 516 |     "        if loss is not None:\n",
 517 |     "            self.loss = loss\n",
 518 |     "\n",
 519 |     "        if optimizer is not None:\n",
 520 |     "            self.optimizer = optimizer\n",
 521 |     "\n",
 522 |     "        if accuracy is not None:\n",
 523 |     "            self.accuracy = accuracy\n",
 524 |     "\n",
 525 |     "    # Finalize the model\n",
 526 |     "    def finalize(self):\n",
 527 |     "\n",
 528 |     "        # Create and set the input layer\n",
 529 |     "        self.input_layer = Layer_Input()\n",
 530 |     "\n",
 531 |     "        # Count all the objects\n",
 532 |     "        layer_count = len(self.layers)\n",
 533 |     "\n",
 534 |     "        # Initialize a list containing trainable layers:\n",
 535 |     "        self.trainable_layers = []\n",
 536 |     "\n",
 537 |     "        # Iterate the objects\n",
 538 |     "        for i in range(layer_count):\n",
 539 |     "\n",
 540 |     "            # If it's the first layer,\n",
 541 |     "            # the previous layer object is the input layer\n",
 542 |     "            if i == 0:\n",
 543 |     "                self.layers[i].prev = self.input_layer\n",
 544 |     "                self.layers[i].next = self.layers[i+1]\n",
 545 |     "\n",
 546 |     "            # All layers except for the first and the last\n",
 547 |     "            elif i < layer_count - 1:\n",
 548 |     "                self.layers[i].prev = self.layers[i-1]\n",
 549 |     "                self.layers[i].next = self.layers[i+1]\n",
 550 |     "\n",
 551 |     "            # The last layer - the next object is the loss\n",
 552 |     "            # Also let's save aside the reference to the last object\n",
 553 |     "            # whose output is the model's output\n",
 554 |     "            else:\n",
 555 |     "                self.layers[i].prev = self.layers[i-1]\n",
 556 |     "                self.layers[i].next = self.loss\n",
 557 |     "                self.output_layer_activation = self.layers[i]\n",
 558 |     "\n",
 559 |     "\n",
 560 |     "            # If layer contains an attribute called \"weights\",\n",
 561 |     "            # it's a trainable layer -\n",
 562 |     "            # add it to the list of trainable layers\n",
 563 |     "            # We don't need to check for biases -\n",
 564 |     "            # checking for weights is enough\n",
 565 |     "            if hasattr(self.layers[i], 'weights'):\n",
 566 |     "                self.trainable_layers.append(self.layers[i])\n",
 567 |     "\n",
 568 |     "        # Update loss object with trainable layers\n",
 569 |     "        if self.loss is not None:\n",
 570 |     "            self.loss.remember_trainable_layers(\n",
 571 |     "                self.trainable_layers\n",
 572 |     "            )\n",
 573 |     "\n",
 574 |     "        # If output activation is Softmax and\n",
 575 |     "        # loss function is Categorical Cross-Entropy\n",
 576 |     "        # create an object of combined activation\n",
 577 |     "        # and loss function containing\n",
 578 |     "        # faster gradient calculation\n",
 579 |     "        if isinstance(self.layers[-1], Activation_Softmax) and \\\n",
 580 |     "           isinstance(self.loss, Loss_CategoricalCrossentropy):\n",
 581 |     "            # Create an object of combined activation\n",
 582 |     "            # and loss functions\n",
 583 |     "            self.softmax_classifier_output = \\\n",
 584 |     "                Activation_Softmax_Loss_CategoricalCrossentropy()\n",
 585 |     "\n",
 586 |     "    # Train the model\n",
 587 |     "    def train(self, X, y, *, epochs=1, batch_size=None,\n",
 588 |     "              print_every=1, validation_data=None):\n",
 589 |     "\n",
 590 |     "        # Initialize accuracy object\n",
 591 |     "        self.accuracy.init(y)\n",
 592 |     "\n",
 593 |     "        # Default value if batch size is not being set\n",
 594 |     "        train_steps = 1\n",
 595 |     "\n",
 596 |     "        # Calculate number of steps\n",
 597 |     "        if batch_size is not None:\n",
 598 |     "            train_steps = len(X) // batch_size\n",
 599 |     "            # Dividing rounds down. If there are some remaining\n",
 600 |     "            # data but not a full batch, this won't include it\n",
 601 |     "            # Add `1` to include this not full batch\n",
 602 |     "            if train_steps * batch_size < len(X):\n",
 603 |     "                train_steps += 1\n",
 604 |     "\n",
 605 |     "        \n",
 606 |     "        # Main training loop\n",
 607 |     "        for epoch in range(1, epochs+1):\n",
 608 |     "            train_dict[epoch] = {}  # add this\n",
 609 |     "            \n",
 610 |     "            # Print epoch number\n",
 611 |     "            print(f'epoch: {epoch}')\n",
 612 |     "\n",
 613 |     "            # Reset accumulated values in loss and accuracy objects\n",
 614 |     "            self.loss.new_pass()\n",
 615 |     "            self.accuracy.new_pass()\n",
 616 |     "\n",
 617 |     "            \n",
 618 |     "            # Iterate over steps\n",
 619 |     "            for step in range(train_steps):\n",
 620 |     "                train_dict[epoch][step] = {} # add this\n",
 621 |     "                # If batch size is not set -\n",
 622 |     "                # train using one step and full dataset\n",
 623 |     "                if batch_size is None:\n",
 624 |     "                    batch_X = X\n",
 625 |     "                    batch_y = y\n",
 626 |     "\n",
 627 |     "                # Otherwise slice a batch\n",
 628 |     "                else:\n",
 629 |     "                    batch_X = X[step*batch_size:(step+1)*batch_size]\n",
 630 |     "                    batch_y = y[step*batch_size:(step+1)*batch_size]\n",
 631 |     "\n",
 632 |     "                # Perform the forward pass\n",
 633 |     "                output = self.forward(batch_X, training=True)\n",
 634 |     "\n",
 635 |     "                # Calculate loss\n",
 636 |     "                data_loss, regularization_loss = \\\n",
 637 |     "                    self.loss.calculate(output, batch_y,\n",
 638 |     "                                        include_regularization=True)\n",
 639 |     "                loss = data_loss + regularization_loss\n",
 640 |     "\n",
 641 |     "                # Get predictions and calculate an accuracy\n",
 642 |     "                predictions = self.output_layer_activation.predictions(\n",
 643 |     "                                  output)\n",
 644 |     "                accuracy = self.accuracy.calculate(predictions,\n",
 645 |     "                                                   batch_y)\n",
 646 |     "\n",
 647 |     "                # Perform backward pass\n",
 648 |     "                self.backward(output, batch_y)\n",
 649 |     "\n",
 650 |     "                # Optimize (update parameters)\n",
 651 |     "                ########################################################\n",
 652 |     "                ### THIS IS WHERE WE SAVE ALL THE DATA FROM TRAINING ###\n",
 653 |     "                ########################################################\n",
 654 |     "                self.optimizer.pre_update_params()\n",
 655 |     "                for n, layer in enumerate(self.trainable_layers):  # added enum/n\n",
 656 |     "                    self.optimizer.update_params(layer)\n",
 657 |     "                    train_dict[epoch][step][n] = {}\n",
 658 |     "                    train_dict[epoch][step][n][\"weights\"] = layer.weights.copy()\n",
 659 |     "                    train_dict[epoch][step][n][\"biases\"] = layer.biases.copy()\n",
 660 |     "                    train_dict[epoch][step][n][\"dweights\"] = layer.dweights.copy()\n",
 661 |     "                    train_dict[epoch][step][n][\"dbiases\"] = layer.dbiases.copy()\n",
 662 |     "                    train_dict[epoch][step][n][\"weight_momentums\"] = layer.weight_momentums.copy()\n",
 663 |     "                    train_dict[epoch][step][n][\"bias_momentums\"] = layer.bias_momentums.copy()\n",
 664 |     "\n",
 665 |     "                self.optimizer.post_update_params()\n",
 666 |     "\n",
 667 |     "                # Print a summary\n",
 668 |     "                if not step % print_every or step == train_steps - 1:\n",
 669 |     "                    print(f'step: {step}, ' +\n",
 670 |     "                          f'acc: {accuracy:.3f}, ' +\n",
 671 |     "                          f'loss: {loss:.3f} (' +\n",
 672 |     "                          f'data_loss: {data_loss:.3f}, ' +\n",
 673 |     "                          f'reg_loss: {regularization_loss:.3f}), ' +\n",
 674 |     "                          f'lr: {self.optimizer.current_learning_rate}')\n",
 675 |     "\n",
 676 |     "            # Get and print epoch loss and accuracy\n",
 677 |     "            epoch_data_loss, epoch_regularization_loss = \\\n",
 678 |     "                self.loss.calculate_accumulated(\n",
 679 |     "                    include_regularization=True)\n",
 680 |     "            epoch_loss = epoch_data_loss + epoch_regularization_loss\n",
 681 |     "            epoch_accuracy = self.accuracy.calculate_accumulated()\n",
 682 |     "\n",
 683 |     "            print(f'training, ' +\n",
 684 |     "                  f'acc: {epoch_accuracy:.3f}, ' +\n",
 685 |     "                  f'loss: {epoch_loss:.3f} (' +\n",
 686 |     "                  f'data_loss: {epoch_data_loss:.3f}, ' +\n",
 687 |     "                  f'reg_loss: {epoch_regularization_loss:.3f}), ' +\n",
 688 |     "                  f'lr: {self.optimizer.current_learning_rate}')\n",
 689 |     "\n",
 690 |     "            # If there is the validation data\n",
 691 |     "            if validation_data is not None:\n",
 692 |     "\n",
 693 |     "                # Evaluate the model:\n",
 694 |     "                self.evaluate(*validation_data,\n",
 695 |     "                              batch_size=batch_size)\n",
 696 |     "\n",
 697 |     "    # Evaluates the model using passed-in dataset\n",
 698 |     "    def evaluate(self, X_val, y_val, *, batch_size=None):\n",
 699 |     "\n",
 700 |     "        # Default value if batch size is not being set\n",
 701 |     "        validation_steps = 1\n",
 702 |     "\n",
 703 |     "        # Calculate number of steps\n",
 704 |     "        if batch_size is not None:\n",
 705 |     "            validation_steps = len(X_val) // batch_size\n",
 706 |     "            # Dividing rounds down. If there are some remaining\n",
 707 |     "            # data but not a full batch, this won't include it\n",
 708 |     "            # Add `1` to include this not full batch\n",
 709 |     "            if validation_steps * batch_size < len(X_val):\n",
 710 |     "                validation_steps += 1\n",
 711 |     "\n",
 712 |     "        # Reset accumulated values in loss\n",
 713 |     "        # and accuracy objects\n",
 714 |     "        self.loss.new_pass()\n",
 715 |     "        self.accuracy.new_pass()\n",
 716 |     "\n",
 717 |     "\n",
 718 |     "        # Iterate over steps\n",
 719 |     "        for step in range(validation_steps):\n",
 720 |     "\n",
 721 |     "            # If batch size is not set -\n",
 722 |     "            # train using one step and full dataset\n",
 723 |     "            if batch_size is None:\n",
 724 |     "                batch_X = X_val\n",
 725 |     "                batch_y = y_val\n",
 726 |     "\n",
 727 |     "            # Otherwise slice a batch\n",
 728 |     "            else:\n",
 729 |     "                batch_X = X_val[\n",
 730 |     "                    step*batch_size:(step+1)*batch_size\n",
 731 |     "                ]\n",
 732 |     "                batch_y = y_val[\n",
 733 |     "                    step*batch_size:(step+1)*batch_size\n",
 734 |     "                ]\n",
 735 |     "\n",
 736 |     "            # Perform the forward pass\n",
 737 |     "            output = self.forward(batch_X, training=False)\n",
 738 |     "\n",
 739 |     "            # Calculate the loss\n",
 740 |     "            self.loss.calculate(output, batch_y)\n",
 741 |     "\n",
 742 |     "            # Get predictions and calculate an accuracy\n",
 743 |     "            predictions = self.output_layer_activation.predictions(\n",
 744 |     "                              output)\n",
 745 |     "            self.accuracy.calculate(predictions, batch_y)\n",
 746 |     "\n",
 747 |     "        # Get and print validation loss and accuracy\n",
 748 |     "        validation_loss = self.loss.calculate_accumulated()\n",
 749 |     "        validation_accuracy = self.accuracy.calculate_accumulated()\n",
 750 |     "\n",
 751 |     "        # Print a summary\n",
 752 |     "        print(f'validation, ' +\n",
 753 |     "              f'acc: {validation_accuracy:.3f}, ' +\n",
 754 |     "              f'loss: {validation_loss:.3f}')\n",
 755 |     "\n",
 756 |     "    # Predicts on the samples\n",
 757 |     "    def predict(self, X, *, batch_size=None):\n",
 758 |     "\n",
 759 |     "        # Default value if batch size is not being set\n",
 760 |     "        prediction_steps = 1\n",
 761 |     "\n",
 762 |     "        # Calculate number of steps\n",
 763 |     "        if batch_size is not None:\n",
 764 |     "            prediction_steps = len(X) // batch_size\n",
 765 |     "\n",
 766 |     "            # Dividing rounds down. If there are some remaining\n",
 767 |     "            # data but not a full batch, this won't include it\n",
 768 |     "            # Add `1` to include this not full batch\n",
 769 |     "            if prediction_steps * batch_size < len(X):\n",
 770 |     "                prediction_steps += 1\n",
 771 |     "\n",
 772 |     "        # Model outputs\n",
 773 |     "        output = []\n",
 774 |     "\n",
 775 |     "        # Iterate over steps\n",
 776 |     "        for step in range(prediction_steps):\n",
 777 |     "\n",
 778 |     "            # If batch size is not set -\n",
 779 |     "            # train using one step and full dataset\n",
 780 |     "            if batch_size is None:\n",
 781 |     "                batch_X = X\n",
 782 |     "\n",
 783 |     "            # Otherwise slice a batch\n",
 784 |     "            else:\n",
 785 |     "                batch_X = X[step*batch_size:(step+1)*batch_size]\n",
 786 |     "\n",
 787 |     "            # Perform the forward pass\n",
 788 |     "            batch_output = self.forward(batch_X, training=False)\n",
 789 |     "\n",
 790 |     "            # Append batch prediction to the list of predictions\n",
 791 |     "            output.append(batch_output)\n",
 792 |     "\n",
 793 |     "        # Stack and return results\n",
 794 |     "        return np.vstack(output)\n",
 795 |     "\n",
 796 |     "    # Performs forward pass\n",
 797 |     "    def forward(self, X, training):\n",
 798 |     "\n",
 799 |     "        # Call forward method on the input layer\n",
 800 |     "        # this will set the output property that\n",
 801 |     "        # the first layer in \"prev\" object is expecting\n",
 802 |     "        self.input_layer.forward(X, training)\n",
 803 |     "\n",
 804 |     "        # Call forward method of every object in a chain\n",
 805 |     "        # Pass output of the previous object as a parameter\n",
 806 |     "        for layer in self.layers:\n",
 807 |     "            layer.forward(layer.prev.output, training)\n",
 808 |     "\n",
 809 |     "        # \"layer\" is now the last object from the list,\n",
 810 |     "        # return its output\n",
 811 |     "        return layer.output\n",
 812 |     "\n",
 813 |     "\n",
 814 |     "    # Performs backward pass\n",
 815 |     "    def backward(self, output, y):\n",
 816 |     "\n",
 817 |     "        # If softmax classifier\n",
 818 |     "        if self.softmax_classifier_output is not None:\n",
 819 |     "            # First call backward method\n",
 820 |     "            # on the combined activation/loss\n",
 821 |     "            # this will set dinputs property\n",
 822 |     "            self.softmax_classifier_output.backward(output, y)\n",
 823 |     "\n",
 824 |     "            # Since we'll not call backward method of the last layer\n",
 825 |     "            # which is Softmax activation\n",
 826 |     "            # as we used combined activation/loss\n",
 827 |     "            # object, let's set dinputs in this object\n",
 828 |     "            self.layers[-1].dinputs = \\\n",
 829 |     "                self.softmax_classifier_output.dinputs\n",
 830 |     "\n",
 831 |     "            # Call backward method going through\n",
 832 |     "            # all the objects but last\n",
 833 |     "            # in reversed order passing dinputs as a parameter\n",
 834 |     "            for layer in reversed(self.layers[:-1]):\n",
 835 |     "                layer.backward(layer.next.dinputs)\n",
 836 |     "\n",
 837 |     "            return\n",
 838 |     "\n",
 839 |     "        # First call backward method on the loss\n",
 840 |     "        # this will set dinputs property that the last\n",
 841 |     "        # layer will try to access shortly\n",
 842 |     "        self.loss.backward(output, y)\n",
 843 |     "\n",
 844 |     "        # Call backward method going through all the objects\n",
 845 |     "        # in reversed order passing dinputs as a parameter\n",
 846 |     "        for layer in reversed(self.layers):\n",
 847 |     "            layer.backward(layer.next.dinputs)\n",
 848 |     "\n",
 849 |     "    # Retrieves and returns parameters of trainable layers\n",
 850 |     "    def get_parameters(self):\n",
 851 |     "\n",
 852 |     "        # Create a list for parameters\n",
 853 |     "        parameters = []\n",
 854 |     "\n",
 855 |     "        # Iterable trainable layers and get their parameters\n",
 856 |     "        for layer in self.trainable_layers:\n",
 857 |     "            parameters.append(layer.get_parameters())\n",
 858 |     "\n",
 859 |     "        # Return a list\n",
 860 |     "        return parameters\n",
 861 |     "\n",
 862 |     "\n",
 863 |     "    # Updates the model with new parameters\n",
 864 |     "    def set_parameters(self, parameters):\n",
 865 |     "\n",
 866 |     "        # Iterate over the parameters and layers\n",
 867 |     "        # and update each layers with each set of the parameters\n",
 868 |     "        for parameter_set, layer in zip(parameters,\n",
 869 |     "                                        self.trainable_layers):\n",
 870 |     "            layer.set_parameters(*parameter_set)\n",
 871 |     "\n",
 872 |     "    # Saves the parameters to a file\n",
 873 |     "    def save_parameters(self, path):\n",
 874 |     "\n",
 875 |     "        # Open a file in the binary-write mode\n",
 876 |     "        # and save parameters into it\n",
 877 |     "        with open(path, 'wb') as f:\n",
 878 |     "            pickle.dump(self.get_parameters(), f)\n",
 879 |     "\n",
 880 |     "    # Loads the weights and updates a model instance with them\n",
 881 |     "    def load_parameters(self, path):\n",
 882 |     "\n",
 883 |     "        # Open file in the binary-read mode,\n",
 884 |     "        # load weights and update trainable layers\n",
 885 |     "        with open(path, 'rb') as f:\n",
 886 |     "            self.set_parameters(pickle.load(f))\n",
 887 |     "\n",
 888 |     "    # Saves the model\n",
 889 |     "    def save(self, path):\n",
 890 |     "\n",
 891 |     "        # Make a deep copy of current model instance\n",
 892 |     "        model = copy.deepcopy(self)\n",
 893 |     "\n",
 894 |     "        # Reset accumulated values in loss and accuracy objects\n",
 895 |     "        model.loss.new_pass()\n",
 896 |     "        model.accuracy.new_pass()\n",
 897 |     "\n",
 898 |     "        # Remove data from the input layer\n",
 899 |     "        # and gradients from the loss object\n",
 900 |     "        model.input_layer.__dict__.pop('output', None)\n",
 901 |     "        model.loss.__dict__.pop('dinputs', None)\n",
 902 |     "\n",
 903 |     "        # For each layer remove inputs, output and dinputs properties\n",
 904 |     "        for layer in model.layers:\n",
 905 |     "            for property in ['inputs', 'output', 'dinputs',\n",
 906 |     "                             'dweights', 'dbiases']:\n",
 907 |     "                layer.__dict__.pop(property, None)\n",
 908 |     "\n",
 909 |     "        # Open a file in the binary-write mode and save the model\n",
 910 |     "        with open(path, 'wb') as f:\n",
 911 |     "            pickle.dump(model, f)\n",
 912 |     "\n",
 913 |     "\n",
 914 |     "    # Loads and returns a model\n",
 915 |     "    @staticmethod\n",
 916 |     "    def load(path):\n",
 917 |     "\n",
 918 |     "        # Open file in the binary-read mode, load a model\n",
 919 |     "        with open(path, 'rb') as f:\n",
 920 |     "            model = pickle.load(f)\n",
 921 |     "\n",
 922 |     "        # Return a model\n",
 923 |     "        return model\n",
 924 |     "\n",
 925 |     "\n",
 926 |     "# Loads a MNIST dataset\n",
 927 |     "def load_mnist_dataset(dataset, path):\n",
 928 |     "\n",
 929 |     "    # Scan all the directories and create a list of labels\n",
 930 |     "    labels = os.listdir(os.path.join(path, dataset))\n",
 931 |     "\n",
 932 |     "    # Create lists for samples and labels\n",
 933 |     "    X = []\n",
 934 |     "    y = []\n",
 935 |     "\n",
 936 |     "\n",
 937 |     "    # For each label folder\n",
 938 |     "    for label in labels:\n",
 939 |     "        print(label)\n",
 940 |     "        # And for each image in given folder\n",
 941 |     "        for file in tqdm(os.listdir(os.path.join(path, dataset, label))):\n",
 942 |     "            # Read the image\n",
 943 |     "            image = cv2.imread(\n",
 944 |     "                        os.path.join(path, dataset, label, file),\n",
 945 |     "                        cv2.IMREAD_UNCHANGED)\n",
 946 |     "\n",
 947 |     "            # And append it and a label to the lists\n",
 948 |     "            X.append(image)\n",
 949 |     "            y.append(label)\n",
 950 |     "\n",
 951 |     "    # Convert the data to proper numpy arrays and return\n",
 952 |     "    return np.array(X), np.array(y).astype('uint8')\n",
 953 |     "\n",
 954 |     "\n",
 955 |     "# MNIST dataset (train + test)\n",
 956 |     "def create_data_mnist(path):\n",
 957 |     "\n",
 958 |     "    # Load both sets separately\n",
 959 |     "    X, y = load_mnist_dataset('train', path)\n",
 960 |     "    X_test, y_test = load_mnist_dataset('test', path)\n",
 961 |     "\n",
 962 |     "    # And return all the data\n",
 963 |     "    return X, y, X_test, y_test\n",
 964 |     "\n",
 965 |     "\n",
 966 |     "# Create dataset\n",
 967 |     "X, y, X_test, y_test = create_data_mnist('fashion_mnist_images')\n",
 968 |     "# Shuffle the training dataset\n",
 969 |     "keys = np.array(range(X.shape[0]))\n",
 970 |     "np.random.shuffle(keys)\n",
 971 |     "X = X[keys]\n",
 972 |     "y = y[keys]\n",
 973 |     "\n",
 974 |     "# Scale and reshape samples\n",
 975 |     "X = (X.reshape(X.shape[0], -1).astype(np.float32) - 127.5) / 127.5\n",
 976 |     "X_test = (X_test.reshape(X_test.shape[0], -1).astype(np.float32) -\n",
 977 |     "             127.5) / 127.5\n"
 978 |    ]
 979 |   },
 980 |   {
 981 |    "cell_type": "code",
 982 |    "execution_count": null,
 983 |    "metadata": {},
 984 |    "outputs": [],
 985 |    "source": [
 986 |     "# Instantiate the model\n",
 987 |     "model = Model()\n",
 988 |     "\n",
 989 |     "# Add layers\n",
 990 |     "model.add(Layer_Dense(X.shape[1], 32))\n",
 991 |     "model.add(Activation_ReLU())\n",
 992 |     "model.add(Layer_Dense(32, 32))\n",
 993 |     "model.add(Activation_ReLU())\n",
 994 |     "model.add(Layer_Dense(32, 10))\n",
 995 |     "model.add(Activation_Softmax())\n",
 996 |     "\n",
 997 |     "# Set loss, optimizer and accuracy objects\n",
 998 |     "model.set(\n",
 999 |     "    loss=Loss_CategoricalCrossentropy(),\n",
1000 |     "    optimizer=Optimizer_Adam(decay=1e-3),\n",
1001 |     "    accuracy=Accuracy_Categorical()\n",
1002 |     ")\n",
1003 |     "\n",
1004 |     "# Finalize the model\n",
1005 |     "model.finalize()\n",
1006 |     "\n",
1007 |     "# Train the model\n",
1008 |     "model.train(X, y, validation_data=(X_test, y_test),\n",
1009 |     "            epochs=5, batch_size=128, print_every=100)\n",
1010 |     "\n",
1011 |     "model.save(\"fashion_mnist.model\")\n",
1012 |     "\n",
1013 |     "\n",
1014 |     "print(\"train_dict\", train_dict)\n",
1015 |     "\n",
1016 |     "# save train_dict with pickle\n",
1017 |     "with open(\"train_dict.pkl\", \"wb\") as f:\n",
1018 |     "    pickle.dump(train_dict, f)"
1019 |    ]
1020 |   }
1021 |  ],
1022 |  "metadata": {
1023 |   "kernelspec": {
1024 |    "display_name": "Python 3",
1025 |    "language": "python",
1026 |    "name": "python3"
1027 |   },
1028 |   "language_info": {
1029 |    "codemirror_mode": {
1030 |     "name": "ipython",
1031 |     "version": 3
1032 |    },
1033 |    "file_extension": ".py",
1034 |    "mimetype": "text/x-python",
1035 |    "name": "python",
1036 |    "nbconvert_exporter": "python",
1037 |    "pygments_lexer": "ipython3",
1038 |    "version": "3.10.13"
1039 |   }
1040 |  },
1041 |  "nbformat": 4,
1042 |  "nbformat_minor": 2
1043 | }
1044 | 


--------------------------------------------------------------------------------