├── Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow ├── Week 1 │ ├── C1W1_Assignment.ipynb │ └── utils.py ├── Week 2 │ ├── C1W2_Assignment.ipynb │ └── utils.py ├── Week 3 │ ├── C1W3_Assignment.ipynb │ └── utils.py └── Week 4 │ ├── C1W4_Assignment.ipynb │ ├── VGG.png │ └── utils.py ├── Course 2 - Custom and Distributed Training with TensorFlow ├── Week 1 │ └── C2W1_Assignment.ipynb ├── Week 2 │ └── C2W2_Assignment.ipynb ├── Week 3 │ └── C2W3_Assignment.ipynb └── Week 4 │ └── C2W4_Assignment.ipynb ├── Course 3 - Advanced Computer Vision with TensorFlow ├── Week 1 │ └── C3W1_Assignment.ipynb ├── Week 2 │ └── C3W2_Assignment.ipynb ├── Week 3 │ └── C3W3_Assignment.ipynb └── Week 4 │ └── C3W4_Assignment.ipynb ├── Course 4 - Generative Deep Learning with TensorFlow ├── Week 1 │ └── C4W1_Assignment.ipynb ├── Week 2 │ └── C4W2_Assignment.ipynb ├── Week 3 │ └── C4W3_Assignment.ipynb └── Week 4 │ ├── C4W4_Assignment.ipynb │ └── hands.zip └── README.md /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 1/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from tensorflow.keras.models import Model 4 | from tensorflow.keras.layers import Dense, Input 5 | from sklearn.model_selection import train_test_split 6 | 7 | def test_loop(test_cases): 8 | 9 | success = 0 10 | fails = 0 11 | 12 | for test_case in test_cases: 13 | try: 14 | assert test_case["result"] == test_case["expected"] 15 | success += 1 16 | 17 | except: 18 | fails += 1 19 | print(f'{test_case["name"]}: {test_case["error_message"]}\nExpected: {test_case["expected"]}\nResult: {test_case["result"]}\nPlease open utils.py if you want to see the unit test here.\n') 20 | 21 | if fails == 0: 22 | print("\033[92m All public tests passed") 23 | 24 | else: 25 | print('\033[92m', success," Tests passed") 26 | print('\033[91m', fails, " Tests failed") 27 | raise Exception(test_case["error_message"]) 28 | 29 | def test_white_df(white_df): 30 | 31 | test_cases = [ 32 | { 33 | "name": "type_check", 34 | "result": type(white_df.is_red[0]), 35 | "expected": np.int64, 36 | "error_message": f'white_df.is_red has an incorrect type.' 37 | }, 38 | { 39 | "name": "output_check", 40 | "result": white_df.is_red[0], 41 | "expected": 0, 42 | "error_message": "white_df.is_red is not set correctly" 43 | }, 44 | { 45 | "name": "len_check", 46 | "result": len(white_df), 47 | "expected": 3961, 48 | "error_message": "Number of rows is incorrect. Please drop duplicates." 49 | } 50 | ] 51 | 52 | test_loop(test_cases) 53 | 54 | def test_red_df(red_df): 55 | 56 | test_cases = [ 57 | { 58 | "name": "type_check", 59 | "result": type(red_df.is_red[0]), 60 | "expected": np.int64, 61 | "error_message": f'red_df.is_red has an incorrect type.' 62 | }, 63 | { 64 | "name": "output_check", 65 | "result": red_df.is_red[0], 66 | "expected": 1, 67 | "error_message": "red_df.is_red is not set correctly" 68 | }, 69 | { 70 | "name": "len_check", 71 | "result": len(red_df), 72 | "expected": 1359, 73 | "error_message": "Number of rows is incorrect. Please drop duplicates." 74 | } 75 | ] 76 | 77 | test_loop(test_cases) 78 | 79 | def test_df_drop(df): 80 | 81 | test_cases = [ 82 | { 83 | "name": "df.alcohol[0]_check", 84 | "result": df.alcohol[0], 85 | "expected": 9.4, 86 | "error_message": f'Value is not as expected. Please check quality interval.' 87 | }, 88 | { 89 | "name": "df.alcohol[100]_check", 90 | "result": df.alcohol[100], 91 | "expected": 10.9, 92 | "error_message": f'Value is not as expected. Please check quality interval.' 93 | } 94 | ] 95 | 96 | test_loop(test_cases) 97 | 98 | def test_data_sizes(train_size, test_size, val_size): 99 | 100 | test_cases = [ 101 | { 102 | "name": "train_test_size_check", 103 | "result": train_size > test_size, 104 | "expected": True, 105 | "error_message": f'train.size is too small. Please check implementation.' 106 | }, 107 | { 108 | "name": "train_val_size_check", 109 | "result": train_size > val_size, 110 | "expected": True, 111 | "error_message": f'train.size is too small. Please check implementation.' 112 | }, 113 | { 114 | "name": "test_val_size_check", 115 | "result": test_size > val_size, 116 | "expected": True, 117 | "error_message": f'test.size is too small. Please check implementation.' 118 | } 119 | ] 120 | 121 | test_loop(test_cases) 122 | 123 | def test_format_output(df, train_Y, val_Y, test_Y): 124 | 125 | train, test = train_test_split(df, test_size=0.2, random_state=1) 126 | train, val = train_test_split(train, test_size=0.2, random_state=1) 127 | 128 | test_cases = [ 129 | { 130 | "name": "train_Y[0]_check", 131 | "result": np.all(train_Y[0] == np.array(train.quality)), 132 | "expected": True, 133 | "error_message": f'train_Y[0] is not equal to train.quality. Please check implementation.' 134 | }, 135 | { 136 | "name": "train_Y[1]_check", 137 | "result": np.all(train_Y[1] == np.array(train.is_red)), 138 | "expected": True, 139 | "error_message": f'train_Y[1] is not equal to train.is_red. Please check implementation.' 140 | }, 141 | { 142 | "name": "val_Y[0]_check", 143 | "result": np.all(val_Y[0] == np.array(val.quality)), 144 | "expected": True, 145 | "error_message": f'train_Y[0] is not equal to val.quality. Please check implementation.' 146 | }, 147 | { 148 | "name": "val_Y[1]_check", 149 | "result": np.all(val_Y[1] == np.array(val.is_red)), 150 | "expected": True, 151 | "error_message": f'train_Y[1] is not equal to val.is_red. Please check implementation.' 152 | }, 153 | { 154 | "name": "test_Y[0]_check", 155 | "result": np.all(test_Y[0] == np.array(test.quality)), 156 | "expected": True, 157 | "error_message": f'test_Y[0] is not equal to test.quality. Please check implementation.' 158 | }, 159 | { 160 | "name": "test_Y[1]_check", 161 | "result": np.all(test_Y[1] == np.array(test.is_red)), 162 | "expected": True, 163 | "error_message": f'test_Y[1] is not equal to test.is_red. Please check implementation.' 164 | } 165 | ] 166 | 167 | test_loop(test_cases) 168 | 169 | def test_norm(norm_train_X, norm_val_X, norm_test_X, train, val, test): 170 | 171 | from pandas.core.frame import DataFrame 172 | 173 | test_cases = [ 174 | { 175 | "name": "norm_train_X_type_check", 176 | "result": type(norm_train_X), 177 | "expected": DataFrame, 178 | "error_message": f'norm_train_X has an incorrect type.' 179 | }, 180 | { 181 | "name": "norm_val_X_type_check", 182 | "result": type(norm_val_X), 183 | "expected": DataFrame, 184 | "error_message": f'norm_val_X has an incorrect type.' 185 | }, 186 | { 187 | "name": "norm_test_X_type_check", 188 | "result": type(norm_test_X), 189 | "expected": DataFrame, 190 | "error_message": f'norm_test_X has an incorrect type.' 191 | }, 192 | { 193 | "name": "norm_train_X_length_check", 194 | "result": len(norm_train_X), 195 | "expected": len(train), 196 | "error_message": f'norm_train_X has an incorrect length.' 197 | }, 198 | { 199 | "name": "norm_val_X_length_check", 200 | "result": len(norm_val_X), 201 | "expected": len(val), 202 | "error_message": f'norm_val_X has an incorrect length.' 203 | }, 204 | { 205 | "name": "norm_test_X_length_check", 206 | "result": len(norm_test_X), 207 | "expected": len(test), 208 | "error_message": f'norm_test_X has an incorrect length.' 209 | }, 210 | ] 211 | 212 | test_loop(test_cases) 213 | 214 | def test_base_model(base_model): 215 | 216 | test_inputs = tf.keras.layers.Input(shape=(11,)) 217 | test_output = base_model(test_inputs) 218 | test_model = Model(inputs=test_inputs, outputs=test_output) 219 | 220 | test_cases = [ 221 | { 222 | "name": "return_type_check", 223 | "result": type(test_output), 224 | "expected": tf.Tensor, 225 | "error_message": 'Return type is incorrect. Please check implementation.' 226 | }, 227 | { 228 | "name": "return_shape_check", 229 | "result": str(test_output.shape), 230 | "expected": '(None, 128)', 231 | "error_message": 'Return shape is incorrect. Please check implementation.' 232 | }, 233 | { 234 | "name": "tensor_dtype_check", 235 | "result": str(test_output.dtype), 236 | "expected": "", 237 | "error_message": 'model dtype is incorrect. Please check implementation.' 238 | }, 239 | { 240 | "name": "base_model_num_layers_check", 241 | "result": len(test_model.layers), 242 | "expected": 3, 243 | "error_message": 'There are too many layers. Please check implementation.' 244 | }, 245 | { 246 | "name": "base_model_layer1_check", 247 | "result": type(test_model.layers[-2]), 248 | "expected": Dense, 249 | "error_message": 'First layer type is incorrect. Please check implementation.' 250 | }, 251 | { 252 | "name": "base_model_layer2_check", 253 | "result": type(test_model.layers[-1]), 254 | "expected": Dense, 255 | "error_message": 'Output layer type is incorrect. Please check implementation.' 256 | }, 257 | ] 258 | 259 | test_loop(test_cases) 260 | 261 | def test_final_model(final_model): 262 | 263 | test_inputs = tf.keras.layers.Input(shape=(11,)) 264 | test_output = final_model(test_inputs) 265 | 266 | test_cases = [ 267 | { 268 | "name": "return_type_check", 269 | "result": type(test_output), 270 | "expected": tf.keras.Model, 271 | "error_message": 'Return type is incorrect. Please check implementation.' 272 | }, 273 | { 274 | "name": "layer_3_activation_check", 275 | "result": test_output.layers[4].activation, 276 | "expected": tf.keras.activations.sigmoid, 277 | "error_message": 'wine_quality layer has an incorrect activation. Please check implementation.' 278 | }, 279 | ] 280 | 281 | test_loop(test_cases) 282 | 283 | def test_model_compile(model): 284 | 285 | from tensorflow.python.keras.metrics import MeanMetricWrapper 286 | 287 | test_cases = [ 288 | { 289 | "name": "metrics_0_check", 290 | "result": type(model.metrics[0]), 291 | "expected": tf.keras.metrics.RootMeanSquaredError, 292 | "error_message": 'wine quality metrics is incorrect. Please check implementation.' 293 | }, 294 | { 295 | "name": "metrics_1_check", 296 | "result": (model.metrics[1].name == 'wine_type_accuracy') or 297 | (model.metrics[1].name == 'wine_type_binary_accuracy'), 298 | "expected": True, 299 | "error_message": f'wine type metrics: {model.metrics[1].name} is incorrect. Please check implementation.' 300 | }, 301 | { 302 | "name": "wine_type_loss_check", 303 | "result": (model.loss['wine_type'] == 'binary_crossentropy') or 304 | (model.loss['wine_type'].name == 'binary_crossentropy') or 305 | (str(model.loss['wine_type']).split()[1] == 'binary_crossentropy'), 306 | "expected": True, 307 | "error_message": f'wine type loss: {model.loss["wine_type"]} is incorrect. Please check implementation.' 308 | }, 309 | { 310 | "name": "wine_quality_loss_check", 311 | "result": (model.loss['wine_quality'] in ['mse', 'mean_squared_error']) or 312 | (str(model.loss['wine_quality']).split()[1] == 'mean_squared_error') or 313 | (model.loss['wine_quality'].name == 'mean_squared_error'), 314 | "expected": True, 315 | "error_message": f'wine quality loss: {model.loss["wine_type"]} is incorrect. Please check implementation.' 316 | }, 317 | ] 318 | 319 | test_loop(test_cases) 320 | 321 | def test_history(history): 322 | 323 | vars_history = vars(history) 324 | 325 | test_cases = [ 326 | { 327 | "name": "type_check", 328 | "result": type(history), 329 | "expected": tf.keras.callbacks.History, 330 | "error_message": 'history type is incorrect. Please check model.fit().' 331 | }, 332 | { 333 | "name": "params_samples_check", 334 | "result": vars_history['params']['samples'], 335 | "expected": 3155, 336 | "error_message": 'Training samples is incorrect. Please check arguments to model.fit().' 337 | }, 338 | { 339 | "name": "params_do_validation_check", 340 | "result": vars_history['params']['do_validation'], 341 | "expected": True, 342 | "error_message": 'No validation data is present. Please check arguments to model.fit().' 343 | }, 344 | ] 345 | 346 | test_loop(test_cases) 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 2/C1W2_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# W2 Assignment: Creating a Custom Loss Function" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This short exercise will require you to write a simple linear regression neural network that is trained on two arrays: $xs$ (inputs) and $ys$ (labels), where the relationship between each corresponding element is $y=2x-1$.\n", 15 | "\n", 16 | "\n", 17 | "$xs = [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]$\n", 18 | "\n", 19 | "$ys = [-3.0, -1.0, 1.0, 3.0, 5.0, 7.0]$\n", 20 | "\n", 21 | "\n", 22 | "You will need to implement a custom loss function that returns the root mean square error (RMSE) of $y_{true} - y_{pred}$. Let's begin!" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 1, 28 | "metadata": { 29 | "colab": {}, 30 | "colab_type": "code", 31 | "id": "0pajvrhrInPa" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import tensorflow as tf\n", 36 | "import numpy as np\n", 37 | "from tensorflow import keras\n", 38 | "from tensorflow.keras import backend as K\n", 39 | "\n", 40 | "import utils" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# inputs\n", 50 | "xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)\n", 51 | "\n", 52 | "# labels. relationship with the inputs above is y=2x-1.\n", 53 | "ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": {}, 59 | "source": [ 60 | "### Define the custom loss function (TODO)\n", 61 | "Define the custom loss function below called `my_rmse()` that returns the RMSE between the target (`y_true`) and prediction (`y_pred`). \n", 62 | "\n", 63 | "You will return $\\sqrt{error}$, where $error$ = $mean((y_{true} - y_{pred})^2)$\n", 64 | "- error: the difference between the true label and predicted label.\n", 65 | "- sqr_error: the square of the error.\n", 66 | "- mean_sqr_error: the mean of the square of the error\n", 67 | "- sqrt_mean_sqr_error: the square root of hte mean of the square of the error (the root mean squared error).\n", 68 | "- Please use `K.mean`, `K.square`, and `K.sqrt`\n", 69 | "- The steps are broken down into separate lines of code for clarity. Feel free to combine them, and just remember to return the root mean squared error." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 3, 75 | "metadata": { 76 | "colab": {}, 77 | "colab_type": "code", 78 | "deletable": false, 79 | "id": "bXNGIkq2Azmf", 80 | "nbgrader": { 81 | "cell_type": "code", 82 | "checksum": "8301324615aba1e02e1f756b4bf1b092", 83 | "grade": false, 84 | "grade_id": "cell-31648b482908e493", 85 | "locked": false, 86 | "schema_version": 3, 87 | "solution": true, 88 | "task": false 89 | } 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "# Please uncomment all lines in this cell and replace those marked with `# YOUR CODE HERE`.\n", 94 | "# You can select all lines in this code cell with Ctrl+A (Windows/Linux) or Cmd+A (Mac), then press Ctrl+/ (Windows/Linux) or Cmd+/ (Mac) to uncomment.\n", 95 | "\n", 96 | "\n", 97 | "\n", 98 | "def my_rmse(y_true, y_pred):\n", 99 | " error = y_true - y_pred\n", 100 | " sqr_error = K.square(error)\n", 101 | " mean_sqr_error = K.mean(sqr_error)\n", 102 | " sqrt_mean_sqr_error = K.sqrt(mean_sqr_error)\n", 103 | " return sqrt_mean_sqr_error" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 4, 109 | "metadata": { 110 | "deletable": false, 111 | "editable": false, 112 | "nbgrader": { 113 | "cell_type": "code", 114 | "checksum": "afa4ace3428496820b8b6fb542ca5117", 115 | "grade": true, 116 | "grade_id": "cell-578f76b36f8ee858", 117 | "locked": true, 118 | "points": 1, 119 | "schema_version": 3, 120 | "solution": false, 121 | "task": false 122 | } 123 | }, 124 | "outputs": [ 125 | { 126 | "name": "stdout", 127 | "output_type": "stream", 128 | "text": [ 129 | "\u001b[92m All public tests passed\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "utils.test_my_rmse(my_rmse)\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### Define a model using the custom loss function (TODO)\n", 142 | "Similar to the ungraded labs, you will define a simple model and pass the function you just coded as the loss.\n", 143 | "- When compiling the model, you'll choose the `sgd` optimizer and set the `loss` parameter to the custom loss function that you just defined.\n", 144 | "- For grading purposes, please leave the other parameter values as is." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 6, 150 | "metadata": { 151 | "colab": { 152 | "base_uri": "https://localhost:8080/", 153 | "height": 34 154 | }, 155 | "colab_type": "code", 156 | "deletable": false, 157 | "id": "2eY7fw0EHwda", 158 | "nbgrader": { 159 | "cell_type": "code", 160 | "checksum": "8af71f8408d04ff7abaf41eb3414c8f6", 161 | "grade": false, 162 | "grade_id": "cell-5a29bb71c93124fc", 163 | "locked": false, 164 | "schema_version": 3, 165 | "solution": true, 166 | "task": false 167 | }, 168 | "outputId": "a3ea92e4-050e-463d-82c9-9b149554ae41" 169 | }, 170 | "outputs": [ 171 | { 172 | "name": "stdout", 173 | "output_type": "stream", 174 | "text": [ 175 | "[[19.076647]]\n" 176 | ] 177 | } 178 | ], 179 | "source": [ 180 | "# Please uncomment all lines in this cell and replace those marked with `# YOUR CODE HERE`.\n", 181 | "# You can select all lines in this code cell with Ctrl+A (Windows/Linux) or Cmd+A (Mac), then press Ctrl+/ (Windows/Linux) or Cmd+/ (Mac) to uncomment.\n", 182 | "\n", 183 | "\n", 184 | "\n", 185 | "# define the model architecture\n", 186 | "model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])\n", 187 | "\n", 188 | "# use the function you just coded as the loss\n", 189 | "model.compile(optimizer='sgd', loss=my_rmse)\n", 190 | " \n", 191 | "# train the model \n", 192 | "model.fit(xs, ys, epochs=500,verbose=0)\n", 193 | " \n", 194 | "# test with a sample input\n", 195 | "print(model.predict([10.0]))" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 7, 201 | "metadata": { 202 | "deletable": false, 203 | "editable": false, 204 | "nbgrader": { 205 | "cell_type": "code", 206 | "checksum": "b8da4dc42fa87a1722251adddae9516c", 207 | "grade": true, 208 | "grade_id": "cell-e46bc4e00375b387", 209 | "locked": true, 210 | "points": 1, 211 | "schema_version": 3, 212 | "solution": false, 213 | "task": false 214 | } 215 | }, 216 | "outputs": [ 217 | { 218 | "name": "stdout", 219 | "output_type": "stream", 220 | "text": [ 221 | "\u001b[92m All public tests passed\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "utils.test_model_loss(model.loss)\n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "colab": { 239 | "include_colab_link": true, 240 | "name": "exercise-answer.ipynb", 241 | "provenance": [] 242 | }, 243 | "kernelspec": { 244 | "display_name": "Python 3", 245 | "language": "python", 246 | "name": "python3" 247 | }, 248 | "language_info": { 249 | "codemirror_mode": { 250 | "name": "ipython", 251 | "version": 3 252 | }, 253 | "file_extension": ".py", 254 | "mimetype": "text/x-python", 255 | "name": "python", 256 | "nbconvert_exporter": "python", 257 | "pygments_lexer": "ipython3", 258 | "version": "3.7.6" 259 | } 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 4 263 | } 264 | -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 2/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.model_selection import train_test_split 3 | from tensorflow.python.framework.ops import EagerTensor 4 | from numpy import int64 5 | 6 | def test_loop(test_cases): 7 | 8 | success = 0 9 | fails = 0 10 | 11 | for test_case in test_cases: 12 | try: 13 | assert test_case["result"] == test_case["expected"] 14 | success += 1 15 | 16 | except: 17 | fails += 1 18 | print(f'{test_case["name"]}: {test_case["error_message"]}\nExpected: {test_case["expected"]}\nResult: {test_case["result"]}\n') 19 | 20 | if fails == 0: 21 | print("\033[92m All public tests passed") 22 | 23 | else: 24 | print('\033[92m', success," Tests passed") 25 | print('\033[91m', fails, " Tests failed") 26 | raise Exception(test_case["error_message"]) 27 | 28 | 29 | def test_my_rmse(my_rmse): 30 | 31 | test_y_true = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float) 32 | test_y_pred = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float) 33 | 34 | expected = 1.7795130420052185 35 | 36 | result = my_rmse(test_y_true, test_y_pred) 37 | 38 | test_cases = [ 39 | { 40 | "name": "type_check", 41 | "result": type(result), 42 | "expected": EagerTensor, 43 | "error_message": f'output has an incorrect type.' 44 | }, 45 | { 46 | "name": "output_check", 47 | "result": result, 48 | "expected": expected, 49 | "error_message": "Output is incorrect. Please check the equation." 50 | } 51 | ] 52 | 53 | test_loop(test_cases) 54 | 55 | def test_model_loss(model_loss): 56 | 57 | test_y_true = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float) 58 | test_y_pred = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float) 59 | 60 | expected = 1.7795130420052185 61 | 62 | result = model_loss(test_y_true, test_y_pred) 63 | 64 | test_cases = [ 65 | { 66 | "name": "type_check", 67 | "result": type(result), 68 | "expected": EagerTensor, 69 | "error_message": f'output has an incorrect type.' 70 | }, 71 | { 72 | "name": "output_check", 73 | "result": result, 74 | "expected": expected, 75 | "error_message": "Output is incorrect. Please check the equation." 76 | } 77 | ] 78 | 79 | test_loop(test_cases) -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 3/C1W3_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Week 3 Assignment: Implement a Quadratic Layer" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "In this week's programming exercise, you will build a custom quadratic layer which computes $y = ax^2 + bx + c$. Similar to the ungraded lab, this layer will be plugged into a model that will be trained on the MNIST dataset. Let's get started!" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Imports" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import tensorflow as tf\n", 31 | "from tensorflow.keras.layers import Layer\n", 32 | "\n", 33 | "import utils" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "### Define the quadratic layer (TODO)\n", 41 | "Implement a simple quadratic layer. It has 3 state variables: $a$, $b$ and $c$. The computation returned is $ax^2 + bx + c$. Make sure it can also accept an activation function.\n", 42 | "\n", 43 | "#### `__init__`\n", 44 | "- call `super(my_fun, self)` to access the base class of `my_fun`, and call the `__init__()` function to initialize that base class. In this case, `my_fun` is `SimpleQuadratic` and its base class is `Layer`.\n", 45 | "- self.units: set this using one of the function parameters.\n", 46 | "- self.activation: The function parameter `activation` will be passed in as a string. To get the tensorflow object associated with the string, please use `tf.keras.activations.get()` \n", 47 | "\n", 48 | "\n", 49 | "#### `build`\n", 50 | "The following are suggested steps for writing your code. If you prefer to use fewer lines to implement it, feel free to do so. Either way, you'll want to set `self.a`, `self.b` and `self.c`.\n", 51 | "\n", 52 | "- a_init: set this to tensorflow's `random_normal_initializer()`\n", 53 | "- a_init_val: Use the `random_normal_initializer()` that you just created and invoke it, setting the `shape` and `dtype`.\n", 54 | " - The `shape` of `a` should have its row dimension equal to the last dimension of `input_shape`, and its column dimension equal to the number of units in the layer. \n", 55 | " - This is because you'll be matrix multiplying x^2 * a, so the dimensions should be compatible.\n", 56 | " - set the dtype to 'float32'\n", 57 | "- self.a: create a tensor using tf.Variable, setting the initial_value and set trainable to True.\n", 58 | "\n", 59 | "- b_init, b_init_val, and self.b: these will be set in the same way that you implemented a_init, a_init_val and self.a\n", 60 | "- c_init: set this to `tf.zeros_initializer`.\n", 61 | "- c_init_val: Set this by calling the tf.zeros_initializer that you just instantiated, and set the `shape` and `dtype`\n", 62 | " - shape: This will be a vector equal to the number of units. This expects a tuple, and remember that a tuple `(9,)` includes a comma.\n", 63 | " - dtype: set to 'float32'.\n", 64 | "- self.c: create a tensor using tf.Variable, and set the parameters `initial_value` and `trainable`.\n", 65 | "\n", 66 | "#### `call`\n", 67 | "The following section performs the multiplication x^2*a + x*b + c. The steps are broken down for clarity, but you can also perform this calculation in fewer lines if you prefer.\n", 68 | "- x_squared: use tf.math.square()\n", 69 | "- x_squared_times_a: use tf.matmul(). \n", 70 | " - If you see an error saying `InvalidArgumentError: Matrix size-incompatible`, please check the order of the matrix multiplication to make sure that the matrix dimensions line up.\n", 71 | "- x_times_b: use tf.matmul().\n", 72 | "- x2a_plus_xb_plus_c: add the three terms together.\n", 73 | "- activated_x2a_plus_xb_plus_c: apply the class's `activation` to the sum of the three terms.\n" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 6, 79 | "metadata": { 80 | "colab": {}, 81 | "colab_type": "code", 82 | "deletable": false, 83 | "id": "Ga20PttZFXm4", 84 | "nbgrader": { 85 | "cell_type": "code", 86 | "checksum": "0df055c519bde80c488c22be89fdb8ef", 87 | "grade": false, 88 | "grade_id": "cell-c302ddc177c098f8", 89 | "locked": false, 90 | "schema_version": 3, 91 | "solution": true, 92 | "task": false 93 | } 94 | }, 95 | "outputs": [], 96 | "source": [ 97 | "# Please uncomment all lines in this cell and replace those marked with `# YOUR CODE HERE`.\n", 98 | "# You can select all lines in this code cell with Ctrl+A (Windows/Linux) or Cmd+A (Mac), then press Ctrl+/ (Windows/Linux) or Cmd+/ (Mac) to uncomment.\n", 99 | "\n", 100 | "\n", 101 | "\n", 102 | "class SimpleQuadratic(Layer):\n", 103 | "\n", 104 | " def __init__(self, units=32, activation=None):\n", 105 | " '''Initializes the class and sets up the internal variables'''\n", 106 | " # YOUR CODE HERE\n", 107 | " super(SimpleQuadratic, self).__init__()\n", 108 | " self.units = units\n", 109 | " self.activation = tf.keras.activations.get(activation)\n", 110 | " \n", 111 | " def build(self, input_shape):\n", 112 | " '''Create the state of the layer (weights)'''\n", 113 | " # a and b should be initialized with random normal, c (or the bias) with zeros.\n", 114 | " # remember to set these as trainable.\n", 115 | " # YOUR CODE HERE\n", 116 | " a_init = tf.random_normal_initializer()\n", 117 | " b_init = tf.random_normal_initializer()\n", 118 | " c_init = tf.zeros_initializer()\n", 119 | " \n", 120 | " self.a = tf.Variable(name = \"kernel\", initial_value = a_init(shape= (input_shape[-1], self.units), \n", 121 | " dtype= \"float32\"), trainable = True)\n", 122 | " \n", 123 | " self.b = tf.Variable(name = \"kernel\", initial_value = b_init(shape= (input_shape[-1], self.units), \n", 124 | " dtype= \"float32\"), trainable = True)\n", 125 | " \n", 126 | " self.c = tf.Variable(name = \"bias\", initial_value = c_init(shape= (self.units,), \n", 127 | " dtype= \"float32\"), trainable = True)\n", 128 | " \n", 129 | " def call(self, inputs):\n", 130 | " '''Defines the computation from inputs to outputs'''\n", 131 | " # YOUR CODE HERE\n", 132 | " result = tf.matmul(tf.math.square(inputs), self.a) + tf.matmul(inputs, self.b) + self.c\n", 133 | " return self.activation(result)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Test your implementation" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 7, 146 | "metadata": { 147 | "deletable": false, 148 | "editable": false, 149 | "nbgrader": { 150 | "cell_type": "code", 151 | "checksum": "0965bec4878a263cf06b286cd0fe3b2a", 152 | "grade": true, 153 | "grade_id": "cell-c3ebc4cccbb7f454", 154 | "locked": true, 155 | "points": 1, 156 | "schema_version": 3, 157 | "solution": false, 158 | "task": false 159 | } 160 | }, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "\u001b[92m All public tests passed\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "utils.test_simple_quadratic(SimpleQuadratic)\n" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "Train your model with the `SimpleQuadratic` layer that you just implemented." 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 8, 184 | "metadata": { 185 | "colab": {}, 186 | "colab_type": "code", 187 | "id": "14tl1CluExjJ" 188 | }, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\n", 195 | "11493376/11490434 [==============================] - 0s 0us/step\n", 196 | "Train on 60000 samples\n", 197 | "Epoch 1/5\n", 198 | "60000/60000 [==============================] - 12s 199us/sample - loss: 0.2683 - accuracy: 0.9220\n", 199 | "Epoch 2/5\n", 200 | "60000/60000 [==============================] - 12s 193us/sample - loss: 0.1347 - accuracy: 0.9592\n", 201 | "Epoch 3/5\n", 202 | "60000/60000 [==============================] - 11s 192us/sample - loss: 0.1005 - accuracy: 0.9688\n", 203 | "Epoch 4/5\n", 204 | "60000/60000 [==============================] - 12s 192us/sample - loss: 0.0806 - accuracy: 0.9746\n", 205 | "Epoch 5/5\n", 206 | "60000/60000 [==============================] - 11s 192us/sample - loss: 0.0711 - accuracy: 0.9774\n", 207 | "10000/10000 [==============================] - 1s 62us/sample - loss: 0.0749 - accuracy: 0.9775\n" 208 | ] 209 | }, 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "[0.07491098121992545, 0.9775]" 214 | ] 215 | }, 216 | "execution_count": 8, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "# THIS CODE SHOULD RUN WITHOUT MODIFICATION\n", 223 | "# AND SHOULD RETURN TRAINING/TESTING ACCURACY at 97%+\n", 224 | "\n", 225 | "mnist = tf.keras.datasets.mnist\n", 226 | "\n", 227 | "(x_train, y_train),(x_test, y_test) = mnist.load_data()\n", 228 | "x_train, x_test = x_train / 255.0, x_test / 255.0\n", 229 | "\n", 230 | "model = tf.keras.models.Sequential([\n", 231 | " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", 232 | " SimpleQuadratic(128, activation='relu'),\n", 233 | " tf.keras.layers.Dropout(0.2),\n", 234 | " tf.keras.layers.Dense(10, activation='softmax')\n", 235 | "])\n", 236 | "\n", 237 | "model.compile(optimizer='adam',\n", 238 | " loss='sparse_categorical_crossentropy',\n", 239 | " metrics=['accuracy'])\n", 240 | "\n", 241 | "model.fit(x_train, y_train, epochs=5)\n", 242 | "model.evaluate(x_test, y_test)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [] 251 | } 252 | ], 253 | "metadata": { 254 | "colab": { 255 | "authorship_tag": "ABX9TyMTFXTWT0EUVuqg6u/LBbJK", 256 | "collapsed_sections": [], 257 | "include_colab_link": true, 258 | "name": "QuadraticLayer_Answer.ipynb", 259 | "provenance": [] 260 | }, 261 | "kernelspec": { 262 | "display_name": "Python 3", 263 | "language": "python", 264 | "name": "python3" 265 | }, 266 | "language_info": { 267 | "codemirror_mode": { 268 | "name": "ipython", 269 | "version": 3 270 | }, 271 | "file_extension": ".py", 272 | "mimetype": "text/x-python", 273 | "name": "python", 274 | "nbconvert_exporter": "python", 275 | "pygments_lexer": "ipython3", 276 | "version": "3.7.6" 277 | } 278 | }, 279 | "nbformat": 4, 280 | "nbformat_minor": 4 281 | } 282 | -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 3/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.ops.resource_variable_ops import ResourceVariable 3 | import numpy as np 4 | 5 | def test_loop(test_cases): 6 | 7 | success = 0 8 | fails = 0 9 | 10 | for test_case in test_cases: 11 | try: 12 | assert test_case["result"] == test_case["expected"] 13 | success += 1 14 | 15 | except: 16 | fails += 1 17 | print(f'{test_case["name"]}: {test_case["error_message"]}\nExpected: {test_case["expected"]}\nResult: {test_case["result"]}\n') 18 | 19 | if fails == 0: 20 | print("\033[92m All public tests passed") 21 | 22 | else: 23 | print('\033[92m', success," Tests passed") 24 | print('\033[91m', fails, " Tests failed") 25 | raise Exception(test_case["error_message"]) 26 | 27 | 28 | def test_simple_quadratic(SimpleQuadratic): 29 | 30 | expected_units = 128 31 | expected_activation_function = tf.keras.activations.relu 32 | expected_activation_string = 'relu' 33 | shape_0 = 8 34 | shape_1 = 2 35 | 36 | test_layer = SimpleQuadratic(units=expected_units, activation=expected_activation_string) 37 | 38 | test_layer.build((shape_0, shape_1)) 39 | 40 | test_inputs = tf.random.uniform((shape_0, shape_1)) 41 | 42 | test_call_value = test_layer.call(test_inputs) 43 | 44 | a_type = type(test_layer.a) 45 | b_type = type(test_layer.b) 46 | c_type = type(test_layer.c) 47 | 48 | test_layer_forced_weights = SimpleQuadratic(units=1, activation=None) 49 | test_layer_forced_weights.a = tf.constant([2.0], dtype='float32', shape=(1,1)) 50 | test_layer_forced_weights.b = tf.constant([2.0], dtype='float32', shape=(1,1)) 51 | test_layer_forced_weights.c = tf.constant([2.0], dtype='float32', shape=(1,1)) 52 | test_layer_forced_weights_inputs = tf.constant([4.0], dtype='float32', shape=(1,1)) 53 | test_layer_forced_weights_expected_output = 42.0 54 | 55 | test_cases = [ 56 | { 57 | "name": "units_check", 58 | "result": test_layer.units, 59 | "expected": expected_units, 60 | "error_message": f'Incorrect number of units.' 61 | }, 62 | { 63 | "name": "activations_check", 64 | "result": test_layer.activation, 65 | "expected": tf.keras.activations.relu, 66 | "error_message": "Got different activation function." 67 | }, 68 | { 69 | "name": "a_type_check", 70 | "result": a_type, 71 | "expected": ResourceVariable, 72 | "error_message": f'State variable a is of different type. Expected ResourceVariable but got {a_type}' 73 | }, 74 | { 75 | "name": "b_type_check", 76 | "result": b_type, 77 | "expected": ResourceVariable, 78 | "error_message": f'State variable b is of different type. Expected ResourceVariable but got {b_type}' 79 | }, 80 | { 81 | "name": "c_type_check", 82 | "result": c_type, 83 | "expected": ResourceVariable, 84 | "error_message": f'State variable c is of different type. Expected ResourceVariable but got {c_type}' 85 | }, 86 | { 87 | "name": "a_initializer_check", 88 | "result": test_layer.a.numpy().sum() != 0, 89 | "expected": True, 90 | "error_message": f'State variable a is not initialized randomly. Please check initializer used.' 91 | }, 92 | { 93 | "name": "b_initializer_check", 94 | "result": test_layer.b.numpy().sum() != 0, 95 | "expected": True, 96 | "error_message": f'State variable b is not initialized randomly. Please check initializer used.' 97 | }, 98 | { 99 | "name": "c_initializer_check", 100 | "result": test_layer.c.numpy().sum() == 0, 101 | "expected": True, 102 | "error_message": f'State variable c is not initialized to zeroes. Please check initializer used.' 103 | }, 104 | { 105 | "name": "output_check", 106 | "result": test_layer_forced_weights.call(test_layer_forced_weights_inputs).numpy()[0][0], 107 | "expected": test_layer_forced_weights_expected_output, 108 | "error_message": f'Expected output is incorrect. Please check operations in the call() method.' 109 | } 110 | ] 111 | 112 | test_loop(test_cases) 113 | -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 4/C1W4_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "GC7zSrbOWiz0" 8 | }, 9 | "source": [ 10 | "# Week 4 Assignment: Create a VGG network" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "In this exercise, you will build a class that implements a [VGG network](https://towardsdatascience.com/vgg-neural-networks-the-next-step-after-alexnet-3f91fa9ffe2c) and then train it to classify images of cats and dogs. The model will look something like this:\n", 18 | "\n", 19 | "\n", 20 | "\n", 21 | "It is primarily made up of a series of Conv2D layers followed by a softmax activated layers to classify the image. As you can see, this will be a handful and the code will look huge if you specify each layer individually. As shown in the lectures, you can instead use model subclassing to build complex architectures. You can encapsulate repeating parts of a network then reuse that code when building the final model. You will get to practice that in this exercise. Let's get started!" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 1, 27 | "metadata": { 28 | "colab": {}, 29 | "colab_type": "code", 30 | "id": "Z01I5nj0NAOu" 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import tensorflow as tf\n", 35 | "import tensorflow_datasets as tfds\n", 36 | "import utils" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Create named-variables dynamically\n", 44 | "\n", 45 | "In this assignment, you will see the use of the Python function `vars()`. This will allow you to use a for loop to define and set multiple variables with a similar name, such as var1, var2, var3. \n", 46 | "\n", 47 | "Please go through the following examples to get familiar with `vars()`, as you will use it when building the VGG model.\n", 48 | "- You'll start by defining a class `MyClass`\n", 49 | "- It contains one variable `var1`. \n", 50 | "- Create an object of type `MyClass`." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 2, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# Define a small class MyClass\n", 60 | "class MyClass:\n", 61 | " def __init__(self):\n", 62 | " # One class variable 'a' is set to 1\n", 63 | " self.var1 = 1\n", 64 | "\n", 65 | "# Create an object of type MyClass()\n", 66 | "my_obj = MyClass()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Python classes have an attribute called `__dict__`.\n", 74 | "- `__dict__` is a Python dictionary that contains the object's instance variables and values as key value pairs." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "{'var1': 1}" 86 | ] 87 | }, 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "my_obj.__dict__" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "If you call `vars()` and pass in an object, it will call the object's `__dict__` attribute, which is a Python dictionary containing the object's instance variables and their values as ke" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 4, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "{'var1': 1}" 113 | ] 114 | }, 115 | "execution_count": 4, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "vars(my_obj)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "You may be familiar with adding new variable like this:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 5, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "{'var1': 1, 'var2': 2}" 140 | ] 141 | }, 142 | "execution_count": 5, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "# Add a new instance variable and give it a value\n", 149 | "my_obj.var2 = 2\n", 150 | "\n", 151 | "# Calls vars() again to see the object's instance variables\n", 152 | "vars(my_obj)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "Here is another way that you can add an instance variable to an object, using `vars()`.\n", 160 | "- Retrieve the Python dictionary `__dict__` of the object using vars(my_obj).\n", 161 | "- Modify this `__dict__` dictionary using square bracket notation and passing in the variable's name as a string: `['var3'] = 3`" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "{'var1': 1, 'var2': 2, 'var3': 3}" 173 | ] 174 | }, 175 | "execution_count": 6, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "# Call vars, passing in the object. Then access the __dict__ dictionary using square brackets\n", 182 | "vars(my_obj)['var3'] = 3\n", 183 | "\n", 184 | "# Call vars() to see the object's instance variables\n", 185 | "vars(my_obj)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "#### Why this is helpful!\n", 193 | "You may be wondering why you would need another way to access an object's instance variables. \n", 194 | "- Notice that when using `vars()`, you can now pass in the name of the variable `var3` as a string.\n", 195 | "- What if you plan to use several variables that are similarly named (`var4`, `var5` ... `var9`) and wanted a convenient way to access them by incrementing a number?\n", 196 | "\n", 197 | "Try this!" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 7, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "data": { 207 | "text/plain": [ 208 | "{'var1': 1,\n", 209 | " 'var2': 2,\n", 210 | " 'var3': 3,\n", 211 | " 'var4': 0,\n", 212 | " 'var5': 0,\n", 213 | " 'var6': 0,\n", 214 | " 'var7': 0,\n", 215 | " 'var8': 0,\n", 216 | " 'var9': 0}" 217 | ] 218 | }, 219 | "execution_count": 7, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "# Use a for loop to increment the index 'i'\n", 226 | "for i in range(4,10):\n", 227 | " # Format a string that is var\n", 228 | " vars(my_obj)[f'var{i}'] = 0\n", 229 | " \n", 230 | "# View the object's instance variables!\n", 231 | "vars(my_obj)" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "There are couple equivalent ways in Python to format a string. Here are two of those ways:\n", 239 | "- f-string: f\"var{i}\"\n", 240 | "- .format: \"var{}\".format(i)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 8, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "name": "stdout", 250 | "output_type": "stream", 251 | "text": [ 252 | "var1\n", 253 | "var2\n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "# Format a string using f-string notation\n", 259 | "i=1\n", 260 | "print(f\"var{i}\")\n", 261 | "\n", 262 | "# Format a string using .format notation\n", 263 | "i=2\n", 264 | "print(\"var{}\".format(i))" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "You can access the variables of a class inside the class definition using `vars(self)`" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 9, 277 | "metadata": {}, 278 | "outputs": [ 279 | { 280 | "data": { 281 | "text/plain": [ 282 | "{'var1': 1}" 283 | ] 284 | }, 285 | "execution_count": 9, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "# Define a small class MyClass\n", 292 | "class MyClass:\n", 293 | " def __init__(self):\n", 294 | " # Use vars(self) to access the class's dictionary of variables\n", 295 | " vars(self)['var1'] = 1\n", 296 | "\n", 297 | "# Create an object of type MyClass()\n", 298 | "my_obj = MyClass()\n", 299 | "vars(my_obj)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "You'll see this in the upcoming code. Now you'll start building the VGG network!" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "colab_type": "text", 313 | "id": "k1T1UMw5YAkp" 314 | }, 315 | "source": [ 316 | "## Create a generic VGG block (TODO)\n", 317 | "\n", 318 | "The VGG Network has blocks of layers, where each block has a varied number of layers.\n", 319 | "- In order to create blocks of layers that have a customizable number of conv2D layers, you'll define a class `Block`, which can generate a customizable block of layers \n", 320 | "\n", 321 | "\n", 322 | "### `__init__`\n", 323 | "In the constructor `__init__`, store the conv2D parameters and also define the number of conv2D layers using the parameters passed into `__init__`.\n", 324 | "- Store the filters, kernel_size, and repetitions as class variables so that they can be used later in the `call` function.\n", 325 | "- Using a for loop, define a number of Conv2D [Conv2D](https://keras.io/api/layers/convolution_layers/convolution2d/) layers, based on the number of `repetitions` desired for this block.\n", 326 | " - You can define each conv2D layer using `vars` and string formatting to create conv2D_0, conv2D_1, conv2D_3 etc.\n", 327 | " - Set these four parameters of Conv2D:\n", 328 | " - filters\n", 329 | " - kernel_size\n", 330 | " - activation: set this to 'relu'\n", 331 | " - padding: set this to 'same' (default pading is 'valid').\n", 332 | " \n", 333 | "- Define the [MaxPool2D](https://keras.io/api/layers/pooling_layers/max_pooling2d/) layer that follows these Conv2D layers. \n", 334 | " - Set the following parameters for MaxPool2D:\n", 335 | " - pool_size: this will be a tuple with two values.\n", 336 | " - strides: this will also be a tuple with two values.\n", 337 | "\n", 338 | "### `call`\n", 339 | "In `call`, you will connect the layers together.\n", 340 | "- The 0-th conv2D layer, `conv2D_0`, immediately follows the `inputs`.\n", 341 | "- For conv2D layers 1,2 and onward, you can use a for loop to connect conv2D_1 to conv2D_0, and connect conv2D_2 to conv2D_1, and so on.\n", 342 | "- After connecting all of the conv2D_i layers, add connect the max_pool layer and return the max_pool layer." 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 19, 348 | "metadata": { 349 | "colab": {}, 350 | "colab_type": "code", 351 | "deletable": false, 352 | "id": "WGJGaxVjM00W", 353 | "nbgrader": { 354 | "cell_type": "code", 355 | "checksum": "7f19295d8925e1d2e60eefd42a6b4dd8", 356 | "grade": false, 357 | "grade_id": "cell-1449db9892707876", 358 | "locked": false, 359 | "schema_version": 3, 360 | "solution": true, 361 | "task": false 362 | } 363 | }, 364 | "outputs": [], 365 | "source": [ 366 | "# Please uncomment all lines in this cell and replace those marked with `# YOUR CODE HERE`.\n", 367 | "# You can select all lines in this code cell with Ctrl+A (Windows/Linux) or Cmd+A (Mac), then press Ctrl+/ (Windows/Linux) or Cmd+/ (Mac) to uncomment.\n", 368 | "\n", 369 | "\n", 370 | "\n", 371 | "class Block(tf.keras.Model):\n", 372 | " def __init__(self, filters, kernel_size, repetitions, pool_size=2, strides=2):\n", 373 | " super(Block, self).__init__()\n", 374 | " self.filters = filters\n", 375 | " self.kernel_size = kernel_size\n", 376 | " self.repetitions = repetitions\n", 377 | " \n", 378 | " # Define a conv2D_0, conv2D_1, etc based on the number of repetitions\n", 379 | " for i in range(self.repetitions):\n", 380 | " \n", 381 | " # Define a Conv2D layer, specifying filters, kernel_size, activation and padding.\n", 382 | " vars(self)[f'conv2D_{i}'] = tf.keras.layers.Conv2D(self.filters, self.kernel_size,\n", 383 | " activation= 'relu', padding= 'same')\n", 384 | " \n", 385 | " # Define the max pool layer that will be added after the Conv2D blocks\n", 386 | " self.max_pool = tf.keras.layers.MaxPool2D(pool_size, strides=strides)\n", 387 | " \n", 388 | " def call(self, inputs):\n", 389 | " # access the class's conv2D_0 layer\n", 390 | " conv2D_0 = vars(self)['conv2D_0']\n", 391 | " \n", 392 | " # Connect the conv2D_0 layer to inputs\n", 393 | " x = conv2D_0(inputs)\n", 394 | "\n", 395 | " # for the remaining conv2D_i layers from 1 to `repetitions` they will be connected to the previous layer\n", 396 | " for i in range(1,self.repetitions):\n", 397 | " # access conv2D_i by formatting the integer `i`. (hint: check how these were saved using `vars()` earlier)\n", 398 | " conv2D_i = vars(self)[f'conv2D_{i}']\n", 399 | " \n", 400 | " # Use the conv2D_i and connect it to the previous layer\n", 401 | " x = conv2D_i(x)\n", 402 | "\n", 403 | " # Finally, add the max_pool layer\n", 404 | " max_pool = self.max_pool(x)\n", 405 | " \n", 406 | " return max_pool" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 20, 412 | "metadata": { 413 | "deletable": false, 414 | "editable": false, 415 | "nbgrader": { 416 | "cell_type": "code", 417 | "checksum": "4027611c9615b1f518a95d76a81bc8d1", 418 | "grade": true, 419 | "grade_id": "cell-2911e521bce8793b", 420 | "locked": true, 421 | "points": 1, 422 | "schema_version": 3, 423 | "solution": false, 424 | "task": false 425 | } 426 | }, 427 | "outputs": [ 428 | { 429 | "name": "stdout", 430 | "output_type": "stream", 431 | "text": [ 432 | "\u001b[92m All public tests passed\n" 433 | ] 434 | } 435 | ], 436 | "source": [ 437 | "utils.test_block_class(Block)\n" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": { 443 | "colab_type": "text", 444 | "id": "peM2GP6uYT0U" 445 | }, 446 | "source": [ 447 | "## Create the Custom VGG network (TODO)\n", 448 | "This model stack has a series of VGG blocks, which can be created using the `Block` class that you defined earlier.\n", 449 | "\n", 450 | "### `__init__`\n", 451 | "- Recall that the `__init__` constructor of `Block` takes several function parameters, \n", 452 | " - filters, kernel_size, repetitions: you'll set these.\n", 453 | " - kernel_size and strides: you can use the default values.\n", 454 | "- For blocks a through e, build the blocks according to the following specifications:\n", 455 | "- block_a: 64 filters, kernel_size 3, repetitions 2\n", 456 | "- block_b: 128 filters, kernel_size 3, repetitions 2\n", 457 | "- block_c: 256 filters, kernel_size 3, repetitions 3\n", 458 | "- block_d: 512 filters, kernel_size 3, repetitions 3\n", 459 | "- block_e: 512 filters, kernel_size 3, repetitions 3\n", 460 | "\n", 461 | "After block 'e', add the following layers:\n", 462 | "- flatten: use [Flatten](https://keras.io/api/layers/reshaping_layers/flatten/).\n", 463 | "- fc: create a fully connected layer using [Dense](https://keras.io/api/layers/core_layers/dense/). Give this 256 units, and a `'relu'` activation.\n", 464 | "- classifier: create the classifier using a Dense layer. The number of units equals the number of classes. For multi-class classification, use a `'softmax'` activation.\n", 465 | "\n", 466 | "### `call`\n", 467 | "Connect these layers together using the functional API syntax:\n", 468 | "- inputs\n", 469 | "- block_a\n", 470 | "- block_b\n", 471 | "- block_c\n", 472 | "- block_d\n", 473 | "- block_e\n", 474 | "- flatten\n", 475 | "- fc\n", 476 | "- classifier\n", 477 | "\n", 478 | "Return the classifier layer." 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 21, 484 | "metadata": { 485 | "colab": {}, 486 | "colab_type": "code", 487 | "deletable": false, 488 | "id": "yD-paeGiNGvz", 489 | "nbgrader": { 490 | "cell_type": "code", 491 | "checksum": "523346a38f53bc31e080114e98e8eca6", 492 | "grade": false, 493 | "grade_id": "cell-d9e90af0898eb47f", 494 | "locked": false, 495 | "schema_version": 3, 496 | "solution": true, 497 | "task": false 498 | } 499 | }, 500 | "outputs": [], 501 | "source": [ 502 | "# Please uncomment all lines in this cell and replace those marked with `# YOUR CODE HERE`.\n", 503 | "# You can select all lines in this code cell with Ctrl+A (Windows/Linux) or Cmd+A (Mac), then press Ctrl+/ (Windows/Linux) or Cmd+/ (Mac) to uncomment.\n", 504 | "\n", 505 | "\n", 506 | "\n", 507 | "class MyVGG(tf.keras.Model):\n", 508 | "\n", 509 | " def __init__(self, num_classes):\n", 510 | " super(MyVGG, self).__init__()\n", 511 | "\n", 512 | " # Creating blocks of VGG with the following \n", 513 | " # (filters, kernel_size, repetitions) configurations\n", 514 | " self.block_a = Block(filters=64, kernel_size=3, repetitions=2)\n", 515 | " self.block_b = Block(filters=128, kernel_size=3, repetitions=2)\n", 516 | " self.block_c = Block(filters=256, kernel_size=3, repetitions=3)\n", 517 | " self.block_d = Block(filters=512, kernel_size=3, repetitions=3)\n", 518 | " self.block_e = Block(filters=512, kernel_size=3, repetitions=3)\n", 519 | "\n", 520 | " # Classification head\n", 521 | " # Define a Flatten layer\n", 522 | " self.flatten = tf.keras.layers.Flatten()\n", 523 | " # Create a Dense layer with 256 units and ReLU as the activation function\n", 524 | " self.fc = tf.keras.layers.Dense(256, activation='relu')\n", 525 | " # Finally add the softmax classifier using a Dense layer\n", 526 | " self.classifier =tf.keras.layers.Dense(num_classes, activation='softmax')\n", 527 | "\n", 528 | " def call(self, inputs):\n", 529 | " # Chain all the layers one after the other\n", 530 | " x = self.block_a(inputs)\n", 531 | " x = self.block_b(x)\n", 532 | " x = self.block_c(x)\n", 533 | " x = self.block_d(x)\n", 534 | " x = self.block_e(x)\n", 535 | " x = self.flatten(x)\n", 536 | " x = self.fc(x)\n", 537 | " x = self.classifier(x)\n", 538 | " return x" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 22, 544 | "metadata": { 545 | "deletable": false, 546 | "editable": false, 547 | "nbgrader": { 548 | "cell_type": "code", 549 | "checksum": "79d77a2aa7ee7f82d707558cf5206868", 550 | "grade": true, 551 | "grade_id": "cell-559ac19437f4f2b2", 552 | "locked": true, 553 | "points": 1, 554 | "schema_version": 3, 555 | "solution": false, 556 | "task": false 557 | } 558 | }, 559 | "outputs": [ 560 | { 561 | "name": "stdout", 562 | "output_type": "stream", 563 | "text": [ 564 | "\u001b[92m All public tests passed\n" 565 | ] 566 | } 567 | ], 568 | "source": [ 569 | "utils.test_myvgg_class(MyVGG, Block)" 570 | ] 571 | }, 572 | { 573 | "cell_type": "markdown", 574 | "metadata": {}, 575 | "source": [ 576 | "### Load data and train the VGG network (Optional)\n", 577 | "You can now load the dataset and proceed to train your VGG network. \n", 578 | "- This will take a few minutes to complete and is **not required to complete the assignment**.\n", 579 | "- You can submit your work before starting the training." 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": null, 585 | "metadata": { 586 | "colab": {}, 587 | "colab_type": "code", 588 | "id": "MaF763OKNJxU" 589 | }, 590 | "outputs": [ 591 | { 592 | "name": "stdout", 593 | "output_type": "stream", 594 | "text": [ 595 | "Epoch 1/10\n", 596 | " 3/Unknown - 55s 18s/step - loss: 0.6866 - accuracy: 0.5729" 597 | ] 598 | } 599 | ], 600 | "source": [ 601 | "dataset = tfds.load('cats_vs_dogs', split=tfds.Split.TRAIN, data_dir='data/')\n", 602 | "\n", 603 | "# Initialize VGG with the number of classes \n", 604 | "vgg = MyVGG(num_classes=2)\n", 605 | "\n", 606 | "# Compile with losses and metrics\n", 607 | "vgg.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n", 608 | "\n", 609 | "# Define preprocessing function\n", 610 | "def preprocess(features):\n", 611 | " # Resize and normalize\n", 612 | " image = tf.image.resize(features['image'], (224, 224))\n", 613 | " return tf.cast(image, tf.float32) / 255., features['label']\n", 614 | "\n", 615 | "# Apply transformations to dataset\n", 616 | "dataset = dataset.map(preprocess).batch(32)\n", 617 | "\n", 618 | "# Train the custom VGG model\n", 619 | "vgg.fit(dataset, epochs=10)" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": null, 625 | "metadata": {}, 626 | "outputs": [], 627 | "source": [] 628 | } 629 | ], 630 | "metadata": { 631 | "colab": { 632 | "collapsed_sections": [], 633 | "include_colab_link": true, 634 | "name": "ExerciseAnswer.ipynb", 635 | "provenance": [] 636 | }, 637 | "kernelspec": { 638 | "display_name": "Python 3", 639 | "language": "python", 640 | "name": "python3" 641 | }, 642 | "language_info": { 643 | "codemirror_mode": { 644 | "name": "ipython", 645 | "version": 3 646 | }, 647 | "file_extension": ".py", 648 | "mimetype": "text/x-python", 649 | "name": "python", 650 | "nbconvert_exporter": "python", 651 | "pygments_lexer": "ipython3", 652 | "version": "3.7.6" 653 | } 654 | }, 655 | "nbformat": 4, 656 | "nbformat_minor": 4 657 | } 658 | -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 4/VGG.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/239e7490dd1bfcc9dd1f59a5e976833c4205006c/Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 4/VGG.png -------------------------------------------------------------------------------- /Course 1 - Custom Models, Layers, and Loss Functions with TensorFlow/Week 4/utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import layers 3 | 4 | def test_loop(test_cases): 5 | 6 | success = 0 7 | fails = 0 8 | 9 | for test_case in test_cases: 10 | try: 11 | assert test_case["result"] == test_case["expected"] 12 | success += 1 13 | 14 | except: 15 | fails += 1 16 | print(f'{test_case["name"]}: {test_case["error_message"]}\nExpected: {test_case["expected"]}\nResult: {test_case["result"]}\n') 17 | 18 | if fails == 0: 19 | print("\033[92m All public tests passed") 20 | 21 | else: 22 | print('\033[92m', success," Tests passed") 23 | print('\033[91m', fails, " Tests failed") 24 | raise Exception(test_case["error_message"]) 25 | 26 | 27 | def test_block_class(Block): 28 | 29 | filters = 64 30 | kernel_size = 3 31 | padding = 'same' 32 | pool_size = 3 33 | repetitions = 2 34 | test_block = Block(filters, kernel_size, repetitions, pool_size) 35 | test_block(tf.random.uniform(shape=[2, 3, 4, 5])) 36 | 37 | vars_test_block = vars(test_block) 38 | 39 | test_cases = [ 40 | { 41 | "name": "max_pool_type_check", 42 | "result": type(test_block.max_pool), 43 | "expected": layers.MaxPooling2D, 44 | "error_message": f'Incorrect layer type for self.maxpool' 45 | }, 46 | { 47 | "name": "max_pool_size_check", 48 | "result": vars_test_block['max_pool'].pool_size, 49 | "expected": (pool_size, pool_size), 50 | "error_message": f'max pool size incorrect. check parameters.' 51 | }, 52 | { 53 | "name": "max_pool_size_check", 54 | "result": vars_test_block['max_pool'].strides, 55 | "expected": (2,2), 56 | "error_message": f'max pool strides incorrect. check parameters.' 57 | }, 58 | { 59 | "name": "conv2D_0_type_check", 60 | "result": type(vars_test_block['conv2D_0']), 61 | "expected": layers.Conv2D, 62 | "error_message": f'Incorrect layer type for block_0' 63 | }, 64 | { 65 | "name": "conv2D_1_type_check", 66 | "result": type(vars_test_block['conv2D_1']), 67 | "expected": layers.Conv2D, 68 | "error_message": f'Incorrect layer type for block_0' 69 | }, 70 | { 71 | "name": "conv2D_0_filters_check", 72 | "result": vars_test_block['conv2D_0'].filters, 73 | "expected": filters, 74 | "error_message": f'Incorrect filters for Conv2D layer. Please check parameters.' 75 | }, 76 | { 77 | "name": "conv2D_0_kernel_size_check", 78 | "result": vars_test_block['conv2D_0'].kernel_size, 79 | "expected": (kernel_size, kernel_size), 80 | "error_message": f'Incorrect kernel_size for Conv2D layer. Please check parameters.' 81 | }, 82 | { 83 | "name": "conv2D_0_activation_check", 84 | "result": vars_test_block['conv2D_0'].activation, 85 | "expected": tf.keras.activations.relu, 86 | "error_message": f'Incorrect activation for Conv2D layer. Please check parameters.' 87 | }, 88 | { 89 | "name": "conv2D_0_padding_check", 90 | "result": vars_test_block['conv2D_0'].padding, 91 | "expected": padding, 92 | "error_message": f'Incorrect padding for Conv2D layer. Please check parameters.' 93 | }, 94 | 95 | ] 96 | 97 | test_loop(test_cases) 98 | 99 | def test_myvgg_class(MyVGG, Block): 100 | test_vgg = MyVGG(num_classes=2) 101 | test_vgg_layers = test_vgg.layers 102 | 103 | def get_block_params(block): 104 | return (block.filters, block.kernel_size, block.repetitions) 105 | 106 | test_cases = [ 107 | { 108 | "name": "block_a_type_check", 109 | "result": type(test_vgg.block_a), 110 | "expected": Block, 111 | "error_message": "self.block_a has an incorrect type. Please check declaration." 112 | }, 113 | { 114 | "name": "block_b_type_check", 115 | "result": type(test_vgg.block_b), 116 | "expected": Block, 117 | "error_message": "self.block_b has an incorrect type. Please check declaration." 118 | }, 119 | { 120 | "name": "block_c_type_check", 121 | "result": type(test_vgg.block_c), 122 | "expected": Block, 123 | "error_message": "self.block_c has an incorrect type. Please check declaration." 124 | }, 125 | { 126 | "name": "block_d_type_check", 127 | "result": type(test_vgg.block_d), 128 | "expected": Block, 129 | "error_message": "self.block_d has an incorrect type. Please check declaration." 130 | }, 131 | { 132 | "name": "block_e_type_check", 133 | "result": type(test_vgg.block_e), 134 | "expected": Block, 135 | "error_message": "self.block_e has an incorrect type. Please check declaration." 136 | }, 137 | { 138 | "name": "block_a_param_check", 139 | "result": get_block_params(test_vgg.block_a), 140 | "expected": (64, 3, 2), 141 | "error_message": "self.block_a has incorrect parameters. Please check hints in the code comments." 142 | }, 143 | { 144 | "name": "block_b_param_check", 145 | "result": get_block_params(test_vgg.block_b), 146 | "expected": (128, 3, 2), 147 | "error_message": "self.block_b has incorrect parameters. Please check hints in the code comments." 148 | }, 149 | { 150 | "name": "block_c_param_check", 151 | "result": get_block_params(test_vgg.block_c), 152 | "expected": (256, 3, 3), 153 | "error_message": "self.block_c has incorrect parameters. Please check hints in the code comments." 154 | }, 155 | { 156 | "name": "block_d_param_check", 157 | "result": get_block_params(test_vgg.block_d), 158 | "expected": (512, 3, 3), 159 | "error_message": "self.block_d has incorrect parameters. Please check hints in the code comments." 160 | }, 161 | { 162 | "name": "block_e_param_check", 163 | "result": get_block_params(test_vgg.block_e), 164 | "expected": (512, 3, 3), 165 | "error_message": "self.block_e has incorrect parameters. Please check hints in the code comments." 166 | }, 167 | { 168 | "name": "flatten_type_check", 169 | "result": type(test_vgg.flatten), 170 | "expected": layers.Flatten, 171 | "error_message": "self.flatten has an incorrect type. Please check declaration." 172 | }, 173 | { 174 | "name": "fc_type_check", 175 | "result": type(test_vgg.fc), 176 | "expected": layers.Dense, 177 | "error_message": "self.fc has an incorrect type. Please check declaration." 178 | }, 179 | { 180 | "name": "fc_units_check", 181 | "result": test_vgg.fc.units, 182 | "expected": 256, 183 | "error_message": "self.fc has an incorrect number of units. Please check declaration." 184 | }, 185 | { 186 | "name": "fc_activation_check", 187 | "result": test_vgg.fc.activation, 188 | "expected": tf.keras.activations.relu, 189 | "error_message": "self.fc has an incorrect activation. Please check declaration." 190 | }, 191 | { 192 | "name": "classifier_type_check", 193 | "result": type(test_vgg.classifier), 194 | "expected": layers.Dense, 195 | "error_message": "self.classifier has an incorrect type. Please check declaration." 196 | }, 197 | { 198 | "name": "fc_units_check", 199 | "result": test_vgg.classifier.units, 200 | "expected": 2, 201 | "error_message": "self.classifier has an incorrect number of units. Please check declaration." 202 | }, 203 | { 204 | "name": "fc_activation_check", 205 | "result": test_vgg.classifier.activation, 206 | "expected": tf.keras.activations.softmax, 207 | "error_message": "self.classifier has an incorrect activation. Please check declaration." 208 | }, 209 | { 210 | "name": "layer_0_check", 211 | "result": type(test_vgg_layers[0]), 212 | "expected": Block, 213 | "error_message": "Layer 0 of myVGG is incorrect. Please check its call() method." 214 | }, 215 | { 216 | "name": "layer_1_check", 217 | "result": type(test_vgg_layers[1]), 218 | "expected": Block, 219 | "error_message": "Layer 1 of myVGG is incorrect. Please check its call() method." 220 | }, 221 | { 222 | "name": "layer_2_check", 223 | "result": type(test_vgg_layers[2]), 224 | "expected": Block, 225 | "error_message": "Layer 2 of myVGG is incorrect. Please check its call() method." 226 | }, 227 | { 228 | "name": "layer_3_check", 229 | "result": type(test_vgg_layers[3]), 230 | "expected": Block, 231 | "error_message": "Layer 3 of myVGG is incorrect. Please check its call() method." 232 | }, 233 | { 234 | "name": "layer_4_check", 235 | "result": type(test_vgg_layers[4]), 236 | "expected": Block, 237 | "error_message": "Layer 4 of myVGG is incorrect. Please check its call() method." 238 | }, 239 | { 240 | "name": "layer_5_check", 241 | "result": type(test_vgg_layers[5]), 242 | "expected": layers.Flatten, 243 | "error_message": "Layer 5 of myVGG is incorrect. Please check its call() method." 244 | }, 245 | { 246 | "name": "layer_6_check", 247 | "result": type(test_vgg_layers[6]), 248 | "expected": layers.Dense, 249 | "error_message": "Layer 6 of myVGG is incorrect. Please check its call() method." 250 | }, 251 | { 252 | "name": "layer_7_check", 253 | "result": type(test_vgg_layers[7]), 254 | "expected": layers.Dense, 255 | "error_message": "Layer 7 of myVGG is incorrect. Please check its call() method." 256 | }, 257 | 258 | ] 259 | 260 | test_loop(test_cases) 261 | -------------------------------------------------------------------------------- /Course 2 - Custom and Distributed Training with TensorFlow/Week 1/C2W1_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic Tensor operations and GradientTape.\n", 8 | "\n", 9 | "In this graded assignment, you will perform different tensor operations as well as use [GradientTape](https://www.tensorflow.org/api_docs/python/tf/GradientTape). These are important building blocks for the next parts of this course so it's important to master the basics. Let's begin!" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "colab": {}, 17 | "colab_type": "code", 18 | "id": "jqev488WJ9-R" 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import tensorflow as tf\n", 23 | "import numpy as np" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Exercise 1 - [tf.constant]((https://www.tensorflow.org/api_docs/python/tf/constant))\n", 31 | "\n", 32 | "Creates a constant tensor from a tensor-like object. " 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": { 39 | "colab": {}, 40 | "colab_type": "code", 41 | "id": "MYdVyiSoLPgO" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "# Convert NumPy array to Tensor using `tf.constant`\n", 46 | "def tf_constant(array):\n", 47 | " \"\"\"\n", 48 | " Args:\n", 49 | " array (numpy.ndarray): tensor-like array.\n", 50 | "\n", 51 | " Returns:\n", 52 | " tensorflow.python.framework.ops.EagerTensor: tensor.\n", 53 | " \"\"\"\n", 54 | " ### START CODE HERE ###\n", 55 | " tf_constant_array = tf.constant(array)\n", 56 | " ### END CODE HERE ###\n", 57 | " return tf_constant_array" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "tmp_array = np.arange(1,10)\n", 78 | "x = tf_constant(tmp_array)\n", 79 | "x\n", 80 | "\n", 81 | "# Expected output:\n", 82 | "# " 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Note that for future docstrings, the type `EagerTensor` will be used as a shortened version of `tensorflow.python.framework.ops.EagerTensor`." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## Exercise 2 - [tf.square](https://www.tensorflow.org/api_docs/python/tf/math/square)\n", 97 | "\n", 98 | "Computes the square of a tensor element-wise." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 4, 104 | "metadata": { 105 | "colab": {}, 106 | "colab_type": "code", 107 | "id": "W6BTwNJCLjV8" 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "# Square the input tensor\n", 112 | "def tf_square(array):\n", 113 | " \"\"\"\n", 114 | " Args:\n", 115 | " array (numpy.ndarray): tensor-like array.\n", 116 | "\n", 117 | " Returns:\n", 118 | " EagerTensor: tensor.\n", 119 | " \"\"\"\n", 120 | " # make sure it's a tensor\n", 121 | " array = tf.constant(array)\n", 122 | " \n", 123 | " ### START CODE HERE ###\n", 124 | " tf_squared_array = tf.square(array)\n", 125 | " ### END CODE HERE ###\n", 126 | " return tf_squared_array" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 5, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "" 138 | ] 139 | }, 140 | "execution_count": 5, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "tmp_array = tf.constant(np.arange(1, 10))\n", 147 | "x = tf_square(tmp_array)\n", 148 | "x\n", 149 | "\n", 150 | "# Expected output:\n", 151 | "# " 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "## Exercise 3 - [tf.reshape](https://www.tensorflow.org/api_docs/python/tf/reshape)\n", 159 | "\n", 160 | "Reshapes a tensor." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 6, 166 | "metadata": { 167 | "colab": {}, 168 | "colab_type": "code", 169 | "id": "7nzBSX8-L0Xt" 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "# Reshape tensor into a 3 x 3 matrix\n", 174 | "def tf_reshape(array, shape):\n", 175 | " \"\"\"\n", 176 | " Args:\n", 177 | " array (EagerTensor): tensor to reshape.\n", 178 | " shape (tuple): desired shape.\n", 179 | "\n", 180 | " Returns:\n", 181 | " EagerTensor: reshaped tensor.\n", 182 | " \"\"\"\n", 183 | " # make sure it's a tensor\n", 184 | " array = tf.constant(array)\n", 185 | " ### START CODE HERE ###\n", 186 | " tf_reshaped_array = tf.reshape(array, shape)\n", 187 | " ### END CODE HERE ###\n", 188 | " return tf_reshaped_array" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 7, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "data": { 198 | "text/plain": [ 199 | "" 203 | ] 204 | }, 205 | "execution_count": 7, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "# Check your function\n", 212 | "tmp_array = np.array([1,2,3,4,5,6,7,8,9])\n", 213 | "# Check that your function reshapes a vector into a matrix\n", 214 | "x = tf_reshape(tmp_array, (3, 3))\n", 215 | "x\n", 216 | "\n", 217 | "# Expected output:\n", 218 | "# " 271 | ] 272 | }, 273 | "execution_count": 9, 274 | "metadata": {}, 275 | "output_type": "execute_result" 276 | } 277 | ], 278 | "source": [ 279 | "# Check your function\n", 280 | "tmp_array = [1,2,3,4]\n", 281 | "x = tf_cast(tmp_array, tf.float32)\n", 282 | "x\n", 283 | "\n", 284 | "# Expected output:\n", 285 | "# " 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "## Exercise 5 - [tf.multiply](https://www.tensorflow.org/api_docs/python/tf/multiply)\n", 293 | "\n", 294 | "Returns an element-wise x * y." 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 10, 300 | "metadata": { 301 | "colab": {}, 302 | "colab_type": "code", 303 | "id": "ivepGtD5MKP5" 304 | }, 305 | "outputs": [], 306 | "source": [ 307 | "# Multiply tensor1 and tensor2\n", 308 | "def tf_multiply(tensor1, tensor2):\n", 309 | " \"\"\"\n", 310 | " Args:\n", 311 | " tensor1 (EagerTensor): a tensor.\n", 312 | " tensor2 (EagerTensor): another tensor.\n", 313 | "\n", 314 | " Returns:\n", 315 | " EagerTensor: resulting tensor.\n", 316 | " \"\"\"\n", 317 | " # make sure these are tensors\n", 318 | " tensor1 = tf.constant(tensor1)\n", 319 | " tensor2 = tf.constant(tensor2)\n", 320 | " \n", 321 | " ### START CODE HERE ###\n", 322 | " product = tf.multiply(tensor1, tensor2)\n", 323 | " ### END CODE HERE ###\n", 324 | " return product\n" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 11, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/plain": [ 335 | "" 338 | ] 339 | }, 340 | "execution_count": 11, 341 | "metadata": {}, 342 | "output_type": "execute_result" 343 | } 344 | ], 345 | "source": [ 346 | "# Check your function\n", 347 | "tmp_1 = tf.constant(np.array([[1,2],[3,4]]))\n", 348 | "tmp_2 = tf.constant(np.array(2))\n", 349 | "result = tf_multiply(tmp_1, tmp_2)\n", 350 | "result\n", 351 | "\n", 352 | "# Expected output:\n", 353 | "# " 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "## Exercise 6 - [tf.add](https://www.tensorflow.org/api_docs/python/tf/add)\n", 363 | "\n", 364 | "Returns x + y element-wise." 365 | ] 366 | }, 367 | { 368 | "cell_type": "code", 369 | "execution_count": 12, 370 | "metadata": { 371 | "colab": {}, 372 | "colab_type": "code", 373 | "id": "BVlntdYnMboh" 374 | }, 375 | "outputs": [], 376 | "source": [ 377 | "# Add tensor1 and tensor2\n", 378 | "def tf_add(tensor1, tensor2):\n", 379 | " \"\"\"\n", 380 | " Args:\n", 381 | " tensor1 (EagerTensor): a tensor.\n", 382 | " tensor2 (EagerTensor): another tensor.\n", 383 | "\n", 384 | " Returns:\n", 385 | " EagerTensor: resulting tensor.\n", 386 | " \"\"\"\n", 387 | " # make sure these are tensors\n", 388 | " tensor1 = tf.constant(tensor1)\n", 389 | " tensor2 = tf.constant(tensor2)\n", 390 | " \n", 391 | " ### START CODE HERE ###\n", 392 | " total = tf.add(tensor1, tensor2)\n", 393 | " ### END CODE HERE ###\n", 394 | " return total" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 13, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "" 406 | ] 407 | }, 408 | "execution_count": 13, 409 | "metadata": {}, 410 | "output_type": "execute_result" 411 | } 412 | ], 413 | "source": [ 414 | "# Check your function\n", 415 | "tmp_1 = tf.constant(np.array([1, 2, 3]))\n", 416 | "tmp_2 = tf.constant(np.array([4, 5, 6]))\n", 417 | "tf_add(tmp_1, tmp_2)\n", 418 | "\n", 419 | "# Expected output:\n", 420 | "# " 421 | ] 422 | }, 423 | { 424 | "cell_type": "markdown", 425 | "metadata": { 426 | "colab_type": "text", 427 | "id": "9EN0W15EWNjD" 428 | }, 429 | "source": [ 430 | "## Exercise 7 - Gradient Tape\n", 431 | "\n", 432 | "Implement the function `tf_gradient_tape` by replacing the instances of `None` in the code below. The instructions are given in the code comments.\n", 433 | "\n", 434 | "You can review the [docs](https://www.tensorflow.org/api_docs/python/tf/GradientTape) or revisit the lectures to complete this task." 435 | ] 436 | }, 437 | { 438 | "cell_type": "code", 439 | "execution_count": 14, 440 | "metadata": { 441 | "colab": {}, 442 | "colab_type": "code", 443 | "id": "p3K94BWZM6nW" 444 | }, 445 | "outputs": [], 446 | "source": [ 447 | "def tf_gradient_tape(x):\n", 448 | " \"\"\"\n", 449 | " Args:\n", 450 | " x (EagerTensor): a tensor.\n", 451 | "\n", 452 | " Returns:\n", 453 | " EagerTensor: Derivative of z with respect to the input tensor x.\n", 454 | " \"\"\"\n", 455 | " with tf.GradientTape() as t:\n", 456 | " \n", 457 | " ### START CODE HERE ###\n", 458 | " # Record the actions performed on tensor x with `watch`\n", 459 | " t.watch(x) \n", 460 | "\n", 461 | " # Define a polynomial of form 3x^3 - 2x^2 + x\n", 462 | " y = 3*x**3 - 2*x**2 + x\n", 463 | "\n", 464 | " # Obtain the sum of the elements in variable y\n", 465 | " z = tf.reduce_sum(y)\n", 466 | " \n", 467 | " # Get the derivative of z with respect to the original input tensor x\n", 468 | " dz_dx = t.gradient(z, x)\n", 469 | " ### END CODE HERE\n", 470 | " \n", 471 | " return dz_dx" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 15, 477 | "metadata": {}, 478 | "outputs": [ 479 | { 480 | "data": { 481 | "text/plain": [ 482 | "29.0" 483 | ] 484 | }, 485 | "execution_count": 15, 486 | "metadata": {}, 487 | "output_type": "execute_result" 488 | } 489 | ], 490 | "source": [ 491 | "# Check your function\n", 492 | "tmp_x = tf.constant(2.0)\n", 493 | "dz_dx = tf_gradient_tape(tmp_x)\n", 494 | "result = dz_dx.numpy()\n", 495 | "result\n", 496 | "\n", 497 | "# Expected output:\n", 498 | "# 29.0" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "metadata": {}, 504 | "source": [ 505 | "**Congratulations on finishing this week's assignment!**\n", 506 | "\n", 507 | "**Keep it up!**" 508 | ] 509 | } 510 | ], 511 | "metadata": { 512 | "coursera": { 513 | "schema_names": [ 514 | "TF3C2W1-1", 515 | "TF3C2W1-2", 516 | "TF3C2W1-3", 517 | "TF3C2W1-4", 518 | "TF3C2W1-5", 519 | "TF3C2W1-6", 520 | "TF3C2W1-7" 521 | ] 522 | }, 523 | "kernelspec": { 524 | "display_name": "Python 3", 525 | "language": "python", 526 | "name": "python3" 527 | }, 528 | "language_info": { 529 | "codemirror_mode": { 530 | "name": "ipython", 531 | "version": 3 532 | }, 533 | "file_extension": ".py", 534 | "mimetype": "text/x-python", 535 | "name": "python", 536 | "nbconvert_exporter": "python", 537 | "pygments_lexer": "ipython3", 538 | "version": "3.7.6" 539 | } 540 | }, 541 | "nbformat": 4, 542 | "nbformat_minor": 4 543 | } 544 | -------------------------------------------------------------------------------- /Course 3 - Advanced Computer Vision with TensorFlow/Week 4/C3W4_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "colab": { 7 | "name": "C3W4_Assignment.ipynb", 8 | "private_outputs": true, 9 | "provenance": [], 10 | "collapsed_sections": [], 11 | "toc_visible": true 12 | }, 13 | "kernelspec": { 14 | "display_name": "Python 3", 15 | "language": "python", 16 | "name": "python3" 17 | }, 18 | "language_info": { 19 | "codemirror_mode": { 20 | "name": "ipython", 21 | "version": 3 22 | }, 23 | "file_extension": ".py", 24 | "mimetype": "text/x-python", 25 | "name": "python", 26 | "nbconvert_exporter": "python", 27 | "pygments_lexer": "ipython3", 28 | "version": "3.7.4" 29 | } 30 | }, 31 | "cells": [ 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "vNQiSujBfjWj" 36 | }, 37 | "source": [ 38 | "# **Week 4 Assignment: Saliency Maps**\n", 39 | "\n", 40 | "Welcome to the final programming exercise of this course! For this week, your task is to adapt the [Cats vs Dogs](https://www.tensorflow.org/datasets/catalog/cats_vs_dogs) Class Activation Map ungraded lab (the second ungraded lab of this week) and make it generate saliency maps instead.\n", 41 | "\n", 42 | "As discussed in the lectures, a saliency map shows the pixels which greatly impacts the classification of an image. \n", 43 | "- This is done by getting the gradient of the loss with respect to changes in the pixel values, then plotting the results. \n", 44 | "- From there, you can see if your model is looking at the correct features when classifying an image. \n", 45 | " - For example, if you're building a dog breed classifier, you should be wary if your saliency map shows strong pixels outside the dog itself (e.g. sky, grass, dog house, etc...).\n", 46 | "\n", 47 | "In this assignment you will be given prompts but less starter code to fill in in. \n", 48 | "- It's good practice for you to try and write as much of this code as you can from memory and from searching the web.\n", 49 | "- **Whenever you feel stuck**, please refer back to the labs of this week to see how to write the code. In particular, look at:\n", 50 | " - **Ungraded Lab 2: Cats vs Dogs CAM**\n", 51 | " - **Ungraded Lab 3: Saliency**\n", 52 | "\n", 53 | "\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "id": "wDHISSfBq40T" 60 | }, 61 | "source": [ 62 | "### Download test files and weights\n", 63 | "\n", 64 | "Let's begin by first downloading files we will be using for this lab." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "metadata": { 70 | "id": "Laatr1c6lr1w" 71 | }, 72 | "source": [ 73 | "# Download the same test files from the Cats vs Dogs ungraded lab\n", 74 | "!wget -O cat1.jpg https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/cat1.jpg\n", 75 | "!wget -O cat2.jpg https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/cat2.jpg\n", 76 | "!wget -O catanddog.jpg https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/catanddog.jpg\n", 77 | "!wget -O dog1.jpg https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/dog1.jpg\n", 78 | "!wget -O dog2.jpg https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/dog2.jpg\n", 79 | "\n", 80 | "# Download prepared weights\n", 81 | "!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1kipXTxesGJKGY1B8uSPRvxROgOH90fih' -O 0_epochs.h5\n", 82 | "!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1oiV6tjy5k7h9OHGTQaf0Ohn3FmF-uOs1' -O 15_epochs.h5\n" 83 | ], 84 | "execution_count": null, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "id": "g24L3lKwqb3E" 91 | }, 92 | "source": [ 93 | "### Import the required packages\n", 94 | "\n", 95 | "Please import:\n", 96 | "\n", 97 | " * Tensorflow\n", 98 | " * Tensorflow Datasets\n", 99 | " * Numpy\n", 100 | " * Matplotlib's PyPlot\n", 101 | " * Keras plot_model utility\n", 102 | " * Keras Models API classes you will be using\n", 103 | " * Keras layers you will be using\n", 104 | " * OpenCV (cv2)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "id": "X86LKLvpBO2S" 111 | }, 112 | "source": [ 113 | "# YOUR CODE HERE\r\n", 114 | "import tensorflow as tf\r\n", 115 | "import tensorflow_datasets as tfds\r\n", 116 | "import numpy as np\r\n", 117 | "import matplotlib.pyplot as plt\r\n", 118 | "from tensorflow.keras.utils import plot_model\r\n", 119 | "from tensorflow.keras import models\r\n", 120 | "from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, GlobalAveragePooling2D \r\n", 121 | "import cv2" 122 | ], 123 | "execution_count": null, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "id": "th4dA3I8-9Ue" 130 | }, 131 | "source": [ 132 | "### Download and prepare the dataset.\n", 133 | "\n" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": { 139 | "id": "y1hujOK9rDyU" 140 | }, 141 | "source": [ 142 | "#### Load Cats vs Dogs \n", 143 | "\n", 144 | "* Required: Use Tensorflow Datasets to fetch the `cats_vs_dogs` dataset. \n", 145 | " * Use the first 80% of the *train* split of the said dataset to create your training set.\n", 146 | " * Set the `as_supervised` flag to create `(image, label)` pairs.\n", 147 | " \n", 148 | "* Optional: You can create validation and test sets from the remaining 20% of the *train* split of `cats_vs_dogs` (i.e. you already used 80% for the train set). This is if you intend to train the model beyond what is required for submission." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "metadata": { 154 | "id": "7w5HNdoHBQv_" 155 | }, 156 | "source": [ 157 | "# Load the data and create the train set (optional: val and test sets)\n", 158 | "\n", 159 | "# YOUR CODE HERE\n", 160 | "train_data = tfds.load('cats_vs_dogs', split='train[:80%]', as_supervised=True)" 161 | ], 162 | "execution_count": null, 163 | "outputs": [] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "id": "tXp0mV5Rbo76" 169 | }, 170 | "source": [ 171 | "#### Create preprocessing function\n", 172 | "\n", 173 | "Define a function that takes in an image and label. This will:\n", 174 | " * cast the image to float32\n", 175 | " * normalize the pixel values to [0, 1]\n", 176 | " * resize the image to 300 x 300\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "metadata": { 182 | "id": "pRkrL2aK2_UZ" 183 | }, 184 | "source": [ 185 | "def augmentimages(image, label):\n", 186 | " # YOUR CODE HERE\n", 187 | " image = tf.cast(image, tf.float32)\n", 188 | " image = image/255\n", 189 | " image = tf.image.resize(image, (300, 300))\n", 190 | " return image, label" 191 | ], 192 | "execution_count": null, 193 | "outputs": [] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": { 198 | "id": "pzvF61GV32k_" 199 | }, 200 | "source": [ 201 | "#### Preprocess the training set\n", 202 | "\n", 203 | "Use the `map()` and pass in the method that you just defined to preprocess the training set.\n" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "metadata": { 209 | "id": "vpNEfDKM353a" 210 | }, 211 | "source": [ 212 | "augmented_training_data = train_data.map(augmentimages)" 213 | ], 214 | "execution_count": null, 215 | "outputs": [] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "Y4nFaMIMbrvA" 221 | }, 222 | "source": [ 223 | "#### Create batches of the training set. \n", 224 | "\n", 225 | "This is already provided for you. Normally, you will want to shuffle the training set. But for predictability in the grading, we will simply create the batches.\n", 226 | "\n", 227 | "```Python\n", 228 | "# Shuffle the data if you're working on your own personal project \n", 229 | "train_batches = augmented_training_data.shuffle(1024).batch(32)\n", 230 | "```" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "metadata": { 236 | "id": "POhDDPBY3vnL" 237 | }, 238 | "source": [ 239 | "train_batches = augmented_training_data.batch(32)" 240 | ], 241 | "execution_count": null, 242 | "outputs": [] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": { 247 | "id": "za5HxgT1_Cw6" 248 | }, 249 | "source": [ 250 | "### Build the Cats vs Dogs classifier \n", 251 | "\n", 252 | "You'll define a model that is nearly the same as the one in the Cats vs. Dogs CAM lab.\n", 253 | "* Please preserve the architecture of the model in the Cats vs Dogs CAM lab (this week's second lab) except for the final `Dense` layer.\n", 254 | "* You should modify the Cats vs Dogs model at the last dense layer to output 2 neurons instead of 1. \n", 255 | " - This is because you will adapt the `do_salience()` function from the lab and that works with one-hot encoded labels. \n", 256 | " - You can do this by changing the `units` argument of the output Dense layer from 1 to 2, with one for each of the classes (i.e. cats and dogs).\n", 257 | " - You should choose an activation that outputs a probability for each of the 2 classes (i.e. categories), where the sum of the probabilities adds up to 1." 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "metadata": { 263 | "id": "IoyCA80GBSlG" 264 | }, 265 | "source": [ 266 | "# YOUR CODE HERE\n", 267 | "model = tf.keras.models.Sequential()\n", 268 | "model.add(Conv2D(16,input_shape=(300,300,3),kernel_size=(3,3),activation='relu',padding='same'))\n", 269 | "model.add(MaxPooling2D(pool_size=(2,2)))\n", 270 | "\n", 271 | "model.add(Conv2D(32,kernel_size=(3,3),activation='relu',padding='same'))\n", 272 | "model.add(MaxPooling2D(pool_size=(2,2)))\n", 273 | "\n", 274 | "model.add(Conv2D(64,kernel_size=(3,3),activation='relu',padding='same'))\n", 275 | "model.add(MaxPooling2D(pool_size=(2,2)))\n", 276 | "\n", 277 | "model.add(Conv2D(128,kernel_size=(3,3),activation='relu',padding='same'))\n", 278 | "model.add(GlobalAveragePooling2D())\n", 279 | "model.add(Dense(2,activation='softmax'))\n", 280 | "model.summary()" 281 | ], 282 | "execution_count": null, 283 | "outputs": [] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": { 288 | "id": "ktnATyllHXC4" 289 | }, 290 | "source": [ 291 | "**Expected Output:**\n", 292 | "\n", 293 | "```txt\n", 294 | "Model: \"sequential\"\n", 295 | "_________________________________________________________________\n", 296 | "Layer (type) Output Shape Param # \n", 297 | "=================================================================\n", 298 | "conv2d (Conv2D) (None, 300, 300, 16) 448 \n", 299 | "_________________________________________________________________\n", 300 | "max_pooling2d (MaxPooling2D) (None, 150, 150, 16) 0 \n", 301 | "_________________________________________________________________\n", 302 | "conv2d_1 (Conv2D) (None, 150, 150, 32) 4640 \n", 303 | "_________________________________________________________________\n", 304 | "max_pooling2d_1 (MaxPooling2 (None, 75, 75, 32) 0 \n", 305 | "_________________________________________________________________\n", 306 | "conv2d_2 (Conv2D) (None, 75, 75, 64) 18496 \n", 307 | "_________________________________________________________________\n", 308 | "max_pooling2d_2 (MaxPooling2 (None, 37, 37, 64) 0 \n", 309 | "_________________________________________________________________\n", 310 | "conv2d_3 (Conv2D) (None, 37, 37, 128) 73856 \n", 311 | "_________________________________________________________________\n", 312 | "global_average_pooling2d (Gl (None, 128) 0 \n", 313 | "_________________________________________________________________\n", 314 | "dense (Dense) (None, 2) 258 \n", 315 | "=================================================================\n", 316 | "Total params: 97,698\n", 317 | "Trainable params: 97,698\n", 318 | "Non-trainable params: 0\n", 319 | "_________________________________________________________________\n", 320 | "```" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": { 326 | "id": "J6nou82P_b5d" 327 | }, 328 | "source": [ 329 | "### Create a function to generate the saliency map\n", 330 | "\n", 331 | "Complete the `do_salience()` function below to save the **normalized_tensor** image. \n", 332 | "- The major steps are listed as comments below.\n", 333 | " - Each section may involve multiple lines of code.\n", 334 | "- Try your best to write the code from memory or by performing web searches.\n", 335 | " - Whenever you get stuck, you can review the \"saliency\" lab (the third lab of this week) to help remind you of what code to write" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "metadata": { 341 | "id": "sKbvh3bl9vnG" 342 | }, 343 | "source": [ 344 | "def do_salience(image, model, label, prefix):\n", 345 | " '''\n", 346 | " Generates the saliency map of a given image.\n", 347 | "\n", 348 | " Args:\n", 349 | " image (file) -- picture that the model will classify\n", 350 | " model (keras Model) -- your cats and dogs classifier\n", 351 | " label (int) -- ground truth label of the image\n", 352 | " prefix (string) -- prefix to add to the filename of the saliency map\n", 353 | " '''\n", 354 | "\n", 355 | " # Read the image and convert channel order from BGR to RGB\n", 356 | " # YOUR CODE HERE\n", 357 | " img = cv2.imread(image)\n", 358 | " img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n", 359 | "\n", 360 | " # Resize the image to 300 x 300 and normalize pixel values to the range [0, 1]\n", 361 | " # YOUR CODE HERE\n", 362 | " img = cv2.resize(img, (300, 300))\n", 363 | " img = img/255.0\n", 364 | " # Add an additional dimension (for the batch), and save this in a new variable\n", 365 | " # YOUR CODE HERE\n", 366 | " expand_img = np.expand_dims(img, axis= 0)\n", 367 | "\n", 368 | " # Declare the number of classes\n", 369 | " # YOUR CODE HERE\n", 370 | " num_classes = 2\n", 371 | " \n", 372 | " # Define the expected output array by one-hot encoding the label\n", 373 | " # The length of the array is equal to the number of classes\n", 374 | " # YOUR CODE HERE\n", 375 | " expected_output = tf.one_hot([label] * expand_img.shape[0], num_classes)\n", 376 | "\n", 377 | " # Witin the GradientTape block:\n", 378 | " # Cast the image as a tf.float32\n", 379 | " # Use the tape to watch the float32 image\n", 380 | " # Get the model's prediction by passing in the float32 image\n", 381 | " # Compute an appropriate loss\n", 382 | " # between the expected output and model predictions.\n", 383 | " # you may want to print the predictions to see if the probabilities adds up to 1\n", 384 | " # YOUR CODE HERE\n", 385 | " with tf.GradientTape() as tape : \n", 386 | " inputs = tf.cast(expand_img, tf.float32)\n", 387 | "\n", 388 | " tape.watch(inputs) \n", 389 | "\n", 390 | " predictions = model(inputs) \n", 391 | "\n", 392 | " loss = tf.keras.losses.categorical_crossentropy(\n", 393 | " expected_output, predictions\n", 394 | " ) \n", 395 | " print(predictions)\n", 396 | "\n", 397 | " # get the gradients of the loss with respect to the model's input image\n", 398 | " # YOUR CODE HERE\n", 399 | " gradients = tape.gradient(loss, inputs)\n", 400 | " \n", 401 | " # generate the grayscale tensor\n", 402 | " # YOUR CODE HERE\n", 403 | " grayscale_tensor = tf.reduce_sum(tf.abs(gradients), axis=-1) \n", 404 | "\n", 405 | " # normalize the pixel values to be in the range [0, 255].\n", 406 | " # the max value in the grayscale tensor will be pushed to 255.\n", 407 | " # the min value will be pushed to 0.\n", 408 | " # Use the formula: 255 * (x - min) / (max - min)\n", 409 | " # Use tf.reduce_max, tf.reduce_min\n", 410 | " # Cast the tensor as a tf.uint8\n", 411 | " # YOUR CODE HERE\n", 412 | " normalized_tensor = tf.cast(\n", 413 | " 255\n", 414 | " * (grayscale_tensor - tf.reduce_min(grayscale_tensor))\n", 415 | " / (tf.reduce_max(grayscale_tensor) - tf.reduce_min(grayscale_tensor)),\n", 416 | " tf.uint8,\n", 417 | " ) \n", 418 | " \n", 419 | " # Remove dimensions that are size 1\n", 420 | " # YOUR CODE HERE\n", 421 | " normalized_tensor = tf.squeeze(normalized_tensor)\n", 422 | " \n", 423 | " # plot the normalized tensor\n", 424 | " # Set the figure size to 8 by 8\n", 425 | " # do not display the axis\n", 426 | " # use the 'gray' colormap\n", 427 | " # This code is provided for you.\n", 428 | " plt.figure(figsize=(8, 8))\n", 429 | " plt.axis('off')\n", 430 | " plt.imshow(normalized_tensor, cmap='gray')\n", 431 | " plt.show()\n", 432 | "\n", 433 | " # optional: superimpose the saliency map with the original image, then display it.\n", 434 | " # we encourage you to do this to visualize your results better\n", 435 | " # YOUR CODE HERE\n", 436 | "\n", 437 | "\n", 438 | " # save the normalized tensor image to a file. this is already provided for you.\n", 439 | " salient_image_name = prefix + image\n", 440 | " normalized_tensor = tf.expand_dims(normalized_tensor, -1)\n", 441 | " normalized_tensor = tf.io.encode_jpeg(normalized_tensor, quality=100, format='grayscale')\n", 442 | " writer = tf.io.write_file(salient_image_name, normalized_tensor)" 443 | ], 444 | "execution_count": null, 445 | "outputs": [] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "metadata": { 450 | "id": "li1idRy-parp" 451 | }, 452 | "source": [ 453 | "### Generate saliency maps with untrained model\n", 454 | "\n", 455 | "As a sanity check, you will load initialized (i.e. untrained) weights and use the function you just implemented. \n", 456 | "- This will check if you built the model correctly and are able to create a saliency map. \n", 457 | "\n", 458 | "If an error pops up when loading the weights or the function does not run, please check your implementation for bugs.\n", 459 | "- You can check the ungraded labs of this week.\n", 460 | "\n", 461 | "Please apply your `do_salience()` function on the following image files:\n", 462 | "\n", 463 | "* `cat1.jpg`\n", 464 | "* `cat2.jpg`\n", 465 | "* `catanddog.jpg`\n", 466 | "* `dog1.jpg`\n", 467 | "* `dog2.jpg`\n", 468 | "\n", 469 | "Cats will have the label `0` while dogs will have the label `1`. \n", 470 | "- For the catanddog, please use `0`. \n", 471 | "- For the prefix of the salience images that will be generated, please use the prefix `epoch0_salient`." 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "metadata": { 477 | "id": "k39fF4n8fgG0" 478 | }, 479 | "source": [ 480 | "# load initial weights\n", 481 | "model.load_weights('0_epochs.h5')\n", 482 | "\n", 483 | "# generate the saliency maps for the 5 test images\n", 484 | "# YOUR CODE HERE\n", 485 | "do_salience('cat1.jpg', model, 0, 'epoch0_salient') \n", 486 | "do_salience('cat2.jpg', model, 0, 'epoch0_salient') \n", 487 | "do_salience('catanddog.jpg', model, 0, 'epoch0_salient') \n", 488 | "do_salience('dog1.jpg', model, 1, 'epoch0_salient') \n", 489 | "do_salience('dog2.jpg', model, 1, 'epoch0_salient')" 490 | ], 491 | "execution_count": null, 492 | "outputs": [] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": { 497 | "id": "8kcdyut5E2Tk" 498 | }, 499 | "source": [ 500 | "With untrained weights, you will see something like this in the output. \n", 501 | "- You will see strong pixels outside the cat that the model uses that when classifying the image. \n", 502 | "- After training that these will slowly start to localize to features inside the pet.\n", 503 | "\n", 504 | "saliency\n" 505 | ] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": { 510 | "id": "-ZhZgd0x_JvN" 511 | }, 512 | "source": [ 513 | "### Configure the model for training\n", 514 | "\n", 515 | "Use `model.compile()` to define the loss, metrics and optimizer. \n", 516 | "\n", 517 | "* Choose a loss function for the model to use when training. \n", 518 | " - For `model.compile()` the ground truth labels from the training set are passed to the model as **integers** (i.e. 0 or 1) as opposed to one-hot encoded vectors.\n", 519 | " - The model predictions are class probabilities. \n", 520 | " - You can browse the [tf.keras.losses](https://www.tensorflow.org/api_docs/python/tf/keras/losses) and determine which one is best used for this case. \n", 521 | " - Remember that you can pass the function as a string (e.g. `loss = 'loss_function_a'`). \n", 522 | "\n", 523 | "* For metrics, you can measure `accuracy`. \n", 524 | "* For the optimizer, please use [RMSProp](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/RMSprop).\n", 525 | " - Please use the default learning rate of `0.001`." 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "metadata": { 531 | "id": "DkyWZ5KdBo-z" 532 | }, 533 | "source": [ 534 | "# YOUR CODE HERE\r\n", 535 | "optimizer = tf.keras.optimizers.RMSprop(0.001)\r\n", 536 | "\r\n", 537 | "model.compile(loss='sparse_categorical_crossentropy', \r\n", 538 | " optimizer=optimizer, \r\n", 539 | " metrics=['accuracy'])" 540 | ], 541 | "execution_count": null, 542 | "outputs": [] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": { 547 | "id": "otIoJJw7_ZFN" 548 | }, 549 | "source": [ 550 | "### Train your model\n", 551 | "\n", 552 | "Please pass in the training batches and train your model for just **3** epochs. \n", 553 | "- **Note:** Please do not exceed 3 epochs because the grader will expect 3 epochs when grading your output.\n", 554 | " - After submitting your zipped folder for grading, feel free to continue training to improve your model.\n", 555 | "\n", 556 | "We have loaded pre-trained weights for 15 epochs so you can get a better output when you visualize the saliency maps." 557 | ] 558 | }, 559 | { 560 | "cell_type": "code", 561 | "metadata": { 562 | "id": "5YSNp7k7BqfL" 563 | }, 564 | "source": [ 565 | "# load pre-trained weights\n", 566 | "model.load_weights('15_epochs.h5')\n", 567 | "\n", 568 | "# train the model for just 3 epochs\n", 569 | "# YOUR CODE HERE \n", 570 | "\n", 571 | "EPOCHS = 3\n", 572 | "model.fit(train_batches ,\n", 573 | " epochs=EPOCHS \n", 574 | " )" 575 | ], 576 | "execution_count": null, 577 | "outputs": [] 578 | }, 579 | { 580 | "cell_type": "markdown", 581 | "metadata": { 582 | "id": "2tTqtLN3tQJx" 583 | }, 584 | "source": [ 585 | "### Generate saliency maps at 18 epochs\n", 586 | "\n", 587 | "You will now use your `do_salience()` function again on the same test images. Please use the same parameters as before but this time, use the prefix `salient`." 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "metadata": { 593 | "id": "bXFtabyVhIKN" 594 | }, 595 | "source": [ 596 | "# YOUR CODE HERE\r\n", 597 | "do_salience('cat1.jpg', model, 0, 'salient') \r\n", 598 | "do_salience('cat2.jpg', model, 0, 'salient') \r\n", 599 | "do_salience('catanddog.jpg', model, 0, 'salient') \r\n", 600 | "do_salience('dog1.jpg', model, 1, 'salient') \r\n", 601 | "do_salience('dog2.jpg', model, 1, 'salient')" 602 | ], 603 | "execution_count": null, 604 | "outputs": [] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "metadata": { 609 | "id": "wGTFcfEgM6aV" 610 | }, 611 | "source": [ 612 | "You should see that the strong pixels are now very less than the ones you generated earlier. Moreover, most of them are now found on features within the pet." 613 | ] 614 | }, 615 | { 616 | "cell_type": "markdown", 617 | "metadata": { 618 | "id": "rPtx-u4u_jL5" 619 | }, 620 | "source": [ 621 | "### Zip the images for grading\n", 622 | "\n", 623 | "Please run the cell below to zip the normalized tensor images you generated at 18 epochs. If you get an error, please check that you have files named:\n", 624 | "\n", 625 | "* salientcat1.jpg\n", 626 | "* salientcat2.jpg\n", 627 | "* salientcatanddog.jpg\n", 628 | "* salientdog1.jpg\n", 629 | "* salientdog2.jpg\n", 630 | "\n", 631 | "Afterwards, please download the **images.zip** from the Files bar on the left." 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "metadata": { 637 | "id": "b-MhcA8Uh8H_" 638 | }, 639 | "source": [ 640 | "from zipfile import ZipFile\n", 641 | "\n", 642 | "!rm images.zip\n", 643 | "\n", 644 | "filenames = ['cat1.jpg', 'cat2.jpg', 'catanddog.jpg', 'dog1.jpg', 'dog2.jpg']\n", 645 | "\n", 646 | "# writing files to a zipfile \n", 647 | "with ZipFile('images.zip','w') as zip:\n", 648 | " for file in filenames:\n", 649 | " zip.write('salient' + file)\n", 650 | "\n", 651 | "print(\"images.zip generated!\")" 652 | ], 653 | "execution_count": null, 654 | "outputs": [] 655 | }, 656 | { 657 | "cell_type": "markdown", 658 | "metadata": { 659 | "id": "SMOgx-N55A6p" 660 | }, 661 | "source": [ 662 | "### Optional: Saliency Maps at 95 epochs\n", 663 | "\n", 664 | "We have pre-trained weights generated at 95 epochs and you can see the difference between the maps you generated at 18 epochs." 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "metadata": { 670 | "id": "elUfhSmMvJZh" 671 | }, 672 | "source": [ 673 | "!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=14vFpBJsL_TNQeugX8vUTv8dYZxn__fQY' -O 95_epochs.h5\n", 674 | "\n", 675 | "model.load_weights('95_epochs.h5')\n", 676 | "\n", 677 | "do_salience('cat1.jpg', model, 0, \"epoch95_salient\")\n", 678 | "do_salience('cat2.jpg', model, 0, \"epoch95_salient\")\n", 679 | "do_salience('catanddog.jpg', model, 0, \"epoch95_salient\")\n", 680 | "do_salience('dog1.jpg', model, 1, \"epoch95_salient\")\n", 681 | "do_salience('dog2.jpg', model, 1, \"epoch95_salient\")" 682 | ], 683 | "execution_count": null, 684 | "outputs": [] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": { 689 | "id": "HuKLdQhvAaTd" 690 | }, 691 | "source": [ 692 | "**Congratulations on completing this week's assignment! Please go back to the Coursera classroom and upload the zipped folder to be graded.**" 693 | ] 694 | } 695 | ] 696 | } -------------------------------------------------------------------------------- /Course 4 - Generative Deep Learning with TensorFlow/Week 1/C4W1_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.4" 21 | }, 22 | "colab": { 23 | "name": "C4W1_Assignment.ipynb", 24 | "private_outputs": true, 25 | "provenance": [], 26 | "collapsed_sections": [] 27 | }, 28 | "accelerator": "GPU" 29 | }, 30 | "cells": [ 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "hJyXk97ZnTSF" 35 | }, 36 | "source": [ 37 | "# Week 1 Assignment: Neural Style Transfer\n", 38 | "\n", 39 | "Welcome to the first programming assignment of this course! Here, you will be implementing neural style transfer using the [Inception](https://arxiv.org/abs/1512.00567v3) model as your feature extractor. This is very similar to the Neural Style Transfer ungraded lab so if you get stuck, remember to review the said notebook for tips." 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "id": "QKa5uifDKII3" 46 | }, 47 | "source": [ 48 | "***Important:*** *This colab notebook has read-only access so you won't be able to save your changes. If you want to save your work periodically, please click `File -> Save a Copy in Drive` to create a copy in your account, then work from there.* " 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": { 54 | "id": "eqxUicSPUOP6" 55 | }, 56 | "source": [ 57 | "## Imports" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "NyftRTSMuwue" 64 | }, 65 | "source": [ 66 | "try:\n", 67 | " # %tensorflow_version only exists in Colab.\n", 68 | " %tensorflow_version 2.x\n", 69 | "except Exception:\n", 70 | " pass\n", 71 | "\n", 72 | "import tensorflow as tf\n", 73 | "\n", 74 | "import matplotlib.pyplot as plt\n", 75 | "import numpy as np\n", 76 | "from keras import backend as K\n", 77 | "\n", 78 | "from imageio import mimsave\n", 79 | "from IPython.display import display as display_fn\n", 80 | "from IPython.display import Image, clear_output" 81 | ], 82 | "execution_count": null, 83 | "outputs": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "id": "5Rd1FKJ9KOr5" 89 | }, 90 | "source": [ 91 | "## Utilities\n", 92 | "\n", 93 | "As before, we've provided some utility functions below to help in loading, visualizing, and preprocessing the images." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "qCMK4u6poA1k" 100 | }, 101 | "source": [ 102 | "def tensor_to_image(tensor):\n", 103 | " '''converts a tensor to an image'''\n", 104 | " tensor_shape = tf.shape(tensor)\n", 105 | " number_elem_shape = tf.shape(tensor_shape)\n", 106 | " if number_elem_shape > 3:\n", 107 | " assert tensor_shape[0] == 1\n", 108 | " tensor = tensor[0]\n", 109 | " return tf.keras.preprocessing.image.array_to_img(tensor) \n", 110 | "\n", 111 | "\n", 112 | "def load_img(path_to_img):\n", 113 | " '''loads an image as a tensor and scales it to 512 pixels'''\n", 114 | " max_dim = 512\n", 115 | " image = tf.io.read_file(path_to_img)\n", 116 | " image = tf.image.decode_jpeg(image)\n", 117 | " image = tf.image.convert_image_dtype(image, tf.float32)\n", 118 | "\n", 119 | " shape = tf.shape(image)[:-1]\n", 120 | " shape = tf.cast(tf.shape(image)[:-1], tf.float32)\n", 121 | " long_dim = max(shape)\n", 122 | " scale = max_dim / long_dim\n", 123 | "\n", 124 | " new_shape = tf.cast(shape * scale, tf.int32)\n", 125 | "\n", 126 | " image = tf.image.resize(image, new_shape)\n", 127 | " image = image[tf.newaxis, :]\n", 128 | " image = tf.image.convert_image_dtype(image, tf.uint8)\n", 129 | "\n", 130 | " return image\n", 131 | "\n", 132 | "\n", 133 | "def load_images(content_path, style_path):\n", 134 | " '''loads the content and path images as tensors'''\n", 135 | " content_image = load_img(\"{}\".format(content_path))\n", 136 | " style_image = load_img(\"{}\".format(style_path))\n", 137 | "\n", 138 | " return content_image, style_image\n", 139 | "\n", 140 | "\n", 141 | "def imshow(image, title=None):\n", 142 | " '''displays an image with a corresponding title'''\n", 143 | " if len(image.shape) > 3:\n", 144 | " image = tf.squeeze(image, axis=0)\n", 145 | "\n", 146 | " plt.imshow(image)\n", 147 | " if title:\n", 148 | " plt.title(title)\n", 149 | " \n", 150 | " \n", 151 | "def show_images_with_objects(images, titles=[]):\n", 152 | " '''displays a row of images with corresponding titles'''\n", 153 | " if len(images) != len(titles):\n", 154 | " return\n", 155 | "\n", 156 | " plt.figure(figsize=(20, 12))\n", 157 | " for idx, (image, title) in enumerate(zip(images, titles)):\n", 158 | " plt.subplot(1, len(images), idx + 1)\n", 159 | " plt.xticks([])\n", 160 | " plt.yticks([])\n", 161 | " imshow(image, title)\n", 162 | "\n", 163 | "\n", 164 | "def clip_image_values(image, min_value=0.0, max_value=255.0):\n", 165 | " '''clips the image pixel values by the given min and max'''\n", 166 | " return tf.clip_by_value(image, clip_value_min=min_value, clip_value_max=max_value)\n", 167 | "\n", 168 | "\n", 169 | "def preprocess_image(image):\n", 170 | " '''preprocesses a given image to use with Inception model'''\n", 171 | " image = tf.cast(image, dtype=tf.float32)\n", 172 | " image = (image / 127.5) - 1.0\n", 173 | "\n", 174 | " return image" 175 | ], 176 | "execution_count": null, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": { 182 | "id": "0U9It5Ii2Oof" 183 | }, 184 | "source": [ 185 | "## Download Images" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": { 191 | "id": "oeXebYusyHwC" 192 | }, 193 | "source": [ 194 | "You will fetch the two images you will use for the content and style image." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "metadata": { 200 | "id": "wqc0OJHwyFAk" 201 | }, 202 | "source": [ 203 | "content_path = tf.keras.utils.get_file('content_image.jpg','https://storage.googleapis.com/laurencemoroney-blog.appspot.com/MLColabImages/dog1.jpg')\n", 204 | "style_path = tf.keras.utils.get_file('style_image.jpg','https://storage.googleapis.com/download.tensorflow.org/example_images/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg')" 205 | ], 206 | "execution_count": null, 207 | "outputs": [] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "vE2TBEBntSjm" 213 | }, 214 | "source": [ 215 | "# display the content and style image\n", 216 | "content_image, style_image = load_images(content_path, style_path)\n", 217 | "show_images_with_objects([content_image, style_image], \n", 218 | " titles=[f'content image: {content_path}',\n", 219 | " f'style image: {style_path}'])" 220 | ], 221 | "execution_count": null, 222 | "outputs": [] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": { 227 | "id": "Jt3i3RRrJiOX" 228 | }, 229 | "source": [ 230 | "## Build the feature extractor" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "vwvREWQ1nTSV" 237 | }, 238 | "source": [ 239 | "Next, you will inspect the layers of the Inception model." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "psmTncz8nTSV" 246 | }, 247 | "source": [ 248 | "# clear session to make layer naming consistent when re-running this cell\n", 249 | "K.clear_session()\n", 250 | "\n", 251 | "# download the inception model and inspect the layers\n", 252 | "tmp_inception = tf.keras.applications.InceptionV3()\n", 253 | "tmp_inception.summary()\n", 254 | "\n", 255 | "# delete temporary model\n", 256 | "del tmp_inception" 257 | ], 258 | "execution_count": null, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": { 264 | "id": "Yk6qYGEynTSW" 265 | }, 266 | "source": [ 267 | "As you can see, it's a very deep network and compared to VGG-19, it's harder to choose which layers to choose to extract features from. \n", 268 | "\n", 269 | "- Notice that the Conv2D layers are named from `conv2d`, `conv2d_1` ... `conv2d_93`, for a total of 94 conv2d layers.\n", 270 | " - So the second conv2D layer is named `conv2d_1`.\n", 271 | "- For the purpose of grading, please choose the following\n", 272 | " - For the content layer: choose the Conv2D layer indexed at `88`.\n", 273 | " - For the style layers, please choose the first `five` conv2D layers near the input end of the model.\n", 274 | " - Note the numbering as mentioned in these instructions." 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": { 280 | "id": "Wt-tASys0eJv" 281 | }, 282 | "source": [ 283 | "Choose intermediate layers from the network to represent the style and content of the image:\n" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "metadata": { 289 | "id": "ArfX_6iA0WAX" 290 | }, 291 | "source": [ 292 | "\n", 293 | "### START CODE HERE ###\n", 294 | "# choose the content layer and put in a list\n", 295 | "content_layers = [\"conv2d_88\"]\n", 296 | "\n", 297 | "# choose the five style layers of interest\n", 298 | "style_layers = [\"conv2d\", \n", 299 | " \"conv2d_1\",\n", 300 | " \"conv2d_2\",\n", 301 | " \"conv2d_3\",\n", 302 | " \"conv2d_4\"]\n", 303 | " \n", 304 | "# combine the content and style layers into one list\n", 305 | "content_and_style_layers = style_layers + content_layers\n", 306 | "### END CODE HERE ###\n", 307 | "\n", 308 | "# count the number of content layers and style layers.\n", 309 | "# you will use these counts later in the assignment\n", 310 | "NUM_CONTENT_LAYERS = len(content_layers)\n", 311 | "NUM_STYLE_LAYERS = len(style_layers)" 312 | ], 313 | "execution_count": null, 314 | "outputs": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "id": "MGo9tQtlTtfQ" 320 | }, 321 | "source": [ 322 | "You can now setup your model to output the selected layers.\n" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "metadata": { 328 | "id": "nfec6MuMAbPx" 329 | }, 330 | "source": [ 331 | "def inception_model(layer_names):\n", 332 | " \"\"\" Creates a inception model that returns a list of intermediate output values.\n", 333 | " args:\n", 334 | " layer_names: a list of strings, representing the names of the desired content and style layers\n", 335 | " \n", 336 | " returns:\n", 337 | " A model that takes the regular inception v3 input and outputs just the content and style layers.\n", 338 | " \n", 339 | " \"\"\"\n", 340 | " \n", 341 | "### START CODE HERE ###\n", 342 | " # Load InceptionV3 with the imagenet weights and **without** the fully-connected layer at the top of the network\n", 343 | " inception = tf.keras.applications.inception_v3.InceptionV3(include_top= False)\n", 344 | "\n", 345 | " # Freeze the weights of the model's layers (make them not trainable)\n", 346 | " inception.trainable = False\n", 347 | " \n", 348 | " # Create a list of layer objects that are specified by layer_names\n", 349 | " output_layers = [inception.get_layer(name).output for name in layer_names]\n", 350 | "\n", 351 | " # Create the model that outputs the content and style layers\n", 352 | " model = tf.keras.models.Model(inputs = inception.input, outputs = output_layers)\n", 353 | " \n", 354 | " # return the model\n", 355 | " return model\n", 356 | "\n", 357 | "### END CODE HERE ###\n" 358 | ], 359 | "execution_count": null, 360 | "outputs": [] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": { 365 | "id": "IwJJkA0enTSX" 366 | }, 367 | "source": [ 368 | "Create an instance of the content and style model using the function that you just defined" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "metadata": { 374 | "id": "6AYqTPpOnTSX" 375 | }, 376 | "source": [ 377 | "K.clear_session()\n", 378 | "\n", 379 | "### START CODE HERE ###\n", 380 | "inception = inception_model(content_and_style_layers)\n", 381 | "### END CODE HERE ###" 382 | ], 383 | "execution_count": null, 384 | "outputs": [] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": { 389 | "id": "jbaIvZf5wWn_" 390 | }, 391 | "source": [ 392 | "## Calculate style loss\n", 393 | "\n", 394 | "The style loss is the average of the squared differences between the features and targets." 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "metadata": { 400 | "id": "nv8hZU0oKIm_" 401 | }, 402 | "source": [ 403 | "def get_style_loss(features, targets):\n", 404 | " \"\"\"Expects two images of dimension h, w, c\n", 405 | " \n", 406 | " Args:\n", 407 | " features: tensor with shape: (height, width, channels)\n", 408 | " targets: tensor with shape: (height, width, channels)\n", 409 | "\n", 410 | " Returns:\n", 411 | " style loss (scalar)\n", 412 | " \"\"\"\n", 413 | " ### START CODE HERE ###\n", 414 | " \n", 415 | " # Calculate the style loss\n", 416 | " style_loss = tf.reduce_mean(tf.square(targets - features))\n", 417 | " \n", 418 | " ### END CODE HERE ###\n", 419 | " return style_loss" 420 | ], 421 | "execution_count": null, 422 | "outputs": [] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": { 427 | "id": "QDDiPF6YnTSY" 428 | }, 429 | "source": [ 430 | "## Calculate content loss\n", 431 | "\n", 432 | "Calculate the sum of the squared error between the features and targets, then multiply by a scaling factor (0.5)." 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "metadata": { 438 | "id": "et8M1lOgKL8o" 439 | }, 440 | "source": [ 441 | "def get_content_loss(features, targets):\n", 442 | " \"\"\"Expects two images of dimension h, w, c\n", 443 | " \n", 444 | " Args:\n", 445 | " features: tensor with shape: (height, width, channels)\n", 446 | " targets: tensor with shape: (height, width, channels)\n", 447 | " \n", 448 | " Returns:\n", 449 | " content loss (scalar)\n", 450 | " \"\"\"\n", 451 | " # get the sum of the squared error multiplied by a scaling factor\n", 452 | " content_loss = 0.5*tf.reduce_sum(tf.square(features - targets))\n", 453 | " \n", 454 | " return content_loss" 455 | ], 456 | "execution_count": null, 457 | "outputs": [] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": { 462 | "id": "2ygKYFw1nTSY" 463 | }, 464 | "source": [ 465 | "## Calculate the gram matrix\n", 466 | "\n", 467 | "Use `tf.linalg.einsum` to calculate the gram matrix for an input tensor.\n", 468 | "- In addition, calculate the scaling factor `num_locations` and divide the gram matrix calculation by `num_locations`.\n", 469 | "\n", 470 | "$$ \\text{num locations} = height \\times width $$" 471 | ] 472 | }, 473 | { 474 | "cell_type": "code", 475 | "metadata": { 476 | "id": "HAy1iGPdoEpZ" 477 | }, 478 | "source": [ 479 | "def gram_matrix(input_tensor):\n", 480 | " \"\"\" Calculates the gram matrix and divides by the number of locations\n", 481 | " Args:\n", 482 | " input_tensor: tensor of shape (batch, height, width, channels)\n", 483 | " \n", 484 | " Returns:\n", 485 | " scaled_gram: gram matrix divided by the number of locations\n", 486 | " \"\"\"\n", 487 | "\n", 488 | " # calculate the gram matrix of the input tensor\n", 489 | " gram = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor) \n", 490 | "\n", 491 | " # get the height and width of the input tensor\n", 492 | " input_shape = tf.shape(input_tensor) \n", 493 | " height = input_shape[1] \n", 494 | " width = input_shape[2] \n", 495 | "\n", 496 | " # get the number of locations (height times width), and cast it as a tf.float32\n", 497 | " num_locations = tf.cast(height * width, tf.float32)\n", 498 | "\n", 499 | " # scale the gram matrix by dividing by the number of locations\n", 500 | " scaled_gram = gram / num_locations\n", 501 | " \n", 502 | " return scaled_gram" 503 | ], 504 | "execution_count": null, 505 | "outputs": [] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": { 510 | "id": "3TYRWE0JnTSZ" 511 | }, 512 | "source": [ 513 | "## Get the style image features\n", 514 | "\n", 515 | "Given the style image as input, you'll get the style features of the inception model that you just created using `inception_model()`.\n", 516 | "- You'll first preprocess the image using the given `preprocess_image` function.\n", 517 | "- You'll then get the outputs of the model.\n", 518 | "- From the outputs, just get the style feature layers and not the content feature layer.\n", 519 | "\n", 520 | "You can run the following code to check the order of the layers in your inception model:" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "metadata": { 526 | "id": "YkVlPUMWnTSZ" 527 | }, 528 | "source": [ 529 | "tmp_layer_list = [layer.output for layer in inception.layers]\n", 530 | "tmp_layer_list" 531 | ], 532 | "execution_count": null, 533 | "outputs": [] 534 | }, 535 | { 536 | "cell_type": "markdown", 537 | "metadata": { 538 | "id": "D3IATpyxnTSZ" 539 | }, 540 | "source": [ 541 | "- For each style layer, calculate the gram matrix. Store these results in a list and return it." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "metadata": { 547 | "id": "YzTK5qzG_MKh" 548 | }, 549 | "source": [ 550 | "def get_style_image_features(image): \n", 551 | " \"\"\" Get the style image features\n", 552 | " \n", 553 | " Args:\n", 554 | " image: an input image\n", 555 | " \n", 556 | " Returns:\n", 557 | " gram_style_features: the style features as gram matrices\n", 558 | " \"\"\"\n", 559 | " ### START CODE HERE ###\n", 560 | " # preprocess the image using the given preprocessing function\n", 561 | " preprocessed_style_image = preprocess_image(image)\n", 562 | "\n", 563 | " # get the outputs from the inception model that you created using inception_model()\n", 564 | " outputs = inception(preprocessed_style_image)\n", 565 | "\n", 566 | " # Get just the style feature layers (exclude the content layer)\n", 567 | " style_outputs = outputs[:NUM_STYLE_LAYERS]\n", 568 | "\n", 569 | " # for each style layer, calculate the gram matrix for that layer and store these results in a list\n", 570 | " gram_style_features = [gram_matrix(style_layer) for style_layer in style_outputs]\n", 571 | " ### END CODE HERE ###\n", 572 | " return gram_style_features" 573 | ], 574 | "execution_count": null, 575 | "outputs": [] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": { 580 | "id": "No7Yox0bnTSa" 581 | }, 582 | "source": [ 583 | "## Get content image features\n", 584 | "\n", 585 | "You will get the content features of the content image.\n", 586 | "- You can follow a similar process as you did with `get_style_image_features`.\n", 587 | "- For the content image, you will not calculate the gram matrix of these style features." 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "metadata": { 593 | "id": "Y7rq02U9_a6L" 594 | }, 595 | "source": [ 596 | "def get_content_image_features(image):\n", 597 | " \"\"\" Get the content image features\n", 598 | " \n", 599 | " Args:\n", 600 | " image: an input image\n", 601 | " \n", 602 | " Returns:\n", 603 | " content_outputs: the content features of the image\n", 604 | " \"\"\"\n", 605 | "\n", 606 | " ### START CODE HERE ###\n", 607 | " # preprocess the image\n", 608 | " preprocessed_content_image = preprocess_image(image)\n", 609 | " \n", 610 | " # get the outputs from the inception model\n", 611 | " outputs = inception(preprocessed_content_image)\n", 612 | "\n", 613 | " # get the content layer of the outputs\n", 614 | " content_outputs = outputs[NUM_STYLE_LAYERS:]\n", 615 | "\n", 616 | " ### END CODE HERE ###\n", 617 | " return content_outputs" 618 | ], 619 | "execution_count": null, 620 | "outputs": [] 621 | }, 622 | { 623 | "cell_type": "markdown", 624 | "metadata": { 625 | "id": "p5gcMSfLnTSa" 626 | }, 627 | "source": [ 628 | "## Calculate the total loss\n", 629 | "\n", 630 | "Please define the total loss using the helper functions you just defined. As a refresher, the total loss is given by $L_{total} = \\beta L_{style} + \\alpha L_{content}$, where $\\beta$ and $\\alpha$ are the style and content weights, respectively.\n" 631 | ] 632 | }, 633 | { 634 | "cell_type": "code", 635 | "metadata": { 636 | "id": "q20XhIHnotQA" 637 | }, 638 | "source": [ 639 | "def get_style_content_loss(style_targets, style_outputs, content_targets, \n", 640 | " content_outputs, style_weight, content_weight):\n", 641 | " \"\"\" Combine the style and content loss\n", 642 | " \n", 643 | " Args:\n", 644 | " style_targets: style features of the style image\n", 645 | " style_outputs: style features of the generated image\n", 646 | " content_targets: content features of the content image\n", 647 | " content_outputs: content features of the generated image\n", 648 | " style_weight: weight given to the style loss\n", 649 | " content_weight: weight given to the content loss\n", 650 | "\n", 651 | " Returns:\n", 652 | " total_loss: the combined style and content loss\n", 653 | "\n", 654 | " \"\"\"\n", 655 | " \n", 656 | " # Sum of the style losses\n", 657 | " style_loss = tf.add_n([ get_style_loss(style_output, style_target)\n", 658 | " for style_output, style_target in zip(style_outputs, style_targets)])\n", 659 | " \n", 660 | " # Sum up the content losses\n", 661 | " content_loss = tf.add_n([get_content_loss(content_output, content_target)\n", 662 | " for content_output, content_target in zip(content_outputs, content_targets)])\n", 663 | "\n", 664 | " ### START CODE HERE ###\n", 665 | " # scale the style loss by multiplying by the style weight and dividing by the number of style layers\n", 666 | " style_loss = style_loss * style_weight / NUM_STYLE_LAYERS\n", 667 | "\n", 668 | " # scale the content loss by multiplying by the content weight and dividing by the number of content layers\n", 669 | " content_loss = content_loss * content_weight / NUM_CONTENT_LAYERS\n", 670 | " \n", 671 | " # sum up the style and content losses\n", 672 | " total_loss = style_loss + content_loss\n", 673 | " ### END CODE HERE ###\n", 674 | " # return the total loss\n", 675 | " return total_loss" 676 | ], 677 | "execution_count": null, 678 | "outputs": [] 679 | }, 680 | { 681 | "cell_type": "markdown", 682 | "metadata": { 683 | "id": "W6lE_zt8nTSb" 684 | }, 685 | "source": [ 686 | "## Calculate gradients\n", 687 | "\n", 688 | "Please use `tf.GradientTape()` to get the gradients of the loss with respect to the input image. Take note that you will *not* need a regularization parameter in this exercise so we only provided the style and content weights as arguments." 689 | ] 690 | }, 691 | { 692 | "cell_type": "code", 693 | "metadata": { 694 | "id": "mp2g2tI58RI0" 695 | }, 696 | "source": [ 697 | "\n", 698 | "\n", 699 | "def calculate_gradients(image, style_targets, content_targets, \n", 700 | " style_weight, content_weight):\n", 701 | " \"\"\" Calculate the gradients of the loss with respect to the generated image\n", 702 | " Args:\n", 703 | " image: generated image\n", 704 | " style_targets: style features of the style image\n", 705 | " content_targets: content features of the content image\n", 706 | " style_weight: weight given to the style loss\n", 707 | " content_weight: weight given to the content loss\n", 708 | " \n", 709 | " Returns:\n", 710 | " gradients: gradients of the loss with respect to the input image\n", 711 | " \"\"\"\n", 712 | "\n", 713 | " ### START CODE HERE ###\n", 714 | " with tf.GradientTape() as tape:\n", 715 | " \n", 716 | " # get the style image features\n", 717 | " style_features = get_style_image_features(image) \n", 718 | " \n", 719 | " # get the content image features\n", 720 | " content_features = get_content_image_features(image) \n", 721 | " \n", 722 | " # get the style and content loss\n", 723 | " loss = get_style_content_loss(style_targets, style_features, content_targets, \n", 724 | " content_features, style_weight, content_weight) \n", 725 | "\n", 726 | " # calculate gradients of loss with respect to the image\n", 727 | " gradients = tape.gradient(loss, image) \n", 728 | "\n", 729 | " ### END CODE HERE ###\n", 730 | "\n", 731 | " return gradients" 732 | ], 733 | "execution_count": null, 734 | "outputs": [] 735 | }, 736 | { 737 | "cell_type": "markdown", 738 | "metadata": { 739 | "id": "I4drTvUNnTSb" 740 | }, 741 | "source": [ 742 | "## Update the image with the style\n", 743 | "\n", 744 | "Please define the helper function to apply the gradients to the generated/stylized image." 745 | ] 746 | }, 747 | { 748 | "cell_type": "code", 749 | "metadata": { 750 | "id": "e-MPRxuGp-5A" 751 | }, 752 | "source": [ 753 | "def update_image_with_style(image, style_targets, content_targets, style_weight, \n", 754 | " content_weight, optimizer):\n", 755 | " \"\"\"\n", 756 | " Args:\n", 757 | " image: generated image\n", 758 | " style_targets: style features of the style image\n", 759 | " content_targets: content features of the content image\n", 760 | " style_weight: weight given to the style loss\n", 761 | " content_weight: weight given to the content loss\n", 762 | " optimizer: optimizer for updating the input image\n", 763 | " \"\"\"\n", 764 | "\n", 765 | " ### START CODE HERE ###\n", 766 | " # Calculate gradients using the function that you just defined.\n", 767 | " gradients =calculate_gradients(image, style_targets, content_targets, style_weight, \n", 768 | " content_weight)\n", 769 | "\n", 770 | " # apply the gradients to the given image\n", 771 | " optimizer.apply_gradients([(gradients, image)])\n", 772 | "\n", 773 | " ### END CODE HERE ###\n", 774 | " # Clip the image using the given clip_image_values() function\n", 775 | " image.assign(clip_image_values(image, min_value=0.0, max_value=255.0))" 776 | ], 777 | "execution_count": null, 778 | "outputs": [] 779 | }, 780 | { 781 | "cell_type": "markdown", 782 | "metadata": { 783 | "id": "foTOpNNw2Wp2" 784 | }, 785 | "source": [ 786 | "## Generate the stylized image\n", 787 | "\n", 788 | "Please complete the function below to implement neural style transfer between your content and style images." 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "metadata": { 794 | "id": "U0Btr_j9M1gu", 795 | "lines_to_next_cell": 2 796 | }, 797 | "source": [ 798 | "def fit_style_transfer(style_image, content_image, style_weight=1e-2, content_weight=1e-4, \n", 799 | " optimizer='adam', epochs=1, steps_per_epoch=1):\n", 800 | " \"\"\" Performs neural style transfer.\n", 801 | " Args:\n", 802 | " style_image: image to get style features from\n", 803 | " content_image: image to stylize \n", 804 | " style_targets: style features of the style image\n", 805 | " content_targets: content features of the content image\n", 806 | " style_weight: weight given to the style loss\n", 807 | " content_weight: weight given to the content loss\n", 808 | " optimizer: optimizer for updating the input image\n", 809 | " epochs: number of epochs\n", 810 | " steps_per_epoch = steps per epoch\n", 811 | " \n", 812 | " Returns:\n", 813 | " generated_image: generated image at final epoch\n", 814 | " images: collection of generated images per epoch \n", 815 | " \"\"\"\n", 816 | "\n", 817 | " images = []\n", 818 | " step = 0\n", 819 | "\n", 820 | " # get the style image features \n", 821 | " style_targets = get_style_image_features(style_image)\n", 822 | " \n", 823 | " # get the content image features\n", 824 | " content_targets = get_content_image_features(content_image)\n", 825 | "\n", 826 | " # initialize the generated image for updates\n", 827 | " generated_image = tf.cast(content_image, dtype=tf.float32)\n", 828 | " generated_image = tf.Variable(generated_image) \n", 829 | " \n", 830 | " # collect the image updates starting from the content image\n", 831 | " images.append(content_image)\n", 832 | " \n", 833 | " for n in range(epochs):\n", 834 | " for m in range(steps_per_epoch):\n", 835 | " step += 1\n", 836 | " \n", 837 | " ### START CODE HERE ###\n", 838 | " # Update the image with the style using the function that you defined\n", 839 | " update_image_with_style(generated_image, style_targets, content_targets, \n", 840 | " style_weight, content_weight, optimizer)\n", 841 | " \n", 842 | " ### END CODE HERE\n", 843 | "\n", 844 | " print(\".\", end='')\n", 845 | " if (m + 1) % 10 == 0:\n", 846 | " images.append(generated_image)\n", 847 | " \n", 848 | " # display the current stylized image\n", 849 | " clear_output(wait=True)\n", 850 | " display_image = tensor_to_image(generated_image)\n", 851 | " display_fn(display_image)\n", 852 | "\n", 853 | " # append to the image collection for visualization later\n", 854 | " images.append(generated_image)\n", 855 | " print(\"Train step: {}\".format(step))\n", 856 | " \n", 857 | " # convert to uint8 (expected dtype for images with pixels in the range [0,255])\n", 858 | " generated_image = tf.cast(generated_image, dtype=tf.uint8)\n", 859 | " \n", 860 | " return generated_image, images" 861 | ], 862 | "execution_count": null, 863 | "outputs": [] 864 | }, 865 | { 866 | "cell_type": "markdown", 867 | "metadata": { 868 | "id": "rFNfb_bpY6Qe" 869 | }, 870 | "source": [ 871 | "With all the helper functions defined, you can now run the main loop and generate the stylized image. This will take a few minutes to run." 872 | ] 873 | }, 874 | { 875 | "cell_type": "code", 876 | "metadata": { 877 | "id": "MtUgMzp8tHs6" 878 | }, 879 | "source": [ 880 | "# PLEASE DO NOT CHANGE THE SETTINGS HERE\n", 881 | "\n", 882 | "# define style and content weight\n", 883 | "style_weight = 1\n", 884 | "content_weight = 1e-32 \n", 885 | "\n", 886 | "# define optimizer. learning rate decreases per epoch.\n", 887 | "adam = tf.optimizers.Adam(\n", 888 | " tf.keras.optimizers.schedules.ExponentialDecay(\n", 889 | " initial_learning_rate=80.0, decay_steps=100, decay_rate=0.80\n", 890 | " )\n", 891 | ")\n", 892 | "\n", 893 | "# start the neural style transfer\n", 894 | "stylized_image, display_images = fit_style_transfer(style_image=style_image, content_image=content_image, \n", 895 | " style_weight=style_weight, content_weight=content_weight,\n", 896 | " optimizer=adam, epochs=10, steps_per_epoch=100)" 897 | ], 898 | "execution_count": null, 899 | "outputs": [] 900 | }, 901 | { 902 | "cell_type": "markdown", 903 | "metadata": { 904 | "id": "TlittQSqo-b_" 905 | }, 906 | "source": [ 907 | "When the loop completes, please right click the image you generated and download it for grading in the classroom.\n", 908 | "\n", 909 | "**Congratulations! You just completed the assignment on Neural Style Transfer!**\n" 910 | ] 911 | } 912 | ] 913 | } -------------------------------------------------------------------------------- /Course 4 - Generative Deep Learning with TensorFlow/Week 2/C4W2_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "accelerator": "GPU", 6 | "kernelspec": { 7 | "display_name": "Python 3", 8 | "language": "python", 9 | "name": "python3" 10 | }, 11 | "language_info": { 12 | "codemirror_mode": { 13 | "name": "ipython", 14 | "version": 3 15 | }, 16 | "file_extension": ".py", 17 | "mimetype": "text/x-python", 18 | "name": "python", 19 | "nbconvert_exporter": "python", 20 | "pygments_lexer": "ipython3", 21 | "version": "3.7.4" 22 | }, 23 | "colab": { 24 | "name": "C4W2_Assignment.ipynb", 25 | "private_outputs": true, 26 | "provenance": [], 27 | "collapsed_sections": [], 28 | "toc_visible": true 29 | } 30 | }, 31 | "cells": [ 32 | { 33 | "cell_type": "markdown", 34 | "metadata": { 35 | "id": "L6S2HVAkSt0p" 36 | }, 37 | "source": [ 38 | "# Week 2 Assignment: CIFAR-10 Autoencoder\n", 39 | "\n", 40 | "For this week, you will create a convolutional autoencoder for the [CIFAR10](https://www.tensorflow.org/datasets/catalog/cifar10) dataset. You are free to choose the architecture of your autoencoder provided that the output image has the same dimensions as the input image.\n", 41 | "\n", 42 | "After training, your model should meet loss and accuracy requirements when evaluated with the test dataset. You will then download the model and upload it in the classroom for grading. \n", 43 | "\n", 44 | "Let's begin!" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "id": "6r4iPr2jyisR" 51 | }, 52 | "source": [ 53 | "***Important:*** *This colab notebook has read-only access so you won't be able to save your changes. If you want to save your work periodically, please click `File -> Save a Copy in Drive` to create a copy in your account, then work from there.* " 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "id": "g1mzy2J8_nc1" 60 | }, 61 | "source": [ 62 | "## Imports" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "metadata": { 68 | "id": "3EXwoz-KHtWO" 69 | }, 70 | "source": [ 71 | "try:\n", 72 | " # %tensorflow_version only exists in Colab.\n", 73 | " %tensorflow_version 2.x\n", 74 | "except Exception:\n", 75 | " pass\n", 76 | "\n", 77 | "import tensorflow as tf\n", 78 | "import tensorflow_datasets as tfds\n", 79 | "\n", 80 | "from keras.models import Sequential" 81 | ], 82 | "execution_count": null, 83 | "outputs": [] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": { 88 | "id": "n2Gs6Lyc_pd0" 89 | }, 90 | "source": [ 91 | "## Load and prepare the dataset\n", 92 | "\n", 93 | "The [CIFAR 10](https://www.tensorflow.org/datasets/catalog/cifar10) dataset already has train and test splits and you can use those in this exercise. Here are the general steps:\n", 94 | "\n", 95 | "* Load the train/test split from TFDS. Set `as_supervised` to `True` so it will be convenient to use the preprocessing function we provided.\n", 96 | "* Normalize the pixel values to the range [0,1], then return `image, image` pairs for training instead of `image, label`. This is because you will check if the output image is successfully regenerated after going through your autoencoder.\n", 97 | "* Shuffle and batch the train set. Batch the test set (no need to shuffle).\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "t9F7YsCNIKSA" 104 | }, 105 | "source": [ 106 | "# preprocessing function\n", 107 | "def map_image(image, label):\n", 108 | " image = tf.cast(image, dtype=tf.float32)\n", 109 | " image = image / 255.0\n", 110 | "\n", 111 | " return image, image # dataset label is not used. replaced with the same image input.\n", 112 | "\n", 113 | "# parameters\n", 114 | "BATCH_SIZE = 128\n", 115 | "SHUFFLE_BUFFER_SIZE = 1024\n", 116 | "\n", 117 | "\n", 118 | "### START CODE HERE (Replace instances of `None` with your code) ###\n", 119 | "\n", 120 | "# use tfds.load() to fetch the 'train' split of CIFAR-10\n", 121 | "train_dataset = tfds.load('cifar10', as_supervised = True, split = 'train')\n", 122 | "\n", 123 | "# preprocess the dataset with the `map_image()` function above\n", 124 | "train_dataset = train_dataset.map(map_image) \n", 125 | "\n", 126 | "# shuffle and batch the dataset\n", 127 | "train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)\n", 128 | "\n", 129 | "\n", 130 | "# use tfds.load() to fetch the 'test' split of CIFAR-10\n", 131 | "test_dataset = tfds.load('cifar10', as_supervised = True, split = 'test')\n", 132 | "\n", 133 | "# preprocess the dataset with the `map_image()` function above\n", 134 | "test_dataset = test_dataset.map(map_image) \n", 135 | "\n", 136 | "# batch the dataset\n", 137 | "test_dataset = test_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)\n", 138 | "\n", 139 | "### END CODE HERE ###" 140 | ], 141 | "execution_count": null, 142 | "outputs": [] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": { 147 | "id": "rPyOgGJs_t98" 148 | }, 149 | "source": [ 150 | "## Build the Model\n", 151 | "\n", 152 | "Create the autoencoder model. As shown in the lectures, you will want to downsample the image in the encoder layers then upsample it in the decoder path. Note that the output layer should be the same dimensions as the original image. Your input images will have the shape `(32, 32, 3)`. If you deviate from this, your model may not be recognized by the grader and may fail. \n", 153 | "\n", 154 | "We included a few hints to use the Sequential API below but feel free to remove it and use the Functional API just like in the ungraded labs if you're more comfortable with it. Another reason to use the latter is if you want to visualize the encoder output. As shown in the ungraded labs, it will be easier to indicate multiple outputs with the Functional API. That is not required for this assignment though so you can just stack layers sequentially if you want a simpler solution." 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "metadata": { 160 | "id": "Wr-Bok3lRgA3" 161 | }, 162 | "source": [ 163 | "# suggested layers to use. feel free to add or remove as you see fit.\n", 164 | "from keras.layers import Conv2D, UpSampling2D\n", 165 | "\n", 166 | "# use the Sequential API (you can remove if you want to use the Functional API)\n", 167 | "model = Sequential()\n", 168 | "\n", 169 | "### START CODE HERE ###\n", 170 | "# use `model.add()` to add layers (if using the Sequential API)\n", 171 | "model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu', input_shape=(32, 32, 3)))\n", 172 | "model.add(tf.keras.layers.BatchNormalization()) \n", 173 | "model.add(Conv2D(32, kernel_size=3, strides=2, padding='same', activation='relu')) \n", 174 | "model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')) \n", 175 | "model.add(tf.keras.layers.BatchNormalization()) \n", 176 | "model.add(UpSampling2D())\n", 177 | "model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', activation='relu')) \n", 178 | "model.add(tf.keras.layers.BatchNormalization()) \n", 179 | "model.add(Conv2D(3, kernel_size=1, strides=1, padding='same', activation='sigmoid')) \n", 180 | "\n", 181 | "model.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error')\n", 182 | "### END CODE HERE ###\n", 183 | "\n", 184 | "model.summary()" 185 | ], 186 | "execution_count": null, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "id": "jRWTAijKEVUC" 193 | }, 194 | "source": [ 195 | "## Configure training parameters\n", 196 | "\n", 197 | "We have already provided the optimizer, metrics, and loss in the code below." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "iHIeD9eDETSk" 204 | }, 205 | "source": [ 206 | "# Please do not change the model.compile() parameters\n", 207 | "model.compile(optimizer='adam', metrics=['accuracy'], loss='mean_squared_error')" 208 | ], 209 | "execution_count": null, 210 | "outputs": [] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "id": "tLQPhm1W_8dC" 216 | }, 217 | "source": [ 218 | "## Training\n", 219 | "\n", 220 | "You can now use [model.fit()](https://keras.io/api/models/model_training_apis/#fit-method) to train your model. You will pass in the `train_dataset` and you are free to configure the other parameters. As with any training, you should see the loss generally going down and the accuracy going up with each epoch. If not, please revisit the previous sections to find possible bugs." 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "metadata": { 226 | "id": "AMBimOnsRvg0" 227 | }, 228 | "source": [ 229 | "# parameters\n", 230 | "train_steps = len(train_dataset) // BATCH_SIZE \n", 231 | "val_steps = len(test_dataset) // BATCH_SIZE\n", 232 | "\n", 233 | "### START CODE HERE ###\n", 234 | "model.fit(train_dataset, validation_data= test_dataset, epochs= 10)\n", 235 | "### END CODE HERE ###" 236 | ], 237 | "execution_count": null, 238 | "outputs": [] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": { 243 | "id": "PT2l1c-SAaF4" 244 | }, 245 | "source": [ 246 | "## Model evaluation\n", 247 | "\n", 248 | "You can use this code to test your model locally before uploading to the grader. To pass, your model needs to satisfy these two requirements:\n", 249 | "\n", 250 | "* loss must be less than 0.01 \n", 251 | "* accuracy must be greater than 0.6" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "metadata": { 257 | "id": "vFncgqahSQhA" 258 | }, 259 | "source": [ 260 | "result = model.evaluate(test_dataset, steps=10)" 261 | ], 262 | "execution_count": null, 263 | "outputs": [] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "metadata": { 268 | "id": "di6VOHGwIsVM" 269 | }, 270 | "source": [ 271 | "If you did some visualization like in the ungraded labs, then you might see something like the gallery below. This part is not required." 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": { 277 | "id": "wmpI4skkIA5L" 278 | }, 279 | "source": [ 280 | "" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "id": "uaRSkQPNAPT0" 287 | }, 288 | "source": [ 289 | "## Save your model\n", 290 | "\n", 291 | "Once you are satisfied with the results, you can now save your model. Please download it from the Files window on the left and go back to the Submission portal in Coursera for grading." 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "metadata": { 297 | "id": "pLFpLP-c7rDR" 298 | }, 299 | "source": [ 300 | "model.save('mymodel.h5')" 301 | ], 302 | "execution_count": null, 303 | "outputs": [] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": { 308 | "id": "QArMiXJTDxDe" 309 | }, 310 | "source": [ 311 | "**Congratulations on completing this week's assignment!**" 312 | ] 313 | } 314 | ] 315 | } -------------------------------------------------------------------------------- /Course 4 - Generative Deep Learning with TensorFlow/Week 3/C4W3_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "jupytext": { 6 | "encoding": "# -*- coding: utf-8 -*-" 7 | }, 8 | "kernelspec": { 9 | "display_name": "Python 3", 10 | "language": "python", 11 | "name": "python3" 12 | }, 13 | "colab": { 14 | "name": "C4W3_Assignment.ipynb", 15 | "private_outputs": true, 16 | "provenance": [], 17 | "collapsed_sections": [] 18 | }, 19 | "accelerator": "GPU" 20 | }, 21 | "cells": [ 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "prTKL3d2kGZE" 26 | }, 27 | "source": [ 28 | "# Week 3: Variational Autoencoders on Anime Faces\n", 29 | "\n", 30 | "For this exercise, you will train a Variational Autoencoder (VAE) using the [anime faces dataset by MckInsey666](https://github.com/bchao1/Anime-Face-Dataset). \n", 31 | "\n", 32 | "You will train the model using the techniques discussed in class. At the end, you should save your model and download it from Colab so that it can be submitted to the autograder for grading." 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": { 38 | "id": "nms7__aqDj48" 39 | }, 40 | "source": [ 41 | "***Important:*** *This colab notebook has read-only access so you won't be able to save your changes. If you want to save your work periodically, please click `File -> Save a Copy in Drive` to create a copy in your account, then work from there.* " 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": { 47 | "id": "2Qxq9uZAk3Lh" 48 | }, 49 | "source": [ 50 | "## Imports" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "metadata": { 56 | "id": "MooRFGEeI1zb" 57 | }, 58 | "source": [ 59 | "import tensorflow as tf\n", 60 | "import tensorflow_datasets as tfds\n", 61 | "\n", 62 | "import matplotlib.pyplot as plt\n", 63 | "import numpy as np\n", 64 | "\n", 65 | "import os\n", 66 | "import zipfile\n", 67 | "import urllib.request\n", 68 | "import random\n", 69 | "from IPython import display" 70 | ], 71 | "execution_count": null, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "id": "wL9rq-0uk7nS" 78 | }, 79 | "source": [ 80 | "## Parameters" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "metadata": { 86 | "id": "pjhN6GgfmUfx" 87 | }, 88 | "source": [ 89 | "# set a random seed\n", 90 | "np.random.seed(51)\n", 91 | "\n", 92 | "# parameters for building the model and training\n", 93 | "BATCH_SIZE=2000\n", 94 | "LATENT_DIM=512\n", 95 | "IMAGE_SIZE=64" 96 | ], 97 | "execution_count": null, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": { 103 | "id": "hXTdjxmolDBo" 104 | }, 105 | "source": [ 106 | "## Download the Dataset\n", 107 | "\n", 108 | "You will download the Anime Faces dataset and save it to a local directory." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "metadata": { 114 | "id": "qxKW6Q88KHcL" 115 | }, 116 | "source": [ 117 | "# make the data directory\n", 118 | "try:\n", 119 | " os.mkdir('/tmp/anime')\n", 120 | "except OSError:\n", 121 | " pass\n", 122 | "\n", 123 | "# download the zipped dataset to the data directory\n", 124 | "data_url = \"https://storage.googleapis.com/laurencemoroney-blog.appspot.com/Resources/anime-faces.zip\"\n", 125 | "data_file_name = \"animefaces.zip\"\n", 126 | "download_dir = '/tmp/anime/'\n", 127 | "urllib.request.urlretrieve(data_url, data_file_name)\n", 128 | "\n", 129 | "# extract the zip file\n", 130 | "zip_ref = zipfile.ZipFile(data_file_name, 'r')\n", 131 | "zip_ref.extractall(download_dir)\n", 132 | "zip_ref.close()" 133 | ], 134 | "execution_count": null, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "kD6WCIlclWaA" 141 | }, 142 | "source": [ 143 | "## Prepare the Dataset" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": { 149 | "id": "kbaVpD18ggOX" 150 | }, 151 | "source": [ 152 | "Next is preparing the data for training and validation. We've provided you some utilities below." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "metadata": { 158 | "id": "NTlx97U_JDPB" 159 | }, 160 | "source": [ 161 | "# Data Preparation Utilities\n", 162 | "\n", 163 | "def get_dataset_slice_paths(image_dir):\n", 164 | " '''returns a list of paths to the image files'''\n", 165 | " image_file_list = os.listdir(image_dir)\n", 166 | " image_paths = [os.path.join(image_dir, fname) for fname in image_file_list]\n", 167 | "\n", 168 | " return image_paths\n", 169 | "\n", 170 | "\n", 171 | "def map_image(image_filename):\n", 172 | " '''preprocesses the images'''\n", 173 | " img_raw = tf.io.read_file(image_filename)\n", 174 | " image = tf.image.decode_jpeg(img_raw)\n", 175 | "\n", 176 | " image = tf.cast(image, dtype=tf.float32)\n", 177 | " image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))\n", 178 | " image = image / 255.0 \n", 179 | " image = tf.reshape(image, shape=(IMAGE_SIZE, IMAGE_SIZE, 3,))\n", 180 | "\n", 181 | " return image" 182 | ], 183 | "execution_count": null, 184 | "outputs": [] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": { 189 | "id": "0uFon6vdhMhi" 190 | }, 191 | "source": [ 192 | "You will use the functions above to generate the train and validation sets." 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "metadata": { 198 | "id": "iGoCJ6DPJHL8" 199 | }, 200 | "source": [ 201 | "# get the list containing the image paths\n", 202 | "paths = get_dataset_slice_paths(\"/tmp/anime/images/\")\n", 203 | "\n", 204 | "# shuffle the paths\n", 205 | "random.shuffle(paths)\n", 206 | "\n", 207 | "# split the paths list into to training (80%) and validation sets(20%).\n", 208 | "paths_len = len(paths)\n", 209 | "train_paths_len = int(paths_len * 0.8)\n", 210 | "\n", 211 | "train_paths = paths[:train_paths_len]\n", 212 | "val_paths = paths[train_paths_len:]\n", 213 | "\n", 214 | "# load the training image paths into tensors, create batches and shuffle\n", 215 | "training_dataset = tf.data.Dataset.from_tensor_slices((train_paths))\n", 216 | "training_dataset = training_dataset.map(map_image)\n", 217 | "training_dataset = training_dataset.shuffle(1000).batch(BATCH_SIZE)\n", 218 | "\n", 219 | "# load the validation image paths into tensors and create batches\n", 220 | "validation_dataset = tf.data.Dataset.from_tensor_slices((val_paths))\n", 221 | "validation_dataset = validation_dataset.map(map_image)\n", 222 | "validation_dataset = validation_dataset.batch(BATCH_SIZE)\n", 223 | "\n", 224 | "\n", 225 | "print(f'number of batches in the training set: {len(training_dataset)}')\n", 226 | "print(f'number of batches in the validation set: {len(validation_dataset)}')" 227 | ], 228 | "execution_count": null, 229 | "outputs": [] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "id": "72ZRga9vlonx" 235 | }, 236 | "source": [ 237 | "## Display Utilities\n", 238 | "\n", 239 | "We've also provided some utilities to help in visualizing the data." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "jC1cpLViJLIu" 246 | }, 247 | "source": [ 248 | "def display_faces(dataset, size=9):\n", 249 | " '''Takes a sample from a dataset batch and plots it in a grid.'''\n", 250 | " dataset = dataset.unbatch().take(size)\n", 251 | " n_cols = 3\n", 252 | " n_rows = size//n_cols + 1\n", 253 | " plt.figure(figsize=(5, 5))\n", 254 | " i = 0\n", 255 | " for image in dataset:\n", 256 | " i += 1\n", 257 | " disp_img = np.reshape(image, (64,64,3))\n", 258 | " plt.subplot(n_rows, n_cols, i)\n", 259 | " plt.xticks([])\n", 260 | " plt.yticks([])\n", 261 | " plt.imshow(disp_img)\n", 262 | "\n", 263 | "\n", 264 | "def display_one_row(disp_images, offset, shape=(28, 28)):\n", 265 | " '''Displays a row of images.'''\n", 266 | " for idx, image in enumerate(disp_images):\n", 267 | " plt.subplot(3, 10, offset + idx + 1)\n", 268 | " plt.xticks([])\n", 269 | " plt.yticks([])\n", 270 | " image = np.reshape(image, shape)\n", 271 | " plt.imshow(image)\n", 272 | "\n", 273 | "\n", 274 | "def display_results(disp_input_images, disp_predicted):\n", 275 | " '''Displays input and predicted images.'''\n", 276 | " plt.figure(figsize=(15, 5))\n", 277 | " display_one_row(disp_input_images, 0, shape=(IMAGE_SIZE,IMAGE_SIZE,3))\n", 278 | " display_one_row(disp_predicted, 20, shape=(IMAGE_SIZE,IMAGE_SIZE,3))\n" 279 | ], 280 | "execution_count": null, 281 | "outputs": [] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "id": "2brROh6qLJbs" 287 | }, 288 | "source": [ 289 | "Let's see some of the anime faces from the validation dataset." 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "metadata": { 295 | "id": "5eZsrZtqJOzv" 296 | }, 297 | "source": [ 298 | "display_faces(validation_dataset, size=12)" 299 | ], 300 | "execution_count": null, 301 | "outputs": [] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": { 306 | "id": "LSBtdCVim9aC" 307 | }, 308 | "source": [ 309 | "## Build the Model" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": { 315 | "id": "WQvzWaNqLrB1" 316 | }, 317 | "source": [ 318 | "You will be building your VAE in the following sections. Recall that this will follow and encoder-decoder architecture and can be summarized by the figure below.\n", 319 | "\n", 320 | "" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": { 326 | "id": "KHNxIUUS9ng9" 327 | }, 328 | "source": [ 329 | "### Sampling Class\n", 330 | "\n", 331 | "You will start with the custom layer to provide the Gaussian noise input along with the mean (mu) and standard deviation (sigma) of the encoder's output. Recall the equation to combine these:\n", 332 | "\n", 333 | "$$z = \\mu + e^{0.5\\sigma} * \\epsilon $$\n", 334 | "\n", 335 | "where $\\mu$ = mean, $\\sigma$ = standard deviation, and $\\epsilon$ = random sample" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "metadata": { 341 | "id": "L-3qk6ZBm0Fl" 342 | }, 343 | "source": [ 344 | "class Sampling(tf.keras.layers.Layer):\n", 345 | " def call(self, inputs):\n", 346 | " \"\"\"Generates a random sample and combines with the encoder output\n", 347 | " \n", 348 | " Args:\n", 349 | " inputs -- output tensor from the encoder\n", 350 | "\n", 351 | " Returns:\n", 352 | " `inputs` tensors combined with a random sample\n", 353 | " \"\"\"\n", 354 | " ### START CODE HERE ###\n", 355 | " mu, sigma = inputs\n", 356 | " batch = tf.shape(mu)[0]\n", 357 | " dim = tf.shape(mu)[1]\n", 358 | " epsilon = tf.keras.backend.random_normal(shape=(batch, dim))\n", 359 | " z = mu + tf.exp(0.5 * sigma) * epsilon\n", 360 | " ### END CODE HERE ###\n", 361 | " return z" 362 | ], 363 | "execution_count": null, 364 | "outputs": [] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": { 369 | "id": "tZjCSa7Y-Gvk" 370 | }, 371 | "source": [ 372 | "### Encoder Layers\n", 373 | "\n", 374 | "Next, please use the Functional API to stack the encoder layers and output `mu`, `sigma` and the shape of the features before flattening. We expect you to use 3 convolutional layers (instead of 2 in the ungraded lab) but feel free to revise as you see fit. Another hint is to use `1024` units in the Dense layer before you get mu and sigma (we used `20` for it in the ungraded lab).\n", 375 | "\n", 376 | "*Note: If you did Week 4 before Week 3, please do not use LeakyReLU activations yet for this particular assignment. The grader for Week 3 does not support LeakyReLU yet. This will be updated but for now, you can use `relu` and `sigmoid` just like in the ungraded lab.*" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "metadata": { 382 | "id": "7VSVYjDim4Dk" 383 | }, 384 | "source": [ 385 | "def encoder_layers(inputs, latent_dim):\n", 386 | " \"\"\"Defines the encoder's layers.\n", 387 | " Args:\n", 388 | " inputs -- batch from the dataset\n", 389 | " latent_dim -- dimensionality of the latent space\n", 390 | "\n", 391 | " Returns:\n", 392 | " mu -- learned mean\n", 393 | " sigma -- learned standard deviation\n", 394 | " batch_3.shape -- shape of the features before flattening\n", 395 | " \"\"\"\n", 396 | " ### START CODE HERE ###\n", 397 | " x = tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=2, padding=\"same\", activation='relu', name=\"encode_conv1\")(inputs)\n", 398 | " x = tf.keras.layers.BatchNormalization()(x)\n", 399 | "\n", 400 | " x = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=2, padding='same', activation='relu', name=\"encode_conv2\")(x)\n", 401 | " x = tf.keras.layers.BatchNormalization()(x)\n", 402 | "\n", 403 | " x = tf.keras.layers.Conv2D(filters=128, kernel_size=3, strides=2, padding='same', activation='relu', name=\"encode_conv3\")(x)\n", 404 | " batch_3 = tf.keras.layers.BatchNormalization()(x) \n", 405 | "\n", 406 | " x = tf.keras.layers.Flatten(name=\"encode_flatten\")(batch_3)\n", 407 | " x = tf.keras.layers.Dense(1024, activation='relu', name=\"encode_dense\")(x)\n", 408 | "\n", 409 | " x = tf.keras.layers.BatchNormalization()(x)\n", 410 | " mu = tf.keras.layers.Dense(latent_dim, name='latent_mu')(x)\n", 411 | " sigma = tf.keras.layers.Dense(latent_dim, name ='latent_sigma')(x)\n", 412 | " ### END CODE HERE ###\n", 413 | "\n", 414 | " # revise `batch_3.shape` here if you opted not to use 3 Conv2D layers\n", 415 | " return mu, sigma, batch_3.shape" 416 | ], 417 | "execution_count": null, 418 | "outputs": [] 419 | }, 420 | { 421 | "cell_type": "markdown", 422 | "metadata": { 423 | "id": "ZOy7wPPY-g-N" 424 | }, 425 | "source": [ 426 | "### Encoder Model\n", 427 | "\n", 428 | "You will feed the output from the above function to the `Sampling layer` you defined earlier. That will have the latent representations that can be fed to the decoder network later. Please complete the function below to build the encoder network with the `Sampling` layer." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "metadata": { 434 | "id": "w8Y-wLFym60N" 435 | }, 436 | "source": [ 437 | "def encoder_model(latent_dim, input_shape):\n", 438 | " \"\"\"Defines the encoder model with the Sampling layer\n", 439 | " Args:\n", 440 | " latent_dim -- dimensionality of the latent space\n", 441 | " input_shape -- shape of the dataset batch\n", 442 | "\n", 443 | " Returns:\n", 444 | " model -- the encoder model\n", 445 | " conv_shape -- shape of the features before flattening\n", 446 | " \"\"\"\n", 447 | " ### START CODE HERE ###\n", 448 | " inputs = tf.keras.layers.Input(shape=input_shape)\n", 449 | " mu, sigma, conv_shape = encoder_layers(inputs, latent_dim=LATENT_DIM)\n", 450 | " z = Sampling()((mu, sigma))\n", 451 | " model = tf.keras.Model(inputs, outputs=[mu, sigma, z])\n", 452 | " ### END CODE HERE ###\n", 453 | " model.summary()\n", 454 | " return model, conv_shape" 455 | ], 456 | "execution_count": null, 457 | "outputs": [] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": { 462 | "id": "A9ENB-6a-0R5" 463 | }, 464 | "source": [ 465 | "### Decoder Layers\n", 466 | "\n", 467 | "Next, you will define the decoder layers. This will expand the latent representations back to the original image dimensions. After training your VAE model, you can use this decoder model to generate new data by feeding random inputs." 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "metadata": { 473 | "id": "qlTjAzgsm9Vn" 474 | }, 475 | "source": [ 476 | "def decoder_layers(inputs, conv_shape):\n", 477 | " \"\"\"Defines the decoder layers.\n", 478 | " Args:\n", 479 | " inputs -- output of the encoder \n", 480 | " conv_shape -- shape of the features before flattening\n", 481 | "\n", 482 | " Returns:\n", 483 | " tensor containing the decoded output\n", 484 | " \"\"\"\n", 485 | " ### START CODE HERE ###\n", 486 | " units = conv_shape[1] * conv_shape[2] * conv_shape[3]\n", 487 | " x = tf.keras.layers.Dense(units, activation = 'relu', name=\"decode_dense1\")(inputs)\n", 488 | " x = tf.keras.layers.BatchNormalization()(x)\n", 489 | " x = tf.keras.layers.Reshape((conv_shape[1], conv_shape[2], conv_shape[3]), name=\"decode_reshape\")(x)\n", 490 | "\n", 491 | " x = tf.keras.layers.Conv2DTranspose(filters=128, kernel_size=3, strides=2, padding='same', activation='relu', name=\"decode_conv2d_1\")(x)\n", 492 | " x = tf.keras.layers.BatchNormalization()(x)\n", 493 | " \n", 494 | " x = tf.keras.layers.Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding='same', activation='relu', name=\"decode_conv2d_12\")(x)\n", 495 | " x = tf.keras.layers.BatchNormalization()(x)\n", 496 | " \n", 497 | " x = tf.keras.layers.Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='same', activation='relu', name=\"decode_conv2d_2\")(x)\n", 498 | " x = tf.keras.layers.BatchNormalization()(x) \n", 499 | "\n", 500 | " x = tf.keras.layers.Conv2DTranspose(filters=3, kernel_size=3, strides=1, padding='same', activation='sigmoid', name=\"decode_final\")(x)\n", 501 | " ### END CODE HERE ###\n", 502 | " return x" 503 | ], 504 | "execution_count": null, 505 | "outputs": [] 506 | }, 507 | { 508 | "cell_type": "markdown", 509 | "metadata": { 510 | "id": "cfLLz84r_MlN" 511 | }, 512 | "source": [ 513 | "### Decoder Model\n", 514 | "\n", 515 | "Please complete the function below to output the decoder model." 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "metadata": { 521 | "id": "sUgTyqNFm_jR" 522 | }, 523 | "source": [ 524 | "def decoder_model(latent_dim, conv_shape):\n", 525 | " \"\"\"Defines the decoder model.\n", 526 | " Args:\n", 527 | " latent_dim -- dimensionality of the latent space\n", 528 | " conv_shape -- shape of the features before flattening\n", 529 | "\n", 530 | " Returns:\n", 531 | " model -- the decoder model\n", 532 | " \"\"\"\n", 533 | " ### START CODE HERE ###\n", 534 | " inputs = tf.keras.layers.Input(shape=(latent_dim,))\n", 535 | " outputs = decoder_layers(inputs, conv_shape)\n", 536 | " model = tf.keras.Model(inputs, outputs)\n", 537 | " ### END CODE HERE ###\n", 538 | " model.summary()\n", 539 | " return model" 540 | ], 541 | "execution_count": null, 542 | "outputs": [] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": { 547 | "id": "ps0yuE1d_cQc" 548 | }, 549 | "source": [ 550 | "### Kullback–Leibler Divergence\n", 551 | "\n", 552 | "Next, you will define the function to compute the [Kullback–Leibler Divergence](https://arxiv.org/abs/2002.07514) loss. This will be used to improve the generative capability of the model. This code is already given.\n" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "metadata": { 558 | "id": "tngFmDDwnDn-" 559 | }, 560 | "source": [ 561 | "def kl_reconstruction_loss(inputs, outputs, mu, sigma):\n", 562 | " \"\"\" Computes the Kullback-Leibler Divergence (KLD)\n", 563 | " Args:\n", 564 | " inputs -- batch from the dataset\n", 565 | " outputs -- output of the Sampling layer\n", 566 | " mu -- mean\n", 567 | " sigma -- standard deviation\n", 568 | "\n", 569 | " Returns:\n", 570 | " KLD loss\n", 571 | " \"\"\"\n", 572 | " kl_loss = 1 + sigma - tf.square(mu) - tf.math.exp(sigma)\n", 573 | " return tf.reduce_mean(kl_loss) * -0.5" 574 | ], 575 | "execution_count": null, 576 | "outputs": [] 577 | }, 578 | { 579 | "cell_type": "markdown", 580 | "metadata": { 581 | "id": "Pi1I431I_og7" 582 | }, 583 | "source": [ 584 | "### Putting it all together\n", 585 | "\n", 586 | "Please define the whole VAE model. Remember to use `model.add_loss()` to add the KL reconstruction loss. This will be accessed and added to the loss later in the training loop." 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "metadata": { 592 | "id": "cuPHg28JnGCp" 593 | }, 594 | "source": [ 595 | "def vae_model(encoder, decoder, input_shape):\n", 596 | " \"\"\"Defines the VAE model\n", 597 | " Args:\n", 598 | " encoder -- the encoder model\n", 599 | " decoder -- the decoder model\n", 600 | " input_shape -- shape of the dataset batch\n", 601 | "\n", 602 | " Returns:\n", 603 | " the complete VAE model\n", 604 | " \"\"\"\n", 605 | " ### START CODE HERE ###\n", 606 | " inputs = tf.keras.layers.Input(shape=input_shape)\n", 607 | "\n", 608 | " # get mu, sigma, and z from the encoder output\n", 609 | " mu, sigma, z = encoder(inputs)\n", 610 | " \n", 611 | " # get reconstructed output from the decoder\n", 612 | " reconstructed = decoder(z)\n", 613 | "\n", 614 | " # define the inputs and outputs of the VAE\n", 615 | " model = tf.keras.Model(inputs=inputs, outputs=reconstructed)\n", 616 | "\n", 617 | " # add the KL loss\n", 618 | " loss = kl_reconstruction_loss(inputs, z, mu, sigma)\n", 619 | " model.add_loss(loss)\n", 620 | " ### END CODE HERE ###\n", 621 | " return model" 622 | ], 623 | "execution_count": null, 624 | "outputs": [] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": { 629 | "id": "P_lbWSKbALf-" 630 | }, 631 | "source": [ 632 | "Next, please define a helper function to return the encoder, decoder, and vae models you just defined.\n" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "metadata": { 638 | "id": "hnPo0Pr3nIts", 639 | "lines_to_next_cell": 2 640 | }, 641 | "source": [ 642 | "def get_models(input_shape, latent_dim):\n", 643 | " \"\"\"Returns the encoder, decoder, and vae models\"\"\"\n", 644 | " ### START CODE HERE ###\n", 645 | " encoder, conv_shape = encoder_model(latent_dim=latent_dim, input_shape=input_shape)\n", 646 | " decoder = decoder_model(latent_dim=latent_dim, conv_shape=conv_shape)\n", 647 | " vae = vae_model(encoder, decoder, input_shape=input_shape)\n", 648 | " ### END CODE HERE ###\n", 649 | " return encoder, decoder, vae" 650 | ], 651 | "execution_count": null, 652 | "outputs": [] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": { 657 | "id": "wJsdzZTPVgOn" 658 | }, 659 | "source": [ 660 | "Let's use the function above to get the models we need in the training loop.\n" 661 | ] 662 | }, 663 | { 664 | "cell_type": "code", 665 | "metadata": { 666 | "id": "IHdr3CUznL5Z" 667 | }, 668 | "source": [ 669 | "encoder, decoder, vae = get_models(input_shape=(64,64,3,), latent_dim=LATENT_DIM)" 670 | ], 671 | "execution_count": null, 672 | "outputs": [] 673 | }, 674 | { 675 | "cell_type": "markdown", 676 | "metadata": { 677 | "id": "N6IwN5vlAb5w" 678 | }, 679 | "source": [ 680 | "## Train the Model\n", 681 | "\n", 682 | "You will now configure the model for training. We defined some losses, the optimizer, and the loss metric below but you can experiment with others if you like.\n" 683 | ] 684 | }, 685 | { 686 | "cell_type": "code", 687 | "metadata": { 688 | "id": "dHPwSmZFnQ_2" 689 | }, 690 | "source": [ 691 | "optimizer = tf.keras.optimizers.Adam(learning_rate=0.002)\n", 692 | "loss_metric = tf.keras.metrics.Mean()\n", 693 | "mse_loss = tf.keras.losses.MeanSquaredError()\n", 694 | "bce_loss = tf.keras.losses.BinaryCrossentropy()" 695 | ], 696 | "execution_count": null, 697 | "outputs": [] 698 | }, 699 | { 700 | "cell_type": "markdown", 701 | "metadata": { 702 | "id": "AWRzFxYkAvXH" 703 | }, 704 | "source": [ 705 | "You will generate 16 images in a 4x4 grid to show\n", 706 | "progress of image generation. We've defined a utility function for that below." 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "metadata": { 712 | "id": "DGe445j0nTmf" 713 | }, 714 | "source": [ 715 | "def generate_and_save_images(model, epoch, step, test_input):\n", 716 | " \"\"\"Helper function to plot our 16 images\n", 717 | "\n", 718 | " Args:\n", 719 | "\n", 720 | " model -- the decoder model\n", 721 | " epoch -- current epoch number during training\n", 722 | " step -- current step number during training\n", 723 | " test_input -- random tensor with shape (16, LATENT_DIM)\n", 724 | " \"\"\"\n", 725 | " predictions = model.predict(test_input)\n", 726 | "\n", 727 | " fig = plt.figure(figsize=(4,4))\n", 728 | "\n", 729 | " for i in range(predictions.shape[0]):\n", 730 | " plt.subplot(4, 4, i+1)\n", 731 | " img = predictions[i, :, :, :] * 255\n", 732 | " img = img.astype('int32')\n", 733 | " plt.imshow(img)\n", 734 | " plt.axis('off')\n", 735 | "\n", 736 | " # tight_layout minimizes the overlap between 2 sub-plots\n", 737 | " fig.suptitle(\"epoch: {}, step: {}\".format(epoch, step))\n", 738 | " plt.savefig('image_at_epoch_{:04d}_step{:04d}.png'.format(epoch, step))\n", 739 | " plt.show()" 740 | ], 741 | "execution_count": null, 742 | "outputs": [] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": { 747 | "id": "ZgJfazr6A_py" 748 | }, 749 | "source": [ 750 | "You can now start the training loop. You are asked to select the number of epochs and to complete the subection on updating the weights. The general steps are:\n", 751 | "\n", 752 | "* feed a training batch to the VAE model\n", 753 | "* compute the reconstruction loss (hint: use the **mse_loss** defined above instead of `bce_loss` in the ungraded lab, then multiply by the flattened dimensions of the image (i.e. 64 x 64 x 3)\n", 754 | "* add the KLD regularization loss to the total loss (you can access the `losses` property of the `vae` model)\n", 755 | "* get the gradients\n", 756 | "* use the optimizer to update the weights\n", 757 | "\n", 758 | "\n", 759 | "When training your VAE, you might notice that there’s not a lot of variation in the faces. But don’t let that deter you! We’ll test based on how well it does in reconstructing the original faces, and not how well it does in creating new faces.\n", 760 | "\n", 761 | "The training will also take a long time (more than 30 minutes) and that is to be expected. If you used the mean loss metric suggested above, train the model until that is down to around 320 before submitting.\n" 762 | ] 763 | }, 764 | { 765 | "cell_type": "code", 766 | "metadata": { 767 | "id": "hvL1bHXJnajM" 768 | }, 769 | "source": [ 770 | "# Training loop. Display generated images each epoch\n", 771 | "\n", 772 | "### START CODE HERE ###\n", 773 | "epochs =200\n", 774 | "### END CODE HERE ###\n", 775 | "\n", 776 | "random_vector_for_generation = tf.random.normal(shape=[16, LATENT_DIM])\n", 777 | "generate_and_save_images(decoder, 0, 0, random_vector_for_generation)\n", 778 | "\n", 779 | "for epoch in range(epochs):\n", 780 | " print('Start of epoch %d' % (epoch,))\n", 781 | "\n", 782 | " # Iterate over the batches of the dataset.\n", 783 | " for step, x_batch_train in enumerate(training_dataset):\n", 784 | " with tf.GradientTape() as tape:\n", 785 | " ### START CODE HERE ### \n", 786 | " reconstructed = vae(x_batch_train)\n", 787 | " # Compute reconstruction loss\n", 788 | " flattened_inputs = tf.reshape(x_batch_train, shape=[-1])\n", 789 | " flattened_outputs = tf.reshape(reconstructed, shape=[-1])\n", 790 | " loss = mse_loss(flattened_inputs, flattened_outputs) * 64 * 64 * 3\n", 791 | " loss += sum(vae.losses) \n", 792 | "\n", 793 | " grads =tape.gradient(loss, vae.trainable_weights)\n", 794 | " optimizer.apply_gradients(zip(grads, vae.trainable_weights))\n", 795 | " ### END CODE HERE\n", 796 | " \n", 797 | " loss_metric(loss)\n", 798 | "\n", 799 | " if step % 10 == 0:\n", 800 | " display.clear_output(wait=False) \n", 801 | " generate_and_save_images(decoder, epoch, step, random_vector_for_generation)\n", 802 | " print('Epoch: %s step: %s mean loss = %s' % (epoch, step, loss_metric.result().numpy()))" 803 | ], 804 | "execution_count": null, 805 | "outputs": [] 806 | }, 807 | { 808 | "cell_type": "markdown", 809 | "metadata": { 810 | "id": "N5wfzGfABny6" 811 | }, 812 | "source": [ 813 | "# Plot Reconstructed Images\n" 814 | ] 815 | }, 816 | { 817 | "cell_type": "markdown", 818 | "metadata": { 819 | "id": "BnQQlWZHaj90" 820 | }, 821 | "source": [ 822 | "As mentioned, your model will be graded on how well it is able to reconstruct images (not generate new ones). You can get a glimpse of how it is doing with the code block below. It feeds in a batch from the test set and plots a row of input (top) and output (bottom) images. Don't worry if the outputs are a blurry. It will look something like below:\n", 823 | "\n", 824 | "" 825 | ] 826 | }, 827 | { 828 | "cell_type": "code", 829 | "metadata": { 830 | "id": "TfIbqTIKSXEe" 831 | }, 832 | "source": [ 833 | "test_dataset = validation_dataset.take(1)\n", 834 | "output_samples = []\n", 835 | "\n", 836 | "for input_image in tfds.as_numpy(test_dataset):\n", 837 | " output_samples = input_image\n", 838 | "\n", 839 | "idxs = np.random.choice(64, size=10)\n", 840 | "\n", 841 | "vae_predicted = vae.predict(test_dataset)\n", 842 | "display_results(output_samples[idxs], vae_predicted[idxs])" 843 | ], 844 | "execution_count": null, 845 | "outputs": [] 846 | }, 847 | { 848 | "cell_type": "markdown", 849 | "metadata": { 850 | "id": "9YKUOCA5BtAA" 851 | }, 852 | "source": [ 853 | "# Plot Generated Images\n" 854 | ] 855 | }, 856 | { 857 | "cell_type": "markdown", 858 | "metadata": { 859 | "id": "ylxL9z15ctsy" 860 | }, 861 | "source": [ 862 | "Using the default parameters, it can take a long time to train your model well enough to generate good fake anime faces. In case you decide to experiment, we provided the code block below to display an 8x8 gallery of fake data generated from your model. Here is a sample gallery generated after 50 epochs.\n", 863 | "\n", 864 | "" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "metadata": { 870 | "id": "zCpTybvGSS6L" 871 | }, 872 | "source": [ 873 | "def plot_images(rows, cols, images, title):\n", 874 | " '''Displays images in a grid.'''\n", 875 | " grid = np.zeros(shape=(rows*64, cols*64, 3))\n", 876 | " for row in range(rows):\n", 877 | " for col in range(cols):\n", 878 | " grid[row*64:(row+1)*64, col*64:(col+1)*64, :] = images[row*cols + col]\n", 879 | "\n", 880 | " plt.figure(figsize=(12,12)) \n", 881 | " plt.imshow(grid)\n", 882 | " plt.title(title)\n", 883 | " plt.show()\n", 884 | "\n", 885 | "# initialize random inputs\n", 886 | "test_vector_for_generation = tf.random.normal(shape=[64, LATENT_DIM])\n", 887 | "\n", 888 | "# get predictions from the decoder model\n", 889 | "predictions= decoder.predict(test_vector_for_generation)\n", 890 | "\n", 891 | "# plot the predictions\n", 892 | "plot_images(8,8,predictions,'Generated Images')" 893 | ], 894 | "execution_count": null, 895 | "outputs": [] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "metadata": { 900 | "id": "J4IixoasCfoR" 901 | }, 902 | "source": [ 903 | "### Save the Model\n", 904 | "\n", 905 | "Once your satisfied with the results, please save and download the model. Afterwards, please go back to the Coursera submission portal to upload your h5 file to the autograder." 906 | ] 907 | }, 908 | { 909 | "cell_type": "code", 910 | "metadata": { 911 | "id": "A9E8qwDAVMPs" 912 | }, 913 | "source": [ 914 | "vae.save(\"anime.h5\")" 915 | ], 916 | "execution_count": null, 917 | "outputs": [] 918 | } 919 | ] 920 | } -------------------------------------------------------------------------------- /Course 4 - Generative Deep Learning with TensorFlow/Week 4/C4W4_Assignment.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.4" 21 | }, 22 | "colab": { 23 | "name": "C4W4_Assignment.ipynb", 24 | "private_outputs": true, 25 | "provenance": [], 26 | "collapsed_sections": [] 27 | }, 28 | "accelerator": "GPU" 29 | }, 30 | "cells": [ 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "ROm8kovwJIxD" 35 | }, 36 | "source": [ 37 | "# Week 4 Assignment: GANs with Hands\n", 38 | "\n", 39 | "\n", 40 | "For the last programming assignment of this course, you will build a Generative Adversarial Network (GAN) that generates pictures of hands. These will be trained on a dataset of hand images doing sign language.\n", 41 | "\n", 42 | "The model you will build will be very similar to the DCGAN model that you saw in the second ungraded lab of this week. Feel free to review it in case you get stuck with any of the required steps." 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "m6Oumw5-Jx1w" 49 | }, 50 | "source": [ 51 | "***Important:*** *This colab notebook has read-only access so you won't be able to save your changes. If you want to save your work periodically, please click `File -> Save a Copy in Drive` to create a copy in your account, then work from there.* " 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "id": "K0OwpFl8JIxP" 58 | }, 59 | "source": [ 60 | "## Imports" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "metadata": { 66 | "id": "k3nvoSP3Btzu" 67 | }, 68 | "source": [ 69 | "import tensorflow as tf\n", 70 | "import tensorflow.keras as keras\n", 71 | "\n", 72 | "import matplotlib.pyplot as plt\n", 73 | "import numpy as np\n", 74 | "\n", 75 | "import urllib.request\n", 76 | "import zipfile\n", 77 | "from IPython import display" 78 | ], 79 | "execution_count": null, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": { 85 | "id": "Yxy_M7xbQef-" 86 | }, 87 | "source": [ 88 | "## Utilities" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "metadata": { 94 | "id": "cg_4z8-glz6P" 95 | }, 96 | "source": [ 97 | "def plot_results(images, n_cols=None):\n", 98 | " '''visualizes fake images'''\n", 99 | " display.clear_output(wait=False) \n", 100 | " \n", 101 | " n_cols = n_cols or len(images)\n", 102 | " n_rows = (len(images) - 1) // n_cols + 1\n", 103 | " \n", 104 | " if images.shape[-1] == 1:\n", 105 | " images = np.squeeze(images, axis=-1)\n", 106 | " \n", 107 | " plt.figure(figsize=(n_cols, n_rows))\n", 108 | " \n", 109 | " for index, image in enumerate(images):\n", 110 | " plt.subplot(n_rows, n_cols, index + 1)\n", 111 | " plt.imshow(image, cmap=\"binary\")\n", 112 | " plt.axis(\"off\")" 113 | ], 114 | "execution_count": null, 115 | "outputs": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "id": "2iI8bUNSJIxR" 121 | }, 122 | "source": [ 123 | "## Get the training data\n", 124 | "\n", 125 | "You will download the dataset and extract it to a directory in your workspace. As mentioned, these are images of human hands performing sign language." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "metadata": { 131 | "id": "uIx-60V_BEyo" 132 | }, 133 | "source": [ 134 | "# download the dataset\n", 135 | "training_url = \"https://storage.googleapis.com/laurencemoroney-blog.appspot.com/Resources/signs-training.zip\"\n", 136 | "training_file_name = \"signs-training.zip\"\n", 137 | "urllib.request.urlretrieve(training_url, training_file_name)\n", 138 | "\n", 139 | "# extract to local directory\n", 140 | "training_dir = \"/tmp\"\n", 141 | "zip_ref = zipfile.ZipFile(training_file_name, 'r')\n", 142 | "zip_ref.extractall(training_dir)\n", 143 | "zip_ref.close()" 144 | ], 145 | "execution_count": null, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": { 151 | "id": "5iPZmV9RJIxR" 152 | }, 153 | "source": [ 154 | "## Preprocess the images\n", 155 | "\n", 156 | "Next, you will prepare the dataset to a format suitable for the model. You will read the files, convert it to a tensor of floats, then normalize the pixel values." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "id": "4rf-e4f-d3H7" 163 | }, 164 | "source": [ 165 | "BATCH_SIZE = 32\n", 166 | "\n", 167 | "# mapping function for preprocessing the image files\n", 168 | "def map_images(file):\n", 169 | " '''converts the images to floats and normalizes the pixel values'''\n", 170 | " img = tf.io.decode_png(tf.io.read_file(file))\n", 171 | " img = tf.dtypes.cast(img, tf.float32)\n", 172 | " img = img / 255.0\n", 173 | " \n", 174 | " return img\n", 175 | "\n", 176 | "# create training batches\n", 177 | "filename_dataset = tf.data.Dataset.list_files(\"/tmp/signs-training/*.png\")\n", 178 | "image_dataset = filename_dataset.map(map_images).batch(BATCH_SIZE)" 179 | ], 180 | "execution_count": null, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "id": "lz9NfgdTJIxS" 187 | }, 188 | "source": [ 189 | "## Build the generator\n", 190 | "\n", 191 | "You are free to experiment but here is the recommended architecture:\n", 192 | "- *Dense*: number of units should equal `7 * 7 * 128`, input_shape takes in a list containing the random normal dimensions.\n", 193 | " - `random_normal_dimensions` is a hyperparameter that defines how many random numbers in a vector you'll want to feed into the generator as a starting point for generating images.\n", 194 | "- *Reshape*: reshape the vector to a 7 x 7 x 128 tensor.\n", 195 | "- *BatchNormalization*\n", 196 | "- *Conv2DTranspose*: takes `64` units, kernel size is `5`, strides is `2`, padding is `SAME`, activation is `selu`.\n", 197 | "- *BatchNormalization*\n", 198 | "- *Conv2DTranspose*: `1` unit, kernel size is `5`, strides is `2`, padding is `SAME`, and activation is `tanh`." 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "metadata": { 204 | "id": "uagZDaF0CZON" 205 | }, 206 | "source": [ 207 | "# You'll pass the random_normal_dimensions to the first dense layer of the generator\n", 208 | "random_normal_dimensions = 32\n", 209 | "\n", 210 | "### START CODE HERE ###\n", 211 | "generator = keras.models.Sequential([\n", 212 | " keras.layers.Dense(7*7*128, input_shape= [random_normal_dimensions]),\n", 213 | " keras.layers.Reshape([7, 7, 128]),\n", 214 | " keras.layers.BatchNormalization(),\n", 215 | " keras.layers.Conv2DTranspose(filters= 64, kernel_size= 5, strides= 2, \n", 216 | " padding= \"SAME\", activation= \"selu\"),\n", 217 | " keras.layers.BatchNormalization(),\n", 218 | " keras.layers.Conv2DTranspose(filters= 1, kernel_size= 5, strides= 2, \n", 219 | " padding= \"SAME\", activation= \"tanh\")\n", 220 | "])\n", 221 | "### END CODE HERE ###" 222 | ], 223 | "execution_count": null, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "8_lAy0bjJIxS" 230 | }, 231 | "source": [ 232 | "## Build the discriminator\n", 233 | "\n", 234 | "Here is the recommended architecture for the discriminator:\n", 235 | "- *Conv2D*: 64 units, kernel size of 5, strides of 2, padding is SAME, activation is a leaky relu with alpha of 0.2, input shape is 28 x 28 x 1\n", 236 | "- *Dropout*: rate is 0.4 (fraction of input units to drop)\n", 237 | "- *Conv2D*: 128 units, kernel size of 5, strides of 2, padding is SAME, activation is LeakyRelu with alpha of 0.2\n", 238 | "- *Dropout*: rate is 0.4.\n", 239 | "- *Flatten*\n", 240 | "- *Dense*: with 1 unit and a sigmoid activation" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "siCh-qRtJIxT", 247 | "lines_to_next_cell": 2 248 | }, 249 | "source": [ 250 | "### START CODE HERE ###\n", 251 | "discriminator = keras.models.Sequential([\n", 252 | " keras.layers.Conv2D(filters=64, kernel_size= 5, strides= 2, \n", 253 | " padding= \"SAME\", activation = keras.layers.LeakyReLU(0.2), input_shape= [28, 28, 1]),\n", 254 | " keras.layers.Dropout(0.4),\n", 255 | " keras.layers.Conv2D(filters= 128, kernel_size = 5, strides= 2,\n", 256 | " padding= \"SAME\", activation= keras.layers.LeakyReLU(0.2)),\n", 257 | " keras.layers.Dropout(0.4),\n", 258 | " keras.layers.Flatten(),\n", 259 | " keras.layers.Dense(1, \"sigmoid\")\n", 260 | "])\n", 261 | "### END CODE HERE ###" 262 | ], 263 | "execution_count": null, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "EKlTL1lhJIxT" 270 | }, 271 | "source": [ 272 | "## Compile the discriminator\n", 273 | "\n", 274 | "- Compile the discriminator with a binary_crossentropy loss and rmsprop optimizer.\n", 275 | "- Set the discriminator to not train on its weights (set its \"trainable\" field)." 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "id": "xh4EaHDlJIxT" 282 | }, 283 | "source": [ 284 | "### START CODE HERE ###\n", 285 | "discriminator.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n", 286 | "discriminator.trainable = False\n", 287 | "### END CODE HERE ###" 288 | ], 289 | "execution_count": null, 290 | "outputs": [] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": { 295 | "id": "3X25T2kUJIxT" 296 | }, 297 | "source": [ 298 | "## Build and compile the GAN model\n", 299 | "\n", 300 | "- Build the sequential model for the GAN, passing a list containing the generator and discriminator.\n", 301 | "- Compile the model with a binary cross entropy loss and rmsprop optimizer." 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "metadata": { 307 | "id": "SBclsOMsJIxU" 308 | }, 309 | "source": [ 310 | "### START CODE HERE ###\n", 311 | "gan = keras.models.Sequential([generator, discriminator])\n", 312 | "gan.compile(loss=\"binary_crossentropy\", optimizer=\"rmsprop\")\n", 313 | "### END CODE HERE ###" 314 | ], 315 | "execution_count": null, 316 | "outputs": [] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": { 321 | "id": "zX2CB0srJIxU" 322 | }, 323 | "source": [ 324 | "## Train the GAN\n", 325 | "\n", 326 | "Phase 1\n", 327 | "- real_batch_size: Get the batch size of the input batch (it's the zero-th dimension of the tensor)\n", 328 | "- noise: Generate the noise using `tf.random.normal`. The shape is batch size x random_normal_dimension\n", 329 | "- fake images: Use the generator that you just created. Pass in the noise and produce fake images.\n", 330 | "- mixed_images: concatenate the fake images with the real images.\n", 331 | " - Set the axis to 0.\n", 332 | "- discriminator_labels: Set to `0.` for fake images and `1.` for real images.\n", 333 | "- Set the discriminator as trainable.\n", 334 | "- Use the discriminator's `train_on_batch()` method to train on the mixed images and the discriminator labels.\n", 335 | "\n", 336 | "\n", 337 | "Phase 2\n", 338 | "- noise: generate random normal values with dimensions batch_size x random_normal_dimensions\n", 339 | " - Use `real_batch_size`.\n", 340 | "- Generator_labels: Set to `1.` to mark the fake images as real\n", 341 | " - The generator will generate fake images that are labeled as real images and attempt to fool the discriminator.\n", 342 | "- Set the discriminator to NOT be trainable.\n", 343 | "- Train the GAN on the noise and the generator labels." 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "metadata": { 349 | "id": "AuV97d_kCpb_" 350 | }, 351 | "source": [ 352 | "\n", 353 | "def train_gan(gan, dataset, random_normal_dimensions, n_epochs=50):\n", 354 | " \"\"\" Defines the two-phase training loop of the GAN\n", 355 | " Args:\n", 356 | " gan -- the GAN model which has the generator and discriminator\n", 357 | " dataset -- the training set of real images\n", 358 | " random_normal_dimensions -- dimensionality of the input to the generator\n", 359 | " n_epochs -- number of epochs\n", 360 | " \"\"\"\n", 361 | "\n", 362 | " # get the two sub networks from the GAN model\n", 363 | " generator, discriminator = gan.layers\n", 364 | " \n", 365 | " for epoch in range(n_epochs):\n", 366 | " print(\"Epoch {}/{}\".format(epoch + 1, n_epochs)) \n", 367 | " for real_images in dataset:\n", 368 | " \n", 369 | "### START CODE HERE ###\n", 370 | " # infer batch size from the current batch of real images\n", 371 | " real_batch_size = real_images.shape[0]\n", 372 | " \n", 373 | " # Train the discriminator - PHASE 1\n", 374 | " # Create the noise\n", 375 | " noise = tf.random.normal(shape=[real_batch_size, random_normal_dimensions])\n", 376 | " \n", 377 | " # Use the noise to generate fake images\n", 378 | " fake_images = generator(noise)\n", 379 | " \n", 380 | " # Create a list by concatenating the fake images with the real ones\n", 381 | " mixed_images =tf.concat([fake_images, real_images], axis=0)\n", 382 | " \n", 383 | " # Create the labels for the discriminator\n", 384 | " # 0 for the fake images\n", 385 | " # 1 for the real images\n", 386 | " discriminator_labels = tf.constant([[0.]] * real_batch_size + [[1.]] * real_batch_size)\n", 387 | " \n", 388 | " # Ensure that the discriminator is trainable\n", 389 | " discriminator.trainable = True\n", 390 | " \n", 391 | " # Use train_on_batch to train the discriminator with the mixed images and the discriminator labels\n", 392 | " discriminator.train_on_batch(mixed_images, discriminator_labels)\n", 393 | " \n", 394 | " # Train the generator - PHASE 2\n", 395 | " # create a batch of noise input to feed to the GAN\n", 396 | " noise = tf.random.normal(shape=[real_batch_size, random_normal_dimensions])\n", 397 | " \n", 398 | " # label all generated images to be \"real\"\n", 399 | " generator_labels = tf.constant([[1.]] * real_batch_size)\n", 400 | " \n", 401 | " # Freeze the discriminator\n", 402 | " discriminator.trainable = False\n", 403 | "\n", 404 | " # Train the GAN on the noise with the labels all set to be true\n", 405 | " gan.train_on_batch(noise, generator_labels)\n", 406 | " \n", 407 | "### END CODE HERE ###\n", 408 | " plot_results(fake_images, 16) \n", 409 | " plt.show()\n", 410 | " return fake_images" 411 | ], 412 | "execution_count": null, 413 | "outputs": [] 414 | }, 415 | { 416 | "cell_type": "markdown", 417 | "metadata": { 418 | "id": "OzbX3hwKJIxW" 419 | }, 420 | "source": [ 421 | "### Run the training\n", 422 | "\n", 423 | "For each epoch, a set of 31 images will be displayed onscreen. The longer you train, the better your output fake images will be. You will pick your best images to submit to the grader." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "metadata": { 429 | "id": "wYx9rzdACt0A" 430 | }, 431 | "source": [ 432 | "# you can adjust the number of epochs\n", 433 | "EPOCHS = 120\n", 434 | "\n", 435 | "# run the training loop and collect images\n", 436 | "fake_images = train_gan(gan, image_dataset, random_normal_dimensions, EPOCHS)" 437 | ], 438 | "execution_count": null, 439 | "outputs": [] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": { 444 | "id": "uIAih3a1JIxX" 445 | }, 446 | "source": [ 447 | "## Choose your best images to submit for grading!\n", 448 | "\n", 449 | "Please visually inspect your 31 generated hand images. They are indexed from 0 to 30, from left to right on the first row on top, and then continuing from left to right on the second row below it.\n", 450 | "\n", 451 | "- Choose 16 images that you think look most like actual hands.\n", 452 | "- Use the `append_to_grading_images()` function, pass in `fake_images` and a list of the indices for the 16 images that you choose to submit for grading (e.g. `append_to_grading_images(fake_images, [1, 4, 5, 6, 8... until you have 16 elements])`)." 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "metadata": { 458 | "id": "4Qcxe1RK-piF" 459 | }, 460 | "source": [ 461 | "# helper function to collect the images\n", 462 | "def append_to_grading_images(images, indexes, l=[]):\n", 463 | " for index in indexes:\n", 464 | " if len(l) >= 16:\n", 465 | " print(\"The list is full\")\n", 466 | " break\n", 467 | " l.append(tf.squeeze(images[index:(index+1),...], axis=0))\n", 468 | " l = tf.convert_to_tensor(l)\n", 469 | " return l" 470 | ], 471 | "execution_count": null, 472 | "outputs": [] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": { 477 | "id": "RFg-wvIcS-Jv" 478 | }, 479 | "source": [ 480 | "Please fill in the empty list (2nd parameter) with 16 indices indicating the images you want to submit to the grader." 481 | ] 482 | }, 483 | { 484 | "cell_type": "code", 485 | "metadata": { 486 | "id": "InUSbfGI-0vk" 487 | }, 488 | "source": [ 489 | "grading_images = append_to_grading_images(fake_images, [ 0,1, 2, 3, 6, 8, 9, 11, 12, 15, 17, 18, 19, 20, 26, 28])" 490 | ], 491 | "execution_count": null, 492 | "outputs": [] 493 | }, 494 | { 495 | "cell_type": "markdown", 496 | "metadata": { 497 | "id": "BsTurLWKJIxY" 498 | }, 499 | "source": [ 500 | "## Zip your selected images for grading\n", 501 | "\n", 502 | "Please run the code below. This will save the images you chose to a zip file named `my-signs.zip`.\n", 503 | "\n", 504 | "- Please download this file from the Files explorer on the left.\n", 505 | "- Please return to the Coursera classroom and upload the zip file for grading." 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "metadata": { 511 | "id": "vL8W2OGBqFL_" 512 | }, 513 | "source": [ 514 | "from PIL import Image\n", 515 | "from zipfile import ZipFile\n", 516 | "\n", 517 | "denormalized_images = grading_images * 255\n", 518 | "denormalized_images = tf.dtypes.cast(denormalized_images, dtype = tf.uint8)\n", 519 | "\n", 520 | "file_paths = []\n", 521 | "\n", 522 | "for this_image in range(0,16):\n", 523 | " i = tf.reshape(denormalized_images[this_image], [28,28])\n", 524 | " im = Image.fromarray(i.numpy())\n", 525 | " im = im.convert(\"L\")\n", 526 | " filename = \"hand\" + str(this_image) + \".png\"\n", 527 | " file_paths.append(filename)\n", 528 | " im.save(filename)\n", 529 | "\n", 530 | "with ZipFile('hands.zip', 'w') as zip:\n", 531 | " for file in file_paths:\n", 532 | " zip.write(file)" 533 | ], 534 | "execution_count": null, 535 | "outputs": [] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": { 540 | "id": "Yp7jYkyXZsM9" 541 | }, 542 | "source": [ 543 | "**Congratulations on completing the final assignment of this course!**" 544 | ] 545 | } 546 | ] 547 | } -------------------------------------------------------------------------------- /Course 4 - Generative Deep Learning with TensorFlow/Week 4/hands.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/239e7490dd1bfcc9dd1f59a5e976833c4205006c/Course 4 - Generative Deep Learning with TensorFlow/Week 4/hands.zip -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TensorFlow-Advanced-Techniques-Specialization 2 | Deeplearning.AI TensorFlow: Advanced Techniques Specialization Solution 3 | 4 | ## Course 1: Custom Models, Layers, and Loss Functions with TensorFlow 5 | 6 | * [Week 1 - Functional APIs](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%201%20-%20Custom%20Models%2C%20Layers%2C%20and%20Loss%20Functions%20with%20TensorFlow/Week%201) 7 | 8 | * [Week 2 - Custom Loss Functions](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%201%20-%20Custom%20Models%2C%20Layers%2C%20and%20Loss%20Functions%20with%20TensorFlow/Week%202) 9 | 10 | * [Week 3 - Custom Layers](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%201%20-%20Custom%20Models%2C%20Layers%2C%20and%20Loss%20Functions%20with%20TensorFlow/Week%203) 11 | 12 | * [Week 4 - Custom Models, Layers, and Loss Functions with TensorFlow](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%201%20-%20Custom%20Models%2C%20Layers%2C%20and%20Loss%20Functions%20with%20TensorFlow/Week%204) 13 | 14 | ## Course 2: Custom and Distributed Training with TensorFlow 15 | 16 | * [Week 1 - Differentiation and Gradients](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%202%20-%20Custom%20and%20Distributed%20Training%20with%20TensorFlow/Week%201) 17 | 18 | * [Week 2 - Custom Training](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%202%20-%20Custom%20and%20Distributed%20Training%20with%20TensorFlow/Week%202) 19 | 20 | * [Week 3 - Graph Mode](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%202%20-%20Custom%20and%20Distributed%20Training%20with%20TensorFlow/Week%203) 21 | 22 | * [Week 4 - Custom and Distributed Training with TensorFlow](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%202%20-%20Custom%20and%20Distributed%20Training%20with%20TensorFlow/Week%204) 23 | 24 | ## Course 3: Advanced Computer Vision with TensorFlow 25 | 26 | * [Week 1 - Introduction to Computer Vision](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%203%20-%20Advanced%20Computer%20Vision%20with%20TensorFlow/Week%201) 27 | 28 | * [Week 2 - Object Detection](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%203%20-%20Advanced%20Computer%20Vision%20with%20TensorFlow/Week%202) 29 | 30 | * [Week 3 - Image Segmentation](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%203%20-%20Advanced%20Computer%20Vision%20with%20TensorFlow/Week%203) 31 | 32 | * [Week 4 - Visualization and Interpretability](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%203%20-%20Advanced%20Computer%20Vision%20with%20TensorFlow/Week%204) 33 | 34 | ## Course 4: Generative Deep Learning with TensorFlow 35 | 36 | * [Week 1 - Style Transfer](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%204%20-%20Generative%20Deep%20Learning%20with%20TensorFlow/Week%201) 37 | 38 | * [Week 2 - AutoEncoders](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%204%20-%20Generative%20Deep%20Learning%20with%20TensorFlow/Week%202) 39 | 40 | * [Week 3 - Variational AutoEncoders](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%204%20-%20Generative%20Deep%20Learning%20with%20TensorFlow/Week%203) 41 | 42 | * [Week 4 - GANS](https://github.com/anhtuan85/TensorFlow-Advanced-Techniques-Specialization/tree/main/Course%204%20-%20Generative%20Deep%20Learning%20with%20TensorFlow/Week%204) 43 | --------------------------------------------------------------------------------