├── models └── .keep ├── .gitignore ├── floyd.yml ├── .floydignore ├── app.py ├── templates ├── layout.html └── serving_template.html ├── serving.py ├── README.md ├── support.py └── colornet.ipynb /models/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | __pycache__/ 3 | *.pyc 4 | *.h5 5 | .floydexpt 6 | -------------------------------------------------------------------------------- /floyd.yml: -------------------------------------------------------------------------------- 1 | env: tensorflow-1.8 2 | machine: cpu 3 | data: 4 | - source: floydhub/datasets/colornet/1 5 | destination: colornet 6 | -------------------------------------------------------------------------------- /.floydignore: -------------------------------------------------------------------------------- 1 | 2 | # Directories and files to ignore when uploading code to floyd 3 | 4 | .git 5 | .eggs 6 | eggs 7 | lib 8 | lib64 9 | parts 10 | sdist 11 | var 12 | *.pyc 13 | *.swp 14 | .DS_Store 15 | *.h5 16 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | from io import BytesIO 4 | 5 | from flask import Flask, make_response, request, render_template 6 | from werkzeug.exceptions import BadRequest 7 | from serving import ( 8 | load_model, 9 | evaluate_input, 10 | ) 11 | 12 | """ 13 | floyd run --cpu --data floydhub/datasets/colornet/1:colornet --mode serve --env tensorflow-1.7 14 | """ 15 | app = Flask(__name__) 16 | app.config['DEBUG'] = False 17 | load_model() 18 | 19 | 20 | @app.route('/', methods=['GET']) 21 | def index(): 22 | return render_template('serving_template.html') 23 | 24 | 25 | @app.route('/image', methods=["POST"]) 26 | def eval_image(): 27 | """"Preprocessing the data and evaluate the model""" 28 | # check if the post request has the file part 29 | input_file = request.files.get('file') 30 | if not input_file: 31 | return BadRequest("File not present in request") 32 | if input_file.filename == '': 33 | return BadRequest("File name is not present in request") 34 | if not input_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')): 35 | return BadRequest("Invalid file type") 36 | 37 | # # Save Image to process 38 | input_buffer = BytesIO() 39 | output_buffer = BytesIO() 40 | input_file.save(input_buffer) 41 | 42 | img = evaluate_input(input_buffer) 43 | img.save(output_buffer, format="JPEG") 44 | img_str = base64.b64encode(output_buffer.getvalue()) 45 | 46 | response = make_response(img_str) 47 | response.headers.set('Content-Type', 'image/jpeg') 48 | return response 49 | 50 | 51 | if __name__ == "__main__": 52 | app.run(host='0.0.0.0', threaded=False) 53 | -------------------------------------------------------------------------------- /templates/layout.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ColorNet 5 | 6 | 7 | 8 | 32 | 33 | 34 |

35 | 36 | {% block body %}{% endblock %} 37 | 38 | 45 |

46 | 47 | 48 | -------------------------------------------------------------------------------- /serving.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.preprocessing import image 3 | from keras.preprocessing.image import array_to_img 4 | from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb 5 | from support import ( 6 | load_pretrained_model, 7 | create_inception_embedding, 8 | ) 9 | 10 | 11 | INCEPTION_PATH = ('/colornet/models/' 12 | 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5') 13 | MODEL_PATH = '/colornet/models/color_tensorflow_real_mode_300.h5' 14 | 15 | ALLOWED_EXTENSIONS = set(['jpg', 'png', 'jpeg']) 16 | 17 | 18 | # MODELS 19 | model = None 20 | inception = None 21 | 22 | 23 | def load_model(): 24 | """Load the model""" 25 | global model, inception 26 | (model, inception) = load_pretrained_model(INCEPTION_PATH, MODEL_PATH) 27 | 28 | 29 | def evaluate_input(input: str): 30 | global model 31 | (color_me, color_me_embed) = _data_preprocessing(input) 32 | output = model.predict([color_me, color_me_embed]) 33 | # Rescale the output from [-1,1] to [-128, 128] 34 | output = output * 128 35 | # Output colorizations 36 | for i in range(len(output)): 37 | cur = np.zeros((256, 256, 3)) 38 | # LAB representation 39 | cur[:, :, 0] = color_me[i][:, :, 0] 40 | cur[:, :, 1:] = output[i] 41 | img = array_to_img(lab2rgb(cur)) 42 | return img 43 | 44 | 45 | def _data_preprocessing(input_filepath): 46 | """From RGB image to L(grayscale)""" 47 | global inception 48 | 49 | img = image.load_img(input_filepath, target_size=(256, 256)) 50 | img = image.img_to_array(img) 51 | color_me = [img] 52 | 53 | ################# 54 | # Preprocessing # 55 | ################# 56 | # From RGB to B&W and embedding 57 | color_me = np.array(color_me, dtype=float) 58 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me))) 59 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0] 60 | color_me = color_me.reshape(color_me.shape+(1,)) 61 | return (color_me, color_me_embed) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Colornet 2 | 3 | Today, colorization is done by hand in Photoshop, a picture can take up to one month to colorize. It requires extensive research. A face alone needs up to 20 layers of pink, green and blue shades to get it just right. But something changed this year when Amir Avni used neural networks to [troll the subreddit](http://www.whatimade.today/our-frst-reddit-bot-coloring-b-2/) [/r/Colorization](https://www.reddit.com/r/Colorization/) - a community where people colorize historical black and white images manually using Photoshop. They were astonished with Amir’s deep learning bot - what could take up to a month of manual labour could now be done in just a few seconds. 4 | 5 | ### Try it now 6 | 7 | [![Run on FloydHub](https://static.floydhub.com/button/button.svg)](https://floydhub.com/run?template=https://github.com/floydhub/colornet-template) 8 | 9 | Click this button to open a Workspace on FloydHub that will train this model. 10 | 11 | ### Colorizing Black&White photos 12 | 13 | Fascinated by Amir’s neural network, Emill reproduced it and documented the process in the famous blog post: [Colorizing B&W Photos with Neural Networks](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/). In this notebook we will reproduce Emil's work by using the Full Version of his experiments. 14 | 15 | ![colorization](https://blog.floydhub.com/content/images/2018/06/woman_results-1-min.png) 16 | *The middle picture is done with our neural network and the picture to the right is the original color photo - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)* 17 | 18 | We will: 19 | - Preprocess the image data for this CV task 20 | - Build and train the `colornet` model using Keras and Tensorflow 21 | - Evaluate our model on the test set 22 | - Run the model on your own black&white and colored pictures! 23 | 24 | 25 | ### Serve an interactive web page for your own model 26 | 27 | You can easily spin up a serve job on FloydHub to demo your model through an 28 | interactive web site. Just run the following command from workspace terminal or 29 | your local machine: 30 | 31 | ```bash 32 | floyd run --mode serve 33 | ``` 34 | 35 | You should be able to see the following page when visiting the FloydHub serve url: 36 | 37 | ![serve-example](https://static.floydhub.com/images/color-serve-example.png) 38 | -------------------------------------------------------------------------------- /templates/serving_template.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block body %} 3 | 4 | 26 | 27 | 78 | 79 | 80 |

81 |

82 |

83 |

Colorizing Black & White Photo

84 |

85 |

86 |

87 |

88 | 98 |

99 |

100 | 101 | 102 |

103 |

104 |

105 |

106 |

107 |

108 |

109 |

110 |

111 | 112 |

113 |

114 |

115 |

116 | 117 | {% endblock %} 118 | -------------------------------------------------------------------------------- /support.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import numpy as np 4 | 5 | from keras.applications.inception_resnet_v2 import InceptionResNetV2 6 | from keras.applications.inception_resnet_v2 import preprocess_input 7 | from keras.layers.core import RepeatVector 8 | from keras.preprocessing import image 9 | from keras.preprocessing.image import img_to_array, load_img 10 | from keras.models import Model 11 | from keras.layers import ( 12 | Conv2D, 13 | UpSampling2D, 14 | Input, 15 | Reshape, 16 | concatenate, 17 | ) 18 | 19 | from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb 20 | from skimage.transform import resize 21 | from skimage.io import imsave 22 | 23 | import matplotlib.pyplot as plt 24 | import tensorflow as tf 25 | 26 | 27 | # Create embedding 28 | def create_inception_embedding(inception, grayscaled_rgb): 29 | grayscaled_rgb_resized = [] 30 | for i in grayscaled_rgb: 31 | i = resize(i, (299, 299, 3), mode='constant', anti_aliasing=True) 32 | grayscaled_rgb_resized.append(i) 33 | grayscaled_rgb_resized = np.array(grayscaled_rgb_resized) 34 | grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized) 35 | with inception.graph.as_default(): 36 | embed = inception.predict(grayscaled_rgb_resized) 37 | return embed 38 | 39 | 40 | def show_img(im, figsize=None, ax=None): 41 | if not ax: 42 | fig, ax = plt.subplots(figsize=figsize) 43 | ax.imshow(im) 44 | ax.get_xaxis().set_visible(False) 45 | ax.get_yaxis().set_visible(False) 46 | return ax 47 | 48 | 49 | def read_img(img_id, data_dir, train_or_test, size): 50 | """Read and resize image. 51 | # Arguments 52 | img_id: string 53 | train_or_test: string 'train' or 'test'. 54 | size: resize the original image. 55 | # Returns 56 | Image as numpy array. 57 | """ 58 | img = image.load_img(os.path.join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size) 59 | img = image.img_to_array(img) 60 | return img 61 | 62 | 63 | def color_result(PATH, START, END, RESULT, model, inception): 64 | # Make predictions on validation images 65 | color_me = [] 66 | i = 0 67 | # Take file in range [START, END] inside the PATH folder 68 | for filename in os.listdir(PATH): 69 | if i > START and i < END: 70 | color_me.append(img_to_array(load_img(os.path.join(PATH, filename)))) 71 | i += 1 72 | 73 | ################# 74 | # Preprocessing # 75 | ################# 76 | # From RGB to B&W and embedding 77 | color_me = np.array(color_me, dtype=float) 78 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me))) 79 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0] 80 | color_me = color_me.reshape(color_me.shape+(1,)) 81 | 82 | # Test model 83 | output = model.predict([color_me, color_me_embed]) 84 | # Rescale the output from [-1,1] to [-128, 128] 85 | output = output * 128 86 | 87 | # Create the result directory if not extists 88 | if not os.path.exists('result'): 89 | os.makedirs('result') 90 | 91 | # Output colorizations 92 | for i in range(len(output)): 93 | cur = np.zeros((256, 256, 3)) 94 | # LAB representation 95 | cur[:, :, 0] = color_me[i][:, :, 0] 96 | cur[:, :, 1:] = output[i] 97 | # Save images as RGB 98 | imsave("result/img_"+str(i)+".png", lab2rgb(cur)) 99 | 100 | 101 | def prediction_from_url(url, model, inception): 102 | test_image_path = '/tmp/test.jpg' 103 | 104 | # Download the image 105 | response = requests.get(url) 106 | if response.status_code == 200: 107 | with open(test_image_path, 'wb') as f: 108 | f.write(response.content) 109 | 110 | color_me = [] 111 | color_me.append(read_img('test', '/', 'tmp', (256, 256))) 112 | 113 | ################# 114 | # Preprocessing # 115 | ################# 116 | # From RGB to B&W and embedding 117 | color_me = np.array(color_me, dtype=float) 118 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me))) 119 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0] 120 | color_me = color_me.reshape(color_me.shape+(1,)) 121 | 122 | # Test model 123 | output = model.predict([color_me, color_me_embed]) 124 | # Rescale the output from [-1,1] to [-128, 128] 125 | output = output * 128 126 | 127 | # Output colorizations 128 | for i in range(len(output)): 129 | cur = np.zeros((256, 256, 3)) 130 | # LAB representation 131 | cur[:, :, 0] = color_me[i][:, :, 0] 132 | cur[:, :, 1:] = output[i] 133 | 134 | # B&W 135 | fig = plt.figure(figsize=(9, 9)) 136 | ax1 = fig.add_subplot(1, 3, 1) 137 | ax1.axis('off') 138 | ax1.set_title('B&W') 139 | ax1.imshow(rgb2gray(read_img('test', '/', 'tmp', (256, 256))/255), cmap='gray') 140 | 141 | # Prediction 142 | ax2 = fig.add_subplot(1, 3, 2) 143 | ax2.axis('off') 144 | ax2.set_title('Prediction') 145 | ax2.imshow(lab2rgb(cur)) 146 | 147 | # Original 148 | ax3 = fig.add_subplot(1, 3, 3) 149 | ax3.axis('off') 150 | ax3.set_title('Original') 151 | ax3.imshow(read_img('test', '/', 'tmp', (256, 256))/255) 152 | 153 | 154 | def load_pretrained_model(inception_wpath, colornet_wpath): 155 | '''Load Emil's pretrained model''' 156 | print('Loading pretrained model... (it could take a while)') 157 | 158 | # Load weights of InceptionResNet model for embedding extraction 159 | inception = InceptionResNetV2(weights=None, include_top=True) 160 | inception.load_weights(inception_wpath) 161 | inception.graph = tf.get_default_graph() 162 | 163 | # The Model 164 | def conv_stack(data, filters, s): 165 | """Utility for building conv layer""" 166 | output = Conv2D(filters, (3, 3), strides=s, activation='relu', padding='same')(data) 167 | return output 168 | 169 | embed_input = Input(shape=(1000,)) 170 | 171 | # Encoder 172 | encoder_input = Input(shape=(256, 256, 1,)) 173 | encoder_output = conv_stack(encoder_input, 64, 2) 174 | encoder_output = conv_stack(encoder_output, 128, 1) 175 | encoder_output = conv_stack(encoder_output, 128, 2) 176 | encoder_output = conv_stack(encoder_output, 256, 1) 177 | encoder_output = conv_stack(encoder_output, 256, 2) 178 | encoder_output = conv_stack(encoder_output, 512, 1) 179 | encoder_output = conv_stack(encoder_output, 512, 1) 180 | encoder_output = conv_stack(encoder_output, 256, 1) 181 | 182 | # Fusion 183 | # y_mid: (None, 256, 28, 28) 184 | fusion_output = RepeatVector(32 * 32)(embed_input) 185 | fusion_output = Reshape(([32, 32, 1000]))(fusion_output) 186 | fusion_output = concatenate([encoder_output, fusion_output], axis=3) 187 | fusion_output = Conv2D(256, (1, 1), activation='relu')(fusion_output) 188 | 189 | # Decoder 190 | decoder_output = conv_stack(fusion_output, 128, 1) 191 | decoder_output = UpSampling2D((2, 2))(decoder_output) 192 | decoder_output = conv_stack(decoder_output, 64, 1) 193 | decoder_output = UpSampling2D((2, 2))(decoder_output) 194 | decoder_output = conv_stack(decoder_output, 32, 1) 195 | decoder_output = conv_stack(decoder_output, 16, 1) 196 | decoder_output = Conv2D(2, (2, 2), activation='tanh', padding='same')(decoder_output) 197 | decoder_output = UpSampling2D((2, 2))(decoder_output) 198 | 199 | model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output) 200 | 201 | # Load colornet weights 202 | model.load_weights(colornet_wpath) 203 | 204 | print('Model loaded!') 205 | return(model, inception) 206 | -------------------------------------------------------------------------------- /colornet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Colornet\n", 8 | "\n", 9 | "Today, colorization is done by hand in Photoshop, a picture can take up to one month to colorize. It requires extensive research. A face alone needs up to 20 layers of pink, green and blue shades to get it just right. But something changed this year when Amir Avni used neural networks to [troll the subreddit](http://www.whatimade.today/our-frst-reddit-bot-coloring-b-2/) [/r/Colorization](https://www.reddit.com/r/Colorization/) - a community where people colorize historical black and white images manually using Photoshop. They were astonished with Amir’s deep learning bot - what could take up to a month of manual labour could now be done in just a few seconds.\n", 10 | "\n", 11 | "### Colorizing Black&White photos\n", 12 | "\n", 13 | "Fascinated by Amir’s neural network, Emill reproduced it and documented the process in the famous blog post: [Colorizing B&W Photos with Neural Networks](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/). In this notebook we will reproduce Emil's work by using the Full Version of his experiments.\n", 14 | "\n", 15 | "![colorization](https://blog.floydhub.com/content/images/2018/06/woman_results-1-min.png)\n", 16 | "*The middle picture is done with our neural network and the picture to the right is the original color photo - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*\n", 17 | "\n", 18 | "We will:\n", 19 | "- Preprocess the image data for this CV task\n", 20 | "- Build and train the `colornet` model using Keras and Tensorflow\n", 21 | "- Evaluate our model on the test set\n", 22 | "- Run the model on your own black&white and colored pictures!\n", 23 | "\n", 24 | "### Instructions\n", 25 | "\n", 26 | "- To execute a code cell, click on the cell and press `Shift + Enter` (shortcut for Run).\n", 27 | "- To learn more about Workspaces, check out the [Getting Started Notebook](get_started_workspace.ipynb).\n", 28 | "- **Tip**: *Feel free to try this Notebook with your own data and on your own super awesome colorization task.*\n", 29 | "\n", 30 | "Now, let's get started! 🚀" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Try it now!\n", 38 | "\n", 39 | "Test out the Emil's pretrained model. Run the code Cell below and enter a URL with your pic in the widget below. Have fun!🎉\n", 40 | "\n", 41 | "Here are some URLs for testing:\n", 42 | "\n", 43 | "- (man, colored) http://www.bolsamania.com/cine/wp-content/uploads/2017/03/26-2.jpg\n", 44 | "- (landscape, colored) https://cdn.pixabay.com/photo/2017/04/07/18/23/landscape-2211587_960_720.jpg\n", 45 | "- (lion, b&w) https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQTXYpHhz45gaDHPsNulPFotlc72i3MDv_1RoOcQjEQx3sX-dWj\n", 46 | "\n", 47 | "\n", 48 | "Note: \n", 49 | "- You can also consider to use URL of colored pictures, in this way you can fully test the colorization on new images.\n", 50 | "- The first prediction can take up to one minute." 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 1, 56 | "metadata": {}, 57 | "outputs": [ 58 | { 59 | "name": "stderr", 60 | "output_type": "stream", 61 | "text": [ 62 | "Using TensorFlow backend.\n" 63 | ] 64 | }, 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "Loading pretrained model... (it could take a while)\n", 70 | "Model loaded!\n" 71 | ] 72 | }, 73 | { 74 | "data": { 75 | "application/vnd.jupyter.widget-view+json": { 76 | "model_id": "9db845b0310c4d0f85a0d8787c3ba761", 77 | "version_major": 2, 78 | "version_minor": 0 79 | }, 80 | "text/plain": [ 81 | "interactive(children=(Text(value='', description='URL', placeholder='Insert URL of a pic'), Button(description…" 82 | ] 83 | }, 84 | "metadata": {}, 85 | "output_type": "display_data" 86 | } 87 | ], 88 | "source": [ 89 | "# Testing on url images\n", 90 | "from ipywidgets import interact_manual\n", 91 | "from ipywidgets import widgets\n", 92 | "from support import prediction_from_url, load_pretrained_model\n", 93 | "\n", 94 | "(model, inception) = load_pretrained_model('/floyd/input/colornet/models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5',\n", 95 | " '/floyd/input/colornet/models/color_tensorflow_real_mode_300.h5')\n", 96 | "\n", 97 | "def get_prediction(URL):\n", 98 | " prediction_from_url(URL, model, inception)\n", 99 | "\n", 100 | "interact_manual(get_prediction, URL=widgets.Text(placeholder='Insert URL of a pic'));" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Initial Setup\n", 108 | "\n", 109 | "Let's start by importing some packages" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 2, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "%load_ext autoreload\n", 119 | "%autoreload 2\n", 120 | "\n", 121 | "import tensorflow as tf\n", 122 | "import numpy as np\n", 123 | "\n", 124 | "import os\n", 125 | "import random\n", 126 | "import keras\n", 127 | "\n", 128 | "from keras.applications.inception_resnet_v2 import InceptionResNetV2\n", 129 | "from keras.applications.inception_resnet_v2 import preprocess_input\n", 130 | "\n", 131 | "from keras.preprocessing import image\n", 132 | "from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img\n", 133 | "\n", 134 | "from keras.models import Sequential, Model\n", 135 | "from keras.callbacks import TensorBoard \n", 136 | "\n", 137 | "from keras.engine import Layer\n", 138 | "from keras.layers import Conv2D, UpSampling2D, InputLayer, Conv2DTranspose, Input, Reshape, merge, concatenate, Activation, Dense, Dropout, Flatten\n", 139 | "from keras.layers.normalization import BatchNormalization\n", 140 | "from keras.layers.core import RepeatVector, Permute\n", 141 | "\n", 142 | "from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb\n", 143 | "from skimage.transform import resize\n", 144 | "from skimage.io import imsave" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "## Training Parameters\n", 152 | "\n", 153 | "We'll set the hyperparameters for training our model. If you understand what they mean, feel free to play around - otherwise, we recommend keeping the defaults for your first run 🙂" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 3, 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# Hyperparams if GPU is available\n", 163 | "if tf.test.is_gpu_available():\n", 164 | " # GPU\n", 165 | " BATCH_SIZE = 20 # Number of examples used in each iteration\n", 166 | " EPOCHS = 1000 # Number of passes through entire dataset\n", 167 | "# Hyperparams for CPU training\n", 168 | "else:\n", 169 | " # CPU\n", 170 | " BATCH_SIZE = 20\n", 171 | " EPOCHS = 250" 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Data Loading\n", 179 | "\n", 180 | "Converting images into tensors and rescaling the pixel values from [0-255] to [0,1].\n", 181 | "\n", 182 | "The colornet dataset provides 3 datasets:\n", 183 | "- **ds-big** with 9600 images\n", 184 | "- **ds-medium** with 200 images (the pretrained models in the `/floyd/input/colornet/models` folder are trained on this one)\n", 185 | "- **ds-small** with 20 images (the one used by Emil in the **Full-Version** section of the Blog post)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "DS_PATH = '/floyd/input/colornet/ds-small' # ADD path/to/dataset\n", 195 | "\n", 196 | "# Get images\n", 197 | "X = []\n", 198 | "for filename in os.listdir(DS_PATH):\n", 199 | " if os.path.isfile(os.path.join(DS_PATH, filename)):\n", 200 | " X.append(img_to_array(load_img(os.path.join(DS_PATH, filename))))\n", 201 | " \n", 202 | "# Normalization => Converting pixel value from [0-255] to [0,1] \n", 203 | "X = np.array(X, dtype=float)\n", 204 | "Xtrain = 1.0/255*X" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Data preprocessing\n", 212 | "\n", 213 | "We’ll use an algorithm to change the color channels, from RGB to Lab. L stands for lightness, and a and b for the color spectrums green–red and blue–yellow.\n", 214 | "As you can see below, a Lab encoded image has one layer for grayscale and have packed three color layers into two. This means that we can use the original grayscale image in our final prediction. Also, we only have to two channels to predict.\n", 215 | "\n", 216 | "\n", 217 | "![preprocessing](https://blog.floydhub.com/content/images/2018/06/woman_lab_color_space.png)\n", 218 | "\n", 219 | "*L/Greyscale to AB - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*\n", 220 | "\n", 221 | "We have a grayscale layer for input, and we want to predict two color layers, the ab in Lab. To create the final color image we’ll include the L/grayscale image we used for the input, thus, creating a Lab image.\n", 222 | "\n", 223 | "![Mapping from B&W to AB](https://blog.floydhub.com/content/images/2018/06/function_lab_color_grids.png)\n", 224 | "*More formally, we want to learn a mapping from the greyscale to AB - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)* " 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 5, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "from support import create_inception_embedding\n", 234 | "\n", 235 | "# Image transformer\n", 236 | "datagen = ImageDataGenerator(\n", 237 | " shear_range=0.1,\n", 238 | " zoom_range=0.1,\n", 239 | " rotation_range=10,\n", 240 | " horizontal_flip=True)\n", 241 | "\n", 242 | "def image_a_b_gen(batch_size):\n", 243 | " \"\"\"Wrapper on top of ImageDataGenerator which\n", 244 | " converts RGB images to B&W, extract the feature using Inception,\n", 245 | " and get the LAB from the original image. \n", 246 | " \n", 247 | " All this information will compose the current batch used \n", 248 | " during the training.\"\"\"\n", 249 | " for batch in datagen.flow(Xtrain, batch_size=batch_size):\n", 250 | " # RGB to B&W\n", 251 | " grayscaled_rgb = gray2rgb(rgb2gray(batch))\n", 252 | " # Feature Extraction\n", 253 | " embed = create_inception_embedding(inception, grayscaled_rgb)\n", 254 | " # RGB to LAB\n", 255 | " lab_batch = rgb2lab(batch)\n", 256 | " X_batch = lab_batch[:,:,:,0]\n", 257 | " X_batch = X_batch.reshape(X_batch.shape+(1,))\n", 258 | " # Convert LAB value from [-128, 128] to [-1, 1]\n", 259 | " Y_batch = lab_batch[:,:,:,1:] / 128\n", 260 | " # The new Batch (B&W, Embedding, LAB)\n", 261 | " yield ([X_batch, create_inception_embedding(inception, grayscaled_rgb)], Y_batch)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "## Model\n", 269 | "\n", 270 | "We will implement a model similar to Federico Baldassarre’s [Deep Koalarization: Image Colorization using CNNs and Inception-ResNet-v2](https://arxiv.org/abs/1712.03400). Here are 2 images for the same model:\n", 271 | "\n", 272 | "![colornet](https://raw.githubusercontent.com/baldassarreFe/deep-koalarization/master/assets/our_net.png)\n", 273 | "*Deep Koalarization - Image from [the paper](https://arxiv.org/abs/1712.03400)*\n", 274 | "\n", 275 | "![emill's colornet](https://blog.floydhub.com/content/images/2018/06/fusion_layer.png)\n", 276 | "\n", 277 | "*Colornet - Image from [the Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 6, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "#Load weights of InceptionResNet model for embedding extraction \n", 287 | "inception = InceptionResNetV2(weights=None, include_top=True)\n", 288 | "inception.load_weights('/floyd/input/colornet/models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')\n", 289 | "inception.graph = tf.get_default_graph()" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 7, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "name": "stdout", 299 | "output_type": "stream", 300 | "text": [ 301 | "__________________________________________________________________________________________________\n", 302 | "Layer (type) Output Shape Param # Connected to \n", 303 | "==================================================================================================\n", 304 | "input_6 (InputLayer) (None, 256, 256, 1) 0 \n", 305 | "__________________________________________________________________________________________________\n", 306 | "conv2d_421 (Conv2D) (None, 128, 128, 64) 640 input_6[0][0] \n", 307 | "__________________________________________________________________________________________________\n", 308 | "conv2d_422 (Conv2D) (None, 128, 128, 128 73856 conv2d_421[0][0] \n", 309 | "__________________________________________________________________________________________________\n", 310 | "conv2d_423 (Conv2D) (None, 64, 64, 128) 147584 conv2d_422[0][0] \n", 311 | "__________________________________________________________________________________________________\n", 312 | "conv2d_424 (Conv2D) (None, 64, 64, 256) 295168 conv2d_423[0][0] \n", 313 | "__________________________________________________________________________________________________\n", 314 | "conv2d_425 (Conv2D) (None, 32, 32, 256) 590080 conv2d_424[0][0] \n", 315 | "__________________________________________________________________________________________________\n", 316 | "conv2d_426 (Conv2D) (None, 32, 32, 512) 1180160 conv2d_425[0][0] \n", 317 | "__________________________________________________________________________________________________\n", 318 | "input_5 (InputLayer) (None, 1000) 0 \n", 319 | "__________________________________________________________________________________________________\n", 320 | "conv2d_427 (Conv2D) (None, 32, 32, 512) 2359808 conv2d_426[0][0] \n", 321 | "__________________________________________________________________________________________________\n", 322 | "repeat_vector_2 (RepeatVector) (None, 1024, 1000) 0 input_5[0][0] \n", 323 | "__________________________________________________________________________________________________\n", 324 | "conv2d_428 (Conv2D) (None, 32, 32, 256) 1179904 conv2d_427[0][0] \n", 325 | "__________________________________________________________________________________________________\n", 326 | "reshape_2 (Reshape) (None, 32, 32, 1000) 0 repeat_vector_2[0][0] \n", 327 | "__________________________________________________________________________________________________\n", 328 | "concatenate_2 (Concatenate) (None, 32, 32, 1256) 0 conv2d_428[0][0] \n", 329 | " reshape_2[0][0] \n", 330 | "__________________________________________________________________________________________________\n", 331 | "conv2d_429 (Conv2D) (None, 32, 32, 256) 321792 concatenate_2[0][0] \n", 332 | "__________________________________________________________________________________________________\n", 333 | "conv2d_430 (Conv2D) (None, 32, 32, 128) 295040 conv2d_429[0][0] \n", 334 | "__________________________________________________________________________________________________\n", 335 | "up_sampling2d_4 (UpSampling2D) (None, 64, 64, 128) 0 conv2d_430[0][0] \n", 336 | "__________________________________________________________________________________________________\n", 337 | "conv2d_431 (Conv2D) (None, 64, 64, 64) 73792 up_sampling2d_4[0][0] \n", 338 | "__________________________________________________________________________________________________\n", 339 | "up_sampling2d_5 (UpSampling2D) (None, 128, 128, 64) 0 conv2d_431[0][0] \n", 340 | "__________________________________________________________________________________________________\n", 341 | "conv2d_432 (Conv2D) (None, 128, 128, 32) 18464 up_sampling2d_5[0][0] \n", 342 | "__________________________________________________________________________________________________\n", 343 | "conv2d_433 (Conv2D) (None, 128, 128, 16) 4624 conv2d_432[0][0] \n", 344 | "__________________________________________________________________________________________________\n", 345 | "conv2d_434 (Conv2D) (None, 128, 128, 2) 130 conv2d_433[0][0] \n", 346 | "__________________________________________________________________________________________________\n", 347 | "up_sampling2d_6 (UpSampling2D) (None, 256, 256, 2) 0 conv2d_434[0][0] \n", 348 | "==================================================================================================\n", 349 | "Total params: 6,541,042\n", 350 | "Trainable params: 6,541,042\n", 351 | "Non-trainable params: 0\n", 352 | "__________________________________________________________________________________________________\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "# The Model\n", 358 | "def conv_stack(data, filters, s):\n", 359 | " \"\"\"Utility for building conv layer\"\"\"\n", 360 | " output = Conv2D(filters, (3, 3), strides=s, activation='relu', padding='same')(data)\n", 361 | " return output\n", 362 | "\n", 363 | "embed_input = Input(shape=(1000,))\n", 364 | "\n", 365 | "#Encoder\n", 366 | "encoder_input = Input(shape=(256, 256, 1,))\n", 367 | "encoder_output = conv_stack(encoder_input, 64, 2)\n", 368 | "encoder_output = conv_stack(encoder_output, 128, 1)\n", 369 | "encoder_output = conv_stack(encoder_output, 128, 2)\n", 370 | "encoder_output = conv_stack(encoder_output, 256, 1)\n", 371 | "encoder_output = conv_stack(encoder_output, 256, 2)\n", 372 | "encoder_output = conv_stack(encoder_output, 512, 1)\n", 373 | "encoder_output = conv_stack(encoder_output, 512, 1)\n", 374 | "encoder_output = conv_stack(encoder_output, 256, 1)\n", 375 | "\n", 376 | "#Fusion\n", 377 | "# y_mid: (None, 256, 28, 28)\n", 378 | "fusion_output = RepeatVector(32 * 32)(embed_input) \n", 379 | "fusion_output = Reshape(([32, 32, 1000]))(fusion_output)\n", 380 | "fusion_output = concatenate([encoder_output, fusion_output], axis=3) \n", 381 | "fusion_output = Conv2D(256, (1, 1), activation='relu')(fusion_output) \n", 382 | "\n", 383 | "\n", 384 | "\n", 385 | "#Decoder\n", 386 | "decoder_output = conv_stack(fusion_output, 128, 1)\n", 387 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n", 388 | "decoder_output = conv_stack(decoder_output, 64, 1)\n", 389 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n", 390 | "decoder_output = conv_stack(decoder_output, 32, 1)\n", 391 | "decoder_output = conv_stack(decoder_output, 16, 1)\n", 392 | "decoder_output = Conv2D(2, (2, 2), activation='tanh', padding='same')(decoder_output)\n", 393 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n", 394 | "\n", 395 | "model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)\n", 396 | "model.summary()" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "## Train & Evaluate\n", 404 | "\n", 405 | "If you left the default hyperpameters in the Notebook untouched, your training should take approximately: \n", 406 | "\n", 407 | "- On CPU machine: 4-5 hours for 250 epochs.\n", 408 | "- On GPU machine: 50 minutes for 1000 epochs.\n", 409 | "\n", 410 | "**Note**: In the dataset you can find different pretrained models that you can use for testing or as a starting point for fine tuning, e.g.: \n", 411 | "```python\n", 412 | "# model.load_weights('')\n", 413 | "model.load_weights('/floyd/input/colornet/models/color_tensorflow_real_mode_300.h5')\n", 414 | "```\n", 415 | "\n", 416 | "**Emil's advice**\n", 417 | "\n", 418 | "It's tricky to get good results. A lot of has to do with how many epochs you train it and which training data you use. *I'd recommend starting with 20-100 images* and **saving at regular intervals**. Once you get a feel for it, you can increase the number of images. Also, use a lot of validation images to understand where it's good and where it struggles.\n", 419 | "\n", 420 | "Analyzing the loss data can also be hard. Initially, I noticed that the batch normalization makes the pictures sepia looking. Then it needs additional training to create colors. The loss curve can be misleading because of this.\n", 421 | "\n", 422 | "For better results, I'd recommend adding a weighted classification, to favor vibrant colors. If I were to redo it today, I'd experiment with the pix2pixHD GAN structure: https://github.com/NVIDIA/pix2pixHD." 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "#Train model \n", 432 | "tensorboard = TensorBoard(log_dir=\"/floyd/home/run\")\n", 433 | "model.compile(optimizer='adam', loss='mse')\n", 434 | "model.fit_generator(image_a_b_gen(BATCH_SIZE), \n", 435 | " callbacks=[tensorboard], \n", 436 | " epochs=EPOCHS, steps_per_epoch=1, verbose=2)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": {}, 442 | "source": [ 443 | "### Eval\n", 444 | "\n", 445 | "We will use the images in the range [START, END] of the Train for evaluating our model as Emil did during his experiments." 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 9, 451 | "metadata": {}, 452 | "outputs": [ 453 | { 454 | "name": "stderr", 455 | "output_type": "stream", 456 | "text": [ 457 | "/usr/local/lib/python3.6/site-packages/skimage/util/dtype.py:130: UserWarning: Possible precision loss when converting from float64 to uint8\n", 458 | " .format(dtypeobj_in, dtypeobj_out))\n" 459 | ] 460 | } 461 | ], 462 | "source": [ 463 | "# Eval Colorization\n", 464 | "from support import color_result\n", 465 | "\n", 466 | "START = 0\n", 467 | "END = 100\n", 468 | "PATH = '/floyd/input/colornet/ds-big/Train/'\n", 469 | "RESULT = 'result'\n", 470 | "\n", 471 | "# It could take some minutes on CPU\n", 472 | "color_result(PATH, START, END, RESULT, model, inception)" 473 | ] 474 | }, 475 | { 476 | "cell_type": "code", 477 | "execution_count": 10, 478 | "metadata": {}, 479 | "outputs": [ 480 | { 481 | "data": { 482 | "application/vnd.jupyter.widget-view+json": { 483 | "model_id": "f8b823926f80494390c7a285e56433d0", 484 | "version_major": 2, 485 | "version_minor": 0 486 | }, 487 | "text/plain": [ 488 | "interactive(children=(IntSlider(value=1, description='Show results of colorization', max=99, min=1), Output())…" 489 | ] 490 | }, 491 | "metadata": {}, 492 | "output_type": "display_data" 493 | } 494 | ], 495 | "source": [ 496 | "# Show results\n", 497 | "\n", 498 | "from ipywidgets import interact\n", 499 | "from ipywidgets import widgets\n", 500 | "from support import show_img \n", 501 | "\n", 502 | "def show_sample(sample_n):\n", 503 | " image_path = os.path.join(RESULT, \"img_\"+str(sample_n-1)+\".png\")\n", 504 | " img = image.load_img(image_path)\n", 505 | " img = image.img_to_array(img)/255\n", 506 | " ax = show_img(img, figsize=(9,9))\n", 507 | " ax.set_title(image_path)\n", 508 | " \n", 509 | "interact(show_sample, sample_n=widgets.IntSlider(value=1, min=1, max=END-START-1, description='Show results of colorization'));" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "## It's your turn\n", 517 | "\n", 518 | "Test out the model you just trained. Run the code Cell below and enter a URL with your pic in the widget below. Have fun!🎉\n", 519 | "\n", 520 | "Here's some URL for testing:\n", 521 | "\n", 522 | "- (man, colored) http://www.bolsamania.com/cine/wp-content/uploads/2017/03/26-2.jpg\n", 523 | "- (landscape, colored) https://cdn.pixabay.com/photo/2017/04/07/18/23/landscape-2211587_960_720.jpg\n", 524 | "- (lion, b&w) https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQTXYpHhz45gaDHPsNulPFotlc72i3MDv_1RoOcQjEQx3sX-dWj\n", 525 | "\n", 526 | "**Note**: *You can also consider to use URL of colored pictures, in this way you can fully test the colorization on new images.*" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 11, 532 | "metadata": {}, 533 | "outputs": [ 534 | { 535 | "data": { 536 | "application/vnd.jupyter.widget-view+json": { 537 | "model_id": "d432f70dedc343d3986357c351a1a45b", 538 | "version_major": 2, 539 | "version_minor": 0 540 | }, 541 | "text/plain": [ 542 | "interactive(children=(Text(value='', description='URL', placeholder='Insert URL of a pic'), Button(description…" 543 | ] 544 | }, 545 | "metadata": {}, 546 | "output_type": "display_data" 547 | } 548 | ], 549 | "source": [ 550 | "# Testing on url images\n", 551 | "from ipywidgets import interact_manual\n", 552 | "from ipywidgets import widgets\n", 553 | "from support import prediction_from_url\n", 554 | "\n", 555 | "def get_prediction(URL):\n", 556 | " prediction_from_url(URL, model, inception)\n", 557 | "\n", 558 | "interact_manual(get_prediction, URL=widgets.Text(placeholder='Insert URL of a pic'));" 559 | ] 560 | }, 561 | { 562 | "cell_type": "markdown", 563 | "metadata": {}, 564 | "source": [ 565 | "## Save the result" 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "execution_count": 12, 571 | "metadata": {}, 572 | "outputs": [], 573 | "source": [ 574 | "model.save_weights(\"models/color_tensorflow_ds_small_{}.h5\".format(EPOCHS))" 575 | ] 576 | }, 577 | { 578 | "cell_type": "markdown", 579 | "metadata": {}, 580 | "source": [ 581 | "### What's next\n", 582 | "\n", 583 | "Colorizing images is a deeply fascinating problem. It is as much as a scientific problem as artistic one. I wrote this article so you can get up to speed in coloring and continue where I left off. Here are some suggestions to get started:\n", 584 | "\n", 585 | "- Implement it with another pre-trained model\n", 586 | "- A different dataset (you can use **ds-big**)\n", 587 | "- Enable the network to grow in accuracy with more pictures\n", 588 | "- Build an amplifier within the RGB color space. Create a similar model to the coloring network, that takes a saturated colored image as input and the correct colored image as output.\n", 589 | "- Implement a weighted classification\n", 590 | "- Use a classification neural network as a loss function. Pictures that are classified as fake produce an error. It then decides how much each pixel contributed to the error.\n", 591 | "- *Apply it to video* (This is a killer AI product). Don’t worry too much about the colorization, but make the switch between images consistent. You could also do something similar for larger images, by tiling smaller ones." 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "##### That's all folks - don't forget to shutdown your workspace once you're done 🙂" 599 | ] 600 | } 601 | ], 602 | "metadata": { 603 | "kernelspec": { 604 | "display_name": "Python 2", 605 | "language": "python", 606 | "name": "python2" 607 | }, 608 | "language_info": { 609 | "codemirror_mode": { 610 | "name": "ipython", 611 | "version": 2 612 | }, 613 | "file_extension": ".py", 614 | "mimetype": "text/x-python", 615 | "name": "python", 616 | "nbconvert_exporter": "python", 617 | "pygments_lexer": "ipython2", 618 | "version": "2.7.10" 619 | } 620 | }, 621 | "nbformat": 4, 622 | "nbformat_minor": 2 623 | } 624 | --------------------------------------------------------------------------------