46 |
47 |
48 |
--------------------------------------------------------------------------------
/serving.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from keras.preprocessing import image
3 | from keras.preprocessing.image import array_to_img
4 | from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb
5 | from support import (
6 | load_pretrained_model,
7 | create_inception_embedding,
8 | )
9 |
10 |
11 | INCEPTION_PATH = ('/colornet/models/'
12 | 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')
13 | MODEL_PATH = '/colornet/models/color_tensorflow_real_mode_300.h5'
14 |
15 | ALLOWED_EXTENSIONS = set(['jpg', 'png', 'jpeg'])
16 |
17 |
18 | # MODELS
19 | model = None
20 | inception = None
21 |
22 |
23 | def load_model():
24 | """Load the model"""
25 | global model, inception
26 | (model, inception) = load_pretrained_model(INCEPTION_PATH, MODEL_PATH)
27 |
28 |
29 | def evaluate_input(input: str):
30 | global model
31 | (color_me, color_me_embed) = _data_preprocessing(input)
32 | output = model.predict([color_me, color_me_embed])
33 | # Rescale the output from [-1,1] to [-128, 128]
34 | output = output * 128
35 | # Output colorizations
36 | for i in range(len(output)):
37 | cur = np.zeros((256, 256, 3))
38 | # LAB representation
39 | cur[:, :, 0] = color_me[i][:, :, 0]
40 | cur[:, :, 1:] = output[i]
41 | img = array_to_img(lab2rgb(cur))
42 | return img
43 |
44 |
45 | def _data_preprocessing(input_filepath):
46 | """From RGB image to L(grayscale)"""
47 | global inception
48 |
49 | img = image.load_img(input_filepath, target_size=(256, 256))
50 | img = image.img_to_array(img)
51 | color_me = [img]
52 |
53 | #################
54 | # Preprocessing #
55 | #################
56 | # From RGB to B&W and embedding
57 | color_me = np.array(color_me, dtype=float)
58 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me)))
59 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0]
60 | color_me = color_me.reshape(color_me.shape+(1,))
61 | return (color_me, color_me_embed)
62 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Colornet
2 |
3 | Today, colorization is done by hand in Photoshop, a picture can take up to one month to colorize. It requires extensive research. A face alone needs up to 20 layers of pink, green and blue shades to get it just right. But something changed this year when Amir Avni used neural networks to [troll the subreddit](http://www.whatimade.today/our-frst-reddit-bot-coloring-b-2/) [/r/Colorization](https://www.reddit.com/r/Colorization/) - a community where people colorize historical black and white images manually using Photoshop. They were astonished with Amir’s deep learning bot - what could take up to a month of manual labour could now be done in just a few seconds.
4 |
5 | ### Try it now
6 |
7 | [](https://floydhub.com/run?template=https://github.com/floydhub/colornet-template)
8 |
9 | Click this button to open a Workspace on FloydHub that will train this model.
10 |
11 | ### Colorizing Black&White photos
12 |
13 | Fascinated by Amir’s neural network, Emill reproduced it and documented the process in the famous blog post: [Colorizing B&W Photos with Neural Networks](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/). In this notebook we will reproduce Emil's work by using the Full Version of his experiments.
14 |
15 | 
16 | *The middle picture is done with our neural network and the picture to the right is the original color photo - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*
17 |
18 | We will:
19 | - Preprocess the image data for this CV task
20 | - Build and train the `colornet` model using Keras and Tensorflow
21 | - Evaluate our model on the test set
22 | - Run the model on your own black&white and colored pictures!
23 |
24 |
25 | ### Serve an interactive web page for your own model
26 |
27 | You can easily spin up a serve job on FloydHub to demo your model through an
28 | interactive web site. Just run the following command from workspace terminal or
29 | your local machine:
30 |
31 | ```bash
32 | floyd run --mode serve
33 | ```
34 |
35 | You should be able to see the following page when visiting the FloydHub serve url:
36 |
37 | 
38 |
--------------------------------------------------------------------------------
/templates/serving_template.html:
--------------------------------------------------------------------------------
1 | {% extends "layout.html" %}
2 | {% block body %}
3 |
4 |
26 |
27 |
78 |
79 |
80 |
81 |
82 |
83 |
Colorizing Black & White Photo
84 |
85 |
86 |
87 |
88 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 | {% endblock %}
118 |
--------------------------------------------------------------------------------
/support.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 | import numpy as np
4 |
5 | from keras.applications.inception_resnet_v2 import InceptionResNetV2
6 | from keras.applications.inception_resnet_v2 import preprocess_input
7 | from keras.layers.core import RepeatVector
8 | from keras.preprocessing import image
9 | from keras.preprocessing.image import img_to_array, load_img
10 | from keras.models import Model
11 | from keras.layers import (
12 | Conv2D,
13 | UpSampling2D,
14 | Input,
15 | Reshape,
16 | concatenate,
17 | )
18 |
19 | from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb
20 | from skimage.transform import resize
21 | from skimage.io import imsave
22 |
23 | import matplotlib.pyplot as plt
24 | import tensorflow as tf
25 |
26 |
27 | # Create embedding
28 | def create_inception_embedding(inception, grayscaled_rgb):
29 | grayscaled_rgb_resized = []
30 | for i in grayscaled_rgb:
31 | i = resize(i, (299, 299, 3), mode='constant', anti_aliasing=True)
32 | grayscaled_rgb_resized.append(i)
33 | grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
34 | grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
35 | with inception.graph.as_default():
36 | embed = inception.predict(grayscaled_rgb_resized)
37 | return embed
38 |
39 |
40 | def show_img(im, figsize=None, ax=None):
41 | if not ax:
42 | fig, ax = plt.subplots(figsize=figsize)
43 | ax.imshow(im)
44 | ax.get_xaxis().set_visible(False)
45 | ax.get_yaxis().set_visible(False)
46 | return ax
47 |
48 |
49 | def read_img(img_id, data_dir, train_or_test, size):
50 | """Read and resize image.
51 | # Arguments
52 | img_id: string
53 | train_or_test: string 'train' or 'test'.
54 | size: resize the original image.
55 | # Returns
56 | Image as numpy array.
57 | """
58 | img = image.load_img(os.path.join(data_dir, train_or_test, '%s.jpg' % img_id), target_size=size)
59 | img = image.img_to_array(img)
60 | return img
61 |
62 |
63 | def color_result(PATH, START, END, RESULT, model, inception):
64 | # Make predictions on validation images
65 | color_me = []
66 | i = 0
67 | # Take file in range [START, END] inside the PATH folder
68 | for filename in os.listdir(PATH):
69 | if i > START and i < END:
70 | color_me.append(img_to_array(load_img(os.path.join(PATH, filename))))
71 | i += 1
72 |
73 | #################
74 | # Preprocessing #
75 | #################
76 | # From RGB to B&W and embedding
77 | color_me = np.array(color_me, dtype=float)
78 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me)))
79 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0]
80 | color_me = color_me.reshape(color_me.shape+(1,))
81 |
82 | # Test model
83 | output = model.predict([color_me, color_me_embed])
84 | # Rescale the output from [-1,1] to [-128, 128]
85 | output = output * 128
86 |
87 | # Create the result directory if not extists
88 | if not os.path.exists('result'):
89 | os.makedirs('result')
90 |
91 | # Output colorizations
92 | for i in range(len(output)):
93 | cur = np.zeros((256, 256, 3))
94 | # LAB representation
95 | cur[:, :, 0] = color_me[i][:, :, 0]
96 | cur[:, :, 1:] = output[i]
97 | # Save images as RGB
98 | imsave("result/img_"+str(i)+".png", lab2rgb(cur))
99 |
100 |
101 | def prediction_from_url(url, model, inception):
102 | test_image_path = '/tmp/test.jpg'
103 |
104 | # Download the image
105 | response = requests.get(url)
106 | if response.status_code == 200:
107 | with open(test_image_path, 'wb') as f:
108 | f.write(response.content)
109 |
110 | color_me = []
111 | color_me.append(read_img('test', '/', 'tmp', (256, 256)))
112 |
113 | #################
114 | # Preprocessing #
115 | #################
116 | # From RGB to B&W and embedding
117 | color_me = np.array(color_me, dtype=float)
118 | color_me_embed = create_inception_embedding(inception, gray2rgb(rgb2gray(1.0/255*color_me)))
119 | color_me = rgb2lab(1.0/255*color_me)[:, :, :, 0]
120 | color_me = color_me.reshape(color_me.shape+(1,))
121 |
122 | # Test model
123 | output = model.predict([color_me, color_me_embed])
124 | # Rescale the output from [-1,1] to [-128, 128]
125 | output = output * 128
126 |
127 | # Output colorizations
128 | for i in range(len(output)):
129 | cur = np.zeros((256, 256, 3))
130 | # LAB representation
131 | cur[:, :, 0] = color_me[i][:, :, 0]
132 | cur[:, :, 1:] = output[i]
133 |
134 | # B&W
135 | fig = plt.figure(figsize=(9, 9))
136 | ax1 = fig.add_subplot(1, 3, 1)
137 | ax1.axis('off')
138 | ax1.set_title('B&W')
139 | ax1.imshow(rgb2gray(read_img('test', '/', 'tmp', (256, 256))/255), cmap='gray')
140 |
141 | # Prediction
142 | ax2 = fig.add_subplot(1, 3, 2)
143 | ax2.axis('off')
144 | ax2.set_title('Prediction')
145 | ax2.imshow(lab2rgb(cur))
146 |
147 | # Original
148 | ax3 = fig.add_subplot(1, 3, 3)
149 | ax3.axis('off')
150 | ax3.set_title('Original')
151 | ax3.imshow(read_img('test', '/', 'tmp', (256, 256))/255)
152 |
153 |
154 | def load_pretrained_model(inception_wpath, colornet_wpath):
155 | '''Load Emil's pretrained model'''
156 | print('Loading pretrained model... (it could take a while)')
157 |
158 | # Load weights of InceptionResNet model for embedding extraction
159 | inception = InceptionResNetV2(weights=None, include_top=True)
160 | inception.load_weights(inception_wpath)
161 | inception.graph = tf.get_default_graph()
162 |
163 | # The Model
164 | def conv_stack(data, filters, s):
165 | """Utility for building conv layer"""
166 | output = Conv2D(filters, (3, 3), strides=s, activation='relu', padding='same')(data)
167 | return output
168 |
169 | embed_input = Input(shape=(1000,))
170 |
171 | # Encoder
172 | encoder_input = Input(shape=(256, 256, 1,))
173 | encoder_output = conv_stack(encoder_input, 64, 2)
174 | encoder_output = conv_stack(encoder_output, 128, 1)
175 | encoder_output = conv_stack(encoder_output, 128, 2)
176 | encoder_output = conv_stack(encoder_output, 256, 1)
177 | encoder_output = conv_stack(encoder_output, 256, 2)
178 | encoder_output = conv_stack(encoder_output, 512, 1)
179 | encoder_output = conv_stack(encoder_output, 512, 1)
180 | encoder_output = conv_stack(encoder_output, 256, 1)
181 |
182 | # Fusion
183 | # y_mid: (None, 256, 28, 28)
184 | fusion_output = RepeatVector(32 * 32)(embed_input)
185 | fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
186 | fusion_output = concatenate([encoder_output, fusion_output], axis=3)
187 | fusion_output = Conv2D(256, (1, 1), activation='relu')(fusion_output)
188 |
189 | # Decoder
190 | decoder_output = conv_stack(fusion_output, 128, 1)
191 | decoder_output = UpSampling2D((2, 2))(decoder_output)
192 | decoder_output = conv_stack(decoder_output, 64, 1)
193 | decoder_output = UpSampling2D((2, 2))(decoder_output)
194 | decoder_output = conv_stack(decoder_output, 32, 1)
195 | decoder_output = conv_stack(decoder_output, 16, 1)
196 | decoder_output = Conv2D(2, (2, 2), activation='tanh', padding='same')(decoder_output)
197 | decoder_output = UpSampling2D((2, 2))(decoder_output)
198 |
199 | model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
200 |
201 | # Load colornet weights
202 | model.load_weights(colornet_wpath)
203 |
204 | print('Model loaded!')
205 | return(model, inception)
206 |
--------------------------------------------------------------------------------
/colornet.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Colornet\n",
8 | "\n",
9 | "Today, colorization is done by hand in Photoshop, a picture can take up to one month to colorize. It requires extensive research. A face alone needs up to 20 layers of pink, green and blue shades to get it just right. But something changed this year when Amir Avni used neural networks to [troll the subreddit](http://www.whatimade.today/our-frst-reddit-bot-coloring-b-2/) [/r/Colorization](https://www.reddit.com/r/Colorization/) - a community where people colorize historical black and white images manually using Photoshop. They were astonished with Amir’s deep learning bot - what could take up to a month of manual labour could now be done in just a few seconds.\n",
10 | "\n",
11 | "### Colorizing Black&White photos\n",
12 | "\n",
13 | "Fascinated by Amir’s neural network, Emill reproduced it and documented the process in the famous blog post: [Colorizing B&W Photos with Neural Networks](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/). In this notebook we will reproduce Emil's work by using the Full Version of his experiments.\n",
14 | "\n",
15 | "\n",
16 | "*The middle picture is done with our neural network and the picture to the right is the original color photo - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*\n",
17 | "\n",
18 | "We will:\n",
19 | "- Preprocess the image data for this CV task\n",
20 | "- Build and train the `colornet` model using Keras and Tensorflow\n",
21 | "- Evaluate our model on the test set\n",
22 | "- Run the model on your own black&white and colored pictures!\n",
23 | "\n",
24 | "### Instructions\n",
25 | "\n",
26 | "- To execute a code cell, click on the cell and press `Shift + Enter` (shortcut for Run).\n",
27 | "- To learn more about Workspaces, check out the [Getting Started Notebook](get_started_workspace.ipynb).\n",
28 | "- **Tip**: *Feel free to try this Notebook with your own data and on your own super awesome colorization task.*\n",
29 | "\n",
30 | "Now, let's get started! 🚀"
31 | ]
32 | },
33 | {
34 | "cell_type": "markdown",
35 | "metadata": {},
36 | "source": [
37 | "## Try it now!\n",
38 | "\n",
39 | "Test out the Emil's pretrained model. Run the code Cell below and enter a URL with your pic in the widget below. Have fun!🎉\n",
40 | "\n",
41 | "Here are some URLs for testing:\n",
42 | "\n",
43 | "- (man, colored) http://www.bolsamania.com/cine/wp-content/uploads/2017/03/26-2.jpg\n",
44 | "- (landscape, colored) https://cdn.pixabay.com/photo/2017/04/07/18/23/landscape-2211587_960_720.jpg\n",
45 | "- (lion, b&w) https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQTXYpHhz45gaDHPsNulPFotlc72i3MDv_1RoOcQjEQx3sX-dWj\n",
46 | "\n",
47 | "\n",
48 | "Note: \n",
49 | "- You can also consider to use URL of colored pictures, in this way you can fully test the colorization on new images.\n",
50 | "- The first prediction can take up to one minute."
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 1,
56 | "metadata": {},
57 | "outputs": [
58 | {
59 | "name": "stderr",
60 | "output_type": "stream",
61 | "text": [
62 | "Using TensorFlow backend.\n"
63 | ]
64 | },
65 | {
66 | "name": "stdout",
67 | "output_type": "stream",
68 | "text": [
69 | "Loading pretrained model... (it could take a while)\n",
70 | "Model loaded!\n"
71 | ]
72 | },
73 | {
74 | "data": {
75 | "application/vnd.jupyter.widget-view+json": {
76 | "model_id": "9db845b0310c4d0f85a0d8787c3ba761",
77 | "version_major": 2,
78 | "version_minor": 0
79 | },
80 | "text/plain": [
81 | "interactive(children=(Text(value='', description='URL', placeholder='Insert URL of a pic'), Button(description…"
82 | ]
83 | },
84 | "metadata": {},
85 | "output_type": "display_data"
86 | }
87 | ],
88 | "source": [
89 | "# Testing on url images\n",
90 | "from ipywidgets import interact_manual\n",
91 | "from ipywidgets import widgets\n",
92 | "from support import prediction_from_url, load_pretrained_model\n",
93 | "\n",
94 | "(model, inception) = load_pretrained_model('/floyd/input/colornet/models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5',\n",
95 | " '/floyd/input/colornet/models/color_tensorflow_real_mode_300.h5')\n",
96 | "\n",
97 | "def get_prediction(URL):\n",
98 | " prediction_from_url(URL, model, inception)\n",
99 | "\n",
100 | "interact_manual(get_prediction, URL=widgets.Text(placeholder='Insert URL of a pic'));"
101 | ]
102 | },
103 | {
104 | "cell_type": "markdown",
105 | "metadata": {},
106 | "source": [
107 | "## Initial Setup\n",
108 | "\n",
109 | "Let's start by importing some packages"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 2,
115 | "metadata": {},
116 | "outputs": [],
117 | "source": [
118 | "%load_ext autoreload\n",
119 | "%autoreload 2\n",
120 | "\n",
121 | "import tensorflow as tf\n",
122 | "import numpy as np\n",
123 | "\n",
124 | "import os\n",
125 | "import random\n",
126 | "import keras\n",
127 | "\n",
128 | "from keras.applications.inception_resnet_v2 import InceptionResNetV2\n",
129 | "from keras.applications.inception_resnet_v2 import preprocess_input\n",
130 | "\n",
131 | "from keras.preprocessing import image\n",
132 | "from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img\n",
133 | "\n",
134 | "from keras.models import Sequential, Model\n",
135 | "from keras.callbacks import TensorBoard \n",
136 | "\n",
137 | "from keras.engine import Layer\n",
138 | "from keras.layers import Conv2D, UpSampling2D, InputLayer, Conv2DTranspose, Input, Reshape, merge, concatenate, Activation, Dense, Dropout, Flatten\n",
139 | "from keras.layers.normalization import BatchNormalization\n",
140 | "from keras.layers.core import RepeatVector, Permute\n",
141 | "\n",
142 | "from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb\n",
143 | "from skimage.transform import resize\n",
144 | "from skimage.io import imsave"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "## Training Parameters\n",
152 | "\n",
153 | "We'll set the hyperparameters for training our model. If you understand what they mean, feel free to play around - otherwise, we recommend keeping the defaults for your first run 🙂"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 3,
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# Hyperparams if GPU is available\n",
163 | "if tf.test.is_gpu_available():\n",
164 | " # GPU\n",
165 | " BATCH_SIZE = 20 # Number of examples used in each iteration\n",
166 | " EPOCHS = 1000 # Number of passes through entire dataset\n",
167 | "# Hyperparams for CPU training\n",
168 | "else:\n",
169 | " # CPU\n",
170 | " BATCH_SIZE = 20\n",
171 | " EPOCHS = 250"
172 | ]
173 | },
174 | {
175 | "cell_type": "markdown",
176 | "metadata": {},
177 | "source": [
178 | "## Data Loading\n",
179 | "\n",
180 | "Converting images into tensors and rescaling the pixel values from [0-255] to [0,1].\n",
181 | "\n",
182 | "The colornet dataset provides 3 datasets:\n",
183 | "- **ds-big** with 9600 images\n",
184 | "- **ds-medium** with 200 images (the pretrained models in the `/floyd/input/colornet/models` folder are trained on this one)\n",
185 | "- **ds-small** with 20 images (the one used by Emil in the **Full-Version** section of the Blog post)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 4,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": [
194 | "DS_PATH = '/floyd/input/colornet/ds-small' # ADD path/to/dataset\n",
195 | "\n",
196 | "# Get images\n",
197 | "X = []\n",
198 | "for filename in os.listdir(DS_PATH):\n",
199 | " if os.path.isfile(os.path.join(DS_PATH, filename)):\n",
200 | " X.append(img_to_array(load_img(os.path.join(DS_PATH, filename))))\n",
201 | " \n",
202 | "# Normalization => Converting pixel value from [0-255] to [0,1] \n",
203 | "X = np.array(X, dtype=float)\n",
204 | "Xtrain = 1.0/255*X"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "## Data preprocessing\n",
212 | "\n",
213 | "We’ll use an algorithm to change the color channels, from RGB to Lab. L stands for lightness, and a and b for the color spectrums green–red and blue–yellow.\n",
214 | "As you can see below, a Lab encoded image has one layer for grayscale and have packed three color layers into two. This means that we can use the original grayscale image in our final prediction. Also, we only have to two channels to predict.\n",
215 | "\n",
216 | "\n",
217 | "\n",
218 | "\n",
219 | "*L/Greyscale to AB - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*\n",
220 | "\n",
221 | "We have a grayscale layer for input, and we want to predict two color layers, the ab in Lab. To create the final color image we’ll include the L/grayscale image we used for the input, thus, creating a Lab image.\n",
222 | "\n",
223 | "\n",
224 | "*More formally, we want to learn a mapping from the greyscale to AB - Image from the [Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)* "
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 5,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "from support import create_inception_embedding\n",
234 | "\n",
235 | "# Image transformer\n",
236 | "datagen = ImageDataGenerator(\n",
237 | " shear_range=0.1,\n",
238 | " zoom_range=0.1,\n",
239 | " rotation_range=10,\n",
240 | " horizontal_flip=True)\n",
241 | "\n",
242 | "def image_a_b_gen(batch_size):\n",
243 | " \"\"\"Wrapper on top of ImageDataGenerator which\n",
244 | " converts RGB images to B&W, extract the feature using Inception,\n",
245 | " and get the LAB from the original image. \n",
246 | " \n",
247 | " All this information will compose the current batch used \n",
248 | " during the training.\"\"\"\n",
249 | " for batch in datagen.flow(Xtrain, batch_size=batch_size):\n",
250 | " # RGB to B&W\n",
251 | " grayscaled_rgb = gray2rgb(rgb2gray(batch))\n",
252 | " # Feature Extraction\n",
253 | " embed = create_inception_embedding(inception, grayscaled_rgb)\n",
254 | " # RGB to LAB\n",
255 | " lab_batch = rgb2lab(batch)\n",
256 | " X_batch = lab_batch[:,:,:,0]\n",
257 | " X_batch = X_batch.reshape(X_batch.shape+(1,))\n",
258 | " # Convert LAB value from [-128, 128] to [-1, 1]\n",
259 | " Y_batch = lab_batch[:,:,:,1:] / 128\n",
260 | " # The new Batch (B&W, Embedding, LAB)\n",
261 | " yield ([X_batch, create_inception_embedding(inception, grayscaled_rgb)], Y_batch)"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "## Model\n",
269 | "\n",
270 | "We will implement a model similar to Federico Baldassarre’s [Deep Koalarization: Image Colorization using CNNs and Inception-ResNet-v2](https://arxiv.org/abs/1712.03400). Here are 2 images for the same model:\n",
271 | "\n",
272 | "\n",
273 | "*Deep Koalarization - Image from [the paper](https://arxiv.org/abs/1712.03400)*\n",
274 | "\n",
275 | "\n",
276 | "\n",
277 | "*Colornet - Image from [the Blog](https://blog.floydhub.com/colorizing-b-w-photos-with-neural-networks/)*"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": 6,
283 | "metadata": {},
284 | "outputs": [],
285 | "source": [
286 | "#Load weights of InceptionResNet model for embedding extraction \n",
287 | "inception = InceptionResNetV2(weights=None, include_top=True)\n",
288 | "inception.load_weights('/floyd/input/colornet/models/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')\n",
289 | "inception.graph = tf.get_default_graph()"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 7,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "name": "stdout",
299 | "output_type": "stream",
300 | "text": [
301 | "__________________________________________________________________________________________________\n",
302 | "Layer (type) Output Shape Param # Connected to \n",
303 | "==================================================================================================\n",
304 | "input_6 (InputLayer) (None, 256, 256, 1) 0 \n",
305 | "__________________________________________________________________________________________________\n",
306 | "conv2d_421 (Conv2D) (None, 128, 128, 64) 640 input_6[0][0] \n",
307 | "__________________________________________________________________________________________________\n",
308 | "conv2d_422 (Conv2D) (None, 128, 128, 128 73856 conv2d_421[0][0] \n",
309 | "__________________________________________________________________________________________________\n",
310 | "conv2d_423 (Conv2D) (None, 64, 64, 128) 147584 conv2d_422[0][0] \n",
311 | "__________________________________________________________________________________________________\n",
312 | "conv2d_424 (Conv2D) (None, 64, 64, 256) 295168 conv2d_423[0][0] \n",
313 | "__________________________________________________________________________________________________\n",
314 | "conv2d_425 (Conv2D) (None, 32, 32, 256) 590080 conv2d_424[0][0] \n",
315 | "__________________________________________________________________________________________________\n",
316 | "conv2d_426 (Conv2D) (None, 32, 32, 512) 1180160 conv2d_425[0][0] \n",
317 | "__________________________________________________________________________________________________\n",
318 | "input_5 (InputLayer) (None, 1000) 0 \n",
319 | "__________________________________________________________________________________________________\n",
320 | "conv2d_427 (Conv2D) (None, 32, 32, 512) 2359808 conv2d_426[0][0] \n",
321 | "__________________________________________________________________________________________________\n",
322 | "repeat_vector_2 (RepeatVector) (None, 1024, 1000) 0 input_5[0][0] \n",
323 | "__________________________________________________________________________________________________\n",
324 | "conv2d_428 (Conv2D) (None, 32, 32, 256) 1179904 conv2d_427[0][0] \n",
325 | "__________________________________________________________________________________________________\n",
326 | "reshape_2 (Reshape) (None, 32, 32, 1000) 0 repeat_vector_2[0][0] \n",
327 | "__________________________________________________________________________________________________\n",
328 | "concatenate_2 (Concatenate) (None, 32, 32, 1256) 0 conv2d_428[0][0] \n",
329 | " reshape_2[0][0] \n",
330 | "__________________________________________________________________________________________________\n",
331 | "conv2d_429 (Conv2D) (None, 32, 32, 256) 321792 concatenate_2[0][0] \n",
332 | "__________________________________________________________________________________________________\n",
333 | "conv2d_430 (Conv2D) (None, 32, 32, 128) 295040 conv2d_429[0][0] \n",
334 | "__________________________________________________________________________________________________\n",
335 | "up_sampling2d_4 (UpSampling2D) (None, 64, 64, 128) 0 conv2d_430[0][0] \n",
336 | "__________________________________________________________________________________________________\n",
337 | "conv2d_431 (Conv2D) (None, 64, 64, 64) 73792 up_sampling2d_4[0][0] \n",
338 | "__________________________________________________________________________________________________\n",
339 | "up_sampling2d_5 (UpSampling2D) (None, 128, 128, 64) 0 conv2d_431[0][0] \n",
340 | "__________________________________________________________________________________________________\n",
341 | "conv2d_432 (Conv2D) (None, 128, 128, 32) 18464 up_sampling2d_5[0][0] \n",
342 | "__________________________________________________________________________________________________\n",
343 | "conv2d_433 (Conv2D) (None, 128, 128, 16) 4624 conv2d_432[0][0] \n",
344 | "__________________________________________________________________________________________________\n",
345 | "conv2d_434 (Conv2D) (None, 128, 128, 2) 130 conv2d_433[0][0] \n",
346 | "__________________________________________________________________________________________________\n",
347 | "up_sampling2d_6 (UpSampling2D) (None, 256, 256, 2) 0 conv2d_434[0][0] \n",
348 | "==================================================================================================\n",
349 | "Total params: 6,541,042\n",
350 | "Trainable params: 6,541,042\n",
351 | "Non-trainable params: 0\n",
352 | "__________________________________________________________________________________________________\n"
353 | ]
354 | }
355 | ],
356 | "source": [
357 | "# The Model\n",
358 | "def conv_stack(data, filters, s):\n",
359 | " \"\"\"Utility for building conv layer\"\"\"\n",
360 | " output = Conv2D(filters, (3, 3), strides=s, activation='relu', padding='same')(data)\n",
361 | " return output\n",
362 | "\n",
363 | "embed_input = Input(shape=(1000,))\n",
364 | "\n",
365 | "#Encoder\n",
366 | "encoder_input = Input(shape=(256, 256, 1,))\n",
367 | "encoder_output = conv_stack(encoder_input, 64, 2)\n",
368 | "encoder_output = conv_stack(encoder_output, 128, 1)\n",
369 | "encoder_output = conv_stack(encoder_output, 128, 2)\n",
370 | "encoder_output = conv_stack(encoder_output, 256, 1)\n",
371 | "encoder_output = conv_stack(encoder_output, 256, 2)\n",
372 | "encoder_output = conv_stack(encoder_output, 512, 1)\n",
373 | "encoder_output = conv_stack(encoder_output, 512, 1)\n",
374 | "encoder_output = conv_stack(encoder_output, 256, 1)\n",
375 | "\n",
376 | "#Fusion\n",
377 | "# y_mid: (None, 256, 28, 28)\n",
378 | "fusion_output = RepeatVector(32 * 32)(embed_input) \n",
379 | "fusion_output = Reshape(([32, 32, 1000]))(fusion_output)\n",
380 | "fusion_output = concatenate([encoder_output, fusion_output], axis=3) \n",
381 | "fusion_output = Conv2D(256, (1, 1), activation='relu')(fusion_output) \n",
382 | "\n",
383 | "\n",
384 | "\n",
385 | "#Decoder\n",
386 | "decoder_output = conv_stack(fusion_output, 128, 1)\n",
387 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n",
388 | "decoder_output = conv_stack(decoder_output, 64, 1)\n",
389 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n",
390 | "decoder_output = conv_stack(decoder_output, 32, 1)\n",
391 | "decoder_output = conv_stack(decoder_output, 16, 1)\n",
392 | "decoder_output = Conv2D(2, (2, 2), activation='tanh', padding='same')(decoder_output)\n",
393 | "decoder_output = UpSampling2D((2, 2))(decoder_output)\n",
394 | "\n",
395 | "model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)\n",
396 | "model.summary()"
397 | ]
398 | },
399 | {
400 | "cell_type": "markdown",
401 | "metadata": {},
402 | "source": [
403 | "## Train & Evaluate\n",
404 | "\n",
405 | "If you left the default hyperpameters in the Notebook untouched, your training should take approximately: \n",
406 | "\n",
407 | "- On CPU machine: 4-5 hours for 250 epochs.\n",
408 | "- On GPU machine: 50 minutes for 1000 epochs.\n",
409 | "\n",
410 | "**Note**: In the dataset you can find different pretrained models that you can use for testing or as a starting point for fine tuning, e.g.: \n",
411 | "```python\n",
412 | "# model.load_weights('')\n",
413 | "model.load_weights('/floyd/input/colornet/models/color_tensorflow_real_mode_300.h5')\n",
414 | "```\n",
415 | "\n",
416 | "**Emil's advice**\n",
417 | "\n",
418 | "It's tricky to get good results. A lot of has to do with how many epochs you train it and which training data you use. *I'd recommend starting with 20-100 images* and **saving at regular intervals**. Once you get a feel for it, you can increase the number of images. Also, use a lot of validation images to understand where it's good and where it struggles.\n",
419 | "\n",
420 | "Analyzing the loss data can also be hard. Initially, I noticed that the batch normalization makes the pictures sepia looking. Then it needs additional training to create colors. The loss curve can be misleading because of this.\n",
421 | "\n",
422 | "For better results, I'd recommend adding a weighted classification, to favor vibrant colors. If I were to redo it today, I'd experiment with the pix2pixHD GAN structure: https://github.com/NVIDIA/pix2pixHD."
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "execution_count": null,
428 | "metadata": {},
429 | "outputs": [],
430 | "source": [
431 | "#Train model \n",
432 | "tensorboard = TensorBoard(log_dir=\"/floyd/home/run\")\n",
433 | "model.compile(optimizer='adam', loss='mse')\n",
434 | "model.fit_generator(image_a_b_gen(BATCH_SIZE), \n",
435 | " callbacks=[tensorboard], \n",
436 | " epochs=EPOCHS, steps_per_epoch=1, verbose=2)"
437 | ]
438 | },
439 | {
440 | "cell_type": "markdown",
441 | "metadata": {},
442 | "source": [
443 | "### Eval\n",
444 | "\n",
445 | "We will use the images in the range [START, END] of the Train for evaluating our model as Emil did during his experiments."
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "execution_count": 9,
451 | "metadata": {},
452 | "outputs": [
453 | {
454 | "name": "stderr",
455 | "output_type": "stream",
456 | "text": [
457 | "/usr/local/lib/python3.6/site-packages/skimage/util/dtype.py:130: UserWarning: Possible precision loss when converting from float64 to uint8\n",
458 | " .format(dtypeobj_in, dtypeobj_out))\n"
459 | ]
460 | }
461 | ],
462 | "source": [
463 | "# Eval Colorization\n",
464 | "from support import color_result\n",
465 | "\n",
466 | "START = 0\n",
467 | "END = 100\n",
468 | "PATH = '/floyd/input/colornet/ds-big/Train/'\n",
469 | "RESULT = 'result'\n",
470 | "\n",
471 | "# It could take some minutes on CPU\n",
472 | "color_result(PATH, START, END, RESULT, model, inception)"
473 | ]
474 | },
475 | {
476 | "cell_type": "code",
477 | "execution_count": 10,
478 | "metadata": {},
479 | "outputs": [
480 | {
481 | "data": {
482 | "application/vnd.jupyter.widget-view+json": {
483 | "model_id": "f8b823926f80494390c7a285e56433d0",
484 | "version_major": 2,
485 | "version_minor": 0
486 | },
487 | "text/plain": [
488 | "interactive(children=(IntSlider(value=1, description='Show results of colorization', max=99, min=1), Output())…"
489 | ]
490 | },
491 | "metadata": {},
492 | "output_type": "display_data"
493 | }
494 | ],
495 | "source": [
496 | "# Show results\n",
497 | "\n",
498 | "from ipywidgets import interact\n",
499 | "from ipywidgets import widgets\n",
500 | "from support import show_img \n",
501 | "\n",
502 | "def show_sample(sample_n):\n",
503 | " image_path = os.path.join(RESULT, \"img_\"+str(sample_n-1)+\".png\")\n",
504 | " img = image.load_img(image_path)\n",
505 | " img = image.img_to_array(img)/255\n",
506 | " ax = show_img(img, figsize=(9,9))\n",
507 | " ax.set_title(image_path)\n",
508 | " \n",
509 | "interact(show_sample, sample_n=widgets.IntSlider(value=1, min=1, max=END-START-1, description='Show results of colorization'));"
510 | ]
511 | },
512 | {
513 | "cell_type": "markdown",
514 | "metadata": {},
515 | "source": [
516 | "## It's your turn\n",
517 | "\n",
518 | "Test out the model you just trained. Run the code Cell below and enter a URL with your pic in the widget below. Have fun!🎉\n",
519 | "\n",
520 | "Here's some URL for testing:\n",
521 | "\n",
522 | "- (man, colored) http://www.bolsamania.com/cine/wp-content/uploads/2017/03/26-2.jpg\n",
523 | "- (landscape, colored) https://cdn.pixabay.com/photo/2017/04/07/18/23/landscape-2211587_960_720.jpg\n",
524 | "- (lion, b&w) https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQTXYpHhz45gaDHPsNulPFotlc72i3MDv_1RoOcQjEQx3sX-dWj\n",
525 | "\n",
526 | "**Note**: *You can also consider to use URL of colored pictures, in this way you can fully test the colorization on new images.*"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": 11,
532 | "metadata": {},
533 | "outputs": [
534 | {
535 | "data": {
536 | "application/vnd.jupyter.widget-view+json": {
537 | "model_id": "d432f70dedc343d3986357c351a1a45b",
538 | "version_major": 2,
539 | "version_minor": 0
540 | },
541 | "text/plain": [
542 | "interactive(children=(Text(value='', description='URL', placeholder='Insert URL of a pic'), Button(description…"
543 | ]
544 | },
545 | "metadata": {},
546 | "output_type": "display_data"
547 | }
548 | ],
549 | "source": [
550 | "# Testing on url images\n",
551 | "from ipywidgets import interact_manual\n",
552 | "from ipywidgets import widgets\n",
553 | "from support import prediction_from_url\n",
554 | "\n",
555 | "def get_prediction(URL):\n",
556 | " prediction_from_url(URL, model, inception)\n",
557 | "\n",
558 | "interact_manual(get_prediction, URL=widgets.Text(placeholder='Insert URL of a pic'));"
559 | ]
560 | },
561 | {
562 | "cell_type": "markdown",
563 | "metadata": {},
564 | "source": [
565 | "## Save the result"
566 | ]
567 | },
568 | {
569 | "cell_type": "code",
570 | "execution_count": 12,
571 | "metadata": {},
572 | "outputs": [],
573 | "source": [
574 | "model.save_weights(\"models/color_tensorflow_ds_small_{}.h5\".format(EPOCHS))"
575 | ]
576 | },
577 | {
578 | "cell_type": "markdown",
579 | "metadata": {},
580 | "source": [
581 | "### What's next\n",
582 | "\n",
583 | "Colorizing images is a deeply fascinating problem. It is as much as a scientific problem as artistic one. I wrote this article so you can get up to speed in coloring and continue where I left off. Here are some suggestions to get started:\n",
584 | "\n",
585 | "- Implement it with another pre-trained model\n",
586 | "- A different dataset (you can use **ds-big**)\n",
587 | "- Enable the network to grow in accuracy with more pictures\n",
588 | "- Build an amplifier within the RGB color space. Create a similar model to the coloring network, that takes a saturated colored image as input and the correct colored image as output.\n",
589 | "- Implement a weighted classification\n",
590 | "- Use a classification neural network as a loss function. Pictures that are classified as fake produce an error. It then decides how much each pixel contributed to the error.\n",
591 | "- *Apply it to video* (This is a killer AI product). Don’t worry too much about the colorization, but make the switch between images consistent. You could also do something similar for larger images, by tiling smaller ones."
592 | ]
593 | },
594 | {
595 | "cell_type": "markdown",
596 | "metadata": {},
597 | "source": [
598 | "##### That's all folks - don't forget to shutdown your workspace once you're done 🙂"
599 | ]
600 | }
601 | ],
602 | "metadata": {
603 | "kernelspec": {
604 | "display_name": "Python 2",
605 | "language": "python",
606 | "name": "python2"
607 | },
608 | "language_info": {
609 | "codemirror_mode": {
610 | "name": "ipython",
611 | "version": 2
612 | },
613 | "file_extension": ".py",
614 | "mimetype": "text/x-python",
615 | "name": "python",
616 | "nbconvert_exporter": "python",
617 | "pygments_lexer": "ipython2",
618 | "version": "2.7.10"
619 | }
620 | },
621 | "nbformat": 4,
622 | "nbformat_minor": 2
623 | }
624 |
--------------------------------------------------------------------------------