├── README.md
└── convert_vgg_grayscale.py


/README.md:
--------------------------------------------------------------------------------
 1 | # VGG_Imagenet_Weights_GrayScale_Images
 2 | Convert VGG imagenet pre-trained weights for grayscale images.
 3 | 
 4 | 2 methods:
 5 | 
 6 | 1. Convert your images to grayscale, copy the grayscale channel 2 times to make the image 3-D.
 7 | 
 8 | 2. Convert the weights of VGG16's first convolutional layer to accomodate gray-scale images.
 9 | eg: Dimension of VGG16's block1_conv1 kernel: (3, 3, 3, 64) -> (height, width, in_channels, out_channels). By default, the in_channels correspond to the number of channels yout training images have. Since VGG16 is pre-trained on Imagenet that has RGB images, in_channels is 3. The idea is to extract these weight values, do a weighted-average of the filters (channel wise) and assign these values to block1_conv1_kernel, s.t. dimension becomes (3, 3, 1, 64).
10 | 
11 | Luminosity formula is used to calculate weighted average:
12 | value: (feature_red * 0.2989) + (feature_green * 0.5870) + (feature_blue * 0.1140)
13 | 
14 | 


--------------------------------------------------------------------------------
/convert_vgg_grayscale.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import keras
  3 | import numpy as np
  4 | os.environ["CUDA_VISIBLE_DEVICES"] = "2"
  5 | 
  6 | from keras.applications.vgg16 import VGG16
  7 | from keras.models import load_model, Model
  8 | from keras.layers import Input, Conv2D, MaxPooling2D
  9 | 
 10 | model = VGG16(weights='imagenet', include_top=False)
 11 | 
 12 | # Block1_conv1 weights are of the format [3, 3, 3, 64] -> this is for RGB images
 13 | # For grayscale, format should be [3, 3, 1, 64]. Weighted average of the features has to be calculated across channels.
 14 | # RGB weights: Red 0.2989, Green 0.5870, Blue 0.1140
 15 | 
 16 | # getting weights of block1 conv1.
 17 | block1_conv1 = model.get_layer('block1_conv1').get_weights()
 18 | weights, biases = block1_conv1
 19 | 
 20 | # :weights shape = [3, 3, 3, 64] - (0, 1, 2, 3)
 21 | # convert :weights shape to = [64, 3, 3, 3] - (3, 2, 0, 1)
 22 | weights = np.transpose(weights, (3, 2, 0, 1))
 23 | 
 24 | 
 25 | kernel_out_channels, kernel_in_channels, kernel_rows, kernel_columns = weights.shape
 26 | 
 27 | # Dimensions : [kernel_out_channels, 1 (since grayscale), kernel_rows, kernel_columns]
 28 | grayscale_weights = np.zeros((kernel_out_channels, 1, kernel_rows, kernel_columns))
 29 | 
 30 | # iterate out_channels number of times
 31 | for i in range(kernel_out_channels):
 32 | 
 33 | 	# get kernel for every out_channel
 34 | 	get_kernel = weights[i, :, :, :]
 35 | 
 36 | 	temp_kernel = np.zeros((3, 3))
 37 | 
 38 | 	# :get_kernel shape = [3, 3, 3]
 39 | 	# axis, dims = (0, in_channel), (1, row), (2, col)
 40 | 
 41 | 	# calculate weighted average across channel axis
 42 | 	in_channels, in_rows, in_columns = get_kernel.shape
 43 | 
 44 | 	for in_row in range(in_rows):
 45 | 		for in_col in range(in_columns):
 46 | 			feature_red = get_kernel[0, in_row, in_col]
 47 | 			feature_green = get_kernel[1, in_row, in_col]
 48 | 			feature_blue = get_kernel[2, in_row, in_col]
 49 | 
 50 | 			# weighted average for RGB filter
 51 | 			total = (feature_red * 0.2989) + (feature_green * 0.5870) + (feature_blue * 0.1140)
 52 | 
 53 | 			temp_kernel[in_row, in_col] = total
 54 | 
 55 | 
 56 | 	# :temp_kernel is a 3x3 matrix [rows x columns]
 57 | 	# add an axis at the end to specify in_channel as 1
 58 | 
 59 | 	# 2 ways of doing this,
 60 | 
 61 | 	# First: Add axis directly at the end of :temp_kernel to make its shape: [3, 3, 1], but this might be 
 62 | 	# an issue when concatenating all feature maps
 63 | 
 64 | 	# Second: Add axis at the start of :temp_kernel to make its shape: [1, 3, 3] which is [in_channel, rows, columns]
 65 | 	temp_kernel = np.expand_dims(temp_kernel, axis=0)
 66 | 
 67 | 	# Now, :temp_kernel shape is [1, 3, 3]
 68 | 
 69 | 	# Concat :temp_kernel to :grayscale_weights along axis=0
 70 | 	grayscale_weights[i, :, :, :] = temp_kernel
 71 | 
 72 | # Dimension of :grayscale_weights is [64, 1, 3, 3]
 73 | # In order to bring it to tensorflow or keras weight format, transpose :grayscale_weights
 74 | 
 75 | # dimension, axis of :grayscale_weights = (out_channels: 0), (in_channels: 1), (rows: 2), (columns: 3)
 76 | # tf format of weights = (rows: 0), (columns: 1), (in_channels: 2), (out_channels: 3)
 77 | 
 78 | # Go from (0, 1, 2, 3) to (2, 3, 1, 0)
 79 | grayscale_weights = np.transpose(grayscale_weights, (2, 3, 1, 0)) # (3, 3, 1, 64)
 80 | 
 81 | # combine :grayscale_weights and :biases
 82 | new_block1_conv1 = [grayscale_weights, biases]
 83 | 
 84 | 
 85 | # Reconstruct the layers of VGG16 but replace block1_conv1 weights with :grayscale_weights
 86 | 
 87 | # get weights of all the layers starting from 'block1_conv2'
 88 | vgg16_weights = {}
 89 | for layer in model.layers[2:]:
 90 | 	if "conv" in layer.name:
 91 | 		vgg16_weights["1024_" + layer.name] = model.get_layer(layer.name).get_weights()
 92 | 
 93 | del model
 94 | 
 95 | 
 96 | # Custom build VGG16
 97 | input = Input(shape=(1024, 1024, 1), name='1024_input')
 98 | # Block 1
 99 | x = Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(1024, 1024, 1), data_format="channels_last", name='1024_block1_conv1')(input)
100 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='1024_block1_conv2')(x)
101 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block1_pool')(x)
102 | 
103 | # Block 2
104 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='1024_block2_conv1')(x)
105 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='1024_block2_conv2')(x)
106 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block2_pool')(x)
107 | 
108 | # Block 3
109 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv1')(x)
110 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv2')(x)
111 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv3')(x)
112 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block3_pool')(x)
113 | 
114 | # Block 4
115 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv1')(x)
116 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv2')(x)
117 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv3')(x)
118 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block4_pool')(x)
119 | 
120 | # Block 5
121 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv1')(x)
122 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv2')(x)
123 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv3')(x)
124 | x = MaxPooling2D((8, 8), strides=(8, 8), name='1024_block5_pool')(x)
125 | 
126 | base_model = Model(inputs=input, outputs=x)
127 | 
128 | base_model.get_layer('1024_block1_conv1').set_weights(new_block1_conv1)
129 | for layer in base_model.layers[2:]:
130 | 	if 'conv' in layer.name:
131 | 		base_model.get_layer(layer.name).set_weights(vgg16_weights[layer.name])
132 | 
133 | base_model.summary()
134 | 
135 | #print base_model.get_layer('block3_conv2').get_weights()
136 | base_model.save('vgg_grayscale_1024.hdf5')
137 | 


--------------------------------------------------------------------------------