├── README.md └── convert_vgg_grayscale.py /README.md: -------------------------------------------------------------------------------- 1 | # VGG_Imagenet_Weights_GrayScale_Images 2 | Convert VGG imagenet pre-trained weights for grayscale images. 3 | 4 | 2 methods: 5 | 6 | 1. Convert your images to grayscale, copy the grayscale channel 2 times to make the image 3-D. 7 | 8 | 2. Convert the weights of VGG16's first convolutional layer to accomodate gray-scale images. 9 | eg: Dimension of VGG16's block1_conv1 kernel: (3, 3, 3, 64) -> (height, width, in_channels, out_channels). By default, the in_channels correspond to the number of channels yout training images have. Since VGG16 is pre-trained on Imagenet that has RGB images, in_channels is 3. The idea is to extract these weight values, do a weighted-average of the filters (channel wise) and assign these values to block1_conv1_kernel, s.t. dimension becomes (3, 3, 1, 64). 10 | 11 | Luminosity formula is used to calculate weighted average: 12 | value: (feature_red * 0.2989) + (feature_green * 0.5870) + (feature_blue * 0.1140) 13 | 14 | -------------------------------------------------------------------------------- /convert_vgg_grayscale.py: -------------------------------------------------------------------------------- 1 | import os 2 | import keras 3 | import numpy as np 4 | os.environ["CUDA_VISIBLE_DEVICES"] = "2" 5 | 6 | from keras.applications.vgg16 import VGG16 7 | from keras.models import load_model, Model 8 | from keras.layers import Input, Conv2D, MaxPooling2D 9 | 10 | model = VGG16(weights='imagenet', include_top=False) 11 | 12 | # Block1_conv1 weights are of the format [3, 3, 3, 64] -> this is for RGB images 13 | # For grayscale, format should be [3, 3, 1, 64]. Weighted average of the features has to be calculated across channels. 14 | # RGB weights: Red 0.2989, Green 0.5870, Blue 0.1140 15 | 16 | # getting weights of block1 conv1. 17 | block1_conv1 = model.get_layer('block1_conv1').get_weights() 18 | weights, biases = block1_conv1 19 | 20 | # :weights shape = [3, 3, 3, 64] - (0, 1, 2, 3) 21 | # convert :weights shape to = [64, 3, 3, 3] - (3, 2, 0, 1) 22 | weights = np.transpose(weights, (3, 2, 0, 1)) 23 | 24 | 25 | kernel_out_channels, kernel_in_channels, kernel_rows, kernel_columns = weights.shape 26 | 27 | # Dimensions : [kernel_out_channels, 1 (since grayscale), kernel_rows, kernel_columns] 28 | grayscale_weights = np.zeros((kernel_out_channels, 1, kernel_rows, kernel_columns)) 29 | 30 | # iterate out_channels number of times 31 | for i in range(kernel_out_channels): 32 | 33 | # get kernel for every out_channel 34 | get_kernel = weights[i, :, :, :] 35 | 36 | temp_kernel = np.zeros((3, 3)) 37 | 38 | # :get_kernel shape = [3, 3, 3] 39 | # axis, dims = (0, in_channel), (1, row), (2, col) 40 | 41 | # calculate weighted average across channel axis 42 | in_channels, in_rows, in_columns = get_kernel.shape 43 | 44 | for in_row in range(in_rows): 45 | for in_col in range(in_columns): 46 | feature_red = get_kernel[0, in_row, in_col] 47 | feature_green = get_kernel[1, in_row, in_col] 48 | feature_blue = get_kernel[2, in_row, in_col] 49 | 50 | # weighted average for RGB filter 51 | total = (feature_red * 0.2989) + (feature_green * 0.5870) + (feature_blue * 0.1140) 52 | 53 | temp_kernel[in_row, in_col] = total 54 | 55 | 56 | # :temp_kernel is a 3x3 matrix [rows x columns] 57 | # add an axis at the end to specify in_channel as 1 58 | 59 | # 2 ways of doing this, 60 | 61 | # First: Add axis directly at the end of :temp_kernel to make its shape: [3, 3, 1], but this might be 62 | # an issue when concatenating all feature maps 63 | 64 | # Second: Add axis at the start of :temp_kernel to make its shape: [1, 3, 3] which is [in_channel, rows, columns] 65 | temp_kernel = np.expand_dims(temp_kernel, axis=0) 66 | 67 | # Now, :temp_kernel shape is [1, 3, 3] 68 | 69 | # Concat :temp_kernel to :grayscale_weights along axis=0 70 | grayscale_weights[i, :, :, :] = temp_kernel 71 | 72 | # Dimension of :grayscale_weights is [64, 1, 3, 3] 73 | # In order to bring it to tensorflow or keras weight format, transpose :grayscale_weights 74 | 75 | # dimension, axis of :grayscale_weights = (out_channels: 0), (in_channels: 1), (rows: 2), (columns: 3) 76 | # tf format of weights = (rows: 0), (columns: 1), (in_channels: 2), (out_channels: 3) 77 | 78 | # Go from (0, 1, 2, 3) to (2, 3, 1, 0) 79 | grayscale_weights = np.transpose(grayscale_weights, (2, 3, 1, 0)) # (3, 3, 1, 64) 80 | 81 | # combine :grayscale_weights and :biases 82 | new_block1_conv1 = [grayscale_weights, biases] 83 | 84 | 85 | # Reconstruct the layers of VGG16 but replace block1_conv1 weights with :grayscale_weights 86 | 87 | # get weights of all the layers starting from 'block1_conv2' 88 | vgg16_weights = {} 89 | for layer in model.layers[2:]: 90 | if "conv" in layer.name: 91 | vgg16_weights["1024_" + layer.name] = model.get_layer(layer.name).get_weights() 92 | 93 | del model 94 | 95 | 96 | # Custom build VGG16 97 | input = Input(shape=(1024, 1024, 1), name='1024_input') 98 | # Block 1 99 | x = Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(1024, 1024, 1), data_format="channels_last", name='1024_block1_conv1')(input) 100 | x = Conv2D(64, (3, 3), activation='relu', padding='same', name='1024_block1_conv2')(x) 101 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block1_pool')(x) 102 | 103 | # Block 2 104 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='1024_block2_conv1')(x) 105 | x = Conv2D(128, (3, 3), activation='relu', padding='same', name='1024_block2_conv2')(x) 106 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block2_pool')(x) 107 | 108 | # Block 3 109 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv1')(x) 110 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv2')(x) 111 | x = Conv2D(256, (3, 3), activation='relu', padding='same', name='1024_block3_conv3')(x) 112 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block3_pool')(x) 113 | 114 | # Block 4 115 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv1')(x) 116 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv2')(x) 117 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block4_conv3')(x) 118 | x = MaxPooling2D((2, 2), strides=(2, 2), name='1024_block4_pool')(x) 119 | 120 | # Block 5 121 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv1')(x) 122 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv2')(x) 123 | x = Conv2D(512, (3, 3), activation='relu', padding='same', name='1024_block5_conv3')(x) 124 | x = MaxPooling2D((8, 8), strides=(8, 8), name='1024_block5_pool')(x) 125 | 126 | base_model = Model(inputs=input, outputs=x) 127 | 128 | base_model.get_layer('1024_block1_conv1').set_weights(new_block1_conv1) 129 | for layer in base_model.layers[2:]: 130 | if 'conv' in layer.name: 131 | base_model.get_layer(layer.name).set_weights(vgg16_weights[layer.name]) 132 | 133 | base_model.summary() 134 | 135 | #print base_model.get_layer('block3_conv2').get_weights() 136 | base_model.save('vgg_grayscale_1024.hdf5') 137 | --------------------------------------------------------------------------------