├── download_models.sh
├── convert_models.sh
├── README.md
├── check_outputs.py
├── predict.py
├── alexnet.py
└── vgg19.py


/download_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p converted-models/
 4 | cd converted-models
 5 | 
 6 | for MODEL in 'hybrid_finetuned_fc6+' 'hybrid_finetuned_all' 'vgg19_finetuned_fc6+' 'vgg19_finetuned_all'; do
 7 |   if [ ! -f "${MODEL}.pth" ]; then
 8 |       echo "Downloading: ${MODEL}.pth"
 9 |       wget https://github.com/fabiocarrara/visual-sentiment-analysis/releases/download/torch-models/${MODEL}.pth
10 |   else
11 |       echo "Skipping: ${MODEL}.pth already downloaded"
12 |   fi
13 | done
14 | 
15 | 


--------------------------------------------------------------------------------
/convert_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | mkdir -p converted-models
 4 | 
 5 | for MODEL in 'hybrid_finetuned_fc6+' 'hybrid_finetuned_all' 'vgg19_finetuned_fc6+' 'vgg19_finetuned_all'; do
 6 |   pushd original-models/${MODEL}
 7 |   mmtoir -f caffe -w snapshot_iter_*.caffemodel -n deploy.prototxt -d ir
 8 |   mmtocode -f pytorch -n ir.pb  -w ir.npy -dw ${MODEL}.pth -d model.py
 9 |   popd
10 |   mv original-models/${MODEL}/${MODEL}.pth converted-models/
11 | done
12 | 
13 | mv original-models/hybrid_finetuned_all/model.py alexnet.py
14 | mv original-models/vgg19_finetuned_all/model.py vgg19.py
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🔥 CHECK OUT v2 (new data, new models) 🔥 https://fabiocarrara.github.io/cross-modal-visual-sentiment-analysis/
 2 | 
 3 | ---
 4 | 
 5 | # Cross-Media Learning for Image Sentiment Analysis in the Wild
 6 | 
 7 | This repo contains the PyTorch-converted models for visual sentiment analysis trained on the
 8 | [T4SA](http://www.t4sa.it) (Twitter for Sentiment Analysis) dataset presented in \[1\].
 9 | 
10 |     [1] Vadicamo, L., Carrara, F., Cimino, A., Cresci, S., Dell'Orletta, F., Falchi, F. and Tesconi, M., 2017.
11 |         Cross-media learning for image sentiment analysis in the wild.
12 |         In Proceedings of the IEEE International Conference on Computer Vision Workshops (pp. 308-317).
13 | 
14 | ## Usage
15 | 
16 | 0. Install Requirements: [PyTorch](https://pytorch.org/get-started/)
17 | 
18 | 1. Download the pretrained models:
19 |    ```sh
20 |    ./download_models.sh
21 |    ```
22 | 
23 | 2. Use the `predict.py` script to make predictions on images. Example:
24 |    ```sh
25 |    python predict.py images_list.txt --model vgg19_finetuned_all --batch-size 64 > predictions.csv
26 |    ```
27 |    The output file contains three columns representing the probability of each image belonging respectively to the *negative*, *neutral*, and *positive* classes in this order.
28 | 
29 | ## Converting the original Caffe models
30 | 
31 | We adopted [MMdnn](https://github.com/microsoft/MMdnn) to convert caffe models to PyTorch.
32 | We recommend using the pre-built Docker image:
33 | ```
34 | docker pull mmdnn/mmdnn:cpu.small
35 | ```
36 | 
37 | First, download the original models available at http://www.t4sa.it and extract them following this
38 | folder structure:
39 | ```
40 | original-models/
41 | ├── hybrid_finetuned_all/
42 | │   ├── deploy.prototxt
43 | │   ├── mean.binaryproto
44 | │   ├── snapshot_iter_34560.caffemodel
45 | │   └── ...
46 | ├── hybrid_finetuned_fc6+/
47 | │   ├── <same as above>
48 | │   └── ...
49 | ├── vgg19_finetuned_all/
50 | │   ├── <same as above>
51 | │   └── ...
52 | └── vgg19_finetuned_fc6+/
53 |     ├── <same as above>
54 |     └── ...
55 | ```
56 | 
57 | Then, run `convert_models.sh`:
58 | 
59 | ```sh
60 | docker run --rm -it -v $(pwd):/workspace -w /workspace mmdnn/mmdnn:cpu.small bash ./convert_models.sh
61 | ```
62 | 


--------------------------------------------------------------------------------
/check_outputs.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import caffe
 3 | import glob
 4 | import torch
 5 | 
 6 | import numpy as np
 7 | import torchvision.transforms.functional as F
 8 | 
 9 | from PIL import Image
10 | 
11 | from alexnet import KitModel as AlexNet
12 | from vgg19 import KitModel as VGG19
13 | 
14 | if __name__ == '__main__':
15 |     models = ('hybrid_finetuned_fc6+',
16 |               'hybrid_finetuned_all',
17 |               'vgg19_finetuned_fc6+',
18 |               'vgg19_finetuned_all')
19 | 
20 |     parser = argparse.ArgumentParser(description='Check outputs of original and converted models')
21 |     parser.add_argument('model', type=str, choices=models, help='model to test')
22 |     parser.add_argument('-i', '--image', type=str, default='dummy-data/lenna.jpg', help='input image')
23 |     args = parser.parse_args()
24 | 
25 |     model = AlexNet if 'hybrid' in args.model else VGG19
26 |     
27 |     converted_model_weights = 'converted-models/{}.pth'.format(args.model)
28 |     converted_model = model(converted_model_weights)
29 |     converted_model.eval()
30 |     
31 |     original_model_net = 'original-models/{}/deploy.prototxt'.format(args.model)
32 |     original_model_weights = 'original-models/{}/snapshot_iter_*.caffemodel'.format(args.model)
33 |     original_model_weights = glob.glob(original_model_weights)[0]
34 |     original_model = caffe.Net(original_model_net, caffe.TEST, weights=original_model_weights)
35 |     
36 |     # image
37 |     pil_image = Image.open(args.image).convert('RGB')
38 |     image = F.to_tensor(F.resize(pil_image, (224, 224)))  # resize to 224
39 |     image = image[[2,1,0]] * 255 # RGB -> BGR (expected by caffe nets), [0,1] -> [0, 255]
40 |     
41 |     # mean
42 |     mean_file = 'original-models/{}/mean.binaryproto'.format(args.model)
43 |     blob = caffe.proto.caffe_pb2.BlobProto()
44 |     blob.ParseFromString(open(mean_file, 'rb').read())
45 |     mean_image = caffe.io.blobproto_to_array(blob).squeeze().astype(np.uint8)
46 |     mean_pixel = torch.from_numpy(mean_image.mean(axis=(1,2), keepdims=True).astype(np.float32))
47 |     
48 |     print(mean_pixel)
49 |     
50 |     # input
51 |     net_input = (image - mean_pixel).unsqueeze(0)
52 |     
53 |     # forward
54 |     original_model.blobs['data'].data[...] = net_input
55 |     original_output = original_model.forward()
56 |     
57 |     converted_output = converted_model(net_input)
58 |     
59 |     # outputs
60 |     print(original_output)
61 |     print(converted_output)
62 |     
63 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import argparse
 3 | import numpy as np
 4 | import torch
 5 | import torchvision.transforms as t
 6 | 
 7 | from torch.utils.data import Dataset, DataLoader
 8 | from torchvision.datasets.folder import default_loader
 9 | from tqdm import tqdm
10 | 
11 | from alexnet import KitModel as AlexNet
12 | from vgg19 import KitModel as VGG19
13 | 
14 | 
15 | class ImageListDataset (Dataset):
16 | 
17 |     def __init__(self, list_filename, root=None, transform=None):
18 |         super(ImageListDataset).__init__()
19 |     
20 |         with open(list_filename, 'r') as list_file:
21 |             self.list = list(map(str.rstrip, list_file))
22 |         
23 |         self.root = root
24 |         self.transform = transform
25 |         
26 |     def __getitem__(self, index):
27 |         path = self.list[index]
28 |         if self.root:
29 |             path = os.path.join(self.root, path)
30 |             
31 |         x = default_loader(path)
32 |         if self.transform:
33 |             x = self.transform(x)
34 |         
35 |         return x
36 |     
37 |     def __len__(self):
38 |         return len(self.list)
39 | 
40 | 
41 | def main(args):
42 | 
43 |     transform = t.Compose([
44 |         t.Resize((224, 224)),
45 |         t.ToTensor(),
46 |         t.Lambda(lambda x: x[[2,1,0], ...] * 255),  # RGB -> BGR and [0,1] -> [0,255]
47 |         t.Normalize(mean=[116.8007, 121.2751, 130.4602], std=[1,1,1]),  # mean subtraction
48 |     ])
49 | 
50 |     data = ImageListDataset(args.image_list, root=args.root, transform=transform)
51 |     dataloader = DataLoader(data, batch_size=args.batch_size, num_workers=8, pin_memory=True)
52 |     
53 |     model = AlexNet if 'hybrid' in args.model else VGG19
54 |     model = model('converted-models/{}.pth'.format(args.model)).to('cuda')
55 |     model.eval()
56 |     
57 |     with torch.no_grad():
58 |         for x in tqdm(dataloader):
59 |             p = model(x.to('cuda')).cpu().numpy()  # order is (NEG, NEU, POS)
60 |             np.savetxt(sys.stdout.buffer, p, delimiter=',')
61 | 
62 |     
63 | if __name__ == '__main__':
64 |     models = ('hybrid_finetuned_fc6+',
65 |           'hybrid_finetuned_all',
66 |           'vgg19_finetuned_fc6+',
67 |           'vgg19_finetuned_all')
68 | 
69 |     parser = argparse.ArgumentParser(description='Predict Visual Sentiment')
70 |     parser.add_argument('image_list', type=str, help='Image list (txt, one path per line)')
71 |     parser.add_argument('-r', '--root', default=None, help='Root path to prepend to image list')
72 |     parser.add_argument('-m', '--model', type=str, choices=models, default='vgg19_finetuned_all', help='Pretrained model')
73 |     parser.add_argument('-b', '--batch-size', type=int, default=48, help='Batch size')
74 |     args = parser.parse_args()
75 |     main(args)
76 | 


--------------------------------------------------------------------------------
/alexnet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | __weights_dict = dict()
 7 | 
 8 | def load_weights(weight_file):
 9 |     if weight_file == None:
10 |         return
11 | 
12 |     try:
13 |         weights_dict = np.load(weight_file, allow_pickle=True).item()
14 |     except:
15 |         weights_dict = np.load(weight_file, encoding='bytes', allow_pickle=True).item()
16 | 
17 |     return weights_dict
18 | 
19 | class KitModel(nn.Module):
20 | 
21 |     
22 |     def __init__(self, weight_file):
23 |         super(KitModel, self).__init__()
24 |         global __weights_dict
25 |         __weights_dict = load_weights(weight_file)
26 | 
27 |         self.conv1 = self.__conv(2, name='conv1', in_channels=3, out_channels=96, kernel_size=(11, 11), stride=(4, 4), groups=1, bias=True)
28 |         self.conv2 = self.__conv(2, name='conv2', in_channels=96, out_channels=256, kernel_size=(5, 5), stride=(1, 1), groups=2, bias=True)
29 |         self.conv3 = self.__conv(2, name='conv3', in_channels=256, out_channels=384, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
30 |         self.conv4 = self.__conv(2, name='conv4', in_channels=384, out_channels=384, kernel_size=(3, 3), stride=(1, 1), groups=2, bias=True)
31 |         self.conv5 = self.__conv(2, name='conv5', in_channels=384, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=2, bias=True)
32 |         self.fc6_1 = self.__dense(name = 'fc6_1', in_features = 9216, out_features = 4096, bias = True)
33 |         self.fc7_1 = self.__dense(name = 'fc7_1', in_features = 4096, out_features = 4096, bias = True)
34 |         self.fc8_retrain_1 = self.__dense(name = 'fc8-retrain_1', in_features = 4096, out_features = 3, bias = True)
35 | 
36 |     def forward(self, x):
37 |         conv1_pad       = F.pad(x, (0, 1, 0, 1))
38 |         conv1           = self.conv1(conv1_pad)
39 |         relu1           = F.relu(conv1)
40 |         norm1           = F.local_response_norm(relu1, size=5, alpha=9.999999747378752e-05, beta=0.75, k=1.0)
41 |         pool1_pad       = F.pad(norm1, (0, 1, 0, 1), value=float('-inf'))
42 |         pool1           = F.max_pool2d(pool1_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False)
43 |         conv2_pad       = F.pad(pool1, (2, 2, 2, 2))
44 |         conv2           = self.conv2(conv2_pad)
45 |         relu2           = F.relu(conv2)
46 |         norm2           = F.local_response_norm(relu2, size=5, alpha=9.999999747378752e-05, beta=0.75, k=1.0)
47 |         pool2_pad       = F.pad(norm2, (0, 1, 0, 1), value=float('-inf'))
48 |         pool2           = F.max_pool2d(pool2_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False)
49 |         conv3_pad       = F.pad(pool2, (1, 1, 1, 1))
50 |         conv3           = self.conv3(conv3_pad)
51 |         relu3           = F.relu(conv3)
52 |         conv4_pad       = F.pad(relu3, (1, 1, 1, 1))
53 |         conv4           = self.conv4(conv4_pad)
54 |         relu4           = F.relu(conv4)
55 |         conv5_pad       = F.pad(relu4, (1, 1, 1, 1))
56 |         conv5           = self.conv5(conv5_pad)
57 |         relu5           = F.relu(conv5)
58 |         pool5_pad       = F.pad(relu5, (0, 1, 0, 1), value=float('-inf'))
59 |         pool5           = F.max_pool2d(pool5_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False)
60 |         fc6_0           = pool5.view(pool5.size(0), -1)
61 |         fc6_1           = self.fc6_1(fc6_0)
62 |         relu6           = F.relu(fc6_1)
63 |         drop6           = F.dropout(input = relu6, p = 0.5, training = self.training, inplace = True)
64 |         fc7_0           = drop6.view(drop6.size(0), -1)
65 |         fc7_1           = self.fc7_1(fc7_0)
66 |         relu7           = F.relu(fc7_1)
67 |         drop7           = F.dropout(input = relu7, p = 0.5, training = self.training, inplace = True)
68 |         fc8_retrain_0   = drop7.view(drop7.size(0), -1)
69 |         fc8_retrain_1   = self.fc8_retrain_1(fc8_retrain_0)
70 |         softmax         = F.softmax(fc8_retrain_1)
71 |         return softmax
72 | 
73 | 
74 |     @staticmethod
75 |     def __conv(dim, name, **kwargs):
76 |         if   dim == 1:  layer = nn.Conv1d(**kwargs)
77 |         elif dim == 2:  layer = nn.Conv2d(**kwargs)
78 |         elif dim == 3:  layer = nn.Conv3d(**kwargs)
79 |         else:           raise NotImplementedError()
80 | 
81 |         layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
82 |         if 'bias' in __weights_dict[name]:
83 |             layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
84 |         return layer
85 | 
86 |     @staticmethod
87 |     def __dense(name, **kwargs):
88 |         layer = nn.Linear(**kwargs)
89 |         layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
90 |         if 'bias' in __weights_dict[name]:
91 |             layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
92 |         return layer
93 | 


--------------------------------------------------------------------------------
/vgg19.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | __weights_dict = dict()
  7 | 
  8 | def load_weights(weight_file):
  9 |     if weight_file == None:
 10 |         return
 11 | 
 12 |     try:
 13 |         weights_dict = np.load(weight_file, allow_pickle=True).item()
 14 |     except:
 15 |         weights_dict = np.load(weight_file, encoding='bytes', allow_pickle=True).item()
 16 | 
 17 |     return weights_dict
 18 | 
 19 | class KitModel(nn.Module):
 20 | 
 21 |     
 22 |     def __init__(self, weight_file):
 23 |         super(KitModel, self).__init__()
 24 |         global __weights_dict
 25 |         __weights_dict = load_weights(weight_file)
 26 | 
 27 |         self.conv1_1 = self.__conv(2, name='conv1_1', in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 28 |         self.conv1_2 = self.__conv(2, name='conv1_2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 29 |         self.conv2_1 = self.__conv(2, name='conv2_1', in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 30 |         self.conv2_2 = self.__conv(2, name='conv2_2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 31 |         self.conv3_1 = self.__conv(2, name='conv3_1', in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 32 |         self.conv3_2 = self.__conv(2, name='conv3_2', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 33 |         self.conv3_3 = self.__conv(2, name='conv3_3', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 34 |         self.conv3_4 = self.__conv(2, name='conv3_4', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 35 |         self.conv4_1 = self.__conv(2, name='conv4_1', in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 36 |         self.conv4_2 = self.__conv(2, name='conv4_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 37 |         self.conv4_3 = self.__conv(2, name='conv4_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 38 |         self.conv4_4 = self.__conv(2, name='conv4_4', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 39 |         self.conv5_1 = self.__conv(2, name='conv5_1', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 40 |         self.conv5_2 = self.__conv(2, name='conv5_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 41 |         self.conv5_3 = self.__conv(2, name='conv5_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 42 |         self.conv5_4 = self.__conv(2, name='conv5_4', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True)
 43 |         self.fc6_1 = self.__dense(name = 'fc6_1', in_features = 25088, out_features = 4096, bias = True)
 44 |         self.fc7_1 = self.__dense(name = 'fc7_1', in_features = 4096, out_features = 4096, bias = True)
 45 |         self.fc8_retrain_1 = self.__dense(name = 'fc8-retrain_1', in_features = 4096, out_features = 3, bias = True)
 46 | 
 47 |     def forward(self, x):
 48 |         conv1_1_pad     = F.pad(x, (1, 1, 1, 1))
 49 |         conv1_1         = self.conv1_1(conv1_1_pad)
 50 |         relu1_1         = F.relu(conv1_1)
 51 |         conv1_2_pad     = F.pad(relu1_1, (1, 1, 1, 1))
 52 |         conv1_2         = self.conv1_2(conv1_2_pad)
 53 |         relu1_2         = F.relu(conv1_2)
 54 |         pool1_pad       = F.pad(relu1_2, (0, 1, 0, 1), value=float('-inf'))
 55 |         pool1           = F.max_pool2d(pool1_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False)
 56 |         conv2_1_pad     = F.pad(pool1, (1, 1, 1, 1))
 57 |         conv2_1         = self.conv2_1(conv2_1_pad)
 58 |         relu2_1         = F.relu(conv2_1)
 59 |         conv2_2_pad     = F.pad(relu2_1, (1, 1, 1, 1))
 60 |         conv2_2         = self.conv2_2(conv2_2_pad)
 61 |         relu2_2         = F.relu(conv2_2)
 62 |         pool2_pad       = F.pad(relu2_2, (0, 1, 0, 1), value=float('-inf'))
 63 |         pool2           = F.max_pool2d(pool2_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False)
 64 |         conv3_1_pad     = F.pad(pool2, (1, 1, 1, 1))
 65 |         conv3_1         = self.conv3_1(conv3_1_pad)
 66 |         relu3_1         = F.relu(conv3_1)
 67 |         conv3_2_pad     = F.pad(relu3_1, (1, 1, 1, 1))
 68 |         conv3_2         = self.conv3_2(conv3_2_pad)
 69 |         relu3_2         = F.relu(conv3_2)
 70 |         conv3_3_pad     = F.pad(relu3_2, (1, 1, 1, 1))
 71 |         conv3_3         = self.conv3_3(conv3_3_pad)
 72 |         relu3_3         = F.relu(conv3_3)
 73 |         conv3_4_pad     = F.pad(relu3_3, (1, 1, 1, 1))
 74 |         conv3_4         = self.conv3_4(conv3_4_pad)
 75 |         relu3_4         = F.relu(conv3_4)
 76 |         pool3_pad       = F.pad(relu3_4, (0, 1, 0, 1), value=float('-inf'))
 77 |         pool3           = F.max_pool2d(pool3_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False)
 78 |         conv4_1_pad     = F.pad(pool3, (1, 1, 1, 1))
 79 |         conv4_1         = self.conv4_1(conv4_1_pad)
 80 |         relu4_1         = F.relu(conv4_1)
 81 |         conv4_2_pad     = F.pad(relu4_1, (1, 1, 1, 1))
 82 |         conv4_2         = self.conv4_2(conv4_2_pad)
 83 |         relu4_2         = F.relu(conv4_2)
 84 |         conv4_3_pad     = F.pad(relu4_2, (1, 1, 1, 1))
 85 |         conv4_3         = self.conv4_3(conv4_3_pad)
 86 |         relu4_3         = F.relu(conv4_3)
 87 |         conv4_4_pad     = F.pad(relu4_3, (1, 1, 1, 1))
 88 |         conv4_4         = self.conv4_4(conv4_4_pad)
 89 |         relu4_4         = F.relu(conv4_4)
 90 |         pool4_pad       = F.pad(relu4_4, (0, 1, 0, 1), value=float('-inf'))
 91 |         pool4           = F.max_pool2d(pool4_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False)
 92 |         conv5_1_pad     = F.pad(pool4, (1, 1, 1, 1))
 93 |         conv5_1         = self.conv5_1(conv5_1_pad)
 94 |         relu5_1         = F.relu(conv5_1)
 95 |         conv5_2_pad     = F.pad(relu5_1, (1, 1, 1, 1))
 96 |         conv5_2         = self.conv5_2(conv5_2_pad)
 97 |         relu5_2         = F.relu(conv5_2)
 98 |         conv5_3_pad     = F.pad(relu5_2, (1, 1, 1, 1))
 99 |         conv5_3         = self.conv5_3(conv5_3_pad)
100 |         relu5_3         = F.relu(conv5_3)
101 |         conv5_4_pad     = F.pad(relu5_3, (1, 1, 1, 1))
102 |         conv5_4         = self.conv5_4(conv5_4_pad)
103 |         relu5_4         = F.relu(conv5_4)
104 |         pool5_pad       = F.pad(relu5_4, (0, 1, 0, 1), value=float('-inf'))
105 |         pool5           = F.max_pool2d(pool5_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False)
106 |         fc6_0           = pool5.view(pool5.size(0), -1)
107 |         fc6_1           = self.fc6_1(fc6_0)
108 |         relu6           = F.relu(fc6_1)
109 |         drop6           = F.dropout(input = relu6, p = 0.5, training = self.training, inplace = True)
110 |         fc7_0           = drop6.view(drop6.size(0), -1)
111 |         fc7_1           = self.fc7_1(fc7_0)
112 |         relu7           = F.relu(fc7_1)
113 |         drop7           = F.dropout(input = relu7, p = 0.5, training = self.training, inplace = True)
114 |         fc8_retrain_0   = drop7.view(drop7.size(0), -1)
115 |         fc8_retrain_1   = self.fc8_retrain_1(fc8_retrain_0)
116 |         softmax         = F.softmax(fc8_retrain_1)
117 |         return softmax
118 | 
119 | 
120 |     @staticmethod
121 |     def __conv(dim, name, **kwargs):
122 |         if   dim == 1:  layer = nn.Conv1d(**kwargs)
123 |         elif dim == 2:  layer = nn.Conv2d(**kwargs)
124 |         elif dim == 3:  layer = nn.Conv3d(**kwargs)
125 |         else:           raise NotImplementedError()
126 | 
127 |         layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
128 |         if 'bias' in __weights_dict[name]:
129 |             layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
130 |         return layer
131 | 
132 |     @staticmethod
133 |     def __dense(name, **kwargs):
134 |         layer = nn.Linear(**kwargs)
135 |         layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights']))
136 |         if 'bias' in __weights_dict[name]:
137 |             layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias']))
138 |         return layer
139 | 
140 | if __name__ == '__main__':
141 |     converted_model = KitModel('vgg19_finetuned_all.pth')
142 |     converted_model.eval()
143 |     
144 |     import caffe
145 |     original_model = caffe.Net('deploy.prototxt', caffe.TEST, weights='snapshot_iter_74880.caffemodel')
146 |     
147 |     from PIL import Image
148 |     import scipy
149 |     import torchvision.transforms.functional as tf
150 |     
151 |     # image
152 |     pil_image = Image.open('../../dummy-data/neutral.jpeg').convert('RGB')
153 |     #image = np.array(pil_image).astype(np.float32)
154 |     #image = scipy.misc.imresize(image, (224, 224), 'bilinear')
155 |     #image = image.transpose((2, 0, 1))  # HWC to CHW
156 |     #image = image[[2,1,0]]  # RGB to BGR
157 |     image = tf.to_tensor(tf.resize(pil_image, (224, 224)))  # resize to 224
158 |     image = image[[2,1,0]] * 255 # RGB -> BGR (expected by caffe nets), [0,1] -> [0, 255]
159 |     
160 |     # mean
161 |     blob = caffe.proto.caffe_pb2.BlobProto()
162 |     blob.ParseFromString(open('mean.binaryproto', 'rb').read())
163 |     mean_image = caffe.io.blobproto_to_array(blob).squeeze().astype(np.uint8)
164 |     mean_pixel = torch.from_numpy(mean_image.mean(axis=(1,2), keepdims=True).astype(np.float32))
165 |     
166 |     print(mean_pixel)
167 |     
168 |     # pil_mean_image = tf.to_pil_image(torch.from_numpy(mean_image))
169 |     # mean_image = tf.to_tensor(tf.resize(pil_mean_image, 224))
170 |     
171 |     # input
172 |     net_input = (image - mean_pixel).unsqueeze(0)
173 |     print(net_input.mean())
174 |     
175 |     # forward
176 |     original_model.blobs['data'].data[...] = net_input
177 |     original_output = original_model.forward()
178 |     
179 |     converted_output = converted_model(net_input)
180 |     
181 |     # outputs
182 |     print(original_output)
183 |     print(converted_output)
184 |     
185 | 


--------------------------------------------------------------------------------