├── download_models.sh ├── convert_models.sh ├── README.md ├── check_outputs.py ├── predict.py ├── alexnet.py └── vgg19.py /download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p converted-models/ 4 | cd converted-models 5 | 6 | for MODEL in 'hybrid_finetuned_fc6+' 'hybrid_finetuned_all' 'vgg19_finetuned_fc6+' 'vgg19_finetuned_all'; do 7 | if [ ! -f "${MODEL}.pth" ]; then 8 | echo "Downloading: ${MODEL}.pth" 9 | wget https://github.com/fabiocarrara/visual-sentiment-analysis/releases/download/torch-models/${MODEL}.pth 10 | else 11 | echo "Skipping: ${MODEL}.pth already downloaded" 12 | fi 13 | done 14 | 15 | -------------------------------------------------------------------------------- /convert_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p converted-models 4 | 5 | for MODEL in 'hybrid_finetuned_fc6+' 'hybrid_finetuned_all' 'vgg19_finetuned_fc6+' 'vgg19_finetuned_all'; do 6 | pushd original-models/${MODEL} 7 | mmtoir -f caffe -w snapshot_iter_*.caffemodel -n deploy.prototxt -d ir 8 | mmtocode -f pytorch -n ir.pb -w ir.npy -dw ${MODEL}.pth -d model.py 9 | popd 10 | mv original-models/${MODEL}/${MODEL}.pth converted-models/ 11 | done 12 | 13 | mv original-models/hybrid_finetuned_all/model.py alexnet.py 14 | mv original-models/vgg19_finetuned_all/model.py vgg19.py 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🔥 CHECK OUT v2 (new data, new models) 🔥 https://fabiocarrara.github.io/cross-modal-visual-sentiment-analysis/ 2 | 3 | --- 4 | 5 | # Cross-Media Learning for Image Sentiment Analysis in the Wild 6 | 7 | This repo contains the PyTorch-converted models for visual sentiment analysis trained on the 8 | [T4SA](http://www.t4sa.it) (Twitter for Sentiment Analysis) dataset presented in \[1\]. 9 | 10 | [1] Vadicamo, L., Carrara, F., Cimino, A., Cresci, S., Dell'Orletta, F., Falchi, F. and Tesconi, M., 2017. 11 | Cross-media learning for image sentiment analysis in the wild. 12 | In Proceedings of the IEEE International Conference on Computer Vision Workshops (pp. 308-317). 13 | 14 | ## Usage 15 | 16 | 0. Install Requirements: [PyTorch](https://pytorch.org/get-started/) 17 | 18 | 1. Download the pretrained models: 19 | ```sh 20 | ./download_models.sh 21 | ``` 22 | 23 | 2. Use the `predict.py` script to make predictions on images. Example: 24 | ```sh 25 | python predict.py images_list.txt --model vgg19_finetuned_all --batch-size 64 > predictions.csv 26 | ``` 27 | The output file contains three columns representing the probability of each image belonging respectively to the *negative*, *neutral*, and *positive* classes in this order. 28 | 29 | ## Converting the original Caffe models 30 | 31 | We adopted [MMdnn](https://github.com/microsoft/MMdnn) to convert caffe models to PyTorch. 32 | We recommend using the pre-built Docker image: 33 | ``` 34 | docker pull mmdnn/mmdnn:cpu.small 35 | ``` 36 | 37 | First, download the original models available at http://www.t4sa.it and extract them following this 38 | folder structure: 39 | ``` 40 | original-models/ 41 | ├── hybrid_finetuned_all/ 42 | │ ├── deploy.prototxt 43 | │ ├── mean.binaryproto 44 | │ ├── snapshot_iter_34560.caffemodel 45 | │ └── ... 46 | ├── hybrid_finetuned_fc6+/ 47 | │ ├── 48 | │ └── ... 49 | ├── vgg19_finetuned_all/ 50 | │ ├── 51 | │ └── ... 52 | └── vgg19_finetuned_fc6+/ 53 | ├── 54 | └── ... 55 | ``` 56 | 57 | Then, run `convert_models.sh`: 58 | 59 | ```sh 60 | docker run --rm -it -v $(pwd):/workspace -w /workspace mmdnn/mmdnn:cpu.small bash ./convert_models.sh 61 | ``` 62 | -------------------------------------------------------------------------------- /check_outputs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import caffe 3 | import glob 4 | import torch 5 | 6 | import numpy as np 7 | import torchvision.transforms.functional as F 8 | 9 | from PIL import Image 10 | 11 | from alexnet import KitModel as AlexNet 12 | from vgg19 import KitModel as VGG19 13 | 14 | if __name__ == '__main__': 15 | models = ('hybrid_finetuned_fc6+', 16 | 'hybrid_finetuned_all', 17 | 'vgg19_finetuned_fc6+', 18 | 'vgg19_finetuned_all') 19 | 20 | parser = argparse.ArgumentParser(description='Check outputs of original and converted models') 21 | parser.add_argument('model', type=str, choices=models, help='model to test') 22 | parser.add_argument('-i', '--image', type=str, default='dummy-data/lenna.jpg', help='input image') 23 | args = parser.parse_args() 24 | 25 | model = AlexNet if 'hybrid' in args.model else VGG19 26 | 27 | converted_model_weights = 'converted-models/{}.pth'.format(args.model) 28 | converted_model = model(converted_model_weights) 29 | converted_model.eval() 30 | 31 | original_model_net = 'original-models/{}/deploy.prototxt'.format(args.model) 32 | original_model_weights = 'original-models/{}/snapshot_iter_*.caffemodel'.format(args.model) 33 | original_model_weights = glob.glob(original_model_weights)[0] 34 | original_model = caffe.Net(original_model_net, caffe.TEST, weights=original_model_weights) 35 | 36 | # image 37 | pil_image = Image.open(args.image).convert('RGB') 38 | image = F.to_tensor(F.resize(pil_image, (224, 224))) # resize to 224 39 | image = image[[2,1,0]] * 255 # RGB -> BGR (expected by caffe nets), [0,1] -> [0, 255] 40 | 41 | # mean 42 | mean_file = 'original-models/{}/mean.binaryproto'.format(args.model) 43 | blob = caffe.proto.caffe_pb2.BlobProto() 44 | blob.ParseFromString(open(mean_file, 'rb').read()) 45 | mean_image = caffe.io.blobproto_to_array(blob).squeeze().astype(np.uint8) 46 | mean_pixel = torch.from_numpy(mean_image.mean(axis=(1,2), keepdims=True).astype(np.float32)) 47 | 48 | print(mean_pixel) 49 | 50 | # input 51 | net_input = (image - mean_pixel).unsqueeze(0) 52 | 53 | # forward 54 | original_model.blobs['data'].data[...] = net_input 55 | original_output = original_model.forward() 56 | 57 | converted_output = converted_model(net_input) 58 | 59 | # outputs 60 | print(original_output) 61 | print(converted_output) 62 | 63 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import argparse 3 | import numpy as np 4 | import torch 5 | import torchvision.transforms as t 6 | 7 | from torch.utils.data import Dataset, DataLoader 8 | from torchvision.datasets.folder import default_loader 9 | from tqdm import tqdm 10 | 11 | from alexnet import KitModel as AlexNet 12 | from vgg19 import KitModel as VGG19 13 | 14 | 15 | class ImageListDataset (Dataset): 16 | 17 | def __init__(self, list_filename, root=None, transform=None): 18 | super(ImageListDataset).__init__() 19 | 20 | with open(list_filename, 'r') as list_file: 21 | self.list = list(map(str.rstrip, list_file)) 22 | 23 | self.root = root 24 | self.transform = transform 25 | 26 | def __getitem__(self, index): 27 | path = self.list[index] 28 | if self.root: 29 | path = os.path.join(self.root, path) 30 | 31 | x = default_loader(path) 32 | if self.transform: 33 | x = self.transform(x) 34 | 35 | return x 36 | 37 | def __len__(self): 38 | return len(self.list) 39 | 40 | 41 | def main(args): 42 | 43 | transform = t.Compose([ 44 | t.Resize((224, 224)), 45 | t.ToTensor(), 46 | t.Lambda(lambda x: x[[2,1,0], ...] * 255), # RGB -> BGR and [0,1] -> [0,255] 47 | t.Normalize(mean=[116.8007, 121.2751, 130.4602], std=[1,1,1]), # mean subtraction 48 | ]) 49 | 50 | data = ImageListDataset(args.image_list, root=args.root, transform=transform) 51 | dataloader = DataLoader(data, batch_size=args.batch_size, num_workers=8, pin_memory=True) 52 | 53 | model = AlexNet if 'hybrid' in args.model else VGG19 54 | model = model('converted-models/{}.pth'.format(args.model)).to('cuda') 55 | model.eval() 56 | 57 | with torch.no_grad(): 58 | for x in tqdm(dataloader): 59 | p = model(x.to('cuda')).cpu().numpy() # order is (NEG, NEU, POS) 60 | np.savetxt(sys.stdout.buffer, p, delimiter=',') 61 | 62 | 63 | if __name__ == '__main__': 64 | models = ('hybrid_finetuned_fc6+', 65 | 'hybrid_finetuned_all', 66 | 'vgg19_finetuned_fc6+', 67 | 'vgg19_finetuned_all') 68 | 69 | parser = argparse.ArgumentParser(description='Predict Visual Sentiment') 70 | parser.add_argument('image_list', type=str, help='Image list (txt, one path per line)') 71 | parser.add_argument('-r', '--root', default=None, help='Root path to prepend to image list') 72 | parser.add_argument('-m', '--model', type=str, choices=models, default='vgg19_finetuned_all', help='Pretrained model') 73 | parser.add_argument('-b', '--batch-size', type=int, default=48, help='Batch size') 74 | args = parser.parse_args() 75 | main(args) 76 | -------------------------------------------------------------------------------- /alexnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | __weights_dict = dict() 7 | 8 | def load_weights(weight_file): 9 | if weight_file == None: 10 | return 11 | 12 | try: 13 | weights_dict = np.load(weight_file, allow_pickle=True).item() 14 | except: 15 | weights_dict = np.load(weight_file, encoding='bytes', allow_pickle=True).item() 16 | 17 | return weights_dict 18 | 19 | class KitModel(nn.Module): 20 | 21 | 22 | def __init__(self, weight_file): 23 | super(KitModel, self).__init__() 24 | global __weights_dict 25 | __weights_dict = load_weights(weight_file) 26 | 27 | self.conv1 = self.__conv(2, name='conv1', in_channels=3, out_channels=96, kernel_size=(11, 11), stride=(4, 4), groups=1, bias=True) 28 | self.conv2 = self.__conv(2, name='conv2', in_channels=96, out_channels=256, kernel_size=(5, 5), stride=(1, 1), groups=2, bias=True) 29 | self.conv3 = self.__conv(2, name='conv3', in_channels=256, out_channels=384, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 30 | self.conv4 = self.__conv(2, name='conv4', in_channels=384, out_channels=384, kernel_size=(3, 3), stride=(1, 1), groups=2, bias=True) 31 | self.conv5 = self.__conv(2, name='conv5', in_channels=384, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=2, bias=True) 32 | self.fc6_1 = self.__dense(name = 'fc6_1', in_features = 9216, out_features = 4096, bias = True) 33 | self.fc7_1 = self.__dense(name = 'fc7_1', in_features = 4096, out_features = 4096, bias = True) 34 | self.fc8_retrain_1 = self.__dense(name = 'fc8-retrain_1', in_features = 4096, out_features = 3, bias = True) 35 | 36 | def forward(self, x): 37 | conv1_pad = F.pad(x, (0, 1, 0, 1)) 38 | conv1 = self.conv1(conv1_pad) 39 | relu1 = F.relu(conv1) 40 | norm1 = F.local_response_norm(relu1, size=5, alpha=9.999999747378752e-05, beta=0.75, k=1.0) 41 | pool1_pad = F.pad(norm1, (0, 1, 0, 1), value=float('-inf')) 42 | pool1 = F.max_pool2d(pool1_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False) 43 | conv2_pad = F.pad(pool1, (2, 2, 2, 2)) 44 | conv2 = self.conv2(conv2_pad) 45 | relu2 = F.relu(conv2) 46 | norm2 = F.local_response_norm(relu2, size=5, alpha=9.999999747378752e-05, beta=0.75, k=1.0) 47 | pool2_pad = F.pad(norm2, (0, 1, 0, 1), value=float('-inf')) 48 | pool2 = F.max_pool2d(pool2_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False) 49 | conv3_pad = F.pad(pool2, (1, 1, 1, 1)) 50 | conv3 = self.conv3(conv3_pad) 51 | relu3 = F.relu(conv3) 52 | conv4_pad = F.pad(relu3, (1, 1, 1, 1)) 53 | conv4 = self.conv4(conv4_pad) 54 | relu4 = F.relu(conv4) 55 | conv5_pad = F.pad(relu4, (1, 1, 1, 1)) 56 | conv5 = self.conv5(conv5_pad) 57 | relu5 = F.relu(conv5) 58 | pool5_pad = F.pad(relu5, (0, 1, 0, 1), value=float('-inf')) 59 | pool5 = F.max_pool2d(pool5_pad, kernel_size=(3, 3), stride=(2, 2), padding=0, ceil_mode=False) 60 | fc6_0 = pool5.view(pool5.size(0), -1) 61 | fc6_1 = self.fc6_1(fc6_0) 62 | relu6 = F.relu(fc6_1) 63 | drop6 = F.dropout(input = relu6, p = 0.5, training = self.training, inplace = True) 64 | fc7_0 = drop6.view(drop6.size(0), -1) 65 | fc7_1 = self.fc7_1(fc7_0) 66 | relu7 = F.relu(fc7_1) 67 | drop7 = F.dropout(input = relu7, p = 0.5, training = self.training, inplace = True) 68 | fc8_retrain_0 = drop7.view(drop7.size(0), -1) 69 | fc8_retrain_1 = self.fc8_retrain_1(fc8_retrain_0) 70 | softmax = F.softmax(fc8_retrain_1) 71 | return softmax 72 | 73 | 74 | @staticmethod 75 | def __conv(dim, name, **kwargs): 76 | if dim == 1: layer = nn.Conv1d(**kwargs) 77 | elif dim == 2: layer = nn.Conv2d(**kwargs) 78 | elif dim == 3: layer = nn.Conv3d(**kwargs) 79 | else: raise NotImplementedError() 80 | 81 | layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) 82 | if 'bias' in __weights_dict[name]: 83 | layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) 84 | return layer 85 | 86 | @staticmethod 87 | def __dense(name, **kwargs): 88 | layer = nn.Linear(**kwargs) 89 | layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) 90 | if 'bias' in __weights_dict[name]: 91 | layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) 92 | return layer 93 | -------------------------------------------------------------------------------- /vgg19.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | __weights_dict = dict() 7 | 8 | def load_weights(weight_file): 9 | if weight_file == None: 10 | return 11 | 12 | try: 13 | weights_dict = np.load(weight_file, allow_pickle=True).item() 14 | except: 15 | weights_dict = np.load(weight_file, encoding='bytes', allow_pickle=True).item() 16 | 17 | return weights_dict 18 | 19 | class KitModel(nn.Module): 20 | 21 | 22 | def __init__(self, weight_file): 23 | super(KitModel, self).__init__() 24 | global __weights_dict 25 | __weights_dict = load_weights(weight_file) 26 | 27 | self.conv1_1 = self.__conv(2, name='conv1_1', in_channels=3, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 28 | self.conv1_2 = self.__conv(2, name='conv1_2', in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 29 | self.conv2_1 = self.__conv(2, name='conv2_1', in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 30 | self.conv2_2 = self.__conv(2, name='conv2_2', in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 31 | self.conv3_1 = self.__conv(2, name='conv3_1', in_channels=128, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 32 | self.conv3_2 = self.__conv(2, name='conv3_2', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 33 | self.conv3_3 = self.__conv(2, name='conv3_3', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 34 | self.conv3_4 = self.__conv(2, name='conv3_4', in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 35 | self.conv4_1 = self.__conv(2, name='conv4_1', in_channels=256, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 36 | self.conv4_2 = self.__conv(2, name='conv4_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 37 | self.conv4_3 = self.__conv(2, name='conv4_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 38 | self.conv4_4 = self.__conv(2, name='conv4_4', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 39 | self.conv5_1 = self.__conv(2, name='conv5_1', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 40 | self.conv5_2 = self.__conv(2, name='conv5_2', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 41 | self.conv5_3 = self.__conv(2, name='conv5_3', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 42 | self.conv5_4 = self.__conv(2, name='conv5_4', in_channels=512, out_channels=512, kernel_size=(3, 3), stride=(1, 1), groups=1, bias=True) 43 | self.fc6_1 = self.__dense(name = 'fc6_1', in_features = 25088, out_features = 4096, bias = True) 44 | self.fc7_1 = self.__dense(name = 'fc7_1', in_features = 4096, out_features = 4096, bias = True) 45 | self.fc8_retrain_1 = self.__dense(name = 'fc8-retrain_1', in_features = 4096, out_features = 3, bias = True) 46 | 47 | def forward(self, x): 48 | conv1_1_pad = F.pad(x, (1, 1, 1, 1)) 49 | conv1_1 = self.conv1_1(conv1_1_pad) 50 | relu1_1 = F.relu(conv1_1) 51 | conv1_2_pad = F.pad(relu1_1, (1, 1, 1, 1)) 52 | conv1_2 = self.conv1_2(conv1_2_pad) 53 | relu1_2 = F.relu(conv1_2) 54 | pool1_pad = F.pad(relu1_2, (0, 1, 0, 1), value=float('-inf')) 55 | pool1 = F.max_pool2d(pool1_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) 56 | conv2_1_pad = F.pad(pool1, (1, 1, 1, 1)) 57 | conv2_1 = self.conv2_1(conv2_1_pad) 58 | relu2_1 = F.relu(conv2_1) 59 | conv2_2_pad = F.pad(relu2_1, (1, 1, 1, 1)) 60 | conv2_2 = self.conv2_2(conv2_2_pad) 61 | relu2_2 = F.relu(conv2_2) 62 | pool2_pad = F.pad(relu2_2, (0, 1, 0, 1), value=float('-inf')) 63 | pool2 = F.max_pool2d(pool2_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) 64 | conv3_1_pad = F.pad(pool2, (1, 1, 1, 1)) 65 | conv3_1 = self.conv3_1(conv3_1_pad) 66 | relu3_1 = F.relu(conv3_1) 67 | conv3_2_pad = F.pad(relu3_1, (1, 1, 1, 1)) 68 | conv3_2 = self.conv3_2(conv3_2_pad) 69 | relu3_2 = F.relu(conv3_2) 70 | conv3_3_pad = F.pad(relu3_2, (1, 1, 1, 1)) 71 | conv3_3 = self.conv3_3(conv3_3_pad) 72 | relu3_3 = F.relu(conv3_3) 73 | conv3_4_pad = F.pad(relu3_3, (1, 1, 1, 1)) 74 | conv3_4 = self.conv3_4(conv3_4_pad) 75 | relu3_4 = F.relu(conv3_4) 76 | pool3_pad = F.pad(relu3_4, (0, 1, 0, 1), value=float('-inf')) 77 | pool3 = F.max_pool2d(pool3_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) 78 | conv4_1_pad = F.pad(pool3, (1, 1, 1, 1)) 79 | conv4_1 = self.conv4_1(conv4_1_pad) 80 | relu4_1 = F.relu(conv4_1) 81 | conv4_2_pad = F.pad(relu4_1, (1, 1, 1, 1)) 82 | conv4_2 = self.conv4_2(conv4_2_pad) 83 | relu4_2 = F.relu(conv4_2) 84 | conv4_3_pad = F.pad(relu4_2, (1, 1, 1, 1)) 85 | conv4_3 = self.conv4_3(conv4_3_pad) 86 | relu4_3 = F.relu(conv4_3) 87 | conv4_4_pad = F.pad(relu4_3, (1, 1, 1, 1)) 88 | conv4_4 = self.conv4_4(conv4_4_pad) 89 | relu4_4 = F.relu(conv4_4) 90 | pool4_pad = F.pad(relu4_4, (0, 1, 0, 1), value=float('-inf')) 91 | pool4 = F.max_pool2d(pool4_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) 92 | conv5_1_pad = F.pad(pool4, (1, 1, 1, 1)) 93 | conv5_1 = self.conv5_1(conv5_1_pad) 94 | relu5_1 = F.relu(conv5_1) 95 | conv5_2_pad = F.pad(relu5_1, (1, 1, 1, 1)) 96 | conv5_2 = self.conv5_2(conv5_2_pad) 97 | relu5_2 = F.relu(conv5_2) 98 | conv5_3_pad = F.pad(relu5_2, (1, 1, 1, 1)) 99 | conv5_3 = self.conv5_3(conv5_3_pad) 100 | relu5_3 = F.relu(conv5_3) 101 | conv5_4_pad = F.pad(relu5_3, (1, 1, 1, 1)) 102 | conv5_4 = self.conv5_4(conv5_4_pad) 103 | relu5_4 = F.relu(conv5_4) 104 | pool5_pad = F.pad(relu5_4, (0, 1, 0, 1), value=float('-inf')) 105 | pool5 = F.max_pool2d(pool5_pad, kernel_size=(2, 2), stride=(2, 2), padding=0, ceil_mode=False) 106 | fc6_0 = pool5.view(pool5.size(0), -1) 107 | fc6_1 = self.fc6_1(fc6_0) 108 | relu6 = F.relu(fc6_1) 109 | drop6 = F.dropout(input = relu6, p = 0.5, training = self.training, inplace = True) 110 | fc7_0 = drop6.view(drop6.size(0), -1) 111 | fc7_1 = self.fc7_1(fc7_0) 112 | relu7 = F.relu(fc7_1) 113 | drop7 = F.dropout(input = relu7, p = 0.5, training = self.training, inplace = True) 114 | fc8_retrain_0 = drop7.view(drop7.size(0), -1) 115 | fc8_retrain_1 = self.fc8_retrain_1(fc8_retrain_0) 116 | softmax = F.softmax(fc8_retrain_1) 117 | return softmax 118 | 119 | 120 | @staticmethod 121 | def __conv(dim, name, **kwargs): 122 | if dim == 1: layer = nn.Conv1d(**kwargs) 123 | elif dim == 2: layer = nn.Conv2d(**kwargs) 124 | elif dim == 3: layer = nn.Conv3d(**kwargs) 125 | else: raise NotImplementedError() 126 | 127 | layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) 128 | if 'bias' in __weights_dict[name]: 129 | layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) 130 | return layer 131 | 132 | @staticmethod 133 | def __dense(name, **kwargs): 134 | layer = nn.Linear(**kwargs) 135 | layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) 136 | if 'bias' in __weights_dict[name]: 137 | layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) 138 | return layer 139 | 140 | if __name__ == '__main__': 141 | converted_model = KitModel('vgg19_finetuned_all.pth') 142 | converted_model.eval() 143 | 144 | import caffe 145 | original_model = caffe.Net('deploy.prototxt', caffe.TEST, weights='snapshot_iter_74880.caffemodel') 146 | 147 | from PIL import Image 148 | import scipy 149 | import torchvision.transforms.functional as tf 150 | 151 | # image 152 | pil_image = Image.open('../../dummy-data/neutral.jpeg').convert('RGB') 153 | #image = np.array(pil_image).astype(np.float32) 154 | #image = scipy.misc.imresize(image, (224, 224), 'bilinear') 155 | #image = image.transpose((2, 0, 1)) # HWC to CHW 156 | #image = image[[2,1,0]] # RGB to BGR 157 | image = tf.to_tensor(tf.resize(pil_image, (224, 224))) # resize to 224 158 | image = image[[2,1,0]] * 255 # RGB -> BGR (expected by caffe nets), [0,1] -> [0, 255] 159 | 160 | # mean 161 | blob = caffe.proto.caffe_pb2.BlobProto() 162 | blob.ParseFromString(open('mean.binaryproto', 'rb').read()) 163 | mean_image = caffe.io.blobproto_to_array(blob).squeeze().astype(np.uint8) 164 | mean_pixel = torch.from_numpy(mean_image.mean(axis=(1,2), keepdims=True).astype(np.float32)) 165 | 166 | print(mean_pixel) 167 | 168 | # pil_mean_image = tf.to_pil_image(torch.from_numpy(mean_image)) 169 | # mean_image = tf.to_tensor(tf.resize(pil_mean_image, 224)) 170 | 171 | # input 172 | net_input = (image - mean_pixel).unsqueeze(0) 173 | print(net_input.mean()) 174 | 175 | # forward 176 | original_model.blobs['data'].data[...] = net_input 177 | original_output = original_model.forward() 178 | 179 | converted_output = converted_model(net_input) 180 | 181 | # outputs 182 | print(original_output) 183 | print(converted_output) 184 | 185 | --------------------------------------------------------------------------------