├── CocoDetection.py
├── README.md
├── RMA_recognition.py
├── RMA_server.py
├── coco-label.txt
├── main.py
├── models
    ├── RMA_module.py
    ├── RMA_module_with_priori.py
    ├── loss.py
    └── loss_with_priori.py
├── priori_main.py
├── priori_main_tencrop.py
├── utils.py
├── visualize.py
├── visualizeImg.py
├── visualizeImg_test0712.py
└── visualize_test0712.py


/CocoDetection.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | from PIL import Image
 3 | import os
 4 | import os.path
 5 | import torch.tensor as tensor
 6 | import torchvision.transforms as transforms
 7 | #import torch
 8 | import numpy as np
 9 | import copy
10 | 
11 | class CocoDetection(data.Dataset):
12 |     """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
13 | 
14 |     Args:
15 |         root (string): Root directory where images are downloaded to.
16 |         annFile (string): Path to json annotation file.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image
18 |             and returns a transformed version. E.g, ``transforms.ToTensor``
19 |         target_transform (callable, optional): A function/transform that takes in the
20 |             target and transforms it.
21 |     """
22 | 
23 |     def __init__(self, root, annFile, transform=None, target_transform=None):
24 |         from pycocotools.coco import COCO
25 |         self.root = root
26 |         self.coco = COCO(annFile)
27 |         self.ids = list(self.coco.imgs.keys())
28 |         self.transform = transform
29 |         self.target_transform = target_transform
30 | 
31 |     def __getitem__(self, index):
32 |         """
33 |         Args:
34 |             index (int): Index
35 | 
36 |         Returns:
37 |             tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
38 |         """
39 |         coco = self.coco
40 |         img_id = self.ids[index]
41 |         ann_ids = coco.getAnnIds(imgIds=img_id)
42 |         target = coco.loadAnns(ann_ids)
43 | 
44 |         path = coco.loadImgs(img_id)[0]['file_name']
45 |         #image_id = coco.loadImgs(img_id)[0]['id']
46 | 
47 |         original_img = Image.open(os.path.join(self.root, path)).convert('RGB')
48 |         #print(transforms.ToTensor()(original_img))
49 |         if self.transform is not None:
50 |             original_img = self.transform(original_img)
51 |             #print(original_img)
52 |             img       = copy.deepcopy(original_img)
53 |             img = transforms.Compose([
54 |                 #transforms.ToTensor(),
55 |                 transforms.Normalize(mean=[0.485, 0.456, 0.406],
56 |                                      std=[0.229, 0.224, 0.225])])(img)
57 |             #print(original_img)
58 | 
59 |         if self.target_transform is not None:
60 |             target = self.target_transform(target)
61 |         
62 |         return img, target, original_img
63 | 
64 | 
65 |     def __len__(self):
66 |         return len(self.ids)
67 | 
68 |     def __repr__(self):
69 |         fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
70 |         fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
71 |         fmt_str += '    Root Location: {}\n'.format(self.root)
72 |         tmp = '    Transforms (if any): '
73 |         fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
74 |         tmp = '    Target Transforms (if any): '
75 |         fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
76 |         return fmt_str
77 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AttentionImageClass
 2 | Multi-label Image Recognition by Recurrently Discovering Attentional Regions (Pytorch implementation)
 3 | 
 4 | ## Training
 5 | 
 6 | ```bash
 7 | python main.py
 8 | ```
 9 | 
10 | ## Running the server for recognition
11 | 
12 | ```bash
13 | python RMA_server.py
14 | ```
15 | 


--------------------------------------------------------------------------------
/RMA_recognition.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | import PIL.Image as Image
 11 | 
 12 | 
 13 | import argparse
 14 | import os
 15 | 
 16 | from models.RMA_module_with_priori import RMA_module
 17 | from models.loss_with_priori import loss_function
 18 | from utils import get_target_transform as target_trans
 19 | from utils import id2label
 20 | 
 21 | 
 22 | # GPU setting
 23 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "6")
 24 | 
 25 | 
 26 | # ==================================================================
 27 | # Constants
 28 | # ==================================================================
 29 | EPOCH         = 45            # number of times for each run-through
 30 | BATCH_SIZE    = 8             # number of images for each epoch
 31 | N             = 512           # size of input images (512 or 640)
 32 | TOPK          = 3             # top k highest-ranked labels  
 33 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 34 | PATH_MODEL_PARAMS  = './params/params_with_priori.pkl'
 35 | 
 36 | 
 37 | # ==================================================================
 38 | # Parser Initialization
 39 | # ==================================================================
 40 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 41 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 42 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 43 | parser.add_argument('--loadModel',       default=True,              type=bool,  help='load model parameters')
 44 | args = parser.parse_args()
 45 | 
 46 | 
 47 | # ==================================================================
 48 | # Transforms for the Input Images
 49 | # ==================================================================
 50 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 51 |                                   std=[0.229, 0.224, 0.225])
 52 | transforms = transforms.Compose([
 53 |              transforms.Resize((N, N)), 
 54 |              transforms.ToTensor(),
 55 |              normalize
 56 |              ]) 
 57 |         
 58 | 
 59 | class RMA_model(object):
 60 |     # 构造函数里加载模型，比如 tensorflow 的 graph, sess 等
 61 |     def __init__(self):
 62 |         # prepare model
 63 |         print('\n***** Prepare Model *****')
 64 |         vgg16 = torchvision.models.vgg16(pretrained=True)
 65 |         self.extract_features = vgg16.features
 66 |         self.RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
 67 |         if args.loadModel:
 68 |             self.RMA.load_state_dict(torch.load(args.pathModelParams))
 69 |         if GPU_IN_USE:
 70 |             print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
 71 |             print('cuda: move all model parameters and buffers to the GPU')
 72 |             self.extract_features.cuda()
 73 |             self.RMA.cuda()
 74 |             cudnn.benchmark = True
 75 |         print('Model Preparation : Finished')
 76 | 
 77 |     # Test
 78 |     def evaluate(self, data):
 79 |         print('evaluate:')
 80 |         self.RMA.eval()        # set the module in evaluation mode
 81 |         print('before transforms')
 82 |         data = transforms(data).unsqueeze(0)
 83 |         if GPU_IN_USE:
 84 |             data = data.cuda()  # set up GPU Tensor
 85 |         
 86 |         print('before extracting features')
 87 |         f_I = self.extract_features(data)        
 88 |         output, _ = self.RMA(f_I)
 89 |         print('after RMA')    
 90 |         prediction  = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
 91 |         filter      = prediction[0].eq(0.1) + prediction[0].gt(0.1)
 92 |         category_id = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
 93 |         print(prediction[0])
 94 |         #print(category_id)
 95 |         return id2label(category_id)[0].tolist()
 96 | 
 97 | 
 98 |     # 需要对外提供一个 API，可以直接拿到你们的结果
 99 |     def image_recognition(self, image_path):
100 |         # 业务逻辑
101 |         print('image path: ', image_path)
102 |         image = Image.open(image_path)
103 |         if not image.mode == 'RGB':
104 |             image = image.convert('RGB')
105 |         with torch.no_grad():
106 |             label = self.evaluate(image)
107 |         print(label)
108 |         return dict(
109 |             data = label
110 |         )
111 |     
112 |     def __del__(self):
113 |         print("delete!")
114 | 
115 | # 生成模型实例
116 | # 这里生成模型实例供 server 导入并调用
117 | print("生成 RMA Model 实例.................")
118 | RMA_model_instance = RMA_model()
119 | print("RMA Model 实例生成完成...............")
120 | 


--------------------------------------------------------------------------------
/RMA_server.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # 用了 Python 的 Tornado 框架，当然也可以使用其它框架
 3 | # 英文文档 (v5.0) http://www.tornadoweb.org/en/stable/
 4 | # 中文文档 (v4.3) http://tornado-zh.readthedocs.io/zh/latest/
 5 | import json
 6 | import os
 7 | 
 8 | import tornado.httpserver
 9 | import tornado.ioloop
10 | import tornado.options
11 | import tornado.web
12 | from tornado.options import define, options
13 | from tornado.escape import json_decode, json_encode
14 | from tornado.concurrent import Future
15 | 
16 | # 这里导入模型实例
17 | from RMA_recognition import RMA_model_instance
18 | 
19 | # 定义端口默认值
20 | define("port", default=8080, help="run on the given port", type=int)
21 | 
22 | # 业务层，负责调用模型 API
23 | class RecognitionService(object):
24 |     def upload_image(self, file_metas):
25 |         file_path = None
26 |         if (file_metas):
27 |             for meta in file_metas:
28 |                 # 上传图片保存路径为 {当前目录}/realimg
29 |                 upload_path = os.path.join(os.path.dirname(__file__), "realimg")
30 |                 
31 |                 filename = meta['filename']
32 |                 file_path = os.path.join(upload_path, filename)
33 | 
34 |                 with open(file_path, 'wb') as f:
35 |                     f.write(meta['body'])
36 |        
37 |         return file_path
38 | 
39 |     def recognition_model_run(self, image_path):
40 |         res = dict(
41 |             rtn = 0,
42 |             msg = "",
43 |             data = {}
44 |         )
45 |         # 调用模型 API
46 |         try:
47 |             data = RMA_model_instance.image_recognition(image_path)
48 |             res = dict(
49 |                 rtn = 200,
50 |                 msg = "成功",
51 |                 data = data
52 |             )
53 |         except Exception as e:
54 |             res["rtn"] = 500
55 |             res["msg"] = str(e)
56 |         
57 |         return res
58 | 
59 | 
60 | # 同步 Handler 示例
61 | class SyncHandler(tornado.web.RequestHandler):
62 |     def initialize(self):
63 |         self.recognition_service = RecognitionService()
64 | 
65 |     def post(self):
66 |         file_metas = self.request.files.get("img")
67 | 
68 |         file_path = self.recognition_service.upload_image(file_metas)
69 | 
70 |         res = self.recognition_service.recognition_model_run(file_path)
71 | 
72 |         self.set_status(res.get("rtn"))
73 |         self.set_header("Content-Type", "application/json")
74 |         self.write(json_encode(res))
75 | 
76 |         self.finish()
77 |         
78 | 
79 | 
80 | 
81 | if __name__ == "__main__":
82 |     tornado.options.parse_command_line()
83 |     # 下面用正则表达式匹配 url
84 |     app = tornado.web.Application(handlers=[
85 |         (r"/api/recognition", SyncHandler)
86 |     ])
87 |     http_server = tornado.httpserver.HTTPServer(app)
88 |     http_server.listen(options.port)
89 |     tornado.ioloop.IOLoop.instance().start()
90 | 
91 | # http://localhost:8080/api/async_api
92 | # http://localhost:8080/api/sync_api
93 | 


--------------------------------------------------------------------------------
/coco-label.txt:
--------------------------------------------------------------------------------
 1 | 1,1,person
 2 | 2,2,bicycle
 3 | 3,3,car
 4 | 4,4,motorcycle
 5 | 5,5,airplane
 6 | 6,6,bus
 7 | 7,7,train
 8 | 8,8,truck
 9 | 9,9,boat
10 | 10,10,traffic light
11 | 11,11,fire hydrant
12 | 13,12,stop sign
13 | 14,13,parking meter
14 | 15,14,bench
15 | 16,15,bird
16 | 17,16,cat
17 | 18,17,dog
18 | 19,18,horse
19 | 20,19,sheep
20 | 21,20,cow
21 | 22,21,elephant
22 | 23,22,bear
23 | 24,23,zebra
24 | 25,24,giraffe
25 | 27,25,backpack
26 | 28,26,umbrella
27 | 31,27,handbag
28 | 32,28,tie
29 | 33,29,suitcase
30 | 34,30,frisbee
31 | 35,31,skis
32 | 36,32,snowboard
33 | 37,33,sports ball
34 | 38,34,kite
35 | 39,35,baseball bat
36 | 40,36,baseball glove
37 | 41,37,skateboard
38 | 42,38,surfboard
39 | 43,39,tennis racket
40 | 44,40,bottle
41 | 46,41,wine glass
42 | 47,42,cup
43 | 48,43,fork
44 | 49,44,knife
45 | 50,45,spoon
46 | 51,46,bowl
47 | 52,47,banana
48 | 53,48,apple
49 | 54,49,sandwich
50 | 55,50,orange
51 | 56,51,broccoli
52 | 57,52,carrot
53 | 58,53,hot dog
54 | 59,54,pizza
55 | 60,55,donut
56 | 61,56,cake
57 | 62,57,chair
58 | 63,58,couch
59 | 64,59,potted plant
60 | 65,60,bed
61 | 67,61,dining table
62 | 70,62,toilet
63 | 72,63,tv
64 | 73,64,laptop
65 | 74,65,mouse
66 | 75,66,remote
67 | 76,67,keyboard
68 | 77,68,cell phone
69 | 78,69,microwave
70 | 79,70,oven
71 | 80,71,toaster
72 | 81,72,sink
73 | 82,73,refrigerator
74 | 84,74,book
75 | 85,75,clock
76 | 86,76,vase
77 | 87,77,scissors
78 | 88,78,teddy bear
79 | 89,79,hair drier
80 | 90,80,toothbrush


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | 
 11 | import argparse
 12 | import os
 13 | 
 14 | from models.RMA_module import RMA_module
 15 | from models.loss import loss_function
 16 | from utils import get_target_transform as target_trans
 17 | 
 18 | # data visualization
 19 | vis = visdom.Visdom(env='baseline(no priori)')
 20 | # GPU setting
 21 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "2")
 22 | 
 23 | # ==================================================================
 24 | # Constants
 25 | # ==================================================================
 26 | EPOCH         = 45            # number of times for each run-through
 27 | BATCH_SIZE    = 16            # number of images for each epoch
 28 | LEARNING_RATE = 1e-5          # default learning rate 
 29 | WEIGHT_DECAY  = 0             # default weight decay
 30 | N             = 512           # size of input images (512 or 640)
 31 | MOMENTUM      = (0.9, 0.999)  # momentum in Adam optimization
 32 | TOPK          = 3             # top k highest-ranked labels  
 33 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 34 | DIR_TRAIN_IMAGES   = '../dataset/train2017/'
 35 | DIR_TEST_IMAGES    = '../dataset/val2017/'
 36 | PATH_TRAIN_ANNFILE = '../dataset/annotations/instances_train2017.json'
 37 | PATH_TEST_ANNFILE  = '../dataset/annotations/instances_val2017.json'
 38 | PATH_MODEL_PARAMS  = './params/params_no_priori.pkl'
 39 | NUM_CATEGORIES     = 80
 40 | LOSS_OUTPUT_INTERVAL = 100
 41 | 
 42 | # ==================================================================
 43 | # Global Variables
 44 | # ==================================================================
 45 | # one iteration means one mini-batch finishs a forward-backward process
 46 | current_training_iteration = torch.tensor([1])
 47 | current_test_iteration     = torch.tensor([1])
 48 | loss_graph_window          = 'loss graph'
 49 | test_f1_graph_window       = 'test OF1 and CF1 graph'
 50 | evaluation_window          = 'six evaluation metrics'
 51 | #category_id_window         = 'category ids of prediction and ground-truth'
 52 | of1 = 0.
 53 | cf1 = 0.
 54 | 
 55 | # ==================================================================
 56 | # Parser Initialization
 57 | # ==================================================================
 58 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 59 | parser.add_argument('--lr',              default=LEARNING_RATE,     type=float, help='learning rate')
 60 | parser.add_argument('--epoch',           default=EPOCH,             type=int,   help='number of epochs')
 61 | parser.add_argument('--trainBatchSize',  default=BATCH_SIZE,        type=int,   help='training batch size')
 62 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 63 | parser.add_argument('--weightDecay',     default=WEIGHT_DECAY,      type=float, help='weight decay')
 64 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 65 | parser.add_argument('--saveModel',       default=True,              type=bool,  help='save model parameters')
 66 | parser.add_argument('--loadModel',       default=False,             type=bool,  help='load model parameters')
 67 | args = parser.parse_args()
 68 | 
 69 | 
 70 | # ==================================================================
 71 | # Prepare Dataset(training & test)
 72 | # ==================================================================
 73 | print('***** Prepare Data ******')
 74 | 
 75 | # transforms of training dataset 
 76 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 77 |                                   std=[0.229, 0.224, 0.225])
 78 | train_transforms = transforms.Compose([
 79 |                      transforms.RandomHorizontalFlip(p=0.5), # default value is 0.5
 80 |                      transforms.Resize((N, N)),
 81 |                      transforms.ToTensor(),
 82 |                      normalize
 83 |                   ])
 84 | 
 85 | # transforms of test dataset
 86 | test_transforms = transforms.Compose([
 87 |                     transforms.Resize((N, N)), 
 88 |                     transforms.ToTensor(),
 89 |                     normalize
 90 |                   ]) 
 91 | 
 92 | train_dataset = torchvision.datasets.CocoDetection(root=DIR_TRAIN_IMAGES, annFile=PATH_TRAIN_ANNFILE, 
 93 |                                                    transform=train_transforms, target_transform=target_trans)
 94 | test_dataset  = torchvision.datasets.CocoDetection(root=DIR_TEST_IMAGES,  annFile=PATH_TEST_ANNFILE,
 95 |                                                    transform=test_transforms,  target_transform=target_trans)
 96 | train_loader  = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.trainBatchSize, shuffle=True,  num_workers=2)
 97 | test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=args.testBatchSize,  shuffle=False, num_workers=2)
 98 | print('Data Preparation : Finished')
 99 | 
100 | 
101 | # ==================================================================
102 | # Prepare Model
103 | # ==================================================================
104 | print('\n***** Prepare Model *****')
105 | 
106 | vgg16 = torchvision.models.vgg16(pretrained=True)
107 | 
108 | for param in vgg16.features.parameters():
109 |     param.requires_grad=False
110 | 
111 | extract_features = vgg16.features
112 | 
113 | RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
114 | if args.loadModel:
115 |     RMA.load_state_dict(torch.load(args.pathModelParams))
116 | 
117 | if GPU_IN_USE:
118 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
119 |     print('cuda: move all model parameters and buffers to the GPU')
120 |     extract_features.cuda()
121 |     RMA.cuda()
122 |     cudnn.benchmark = True
123 | 
124 | # Adam optimization
125 | optimizer = optim.Adam(RMA.parameters(), lr=args.lr, weight_decay=args.weightDecay, betas=MOMENTUM)  
126 | # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)  # lr decay
127 | print('Model Preparation : Finished')
128 | 
129 | 
130 | # Train
131 | # ================================================================================
132 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
133 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
134 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
135 | # prediction: [
136 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
137 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
138 | #             ]
139 | # ================================================================================
140 | def train():
141 |     print('train:')
142 |     RMA.train()     # set the module in training  mode
143 |     train_loss = 0. # sum of train loss up to current batch
144 | 
145 |     global current_training_iteration
146 |     
147 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
148 |     sum_correct_prediction_label = torch.zeros(1, 80)
149 |     sum_ground_truth_label       = torch.zeros(1, 80)
150 |     
151 |     for batch_num, (data, target) in enumerate(train_loader):
152 |         if target.sum() == 0:
153 |             continue
154 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
155 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
156 |         
157 |         if GPU_IN_USE:
158 |             data, target = data.cuda(), target.cuda() 
159 | 
160 |         # -----forward-----
161 |         optimizer.zero_grad()
162 |         f_I = extract_features(data)        
163 |         output, M = RMA(f_I)
164 |         # ---end forward---
165 |         
166 |         # ---calculate loss and backward---
167 |         loss = loss_function(output, target, M, add_constraint=True)
168 |         loss.backward()
169 |         optimizer.step()
170 |         # ----------end backward-----------
171 |         
172 |         train_loss   += loss
173 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
174 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
175 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
176 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
177 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
178 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
179 |         
180 |         #count the sum of label vector
181 |         sum_prediction_label         += prediction_label.sum(dim=0)
182 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
183 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
184 |         
185 |         #for i in range(0, target.size(0)):
186 |         #    print('-----------------')
187 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
188 |         #    print('prediction:   ', prediction[1][i])
189 |         #    print('-----------------')
190 |         
191 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
192 |             # visualization: draw the train loss graph 
193 |             vis.line(
194 |                 X=current_training_iteration, 
195 |                 Y=torch.tensor([train_loss.data]) / (batch_num+1), 
196 |                 win=loss_graph_window,
197 |                 name='train loss',
198 |                 update=None if current_training_iteration == 1 else 'append',
199 |                 opts=dict(xlabel='iteration', ylabel='loss', showlegend=True)
200 |             )
201 |             print('loss %.3f (batch %d)' % (train_loss/(batch_num+1), batch_num+1))
202 |             current_training_iteration += LOSS_OUTPUT_INTERVAL
203 | 
204 |     # evaluation metrics
205 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
206 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
207 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
208 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
209 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
210 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
211 |    
212 |     return c_p, c_r, cf1, o_p, o_r, of1
213 | 
214 | 
215 | # Test
216 | # ================================================================================
217 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
218 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
219 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
220 | # prediction: [
221 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
222 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
223 | #             ]
224 | # ================================================================================
225 | def test():
226 |     print('test:')
227 |     RMA.eval()        # set the module in evaluation mode
228 |     test_loss    = 0. # sum of train loss up to current batch
229 | 
230 |     global current_test_iteration
231 |     
232 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
233 |     sum_correct_prediction_label = torch.zeros(1, 80)
234 |     sum_ground_truth_label       = torch.zeros(1, 80)
235 | 
236 |     for batch_num, (data, target) in enumerate(test_loader):
237 |         if target.sum() == 0:
238 |             continue
239 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
240 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
241 | 
242 |         if GPU_IN_USE:
243 |             data, target = data.cuda(), target.cuda()  # set up GPU Tensor
244 | 
245 |         f_I = extract_features(data)        
246 |         output, M = RMA(f_I)
247 |         loss = loss_function(output, target, M, add_constraint=True)
248 |         
249 |         test_loss    += loss
250 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
251 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
252 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
253 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
254 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
255 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
256 |         
257 |         #count the sum of label vector
258 |         sum_prediction_label         += prediction_label.sum(dim=0)
259 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
260 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
261 | 
262 |         #for i in range(0, target.size(0)):
263 |         #    print('-----------------')
264 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
265 |         #    print('prediction:   ', prediction_index[i] - 1)
266 |         #    print('-----------------')
267 |         # 
268 | 
269 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
270 |             # visualization: draw the test loss graph
271 |             vis.line(
272 |                 X=current_test_iteration, 
273 |                 Y=torch.tensor([test_loss.data]) / (batch_num+1), 
274 |                 win=loss_graph_window,
275 |                 name='test loss',
276 |                 update='insert' if current_test_iteration == 1 else 'append',
277 |                 opts=dict(showlegend=True),
278 |             )
279 |             print('loss %.3f (batch %d)' % (test_loss / (batch_num+1), batch_num+1))
280 |             current_test_iteration += LOSS_OUTPUT_INTERVAL
281 | 
282 |     # evaluation metrics
283 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
284 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
285 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
286 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
287 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
288 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
289 |    
290 |     return c_p, c_r, cf1, o_p, o_r, of1
291 | 
292 | 
293 | # ==================================================================
294 | # Save Model
295 | # ==================================================================
296 | def save():
297 |     torch.save(RMA.state_dict(), args.pathModelParams)
298 |     print('Checkpoint saved to {}'.format(args.pathModelParams))
299 | 
300 | 
301 | # ==================================================================
302 | # Main Loop
303 | # ==================================================================
304 | for current_epoch in range(1, args.epoch + 1):
305 |     print('\n===> epoch: %d/%d' % (current_epoch, args.epoch))
306 |     train_cp, train_cr, train_cf1, train_op, train_or, train_of1 = train()
307 |     with torch.no_grad():
308 |         test_cp, test_cr, test_cf1, test_op, test_or, test_of1 = test()
309 |     
310 |     evaluation_metrics = '''
311 | <pre>
312 | ===> epoch: %d/%d<br/>
313 | -------------------------------------------------------------
314 | |    CP   |    CR   |   CF1   |    OP   |    OR   |   OF1   |
315 | -------------------------------------------------------------
316 | |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |
317 | -------------------------------------------------------------
318 | </pre>
319 | ''' % (current_epoch, args.epoch, test_cp, test_cr, test_cf1, test_op, test_or, test_of1)
320 |     
321 |     # visualization
322 |     vis.line(
323 |         X=torch.tensor([current_epoch]), 
324 |         Y=torch.tensor([test_cf1]),
325 |         name='test_CF1',
326 |         win=test_f1_graph_window,
327 |         update=None if current_epoch == 1 else 'append', 
328 |         opts=dict(xlabel='epoch', ylabel='F1', showlegend=True, title='Evaluation of Test (CF1 / OF1)')
329 |     )
330 |     vis.line(
331 |         X=torch.tensor([current_epoch]), 
332 |         Y=torch.tensor([test_of1]),
333 |         name='test_OF1',
334 |         win=test_f1_graph_window,
335 |         update='insert' if current_epoch == 1 else 'append', 
336 |         opts=dict(showlegend=True)
337 |     )
338 |     vis.text(
339 |         evaluation_metrics,
340 |         win=evaluation_window,
341 |         append=False if current_epoch == 1 else True
342 |     )
343 |     
344 |     if test_of1 > of1 and test_cf1 > cf1:
345 |         if args.saveModel:
346 |             save()
347 |         of1 = test_of1
348 |         cf1 = test_cf1
349 | 
350 |     if current_epoch == args.epoch:
351 |         print('===> BEST PERFORMANCE (OF1/CF1): %.3f / %.3f' % (of1, cf1))
352 | 
353 | 


--------------------------------------------------------------------------------
/models/RMA_module.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch.tensor as tensor
  4 | import torch
  5 | 
  6 | COCO_CATEGORIES = 80
  7 | 
  8 | '''
  9 | Recurrent Memorized-Attention Module
 10 | ==============================================================================================
 11 | @Parameters:
 12 |     lstm_input_size  : number of expected features in the input x of LSTM
 13 |     lstm_hidden_size : number of features in the hidden state of LSTM
 14 |     zk_size          : size of z_k (about z_k, see 'Update rule of M' in the paper)
 15 |     num_itreations   : number of iterations in RMA module (default: 5)
 16 |     num_classes      : number of classes/categories (default: 80, using COCO dataset)
 17 |     use_gpu          : whether using gpu (default: True)
 18 | @Input:
 19 |     f_I : feature map (torch.cuda.FloatTensor[batch_size, num_channels, height, width])
 20 | @Output:
 21 |     fused_scores : final fused score vectors (torch.cuda.FloatTensor[batch_size, num_classes])
 22 |     M            : transformation matrices in ST
 23 | ==============================================================================================
 24 | '''
 25 | class RMA_module(nn.Module):
 26 |     def __init__(self, lstm_input_size, lstm_hidden_size, zk_size,
 27 |                  num_iterations=5, num_classes=COCO_CATEGORIES,
 28 |                  use_gpu=True):
 29 |         
 30 |         super(RMA_module, self).__init__()
 31 | 
 32 |         self.K           = num_iterations 
 33 |         self.C           = num_classes
 34 |         self.use_gpu     = use_gpu
 35 |         self.input_size  = lstm_input_size
 36 |         self.hidden_size = lstm_hidden_size
 37 | 
 38 |         self.pooling = nn.MaxPool2d(kernel_size=2)
 39 |         self.fc      = nn.Linear(lstm_input_size * lstm_input_size / 4 * 512, 4096)
 40 |         self.lstm = nn.LSTMCell(4096, lstm_hidden_size)
 41 |         
 42 |         self.get_zk = nn.Sequential(
 43 |             # channels of output feature map in vgg16 = 512
 44 |             nn.Linear(lstm_hidden_size, zk_size),     
 45 |             nn.ReLU(inplace=True)
 46 |         )
 47 |         self.get_score = nn.Linear(zk_size, num_classes)
 48 |         self.update_m  = nn.Linear(zk_size, 6)
 49 |         self.update_m.weight.data = torch.zeros(6, zk_size)
 50 |         self.update_m.bias.data   = tensor([1., 0., 0., 0., 1., 0.])
 51 | 
 52 |     # ST: spatial transformer network forward function
 53 |     # ================================================
 54 |     def ST(self, x, theta):
 55 |         # determine the output size of STN
 56 |         num_channels = x.size()[1]
 57 |         batch_size   = x.size()[0]
 58 |         output_size =  torch.Size((batch_size, num_channels, self.input_size, self.input_size))
 59 |         
 60 |         grid = F.affine_grid(theta, output_size) 
 61 |         if self.use_gpu:
 62 |             grid = grid.cuda()
 63 |         # use bilinear interpolation(default) to sample the input pixels
 64 |         x = F.grid_sample(x, grid)
 65 |         return x
 66 |     
 67 |     # init_hidden: initialize the (h0, c0) in LSTM
 68 |     # ============================================
 69 |     def init_hidden(self, N):
 70 |         if self.use_gpu:
 71 |             h0 = torch.zeros(N, self.hidden_size).cuda()
 72 |             c0 = torch.zeros(N, self.hidden_size).cuda()
 73 |         else:
 74 |             h0 = torch.zeros(N, self.hidden_size)
 75 |             c0 = torch.zeros(N, self.hidden_size)
 76 |         return (h0, c0)
 77 |     
 78 |     # RMA moudule forward function
 79 |     # ============================
 80 |     def forward(self, f_I, return_whole_scores=False):
 81 |         # initialization
 82 |         batch_size = f_I.size()[0]
 83 |         hidden = self.init_hidden(batch_size)
 84 |         if self.use_gpu:
 85 |             scores = torch.randn(self.K, batch_size, self.C).cuda()
 86 |         else:
 87 |             scores = torch.randn(self.K, batch_size, self.C)
 88 |         M      = torch.randn(self.K+1, batch_size, 2, 3) 
 89 |         M[0]   = tensor([[1., 0., 0.], [0., 1., 0.]])
 90 |         
 91 |         # for each iteration
 92 |         for k in range(0, self.K+1):
 93 |             # locate an attentional region
 94 |             f_k = self.ST(f_I, M[k])
 95 |             
 96 |             # descend dimension for lower GPU memory requirement
 97 |             f_k = self.pooling(f_k)
 98 |             f_k = self.fc(f_k.view(batch_size, -1))
 99 | 
100 |             # predict the scores regarding this region
101 |             hidden = self.lstm(f_k, hidden)
102 |             
103 |             # get z_k for further caculating M and scores
104 |             z_k = self.get_zk(hidden[0])
105 | 
106 |             if k != 0:
107 |                 # obtain the score vector of current iteration
108 |                 scores[k-1] = self.get_score(z_k)
109 |                 
110 |             if k != self.K:
111 |                 # update transformation matrix for next iteration
112 |                 M[k+1] = self.update_m(z_k).view(batch_size, 2, 3)
113 |                 M[k+1, :, 0, 1] = tensor(0.)
114 |                 M[k+1, :, 1, 0] = tensor(0.)
115 |         
116 |         # max pooling to obtain the final fused scores
117 |         fused_scores = scores.max(0)
118 |         
119 |         if return_whole_scores:
120 |             return fused_scores[0], M[1:, :, :, :], scores
121 |         else:
122 |             return fused_scores[0], M
123 | 
124 | 


--------------------------------------------------------------------------------
/models/RMA_module_with_priori.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch.tensor as tensor
  4 | import torch
  5 | 
  6 | COCO_CATEGORIES = 80
  7 | 
  8 | '''
  9 | Recurrent Memorized-Attention Module
 10 | ==============================================================================================
 11 | @Parameters:
 12 |     lstm_input_size  : number of expected features in the input x of LSTM
 13 |     lstm_hidden_size : number of features in the hidden state of LSTM
 14 |     zk_size          : size of z_k (about z_k, see 'Update rule of M' in the paper)
 15 |     num_itreations   : number of iterations in RMA module (default: 5)
 16 |     num_classes      : number of classes/categories (default: 80, using COCO dataset)
 17 |     use_gpu          : whether using gpu (default: True)
 18 | @Input:
 19 |     f_I : feature map (torch.cuda.FloatTensor[batch_size, num_channels, height, width])
 20 | @Output:
 21 |     fused_scores : final fused score vectors (torch.cuda.FloatTensor[batch_size, num_classes])
 22 |     M            : transformation matrices in ST
 23 | ==============================================================================================
 24 | '''
 25 | class RMA_module(nn.Module):
 26 |     def __init__(self, lstm_input_size, lstm_hidden_size, zk_size,
 27 |                  num_iterations=5, num_classes=COCO_CATEGORIES,
 28 |                  use_gpu=True):
 29 |         
 30 |         super(RMA_module, self).__init__()
 31 | 
 32 |         self.K           = num_iterations 
 33 |         self.C           = num_classes
 34 |         self.use_gpu     = use_gpu
 35 |         self.input_size  = lstm_input_size
 36 |         self.hidden_size = lstm_hidden_size
 37 | 
 38 |         self.cx = tensor([0.5,  0.5, -0.5, -0.5]).view(4, -1)
 39 |         self.cy = tensor([0.5, -0.5,  0.5, -0.5]).view(4, -1)
 40 |         
 41 |         self.pooling = nn.MaxPool2d(kernel_size=2)
 42 |         self.fc      = nn.Linear(lstm_input_size * lstm_input_size / 4 * 512, 4096)
 43 |         self.lstm = nn.LSTMCell(4096, lstm_hidden_size)
 44 |         
 45 |         self.get_zk = nn.Sequential(
 46 |             # channels of output feature map in vgg16 = 512
 47 |             nn.Linear(lstm_hidden_size, zk_size),     
 48 |             nn.ReLU(inplace=True)
 49 |         )
 50 |         self.get_score = nn.Linear(zk_size, num_classes)
 51 |         self.update_m  = nn.Linear(zk_size, 6)
 52 |         self.update_m.weight.data = torch.zeros(6, zk_size)
 53 |         self.update_m.bias.data   = tensor([1., 0., 0., 0., 1., 0.])
 54 | 
 55 |     # ST: spatial transformer network forward function
 56 |     # ================================================
 57 |     def ST(self, x, theta, k):
 58 |         # determine the output size of STN
 59 |         num_channels = x.size()[1]
 60 |         batch_size   = x.size()[0]
 61 |         output_size =  torch.Size((batch_size, num_channels, self.input_size, self.input_size))
 62 |         
 63 |         if k > 1:
 64 |             theta[:, 0, 2] = theta[:, 0, 2] + self.cx[k-2]
 65 |             theta[:, 1, 2] = theta[:, 1, 2] + self.cy[k-2]
 66 | 
 67 |         grid = F.affine_grid(theta, output_size) 
 68 |         if self.use_gpu:
 69 |             grid = grid.cuda()
 70 |         # use bilinear interpolation(default) to sample the input pixels
 71 |         x = F.grid_sample(x, grid)
 72 |         return x, theta
 73 |     
 74 |     # init_hidden: initialize the (h0, c0) in LSTM
 75 |     # ============================================
 76 |     def init_hidden(self, N):
 77 |         if self.use_gpu:
 78 |             h0 = torch.zeros(N, self.hidden_size).cuda()
 79 |             c0 = torch.zeros(N, self.hidden_size).cuda()
 80 |         else:
 81 |             h0 = torch.zeros(N, self.hidden_size)
 82 |             c0 = torch.zeros(N, self.hidden_size)
 83 |         return (h0, c0)
 84 |     
 85 |     # RMA moudule forward function
 86 |     # ============================
 87 |     def forward(self, f_I, return_whole_scores=False):
 88 |         # initialization
 89 |         batch_size = f_I.size()[0]
 90 |         hidden = self.init_hidden(batch_size)
 91 |         if self.use_gpu:
 92 |             scores = torch.randn(self.K, batch_size, self.C).cuda()
 93 |         else:
 94 |             scores = torch.randn(self.K, batch_size, self.C)
 95 |         M      = torch.randn(self.K+1, batch_size, 2, 3) 
 96 |         M[0]   = tensor([[1., 0., 0.], [0., 1., 0.]])
 97 |         M_for_visual = torch.randn(self.K, batch_size, 2, 3)
 98 | 
 99 |         # for each iteration
100 |         for k in range(0, self.K+1):
101 |             # locate an attentional region
102 |             f_k, M_k_for_visual = self.ST(f_I, M[k].clone(), k)
103 |             
104 |             # descend dimension for lower GPU memory requirement
105 |             f_k = self.pooling(f_k)
106 |             f_k = self.fc(f_k.view(batch_size, -1))
107 | 
108 |             # predict the scores regarding this region
109 |             hidden = self.lstm(f_k, hidden)
110 |             
111 |             # get z_k for further caculating M and scores
112 |             z_k = self.get_zk(hidden[0])
113 | 
114 |             if k != 0:
115 |                 # obtain the score vector of current iteration
116 |                 scores[k-1] = self.get_score(z_k)
117 |                 M_for_visual[k-1] = M_k_for_visual
118 |                 
119 |             if k != self.K:
120 |                 # update transformation matrix for next iteration
121 |                 M[k+1] = self.update_m(z_k).view(batch_size, 2, 3)
122 |                 M[k+1, :, 0, 1] = tensor(0.)
123 |                 M[k+1, :, 1, 0] = tensor(0.)
124 |         
125 |         # max pooling to obtain the final fused scores
126 |         fused_scores = scores.max(0)
127 |        
128 |         #print('M for visual ', M_for_visual)
129 | 
130 |         if return_whole_scores:
131 |             return fused_scores[0], M_for_visual, scores
132 |         else:
133 |             return fused_scores[0], M
134 | 
135 | 


--------------------------------------------------------------------------------
/models/loss.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.tensor as tensor
  3 | import torch.nn.functional as F
  4 | import torch
  5 | from math import *
  6 | 
  7 | # hyperparameters
  8 | alpha   = 0.5
  9 | beta    = 0.1
 10 | lambda1 = 0.01
 11 | lambda2 = 0.1
 12 | gama    = 0.1
 13 | 
 14 | 
 15 | def getAnchorPoints(num_points):
 16 |     radius = 0.5 * sqrt(2)
 17 |     # difference between two anchor points
 18 |     diff = 2 * pi / num_points
 19 |     cx = [radius * cos(i * diff) for i in range(0, num_points)]
 20 |     cy = [radius * sin(i * diff) for i in range(0, num_points)]
 21 | 
 22 |     return tensor(cx).view(num_points, -1), tensor(cy).view(num_points, -1)
 23 | 
 24 | 
 25 | '''
 26 | Loss Function for AttentionImageClass
 27 | =======================================================================================
 28 | @Args:
 29 |     input  : score vectors         (torch.cuda.FloatTensor[batch_size, num_categories])
 30 |     target : target                (torch.cuda.FloatTensor[batch_size, num_categories])
 31 |     M      : transformation matrix (torch.FloatTensor[num_iterations, batch_size, 2, 3])
 32 | 
 33 | @Returns:
 34 |     total_loss
 35 | =======================================================================================
 36 | '''
 37 | def loss_function(input, target, M, add_constraint=False):
 38 |     '''
 39 |     [variable] 'pp'       : predicted probability vector    
 40 |     [variable] 'gtp'      : ground-truth probability vector
 41 |     [variable] 'loss_cls' : loss for classification
 42 |     [variable] 'loss_loc' : loss for localizatoin
 43 |     '''
 44 |     
 45 |     # extra arguments from theta(that is transformation matrix)
 46 |     # =========================================================
 47 |     sx = M[1:, :, 0, 0]
 48 |     sy = M[1:, :, 1, 1]
 49 |     tx = M[1:, :, 0, 2]
 50 |     ty = M[1:, :, 1, 2]
 51 |      
 52 |     # anchor point
 53 |     # ============
 54 |     cx = tensor([0., 0.5,  0.5, -0.5, -0.5]).view(5, -1)
 55 |     cy = tensor([0., 0.5, -0.5,  0.5, -0.5]).view(5, -1)
 56 |     #cx, cy = getAnchorPoints(M.size(0) - 2)
 57 | 
 58 |     # calculate the predicted & ground-truth iprobability vector
 59 |     # ==========================================================
 60 |     pp  = F.softmax(input, dim=1)
 61 |     gtp = target.div(target.norm(p=1, dim=1).view(input.size()[0], -1))
 62 |     
 63 |     # calculate loss for classification
 64 |     # =================================
 65 |     loss_cls = F.mse_loss(pp, gtp, size_average=False)
 66 |     
 67 |     if not add_constraint:
 68 |         return loss_cls
 69 | 
 70 |     # calculate loss for localization
 71 |     # ===============================
 72 |     # anchor constraint
 73 |     loss_A = torch.sum(0.5 * ((tx - cx)**2 + (ty - cy)**2))
 74 |     
 75 |     # scale constraint
 76 |     loss_sx = torch.sum(torch.max(abs(sx) - alpha, tensor(0.)) ** 2)
 77 |     loss_sy = torch.sum(torch.max(abs(sy) - alpha, tensor(0.)) ** 2)
 78 |     loss_S  = loss_sx + loss_sy 
 79 | 
 80 |     # positive constraint
 81 |     loss_P = torch.sum(torch.max(beta - sx, tensor(0.)) + torch.max(beta - sy, tensor(0.)))
 82 | 
 83 |     loss_loc = (loss_S + lambda1 * loss_A + lambda2 * loss_P).cuda()
 84 |     
 85 |     # calculate total loss
 86 |     # ====================
 87 |     total_loss = loss_cls + gama * loss_loc
 88 |     
 89 |     print("M ", M)
 90 |     #print('sx ', sx)
 91 |     #print('sy ', sy)
 92 |     #print('tx ', tx)
 93 |     #print('ty ', ty)
 94 | 
 95 |     #print('cx ', cx)
 96 |     #print('cy ', cy)
 97 |     
 98 |     print('loss_A ', loss_A)
 99 |     #print('loss_S ', loss_S)
100 |     #print('loss_P ', loss_P)
101 |     print('loss_loc ', loss_loc)
102 |     print("loss_cls ", loss_cls)
103 |     print('total_loss ', total_loss)
104 |     
105 |     return total_loss
106 | 
107 | 


--------------------------------------------------------------------------------
/models/loss_with_priori.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.tensor as tensor
  3 | import torch.nn.functional as F
  4 | import torch
  5 | from math import *
  6 | 
  7 | 
  8 | # hyperparameters
  9 | alpha   = 0.5
 10 | beta    = 0.1
 11 | lambda1 = 0.01
 12 | lambda2 = 0.1
 13 | gama    = 0.1
 14 | 
 15 | 
 16 | def getAnchorPoints(num_points):
 17 |     radius = 0.5 * sqrt(2)
 18 |     # difference between two anchor points
 19 |     diff = 2 * pi / num_points
 20 |     cx = [radius * cos(i * diff) for i in range(0, num_points)]
 21 |     cy = [radius * sin(i * diff) for i in range(0, num_points)]
 22 | 
 23 |     return tensor(cx).view(num_points, -1), tensor(cy).view(num_points, -1)
 24 | 
 25 | 
 26 | '''
 27 | Loss Function for AttentionImageClass
 28 | =======================================================================================
 29 | @Args:
 30 |     input  : score vectors         (torch.cuda.FloatTensor[batch_size, num_categories])
 31 |     target : target                (torch.cuda.FloatTensor[batch_size, num_categories])
 32 |     M      : transformation matrix (torch.FloatTensor[num_iterations, batch_size, 2, 3])
 33 | 
 34 | @Returns:
 35 |     total_loss
 36 | =======================================================================================
 37 | '''
 38 | def loss_function(input, target, M, add_constraint=False):
 39 |     '''
 40 |     [variable] 'pp'       : predicted probability vector    
 41 |     [variable] 'gtp'      : ground-truth probability vector
 42 |     [variable] 'loss_cls' : loss for classification
 43 |     [variable] 'loss_loc' : loss for localizatoin
 44 |     '''
 45 |     
 46 |     # extra arguments from theta(that is transformation matrix)
 47 |     # =========================================================
 48 |     sx = M[1:, :, 0, 0]
 49 |     sy = M[1:, :, 1, 1]
 50 |     tx = M[2:, :, 0, 2]
 51 |     ty = M[2:, :, 1, 2]
 52 |      
 53 |     # anchor point
 54 |     # ============
 55 |     cx = tensor([0.5,  0.5, -0.5, -0.5]).view(4, -1)
 56 |     cy = tensor([0.5, -0.5,  0.5, -0.5]).view(4, -1)
 57 |     #cx, cy = getAnchorPoints(M.size(0) - 2)
 58 | 
 59 |     # calculate the predicted & ground-truth iprobability vector
 60 |     # ==========================================================
 61 |     pp  = F.softmax(input, dim=1)
 62 |     gtp = target.div(target.norm(p=1, dim=1).view(input.size()[0], -1))
 63 |     
 64 |     # calculate loss for classification
 65 |     # =================================
 66 |     loss_cls = F.mse_loss(pp, gtp, size_average=False)
 67 |     
 68 |     if not add_constraint:
 69 |         return loss_cls
 70 | 
 71 |     # calculate loss for localization
 72 |     # ===============================
 73 |     # anchor constraint
 74 |     loss_A = torch.sum(0.5 * (tx**2 + ty**2))
 75 | 
 76 |     # scale constraint
 77 |     loss_sx = torch.sum(torch.max(abs(sx) - alpha, tensor(0.)) ** 2)
 78 |     loss_sy = torch.sum(torch.max(abs(sy) - alpha, tensor(0.)) ** 2)
 79 |     loss_S  = loss_sx + loss_sy 
 80 | 
 81 |     # positive constraint
 82 |     loss_P = torch.sum(torch.max(beta - sx, tensor(0.)) + torch.max(beta - sy, tensor(0.)))
 83 | 
 84 |     loss_loc = (loss_S + lambda1 * loss_A + lambda2 * loss_P).cuda()
 85 |     
 86 |     # calculate total loss
 87 |     # ====================
 88 |     total_loss = loss_cls + gama * loss_loc
 89 |     
 90 |     print("M ", M)
 91 |     #print('sx ', sx)
 92 |     #print('sy ', sy)
 93 |     #print('tx ', tx)
 94 |     #print('ty ', ty)
 95 | 
 96 |     #print('cx ', cx)
 97 |     #print('cy ', cy)
 98 |     
 99 |     print('loss_A ', loss_A)
100 |     #print('loss_S ', loss_S)
101 |     #print('loss_P ', loss_P)
102 |     print('loss_loc ', loss_loc)
103 |     print("loss_cls ", loss_cls)
104 |     print('total_loss ', total_loss)
105 |     
106 |     return total_loss
107 | 
108 | 


--------------------------------------------------------------------------------
/priori_main.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | 
 11 | import argparse
 12 | import os
 13 | 
 14 | from models.RMA_module_with_priori import RMA_module
 15 | from models.loss_with_priori import loss_function
 16 | from utils import get_target_transform as target_trans
 17 | 
 18 | # data visualization
 19 | vis = visdom.Visdom(env='baseline(with priori)')
 20 | # GPU setting
 21 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "3")
 22 | 
 23 | # ==================================================================
 24 | # Constants
 25 | # ==================================================================
 26 | EPOCH         = 45            # number of times for each run-through
 27 | BATCH_SIZE    = 16            # number of images for each epoch
 28 | LEARNING_RATE = 1e-5          # default learning rate 
 29 | WEIGHT_DECAY  = 0             # default weight decay
 30 | N             = 512           # size of input images (512 or 640)
 31 | MOMENTUM      = (0.9, 0.999)  # momentum in Adam optimization
 32 | TOPK          = 3             # top k highest-ranked labels  
 33 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 34 | DIR_TRAIN_IMAGES   = '../dataset/train2017/'
 35 | DIR_TEST_IMAGES    = '../dataset/val2017/'
 36 | PATH_TRAIN_ANNFILE = '../dataset/annotations/instances_train2017.json'
 37 | PATH_TEST_ANNFILE  = '../dataset/annotations/instances_val2017.json'
 38 | PATH_MODEL_PARAMS  = './params/params_with_priori.pkl'
 39 | NUM_CATEGORIES     = 80
 40 | LOSS_OUTPUT_INTERVAL = 100
 41 | 
 42 | # ==================================================================
 43 | # Global Variables
 44 | # ==================================================================
 45 | # one iteration means one mini-batch finishs a forward-backward process
 46 | current_training_iteration = torch.tensor([1])
 47 | current_test_iteration     = torch.tensor([1])
 48 | loss_graph_window          = 'loss graph'
 49 | test_f1_graph_window       = 'test OF1 and CF1 graph'
 50 | evaluation_window          = 'six evaluation metrics'
 51 | #category_id_window         = 'category ids of prediction and ground-truth'
 52 | of1 = 0.
 53 | cf1 = 0.
 54 | 
 55 | # ==================================================================
 56 | # Parser Initialization
 57 | # ==================================================================
 58 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 59 | parser.add_argument('--lr',              default=LEARNING_RATE,     type=float, help='learning rate')
 60 | parser.add_argument('--epoch',           default=EPOCH,             type=int,   help='number of epochs')
 61 | parser.add_argument('--trainBatchSize',  default=BATCH_SIZE,        type=int,   help='training batch size')
 62 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 63 | parser.add_argument('--weightDecay',     default=WEIGHT_DECAY,      type=float, help='weight decay')
 64 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 65 | parser.add_argument('--saveModel',       default=True,              type=bool,  help='save model parameters')
 66 | parser.add_argument('--loadModel',       default=False,             type=bool,  help='load model parameters')
 67 | args = parser.parse_args()
 68 | 
 69 | 
 70 | # ==================================================================
 71 | # Prepare Dataset(training & test)
 72 | # ==================================================================
 73 | print('***** Prepare Data ******')
 74 | 
 75 | # transforms of training dataset 
 76 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 77 |                                   std=[0.229, 0.224, 0.225])
 78 | train_transforms = transforms.Compose([
 79 |                      transforms.RandomHorizontalFlip(p=0.5), # default value is 0.5
 80 |                      transforms.Resize((N, N)),
 81 |                      transforms.ToTensor(),
 82 |                      normalize
 83 |                   ])
 84 | 
 85 | # transforms of test dataset
 86 | test_transforms = transforms.Compose([
 87 |                     transforms.Resize((N, N)), 
 88 |                     transforms.ToTensor(),
 89 |                     normalize
 90 |                   ]) 
 91 | 
 92 | train_dataset = torchvision.datasets.CocoDetection(root=DIR_TRAIN_IMAGES, annFile=PATH_TRAIN_ANNFILE, 
 93 |                                                    transform=train_transforms, target_transform=target_trans)
 94 | test_dataset  = torchvision.datasets.CocoDetection(root=DIR_TEST_IMAGES,  annFile=PATH_TEST_ANNFILE,
 95 |                                                    transform=test_transforms,  target_transform=target_trans)
 96 | train_loader  = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.trainBatchSize, shuffle=True,  num_workers=2)
 97 | test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=args.testBatchSize,  shuffle=False, num_workers=2)
 98 | print('Data Preparation : Finished')
 99 | 
100 | 
101 | # ==================================================================
102 | # Prepare Model
103 | # ==================================================================
104 | print('\n***** Prepare Model *****')
105 | 
106 | vgg16 = torchvision.models.vgg16(pretrained=True)
107 | 
108 | for param in vgg16.features.parameters():
109 |     param.requires_grad=False
110 | 
111 | extract_features = vgg16.features
112 | 
113 | RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
114 | if args.loadModel:
115 |     RMA.load_state_dict(torch.load(args.pathModelParams))
116 | 
117 | if GPU_IN_USE:
118 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
119 |     print('cuda: move all model parameters and buffers to the GPU')
120 |     extract_features.cuda()
121 |     RMA.cuda()
122 |     cudnn.benchmark = True
123 | 
124 | # Adam optimization
125 | optimizer = optim.Adam(RMA.parameters(), lr=args.lr, weight_decay=args.weightDecay, betas=MOMENTUM)  
126 | # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)  # lr decay
127 | print('Model Preparation : Finished')
128 | 
129 | 
130 | # Train
131 | # ================================================================================
132 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
133 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
134 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
135 | # prediction: [
136 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
137 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
138 | #             ]
139 | # ================================================================================
140 | def train():
141 |     print('train:')
142 |     RMA.train()     # set the module in training  mode
143 |     train_loss = 0. # sum of train loss up to current batch
144 | 
145 |     global current_training_iteration
146 |     
147 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
148 |     sum_correct_prediction_label = torch.zeros(1, 80)
149 |     sum_ground_truth_label       = torch.zeros(1, 80)
150 |     
151 |     for batch_num, (data, target) in enumerate(train_loader):
152 |         if target.sum() == 0:
153 |             continue
154 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
155 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
156 |         
157 |         if GPU_IN_USE:
158 |             data, target = data.cuda(), target.cuda() 
159 | 
160 |         # -----forward-----
161 |         optimizer.zero_grad()
162 |         f_I = extract_features(data)        
163 |         output, M = RMA(f_I)
164 |         # ---end forward---
165 |         
166 |         # ---calculate loss and backward---
167 |         loss = loss_function(output, target, M, add_constraint=True)
168 |         loss.backward()
169 |         optimizer.step()
170 |         # ----------end backward-----------
171 |         
172 |         train_loss   += loss
173 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
174 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
175 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
176 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
177 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
178 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
179 |         
180 |         #count the sum of label vector
181 |         sum_prediction_label         += prediction_label.sum(dim=0)
182 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
183 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
184 |         
185 |         #for i in range(0, target.size(0)):
186 |         #    print('-----------------')
187 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
188 |         #    print('prediction:   ', prediction[1][i])
189 |         #    print('-----------------')
190 |         
191 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
192 |             # visualization: draw the train loss graph 
193 |             vis.line(
194 |                 X=current_training_iteration, 
195 |                 Y=torch.tensor([train_loss.data]) / (batch_num+1), 
196 |                 win=loss_graph_window,
197 |                 name='train loss',
198 |                 update=None if current_training_iteration == 1 else 'append',
199 |                 opts=dict(xlabel='iteration', ylabel='loss', showlegend=True)
200 |             )
201 |             print('loss %.3f (batch %d)' % (train_loss/(batch_num+1), batch_num+1))
202 |             current_training_iteration += LOSS_OUTPUT_INTERVAL
203 | 
204 |     # evaluation metrics
205 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
206 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
207 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
208 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
209 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
210 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
211 |    
212 |     return c_p, c_r, cf1, o_p, o_r, of1
213 | 
214 | 
215 | # Test
216 | # ================================================================================
217 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
218 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
219 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
220 | # prediction: [
221 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
222 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
223 | #             ]
224 | # ================================================================================
225 | def test():
226 |     print('test:')
227 |     RMA.eval()        # set the module in evaluation mode
228 |     test_loss    = 0. # sum of train loss up to current batch
229 | 
230 |     global current_test_iteration
231 |     
232 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
233 |     sum_correct_prediction_label = torch.zeros(1, 80)
234 |     sum_ground_truth_label       = torch.zeros(1, 80)
235 | 
236 |     for batch_num, (data, target) in enumerate(test_loader):
237 |         if target.sum() == 0:
238 |             continue
239 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
240 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
241 | 
242 |         if GPU_IN_USE:
243 |             data, target = data.cuda(), target.cuda()  # set up GPU Tensor
244 | 
245 |         f_I = extract_features(data)        
246 |         output, M = RMA(f_I)
247 |         loss = loss_function(output, target, M, add_constraint=True)
248 |         
249 |         test_loss    += loss
250 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
251 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
252 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
253 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
254 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
255 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
256 |         
257 |         #count the sum of label vector
258 |         sum_prediction_label         += prediction_label.sum(dim=0)
259 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
260 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
261 | 
262 |         #for i in range(0, target.size(0)):
263 |         #    print('-----------------')
264 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
265 |         #    print('prediction:   ', prediction_index[i] - 1)
266 |         #    print('-----------------')
267 |         # 
268 | 
269 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
270 |             # visualization: draw the test loss graph
271 |             vis.line(
272 |                 X=current_test_iteration, 
273 |                 Y=torch.tensor([test_loss.data]) / (batch_num+1), 
274 |                 win=loss_graph_window,
275 |                 name='test loss',
276 |                 update='insert' if current_test_iteration == 1 else 'append',
277 |                 opts=dict(showlegend=True),
278 |             )
279 |             print('loss %.3f (batch %d)' % (test_loss / (batch_num+1), batch_num+1))
280 |             current_test_iteration += LOSS_OUTPUT_INTERVAL
281 | 
282 |     # evaluation metrics
283 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
284 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
285 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
286 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
287 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
288 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
289 |    
290 |     return c_p, c_r, cf1, o_p, o_r, of1
291 | 
292 | 
293 | # ==================================================================
294 | # Save Model
295 | # ==================================================================
296 | def save():
297 |     torch.save(RMA.state_dict(), args.pathModelParams)
298 |     print('Checkpoint saved to {}'.format(args.pathModelParams))
299 | 
300 | 
301 | # ==================================================================
302 | # Main Loop
303 | # ==================================================================
304 | for current_epoch in range(1, args.epoch + 1):
305 |     print('\n===> epoch: %d/%d' % (current_epoch, args.epoch))
306 |     train_cp, train_cr, train_cf1, train_op, train_or, train_of1 = train()
307 |     with torch.no_grad():
308 |         test_cp, test_cr, test_cf1, test_op, test_or, test_of1 = test()
309 |     
310 |     evaluation_metrics = '''
311 | <pre>
312 | ===> epoch: %d/%d<br/>
313 | -------------------------------------------------------------
314 | |    CP   |    CR   |   CF1   |    OP   |    OR   |   OF1   |
315 | -------------------------------------------------------------
316 | |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |
317 | -------------------------------------------------------------
318 | </pre>
319 | ''' % (current_epoch, args.epoch, test_cp, test_cr, test_cf1, test_op, test_or, test_of1)
320 |     
321 |     # visualization
322 |     vis.line(
323 |         X=torch.tensor([current_epoch]), 
324 |         Y=torch.tensor([test_cf1]),
325 |         name='test_CF1',
326 |         win=test_f1_graph_window,
327 |         update=None if current_epoch == 1 else 'append', 
328 |         opts=dict(xlabel='epoch', ylabel='F1', showlegend=True, title='Evaluation of Test (CF1 / OF1)')
329 |     )
330 |     vis.line(
331 |         X=torch.tensor([current_epoch]), 
332 |         Y=torch.tensor([test_of1]),
333 |         name='test_OF1',
334 |         win=test_f1_graph_window,
335 |         update='insert' if current_epoch == 1 else 'append', 
336 |         opts=dict(showlegend=True)
337 |     )
338 |     vis.text(
339 |         evaluation_metrics,
340 |         win=evaluation_window,
341 |         append=False if current_epoch == 1 else True
342 |     )
343 |     
344 |     if test_of1 > of1 and test_cf1 > cf1:
345 |         if args.saveModel:
346 |             save()
347 |         of1 = test_of1
348 |         cf1 = test_cf1
349 | 
350 |     if current_epoch == args.epoch:
351 |         print('===> BEST PERFORMANCE (OF1/CF1): %.3f / %.3f' % (of1, cf1))
352 | 
353 | 


--------------------------------------------------------------------------------
/priori_main_tencrop.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | 
 11 | import argparse
 12 | import os
 13 | 
 14 | from models.RMA_module_with_priori import RMA_module
 15 | from models.loss_with_priori import loss_function
 16 | from utils import get_target_transform as target_trans
 17 | from utils import*
 18 | 
 19 | # data visualization
 20 | vis = visdom.Visdom(env='priori_tencrop')
 21 | # GPU setting
 22 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "5")
 23 | 
 24 | # ==================================================================
 25 | # Constants
 26 | # ==================================================================
 27 | EPOCH         = 45            # number of times for each run-through
 28 | BATCH_SIZE    = 16            # number of images for each epoch
 29 | LEARNING_RATE = 1e-5          # default learning rate 
 30 | WEIGHT_DECAY  = 0             # default weight decay
 31 | N             = 576           # size of input images (512 or 640)
 32 | MOMENTUM      = (0.9, 0.999)  # momentum in Adam optimization
 33 | TOPK          = 3             # top k highest-ranked labels  
 34 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 35 | DIR_TRAIN_IMAGES   = '../dataset/train2017/'
 36 | DIR_TEST_IMAGES    = '../dataset/val2017/'
 37 | PATH_TRAIN_ANNFILE = '../dataset/annotations/instances_train2017.json'
 38 | PATH_TEST_ANNFILE  = '../dataset/annotations/instances_val2017.json'
 39 | PATH_MODEL_PARAMS  = './params/params_with_priori.pkl'
 40 | NUM_CATEGORIES     = 80
 41 | LOSS_OUTPUT_INTERVAL = 100
 42 | CROPSIZE_512 = 16
 43 | 
 44 | # ==================================================================
 45 | # Global Variables
 46 | # ==================================================================
 47 | # one iteration means one mini-batch finishs a forward-backward process
 48 | current_training_iteration = torch.tensor([1])
 49 | current_test_iteration     = torch.tensor([1])
 50 | loss_graph_window          = 'loss graph'
 51 | test_f1_graph_window       = 'test OF1 and CF1 graph'
 52 | evaluation_window          = 'six evaluation metrics'
 53 | #category_id_window         = 'category ids of prediction and ground-truth'
 54 | of1 = 0.
 55 | cf1 = 0.
 56 | 
 57 | # ==================================================================
 58 | # Parser Initialization
 59 | # ==================================================================
 60 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 61 | parser.add_argument('--lr',              default=LEARNING_RATE,     type=float, help='learning rate')
 62 | parser.add_argument('--epoch',           default=EPOCH,             type=int,   help='number of epochs')
 63 | parser.add_argument('--trainBatchSize',  default=BATCH_SIZE,        type=int,   help='training batch size')
 64 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 65 | parser.add_argument('--weightDecay',     default=WEIGHT_DECAY,      type=float, help='weight decay')
 66 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 67 | parser.add_argument('--saveModel',       default=True,              type=bool,  help='save model parameters')
 68 | parser.add_argument('--loadModel',       default=False,             type=bool,  help='load model parameters')
 69 | args = parser.parse_args()
 70 | 
 71 | 
 72 | # ==================================================================
 73 | # Prepare Dataset(training & test)
 74 | # ==================================================================
 75 | print('***** Prepare Data ******')
 76 | 
 77 | # transforms of training dataset 
 78 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 79 |                                   std=[0.229, 0.224, 0.225])
 80 | train_transforms = transforms.Compose([
 81 |                      transforms.RandomHorizontalFlip(p=0.5), # default value is 0.5
 82 |                      transforms.Resize((N, N)),
 83 |                      transforms.ToTensor(),
 84 |                      normalize
 85 |                   ])
 86 | 
 87 | # transforms of test dataset
 88 | test_transforms = transforms.Compose([
 89 |                     transforms.Resize((N, N)), 
 90 |                     transforms.ToTensor(),
 91 |                     normalize
 92 |                   ]) 
 93 | 
 94 | train_dataset = torchvision.datasets.CocoDetection(root=DIR_TRAIN_IMAGES, annFile=PATH_TRAIN_ANNFILE, 
 95 |                                                    transform=train_transforms, target_transform=target_trans)
 96 | test_dataset  = torchvision.datasets.CocoDetection(root=DIR_TEST_IMAGES,  annFile=PATH_TEST_ANNFILE,
 97 |                                                    transform=test_transforms,  target_transform=target_trans)
 98 | train_loader  = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=args.trainBatchSize, shuffle=True,  num_workers=2)
 99 | test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=args.testBatchSize,  shuffle=False, num_workers=2)
100 | print('Data Preparation : Finished')
101 | 
102 | 
103 | # ==================================================================
104 | # Prepare Model
105 | # ==================================================================
106 | print('\n***** Prepare Model *****')
107 | 
108 | vgg16 = torchvision.models.vgg16(pretrained=True)
109 | 
110 | for param in vgg16.features.parameters():
111 |     param.requires_grad=False
112 | 
113 | extract_features = vgg16.features
114 | 
115 | RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
116 | if args.loadModel:
117 |     RMA.load_state_dict(torch.load(args.pathModelParams))
118 | 
119 | if GPU_IN_USE:
120 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
121 |     print('cuda: move all model parameters and buffers to the GPU')
122 |     extract_features.cuda()
123 |     RMA.cuda()
124 |     cudnn.benchmark = True
125 | 
126 | # Adam optimization
127 | optimizer = optim.Adam(RMA.parameters(), lr=args.lr, weight_decay=args.weightDecay, betas=MOMENTUM)  
128 | # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[75, 150], gamma=0.5)  # lr decay
129 | print('Model Preparation : Finished')
130 | 
131 | 
132 | # Train
133 | # ================================================================================
134 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
135 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
136 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
137 | # prediction: [
138 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
139 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
140 | #             ]
141 | # ================================================================================
142 | def train():
143 |     print('train:')
144 |     RMA.train()     # set the module in training  mode
145 |     train_loss = 0. # sum of train loss up to current batch
146 | 
147 |     global current_training_iteration
148 |     
149 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
150 |     sum_correct_prediction_label = torch.zeros(1, 80)
151 |     sum_ground_truth_label       = torch.zeros(1, 80)
152 |     
153 |     for batch_num, (data, target) in enumerate(train_loader):
154 |         if target.sum() == 0:
155 |             continue
156 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
157 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
158 |         
159 |         if GPU_IN_USE:
160 |             data, target = data.cuda(), target.cuda() 
161 | 
162 |         # -----forward-----
163 |         optimizer.zero_grad()
164 |         f_I = extract_features(data)        
165 |         output, M = RMA(f_I)
166 |         # ---end forward---
167 |         
168 |         # ---calculate loss and backward---
169 |         loss = loss_function(output, target, M, add_constraint=True)
170 |         loss.backward()
171 |         optimizer.step()
172 |         # ----------end backward-----------
173 |         
174 |         train_loss   += loss
175 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
176 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
177 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
178 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
179 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
180 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
181 |         
182 |         #count the sum of label vector
183 |         sum_prediction_label         += prediction_label.sum(dim=0)
184 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
185 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
186 |         
187 |         #for i in range(0, target.size(0)):
188 |         #    print('-----------------')
189 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
190 |         #    print('prediction:   ', prediction[1][i])
191 |         #    print('-----------------')
192 |         
193 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
194 |             # visualization: draw the train loss graph 
195 |             vis.line(
196 |                 X=current_training_iteration, 
197 |                 Y=torch.tensor([train_loss.data]) / (batch_num+1), 
198 |                 win=loss_graph_window,
199 |                 name='train loss',
200 |                 update=None if current_training_iteration == 1 else 'append',
201 |                 opts=dict(xlabel='iteration', ylabel='loss', showlegend=True)
202 |             )
203 |             print('loss %.3f (batch %d)' % (train_loss/(batch_num+1), batch_num+1))
204 |             current_training_iteration += LOSS_OUTPUT_INTERVAL
205 | 
206 |     # evaluation metrics
207 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
208 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
209 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
210 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
211 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
212 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
213 |    
214 |     return c_p, c_r, cf1, o_p, o_r, of1
215 | 
216 | 
217 | # Test
218 | # ================================================================================
219 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
220 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
221 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
222 | # prediction: [
223 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
224 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
225 | #             ]
226 | # ================================================================================
227 | def test():
228 |     print('test:')
229 |     RMA.eval()        # set the module in evaluation mode
230 |     test_loss    = 0. # sum of train loss up to current batch
231 | 
232 |     global current_test_iteration
233 |     
234 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
235 |     sum_correct_prediction_label = torch.zeros(1, 80)
236 |     sum_ground_truth_label       = torch.zeros(1, 80)
237 | 
238 |     for batch_num, (data, target) in enumerate(test_loader):
239 |         if target.sum() == 0:
240 |             continue
241 |         target = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
242 |         data   = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
243 | 
244 |         if GPU_IN_USE:
245 |             data, target = data.cuda(), target.cuda()  # set up GPU Tensor
246 | 
247 |         f_I = extract_features(data)   
248 |         # ten-crop
249 |         # f_I: batchsize*channel*inputSize*inputSize
250 |         # tencrop_results: 10*batchsize*channel*cropSize*cropSize
251 |         tencrop_results = tencrop(f_I, f_I.size(0), f_I.size(1), f_I.size(2), CROPSIZE_512)
252 |         RMA_outputs = torch.zeros(target.size())
253 |         RMA_losses = 0
254 |         tencrop_results = tencrop_results.cuda()
255 |         RMA_outputs = RMA_outputs.cuda()
256 | 
257 |         for i in range(10):
258 |             crop_RMA_output, crop_RMA_M = RMA(tencrop_results[i])
259 |             RMA_outputs += crop_RMA_output
260 |             RMA_losses  += loss_function(crop_RMA_output, target, crop_RMA_M, add_constraint=True)
261 | 
262 |         output = RMA_outputs * 0.1
263 |         loss = RMA_losses * 0.1     
264 | 
265 |         # output, M = RMA(f_I)
266 |         # loss = loss_function(output, target, M, add_constraint=True)
267 |         
268 |         test_loss    += loss
269 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
270 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
271 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
272 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
273 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
274 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
275 |         
276 |         #count the sum of label vector
277 |         sum_prediction_label         += prediction_label.sum(dim=0)
278 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
279 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
280 | 
281 |         #for i in range(0, target.size(0)):
282 |         #    print('-----------------')
283 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
284 |         #    print('prediction:   ', prediction_index[i] - 1)
285 |         #    print('-----------------')
286 |         # 
287 | 
288 |         if batch_num % LOSS_OUTPUT_INTERVAL == 0:
289 |             # visualization: draw the test loss graph
290 |             vis.line(
291 |                 X=current_test_iteration, 
292 |                 Y=torch.tensor([test_loss.data]) / (batch_num+1), 
293 |                 win=loss_graph_window,
294 |                 name='test loss',
295 |                 update=None if current_test_iteration == 1 else 'append',
296 |                 # update='insert' if current_test_iteration == 1 else 'append',
297 |                 opts=dict(showlegend=True),
298 |             )
299 |             print('loss %.3f (batch %d)' % (test_loss / (batch_num+1), batch_num+1))
300 |             current_test_iteration += LOSS_OUTPUT_INTERVAL
301 | 
302 |     # evaluation metrics
303 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
304 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
305 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
306 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
307 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
308 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
309 |    
310 |     return c_p, c_r, cf1, o_p, o_r, of1
311 | 
312 | 
313 | # ==================================================================
314 | # Save Model
315 | # ==================================================================
316 | def save():
317 |     torch.save(RMA.state_dict(), args.pathModelParams)
318 |     print('Checkpoint saved to {}'.format(args.pathModelParams))
319 | 
320 | 
321 | # ==================================================================
322 | # Main Loop
323 | # ==================================================================
324 | for current_epoch in range(1, args.epoch + 1):
325 |     print('\n===> epoch: %d/%d' % (current_epoch, args.epoch))
326 |     # train_cp, train_cr, train_cf1, train_op, train_or, train_of1 = train()
327 |     with torch.no_grad():
328 |         test_cp, test_cr, test_cf1, test_op, test_or, test_of1 = test()
329 |     
330 |     evaluation_metrics = '''
331 | <pre>
332 | ===> epoch: %d/%d<br/>
333 | -------------------------------------------------------------
334 | |    CP   |    CR   |   CF1   |    OP   |    OR   |   OF1   |
335 | -------------------------------------------------------------
336 | |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |
337 | -------------------------------------------------------------
338 | </pre>
339 | ''' % (current_epoch, args.epoch, test_cp, test_cr, test_cf1, test_op, test_or, test_of1)
340 |     
341 |     # visualization
342 |     vis.line(
343 |         X=torch.tensor([current_epoch]), 
344 |         Y=torch.tensor([test_cf1]),
345 |         name='test_CF1',
346 |         win=test_f1_graph_window,
347 |         update=None if current_epoch == 1 else 'append', 
348 |         opts=dict(xlabel='epoch', ylabel='F1', showlegend=True, title='Evaluation of Test (CF1 / OF1)')
349 |     )
350 |     vis.line(
351 |         X=torch.tensor([current_epoch]), 
352 |         Y=torch.tensor([test_of1]),
353 |         name='test_OF1',
354 |         win=test_f1_graph_window,
355 |         update='insert' if current_epoch == 1 else 'append', 
356 |         opts=dict(showlegend=True)
357 |     )
358 |     vis.text(
359 |         evaluation_metrics,
360 |         win=evaluation_window,
361 |         append=False if current_epoch == 1 else True
362 |     )
363 |     
364 |     if test_of1 > of1 and test_cf1 > cf1:
365 |         if args.saveModel:
366 |             save()
367 |         of1 = test_of1
368 |         cf1 = test_cf1
369 | 
370 |     if current_epoch == args.epoch:
371 |         print('===> BEST PERFORMANCE (OF1/CF1): %.3f / %.3f' % (of1, cf1))
372 | 
373 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from numpy import *
 4 | import torch.utils.data as data
 5 | from PIL import Image
 6 | import os
 7 | import os.path
 8 | import torch.nn.functional as F
 9 | 
10 | CLASS = np.array(['None', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
11 | 
12 | 
13 | def get_target_transform(target):
14 |     labelsmap = {}
15 |     target_transform = torch.zeros(80)
16 |     labels = open('coco-label.txt', 'r')
17 |     for line in labels:
18 |         ids = line.split(',')
19 |         labelsmap[int(ids[0])] = int(ids[1])
20 |     for obj in target:
21 |         if 'category_id' in obj:
22 |             catId = obj['category_id']
23 |             target_transform[labelsmap[catId] - 1] = 1
24 |     #print(target[0]['image_id'])
25 |     #print(target['image']['id']) 
26 |     return target_transform
27 | 
28 |  # inputs: batchsize*channel*inputSize*inputSize(tensor)
29 |  # batchSize = 8
30 |  # channel   =
31 |  # inputSize =
32 |  # cropSize  =
33 |  # return: 10*batchsize*channel*cropSize*cropSize(tensor)
34 | def tencrop(inputs, batchSize, channel, inputSize, cropSize):
35 |     crops = torch.zeros(10, batchSize, channel, cropSize, cropSize)
36 |     crops = crops.numpy()
37 |     edgestart = inputSize-cropSize;
38 |     midstart = int(floor(inputSize/2)-floor(cropSize/2))
39 |     midend = midstart+cropSize;
40 | 
41 |     crops[0] = inputs[:,:,0:cropSize,0:cropSize]
42 |     crops[1] = inputs[:,:,0:cropSize,edgestart:inputSize]
43 |     crops[2] = inputs[:,:,edgestart:inputSize,0:cropSize]
44 |     crops[3] = inputs[:,:,edgestart:inputSize,edgestart:inputSize]
45 |     crops[4] = inputs[:,:,midstart:midend,midstart:midend]
46 |     crops[5] = crops[0][:,:,:,::-1]
47 |     crops[6] = crops[1][:,:,:,::-1]
48 |     crops[7] = crops[2][:,:,:,::-1]
49 |     crops[8] = crops[3][:,:,:,::-1]
50 |     crops[9] = crops[4][:,:,:,::-1]
51 |     crops = torch.from_numpy(crops)
52 |     return crops
53 | 
54 | 
55 | # input: 'scores' tensor[K, batch_size, num_categories]
56 | # return : 'confidence' tensor[K, batch_size]
57 | #          'className'  numpy.ndarray[K, batch_size]
58 | def getPredictedInfo(scores): 
59 |     confidence, category_id = torch.max(F.softmax(scores, dim=2), dim=2)
60 |     #print(category_id)
61 |     className = CLASS[category_id+1]
62 |     return confidence, className
63 | 
64 | 
65 | def id2label(category_id):
66 |     return [CLASS[i.index_select(0, i.nonzero().view(-1))] for i in category_id]
67 | 
68 | 


--------------------------------------------------------------------------------
/visualize.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | from CocoDetection import CocoDetection
 11 | from visualizeImg import *
 12 | #from PIL import Image
 13 | #import torch
 14 | 
 15 | import argparse
 16 | import os
 17 | 
 18 | from models.RMA_module_with_priori import RMA_module
 19 | from models.loss_with_priori import loss_function
 20 | from utils import get_target_transform as target_trans
 21 | 
 22 | 
 23 | # GPU setting
 24 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "2")
 25 | 
 26 | 
 27 | # ==================================================================
 28 | # Constants
 29 | # ==================================================================
 30 | EPOCH         = 45            # number of times for each run-through
 31 | BATCH_SIZE    = 16            # number of images for each epoch
 32 | N             = 512           # size of input images (512 or 640)
 33 | TOPK          = 3             # top k highest-ranked labels  
 34 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 35 | DIR_TEST_IMAGES    = '../dataset/val2017/'
 36 | PATH_TEST_ANNFILE  = '../dataset/annotations/instances_val2017.json'
 37 | PATH_MODEL_PARAMS  = './params/params_with_priori.pkl'
 38 | NUM_CATEGORIES     = 80
 39 | OUTPUT_INTERVAL    = 100
 40 | 
 41 | 
 42 | # ==================================================================
 43 | # Parser Initialization
 44 | # ==================================================================
 45 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 46 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 47 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 48 | parser.add_argument('--loadModel',       default=True,              type=bool,  help='load model parameters')
 49 | args = parser.parse_args()
 50 | 
 51 | 
 52 | # ==================================================================
 53 | # Prepare Dataset(training & test)
 54 | # ==================================================================
 55 | print('***** Prepare Data ******')
 56 | 
 57 | # transforms of test dataset
 58 | #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 59 | #                                  std=[0.229, 0.224, 0.225])
 60 | test_transforms = transforms.Compose([
 61 |                     transforms.Resize((N, N)), 
 62 |                     transforms.ToTensor(),
 63 |                   ]) 
 64 | 
 65 | test_dataset  = CocoDetection(root=DIR_TEST_IMAGES,  annFile=PATH_TEST_ANNFILE,
 66 |                               transform=test_transforms,  target_transform=target_trans)
 67 | test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=args.testBatchSize,  shuffle=False, num_workers=2)
 68 | print('Data Preparation : Finished')
 69 | 
 70 | # ==================================================================
 71 | # Prepare Model
 72 | # ==================================================================
 73 | print('\n***** Prepare Model *****')
 74 | 
 75 | vgg16 = torchvision.models.vgg16(pretrained=True)
 76 | extract_features = vgg16.features
 77 | RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
 78 | if args.loadModel:
 79 |     RMA.load_state_dict(torch.load(args.pathModelParams))
 80 | 
 81 | if GPU_IN_USE:
 82 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
 83 |     print('cuda: move all model parameters and buffers to the GPU')
 84 |     extract_features.cuda()
 85 |     RMA.cuda()
 86 |     cudnn.benchmark = True
 87 | 
 88 | print('Model Preparation : Finished')
 89 | 
 90 | 
 91 | # Test
 92 | # ================================================================================
 93 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
 94 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
 95 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
 96 | # prediction: [
 97 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
 98 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
 99 | #             ]
100 | # ================================================================================
101 | def test():
102 |     print('test:')
103 |     #RMA.eval()        # set the module in evaluation mode
104 | 
105 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
106 |     sum_correct_prediction_label = torch.zeros(1, 80)
107 |     sum_ground_truth_label       = torch.zeros(1, 80)
108 | 
109 |     for batch_num, (data, target, original_imgs) in enumerate(test_loader):
110 |         if target.sum() == 0:
111 |             continue
112 |         target        = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
113 |         data          = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
114 |         original_imgs = original_imgs.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
115 |         
116 |         #print('original_imgs ', original_imgs.size())
117 | 
118 |         if GPU_IN_USE:
119 |             data, target = data.cuda(), target.cuda()  # set up GPU Tensor
120 | 
121 |         f_I = extract_features(data)        
122 |         output, M, scores = RMA(f_I, return_whole_scores=True)
123 |         
124 |         #total_thetas.append(M)
125 |         #total_scores.append(scores)
126 |         
127 |         #visualize_attentional_regions(original_imgs, M[1:, :, :, :], scores)
128 |         visualize_attentional_regions(original_imgs, M, scores)
129 |         
130 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
131 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
132 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
133 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
134 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
135 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
136 |         
137 |         #count the sum of label vector
138 |         sum_prediction_label         += prediction_label.sum(dim=0)
139 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
140 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
141 |         
142 |         #for i in range(0, target.size(0)):
143 |         #    print('-----------------')
144 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
145 |         #    print('prediction:   ', prediction_index[i] - 1)
146 |         #    print('-----------------')
147 |         
148 |         if batch_num % OUTPUT_INTERVAL == 0:
149 |             print(batch_num)
150 |             #print('loss %.3f (batch %d)' % (test_loss / (batch_num+1), batch_num+1))
151 |     
152 |     #evaluation metrics
153 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
154 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
155 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
156 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
157 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
158 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
159 |    
160 |     print('-------------------------------------------------------------')
161 |     print('|    CP   |    CR   |    CF1  |    OP   |    OR   |    OF1  |')
162 |     print('-------------------------------------------------------------')
163 |     print('|  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |' % (c_p, c_r, cf1, o_p, o_r, of1))
164 |     print('-------------------------------------------------------------')
165 |     
166 | 
167 | 
168 | # ==================================================================
169 | # Save Parameters of Test
170 | # ==================================================================
171 | #def save():
172 | #    torch.save(RMA.state_dict(), args.pathModelParams)
173 | #    print('Checkpoint saved to {}'.format(args.pathModelParams))
174 | 
175 | 
176 | # ==================================================================
177 | # Main
178 | # ==================================================================
179 | with torch.no_grad():
180 |     test()
181 | 
182 | 


--------------------------------------------------------------------------------
/visualizeImg.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image, ImageDraw, ImageFont, ImageFilter
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib
  4 | import numpy
  5 | import random
  6 | import torch
  7 | import os
  8 | from utils import *
  9 | from torchvision import transforms as transforms
 10 | 
 11 | RECTANGLE_SIZE = 4
 12 | imgId = 0
 13 | 
 14 | # random color
 15 | def randomColor():
 16 |     return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))
 17 | 
 18 | # input: numpy.ndarray
 19 | # topleft_Corner: (x,y)
 20 | # width: rectangle width
 21 | # height: rectangle height
 22 | # size: line size
 23 | def drawRectangle(input, x, y, width, height, size, k, className, confidence):
 24 | 	# numpy->PIL
 25 | 	inputImg = Image.fromarray(numpy.uint8(input))
 26 | 	draw = ImageDraw.Draw(inputImg)
 27 | 	color  = randomColor()
 28 | 	font = ImageFont.truetype('Arial.ttf', 24)
 29 | 	for i in range(1, size + 1):
 30 | 		draw.rectangle((x + (size - i), y + (size - i), x + width + i, y + height + i), outline=color)
 31 | 		draw.text((x+size+6,y), k, font = font, fill = color)
 32 | 		draw.text((x+size+30,y), className, font = font, fill = color)
 33 | 		draw.text((x+len(className)*14+36,y), confidence, font = font, fill = color)
 34 | 	return inputImg
 35 | 
 36 | 
 37 | # M: k*batchsize*2*3
 38 | # sourceCoordinate: k*batchsize*2*2
 39 | # return: topleft, bottomright corners
 40 | def getSourceCoordinate(M):
 41 | 	target = torch.tensor([-1.,1.,1.,-1.,1.,1.])
 42 | 	target = target.view(3,2)
 43 | 	sourceCoordinate = torch.matmul(M,target)
 44 | 	if imgId == 0:
 45 | 		for batch_index in range(M.size(1)):
 46 | 			print("img", batch_index,"-----------")
 47 | 			for k_index in range(M.size(0)):
 48 | 				print("k: ", k_index)
 49 | 				print(M[k_index, batch_index, :, :])
 50 | 	x0 = sourceCoordinate[:,:,0,0]*256.+256.
 51 | 	x1 = sourceCoordinate[:,:,0,1]*256.+256.
 52 | 	y0 = -(sourceCoordinate[:,:,1,0]*256.-256.)
 53 | 	y1 = -(sourceCoordinate[:,:,1,1]*256.-256.)
 54 | 	sourceCoordinate[:,:,0,0] = torch.min(x0, x1)
 55 | 	sourceCoordinate[:,:,0,1] = torch.max(x0, x1)
 56 | 	sourceCoordinate[:,:,1,0] = torch.min(y0, y1)
 57 | 	sourceCoordinate[:,:,1,1] = torch.max(y0, y1)
 58 | 	return sourceCoordinate
 59 | 
 60 | # sourceCoordinate: k*batchsize*2*2
 61 | # rectangleInfo: k*batchsize*4
 62 | # return:k*batchsize*[x,y,width,height]
 63 | def getPredictedRectangle(M):
 64 | 	coordinate = getSourceCoordinate(M)
 65 | 	rectangleInfo = torch.zeros(coordinate.size(0), coordinate.size(1), 4)
 66 | 	rectangleInfo[:,:,0] = coordinate[:,:,0,0]
 67 | 	rectangleInfo[:,:,1] = coordinate[:,:,1,0]
 68 | 	rectangleInfo[:,:,2] = coordinate[:,:,0,1]-coordinate[:,:,0,0]
 69 | 	rectangleInfo[:,:,3] = coordinate[:,:,1,1]-coordinate[:,:,1,0]
 70 | 	return rectangleInfo
 71 | 
 72 | 
 73 | def drawPictures(original_img, rectangle, className, confidence):
 74 | 	for batch_index in range(shape(className)[1]):
 75 | 		current_picture = transforms.ToPILImage()(original_img[batch_index]).convert('RGB')
 76 | 		for iterator_index in range(shape(className)[0]):
 77 | 			k          = str(iterator_index)
 78 | 			x          = rectangle[iterator_index, batch_index, 0]
 79 | 			y          = rectangle[iterator_index, batch_index, 1]
 80 | 			width      = rectangle[iterator_index, batch_index, 2]
 81 | 			height     = rectangle[iterator_index, batch_index, 3]
 82 | 			classname  = className[iterator_index, batch_index]
 83 | 			conf_value = str(round(confidence[iterator_index, batch_index].item(),3))
 84 | 			current_picture = drawRectangle(current_picture, x, y, width, height, RECTANGLE_SIZE, k, classname, conf_value)
 85 | 		global imgId
 86 | 		current_picture.save("./result_visual/visualize_imgs_with_priori/"+str(imgId)+".jpg")
 87 | 		imgId += 1
 88 | 	return
 89 | 
 90 | 
 91 | def visualize_attentional_regions(original_img, M, scores):
 92 | 	# rectangle
 93 | 	rectangle = getPredictedRectangle(M)
 94 | 	# information
 95 | 	confidence, className = getPredictedInfo(scores)
 96 | 	drawPictures(original_img, rectangle, className, confidence)
 97 | 	return
 98 | 
 99 | 
100 | 
101 | 


--------------------------------------------------------------------------------
/visualizeImg_test0712.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image, ImageDraw, ImageFont, ImageFilter
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib
  4 | import numpy
  5 | import random
  6 | import torch
  7 | import os
  8 | from utils import *
  9 | from torchvision import transforms as transforms
 10 | 
 11 | RECTANGLE_SIZE = 4
 12 | imgId = 0
 13 | imgTextId = 0
 14 | 
 15 | # random color
 16 | def randomColor():
 17 |     return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))
 18 | 
 19 | # input: numpy.ndarray
 20 | # topleft_Corner: (x,y)
 21 | # width: rectangle width
 22 | # height: rectangle height
 23 | # size: line size
 24 | def drawRectangle(input, x, y, width, height, size, k, className, confidence):
 25 | 	# numpy->PIL
 26 | 	inputImg = Image.fromarray(numpy.uint8(input))
 27 | 	draw = ImageDraw.Draw(inputImg)
 28 | 	color  = randomColor()
 29 | 	font = ImageFont.truetype('Arial.ttf', 24)
 30 | 	for i in range(1, size + 1):
 31 | 		draw.rectangle((x + (size - i), y + (size - i), x + width + i, y + height + i), outline=color)
 32 | 		draw.text((x+size+6,y), k, font = font, fill = color)
 33 | 		draw.text((x+size+30,y), className, font = font, fill = color)
 34 | 		draw.text((x+len(className)*14+36,y), confidence, font = font, fill = color)
 35 | 	return inputImg
 36 | 
 37 | 
 38 | # M: k*batchsize*2*3
 39 | # sourceCoordinate: k*batchsize*2*2
 40 | # return: topleft, bottomright corners
 41 | def getSourceCoordinate(M):
 42 | 	target = torch.tensor([-1.,1.,1.,-1.,1.,1.])
 43 | 	target = target.view(3,2)
 44 | 	sourceCoordinate = torch.matmul(M,target)
 45 | 	if imgId == 0:
 46 | 		for batch_index in range(M.size(1)):
 47 | 			print("img", batch_index,"-----------")
 48 | 			for k_index in range(M.size(0)):
 49 | 				print("k: ", k_index)
 50 | 				print(M[k_index, batch_index, :, :])
 51 | 	x0 = sourceCoordinate[:,:,0,0]*256.+256.
 52 | 	x1 = sourceCoordinate[:,:,0,1]*256.+256.
 53 | 	y0 = -(sourceCoordinate[:,:,1,0]*256.-256.)
 54 | 	y1 = -(sourceCoordinate[:,:,1,1]*256.-256.)
 55 | 	sourceCoordinate[:,:,0,0] = torch.min(x0, x1)
 56 | 	sourceCoordinate[:,:,0,1] = torch.max(x0, x1)
 57 | 	sourceCoordinate[:,:,1,0] = torch.min(y0, y1)
 58 | 	sourceCoordinate[:,:,1,1] = torch.max(y0, y1)
 59 | 	return sourceCoordinate
 60 | 
 61 | # sourceCoordinate: k*batchsize*2*2
 62 | # rectangleInfo: k*batchsize*4
 63 | # return:k*batchsize*[x,y,width,height]
 64 | def getPredictedRectangle(M):
 65 | 	coordinate = getSourceCoordinate(M)
 66 | 	rectangleInfo = torch.zeros(coordinate.size(0), coordinate.size(1), 4)
 67 | 	rectangleInfo[:,:,0] = coordinate[:,:,0,0]
 68 | 	rectangleInfo[:,:,1] = coordinate[:,:,1,0]
 69 | 	rectangleInfo[:,:,2] = coordinate[:,:,0,1]-coordinate[:,:,0,0]
 70 | 	rectangleInfo[:,:,3] = coordinate[:,:,1,1]-coordinate[:,:,1,0]
 71 | 	return rectangleInfo
 72 | 
 73 | 
 74 | def drawPictures(original_img, rectangle, className, confidence):
 75 | 	for batch_index in range(shape(className)[1]):
 76 | 		current_picture = transforms.ToPILImage()(original_img[batch_index]).convert('RGB')
 77 | 		for iterator_index in range(shape(className)[0]):
 78 | 			k          = str(iterator_index)
 79 | 			x          = rectangle[iterator_index, batch_index, 0]
 80 | 			y          = rectangle[iterator_index, batch_index, 1]
 81 | 			width      = rectangle[iterator_index, batch_index, 2]
 82 | 			height     = rectangle[iterator_index, batch_index, 3]
 83 | 			classname  = className[iterator_index, batch_index]
 84 | 			conf_value = str(round(confidence[iterator_index, batch_index].item(),3))
 85 | 			current_picture = drawRectangle(current_picture, x, y, width, height, RECTANGLE_SIZE, k, classname, conf_value)
 86 | 		global imgId
 87 | 		current_picture.save("./result_visual/visualize_imgs_test0712/"+str(imgId)+".jpg")
 88 | 		imgId += 1
 89 | 	return
 90 | 
 91 | 
 92 | def visualize_attentional_regions(original_img, M, scores):
 93 | 	# rectangle
 94 | 	rectangle = getPredictedRectangle(M)
 95 | 	# information
 96 | 	confidence, className = getPredictedInfo(scores)
 97 | 	drawPictures(original_img, rectangle, className, confidence)
 98 | 	return
 99 | 
100 | 
101 | # write_prediction_target
102 | # ================================================================================
103 | # input:       numpy (one picture)
104 | # prediction:  list  (prediction classname)
105 | # target:      list  (target classname)
106 | # return       PIL
107 | # ================================================================================
108 | def write_prediction_target(input, prediction, target):
109 | 	# numpy->PIL
110 | 	inputImg = Image.fromarray(numpy.uint8(input))
111 | 	draw     = ImageDraw.Draw(inputImg)
112 | 	color    = randomColor()
113 | 	font     = ImageFont.truetype('Arial.ttf', 24)
114 | 	# initialize x coordinates
115 | 	prediction_x = 10
116 | 	target_x     = 10
117 | 	# prediction
118 | 	for p_index in range(len(prediction)):
119 | 		draw.text((prediction_x,10), prediction[p_index], font = font, fill = color)
120 | 		prediction_x = p_index*10 + prediction_x + len(prediction[p_index])*14
121 | 	# target
122 | 	for t_index in range(len(target)):
123 | 		draw.text((target_x,40), target[t_index], font = font, fill = color)
124 | 		target_x = t_index*10 + target_x + len(target[t_index])*14
125 | 
126 | 	return inputImg
127 | 
128 | 
129 | 
130 | def visualize_prediction(original_imgs, prediction_list, target_list):
131 | 	for batch_index in range(original_imgs.shape[0]):
132 | 		current_picture = transforms.ToPILImage()(original_img[batch_index]).convert('RGB')
133 | 		current_picture = write_prediction_target(current_picture,prediction_list[batch_index],target_list[batch_index])
134 | 		global imgTextId
135 | 		current_picture.save("./result_visual/visualize_imgs_test0712/"+str(imgTextId)+".jpg")
136 | 		imgTextId += 1
137 | 	return


--------------------------------------------------------------------------------
/visualize_test0712.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.optim as optim
  3 | import torch.utils.data
  4 | import torch.backends.cudnn as cudnn
  5 | import torchvision
  6 | from torchvision import transforms as transforms
  7 | import numpy as np
  8 | import visdom
  9 | import torch.nn.functional as F
 10 | from CocoDetection import CocoDetection
 11 | # from visualizeImg import *
 12 | from visualizeImg_test0712 import *
 13 | #from PIL import Image
 14 | #import torch
 15 | 
 16 | import argparse
 17 | import os
 18 | 
 19 | from models.RMA_module_with_priori import RMA_module
 20 | from models.loss_with_priori import loss_function
 21 | from utils import get_target_transform as target_trans
 22 | 
 23 | 
 24 | # GPU setting
 25 | os.environ.setdefault("CUDA_VISIBLE_DEVICES", "3")
 26 | 
 27 | 
 28 | # ==================================================================
 29 | # Constants
 30 | # ==================================================================
 31 | EPOCH         = 45            # number of times for each run-through
 32 | BATCH_SIZE    = 16             # number of images for each epoch
 33 | N             = 512           # size of input images (512 or 640)
 34 | TOPK          = 3             # top k highest-ranked labels  
 35 | GPU_IN_USE    = torch.cuda.is_available()  # whether using GPU
 36 | DIR_TEST_IMAGES    = '../dataset/val2017/'
 37 | PATH_TEST_ANNFILE  = '../dataset/annotations/instances_val2017.json'
 38 | PATH_MODEL_PARAMS  = './params/params_with_priori.pkl'
 39 | NUM_CATEGORIES     = 80
 40 | OUTPUT_INTERVAL    = 100
 41 | 
 42 | 
 43 | # ==================================================================
 44 | # Parser Initialization
 45 | # ==================================================================
 46 | parser = argparse.ArgumentParser(description='Pytorch Implementation of ICCV2017_AttentionImageClass')
 47 | parser.add_argument('--testBatchSize',   default=BATCH_SIZE,        type=int,   help='testing batch size')
 48 | parser.add_argument('--pathModelParams', default=PATH_MODEL_PARAMS, type=str,   help='path of model parameters')
 49 | parser.add_argument('--loadModel',       default=True,              type=bool,  help='load model parameters')
 50 | args = parser.parse_args()
 51 | 
 52 | 
 53 | # ==================================================================
 54 | # Prepare Dataset(training & test)
 55 | # ==================================================================
 56 | print('***** Prepare Data ******')
 57 | 
 58 | # transforms of test dataset
 59 | #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 60 | #                                  std=[0.229, 0.224, 0.225])
 61 | test_transforms = transforms.Compose([
 62 |                     transforms.Resize((N, N)), 
 63 |                     transforms.ToTensor(),
 64 |                   ]) 
 65 | 
 66 | test_dataset  = CocoDetection(root=DIR_TEST_IMAGES,  annFile=PATH_TEST_ANNFILE,
 67 |                               transform=test_transforms,  target_transform=target_trans)
 68 | test_loader   = torch.utils.data.DataLoader(dataset=test_dataset,  batch_size=args.testBatchSize,  shuffle=False, num_workers=2)
 69 | print('Data Preparation : Finished')
 70 | 
 71 | # ==================================================================
 72 | # Prepare Model
 73 | # ==================================================================
 74 | print('\n***** Prepare Model *****')
 75 | 
 76 | vgg16 = torchvision.models.vgg16(pretrained=True)
 77 | extract_features = vgg16.features
 78 | RMA = RMA_module(lstm_input_size=14, lstm_hidden_size=4096, zk_size=4096)
 79 | if args.loadModel:
 80 |     RMA.load_state_dict(torch.load(args.pathModelParams))
 81 | 
 82 | if GPU_IN_USE:
 83 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
 84 |     print('cuda: move all model parameters and buffers to the GPU')
 85 |     extract_features.cuda()
 86 |     RMA.cuda()
 87 |     cudnn.benchmark = True
 88 | 
 89 | print('Model Preparation : Finished')
 90 | 
 91 | 
 92 | # Test
 93 | # ================================================================================
 94 | # data:        [torch.cuda.FloatTensor of size [batch_size, 3, N, N] N=512/640]
 95 | # target:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
 96 | # output:      [torch.cuda.FloatTensor of size [batch_size, num_categories]]
 97 | # prediction: [
 98 | #              [torch.cuda.FloatTensor of size [batch_size, TOPK] (TOPK)],
 99 | #              [torch.cuda.LongTensor  of size [batch_size, TOPK] (index of TOPK)]
100 | #             ]
101 | # ================================================================================
102 | def test():
103 |     print('test:')
104 |     #RMA.eval()        # set the module in evaluation mode
105 | 
106 |     sum_prediction_label         = torch.zeros(1, 80) + 1e-6
107 |     sum_correct_prediction_label = torch.zeros(1, 80)
108 |     sum_ground_truth_label       = torch.zeros(1, 80)
109 | 
110 |     for batch_num, (data, target, original_imgs) in enumerate(test_loader):
111 |         if target.sum() == 0:
112 |             continue
113 |         target        = target.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
114 |         data          = data.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
115 |         original_imgs = original_imgs.index_select(0, torch.nonzero(target.sum(dim=1)).view(-1))
116 |         
117 |         #print('original_imgs ', original_imgs.size())
118 | 
119 |         if GPU_IN_USE:
120 |             data, target = data.cuda(), target.cuda()  # set up GPU Tensor
121 | 
122 |         f_I = extract_features(data)        
123 |         output, M, scores = RMA(f_I, return_whole_scores=True)
124 |         
125 |         #total_thetas.append(M)
126 |         #total_scores.append(scores)
127 |         
128 |         #visualize_attentional_regions(original_imgs, M[1:, :, :, :], scores)
129 |         #visualize_attentional_regions(original_imgs, M, scores)
130 |         
131 |         prediction    = torch.topk(F.softmax(output, dim=1), 10, dim=1) 
132 |         filter        = prediction[0].eq(0.1) + prediction[0].gt(0.1)
133 |         prediction_index         = torch.mul(prediction[1]+1, filter.type(torch.cuda.LongTensor))
134 |         extend_eye_mat           = torch.cat((torch.zeros(1, 80), torch.eye(80)), 0)
135 |         prediction_label         = extend_eye_mat[prediction_index.view(-1)].view(-1, 10, 80).sum(dim=1)
136 |         correct_prediction_label = (target.cpu().byte() & prediction_label.byte()).type(torch.FloatTensor)
137 |         
138 |         #count the sum of label vector
139 |         sum_prediction_label         += prediction_label.sum(dim=0)
140 |         sum_correct_prediction_label += correct_prediction_label.sum(dim=0)
141 |         sum_ground_truth_label       += target.cpu().sum(dim=0)
142 | 
143 |         # visualize 未测试
144 |         prediction_list = id2label(prediction_index)[0].tolist()
145 |         target_list = id2label(target.nonzero()+1)[0].tolist()
146 |         print("prediction_list: ", prediction_list)
147 |         print("target_list: ", target_list)
148 |         # visualize_prediction(original_imgs, prediction_list, target_list)
149 | 
150 |         #for i in range(0, target.size(0)):
151 |         #    print('-----------------')
152 |         #    print('ground-truth: ', target[i].nonzero().view(-1))
153 |         #    print('prediction:   ', prediction_index[i] - 1)
154 |         #    print('-----------------')
155 |         
156 |         if batch_num % OUTPUT_INTERVAL == 0:
157 |             print(batch_num)
158 |             #print('loss %.3f (batch %d)' % (test_loss / (batch_num+1), batch_num+1))
159 |     
160 |     #evaluation metrics
161 |     o_p = torch.div(sum_correct_prediction_label.sum(), sum_prediction_label.sum())
162 |     o_r = torch.div(sum_correct_prediction_label.sum(), sum_ground_truth_label.sum())
163 |     of1 = torch.div(2 * o_p * o_r, o_p + o_r)
164 |     c_p = (torch.div(sum_correct_prediction_label, sum_prediction_label)).sum() / NUM_CATEGORIES
165 |     c_r = (torch.div(sum_correct_prediction_label, sum_ground_truth_label)).sum() / NUM_CATEGORIES
166 |     cf1 = torch.div(2 * c_p * c_r, c_p + c_r)
167 |    
168 |     print('-------------------------------------------------------------')
169 |     print('|    CP   |    CR   |    CF1  |    OP   |    OR   |    OF1  |')
170 |     print('-------------------------------------------------------------')
171 |     print('|  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |  %.3f  |' % (c_p, c_r, cf1, o_p, o_r, of1))
172 |     print('-------------------------------------------------------------')
173 |     
174 | 
175 | 
176 | # ==================================================================
177 | # Save Parameters of Test
178 | # ==================================================================
179 | #def save():
180 | #    torch.save(RMA.state_dict(), args.pathModelParams)
181 | #    print('Checkpoint saved to {}'.format(args.pathModelParams))
182 | 
183 | 
184 | # ==================================================================
185 | # Main
186 | # ==================================================================
187 | with torch.no_grad():
188 |     test()
189 | 
190 | 


--------------------------------------------------------------------------------