├── .idea └── vcs.xml ├── Demo.ipynb ├── README.md ├── __pycache__ └── parser.cpython-35.pyc ├── checkpoint └── ReadMe ├── demo_attr.png ├── lib ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-35.pyc │ └── nms_wrapper.cpython-35.pyc ├── bilinear_pooling │ ├── CompactBilinearPooling.py │ └── __pycache__ │ │ └── CompactBilinearPooling.cpython-35.pyc ├── configure │ ├── __pycache__ │ │ ├── config.cpython-35.pyc │ │ └── net_util.cpython-35.pyc │ ├── config.py │ └── net_util.py ├── dataset │ ├── __pycache__ │ │ └── coco_dataset.cpython-35.pyc │ └── coco_dataset.py ├── make.sh ├── nms │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ └── pth_nms.cpython-35.pyc │ ├── _ext │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ └── __init__.cpython-35.pyc │ │ └── nms │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── __pycache__ │ │ │ └── __init__.cpython-35.pyc │ │ │ └── _nms.so │ ├── build.py │ ├── pth_nms.py │ ├── pth_nms.pyc │ └── src │ │ ├── cuda │ │ ├── nms_kernel.cu │ │ ├── nms_kernel.cu.o │ │ └── nms_kernel.h │ │ ├── nms.c │ │ ├── nms.h │ │ ├── nms_cuda.c │ │ └── nms_cuda.h ├── nms_wrapper.py ├── nms_wrapper.pyc ├── pytorch_fft │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-35.pyc │ ├── _ext │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-35.pyc │ │ └── th_fft │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ └── __init__.cpython-35.pyc │ │ │ └── _th_fft.so │ ├── fft │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── autograd.cpython-35.pyc │ │ │ └── fft.cpython-35.pyc │ │ ├── autograd.py │ │ └── fft.py │ └── src │ │ ├── generic │ │ ├── helpers.c │ │ ├── th_fft_cuda.c │ │ ├── th_fft_cuda.h │ │ ├── th_irfft_cuda.c │ │ └── th_rfft_cuda.c │ │ ├── th_fft_cuda.c │ │ ├── th_fft_cuda.h │ │ ├── th_fft_generate_double.h │ │ ├── th_fft_generate_float.h │ │ └── th_fft_generate_helpers.h ├── resnet │ ├── __pycache__ │ │ └── resnet.cpython-35.pyc │ └── resnet.py └── roi_align │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ ├── __init__.cpython-35.pyc │ └── crop_and_resize.cpython-35.pyc │ ├── _ext │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ └── __init__.cpython-35.pyc │ └── crop_and_resize │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ └── __init__.cpython-35.pyc │ │ └── _crop_and_resize.so │ ├── build.py │ ├── crop_and_resize.py │ ├── crop_and_resize.pyc │ ├── roi_align.py │ ├── roi_align.pyc │ └── src │ ├── crop_and_resize.c │ ├── crop_and_resize.h │ ├── crop_and_resize_gpu.c │ ├── crop_and_resize_gpu.h │ └── cuda │ ├── crop_and_resize_kernel.cu │ ├── crop_and_resize_kernel.cu.o │ └── crop_and_resize_kernel.h ├── models ├── Model7.py └── __pycache__ │ └── Model7.cpython-35.pyc ├── others ├── README.md ├── coco_person_list.txt ├── dictionary_emb.pkl └── low-level-attr.txt ├── parser.py ├── results ├── architecture.png ├── test.log ├── train.log └── train_batch.log ├── runs ├── Oct05_13-58-18_apg395-001 │ └── events.out.tfevents.1538773098.apg395-001 ├── Oct05_14-08-13_apg395-001 │ └── events.out.tfevents.1538773693.apg395-001 ├── Oct05_14-08-27_apg395-001 │ └── events.out.tfevents.1538773707.apg395-001 ├── Oct05_14-08-58_apg395-001 │ └── events.out.tfevents.1538773738.apg395-001 ├── Oct05_14-17-30_apg395-001 │ └── events.out.tfevents.1538774250.apg395-001 ├── Oct05_14-17-42_apg395-001 │ └── events.out.tfevents.1538774262.apg395-001 ├── Oct05_14-18-03_apg395-001 │ └── events.out.tfevents.1538774283.apg395-001 ├── Oct05_14-18-55_apg395-001 │ └── events.out.tfevents.1538774335.apg395-001 └── Oct05_14-19-46_apg395-001 │ └── events.out.tfevents.1538774386.apg395-001 └── train_attr_attention_embedding.py /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Visual Cues Grounding Through Weak Suppervision 2 | 3 | PyTorch implementation of **[Modularized Textual Grounding for Counterfactual Resilience 4 | ](http://openaccess.thecvf.com/content_CVPR_2019/papers/Fang_Modularized_Textual_Grounding_for_Counterfactual_Resilience_CVPR_2019_paper.pdf)** , CVPR 2019. 5 | 6 | Qualitative grounding results can be found in our **[webpage](http://www.public.asu.edu/~zfang29/textual_grounding_cvpr2019/website.html)**. 7 | 8 | ## Introduction 9 | We propose a cross-modal grounding method through weak supervision. 10 | 11 | ![architecture](./results/architecture.png "Ground Visual Cue Through a Top-down Guided Design.") 12 | 13 | A demonstration on how to load and ground the attribute can be found at : Demo.ipynb 14 | 15 | Image --> 'Boy' Attribute -- > 'Lady' Attribute 16 |

17 | 18 |

19 | 20 | ## Requirements 21 | 1. PyTorch 0.4. 22 | 2. Python 3.6. 23 | 3. FFT package. 24 | 25 | ## Dataset 26 | Weakly trained on both COCO or Flickr 30k. 27 | 28 | ## Usage 29 | Training script for attribute grounding:
Train_attr_attention_embedding.py
30 | 31 | Attention model for attribute grounding, it's based on a pre-trained Res-50 Network on person gender/age classification network: 32 |
 /Models/Model7.py
33 | 34 | \ Contains all the neccesary dependencies for our framework, it consists of: 35 | 36 | 43 | 44 | In order to re_train our framework, several things might be modified: 45 |
 parser.py 
46 | 47 | In parser.py, img_path/annotations need to be changed to your local coco_2017_train directory: 48 |
 /path/to/your/local/coco17/image path/annotations/ 
49 | 50 | Argument resume is for loading pre-trained overall model. 51 | 52 | ## Download 53 | To download the pre-trained unsupervised network: 54 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /__pycache__/parser.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/__pycache__/parser.cpython-35.pyc -------------------------------------------------------------------------------- /checkpoint/ReadMe: -------------------------------------------------------------------------------- 1 | This directory contains the pretrained model. 2 | -------------------------------------------------------------------------------- /demo_attr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/demo_attr.png -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.py -------------------------------------------------------------------------------- /lib/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.pyc -------------------------------------------------------------------------------- /lib/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/__pycache__/nms_wrapper.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/nms_wrapper.cpython-35.pyc -------------------------------------------------------------------------------- /lib/bilinear_pooling/CompactBilinearPooling.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '/../../') 3 | import numpy as np 4 | import torch 5 | from torch import nn 6 | from torch.autograd import Variable 7 | 8 | import lib.pytorch_fft.fft.autograd as afft 9 | 10 | 11 | class CompactBilinearPooling(nn.Module): 12 | """ 13 | Compute compact bilinear pooling over two bottom inputs. 14 | Args: 15 | output_dim: output dimension for compact bilinear pooling. 16 | sum_pool: (Optional) If True, sum the output along height and width 17 | dimensions and return output shape [batch_size, output_dim]. 18 | Otherwise return [batch_size, height, width, output_dim]. 19 | Default: True. 20 | rand_h_1: (Optional) an 1D numpy array containing indices in interval 21 | `[0, output_dim)`. Automatically generated from `seed_h_1` 22 | if is None. 23 | rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape 24 | as `rand_h_1`. Automatically generated from `seed_s_1` if is 25 | None. 26 | rand_h_2: (Optional) an 1D numpy array containing indices in interval 27 | `[0, output_dim)`. Automatically generated from `seed_h_2` 28 | if is None. 29 | rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape 30 | as `rand_h_2`. Automatically generated from `seed_s_2` if is 31 | None. 32 | """ 33 | 34 | def __init__(self, input_dim1, input_dim2, output_dim, 35 | sum_pool=False, cuda=True, 36 | rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None): 37 | super(CompactBilinearPooling, self).__init__() 38 | self.input_dim1 = input_dim1 39 | self.input_dim2 = input_dim2 40 | self.output_dim = output_dim 41 | self.sum_pool = sum_pool 42 | 43 | if rand_h_1 is None: 44 | np.random.seed(1) 45 | rand_h_1 = np.random.randint(output_dim, size=self.input_dim1) 46 | if rand_s_1 is None: 47 | np.random.seed(3) 48 | rand_s_1 = 2 * np.random.randint(2, size=self.input_dim1) - 1 49 | 50 | sparse_sketch_matrix1 = Variable(self.generate_sketch_matrix( 51 | rand_h_1, rand_s_1, self.output_dim)) 52 | 53 | if rand_h_2 is None: 54 | np.random.seed(5) 55 | rand_h_2 = np.random.randint(output_dim, size=self.input_dim2) 56 | if rand_s_2 is None: 57 | np.random.seed(7) 58 | rand_s_2 = 2 * np.random.randint(2, size=self.input_dim2) - 1 59 | 60 | sparse_sketch_matrix2 = Variable(self.generate_sketch_matrix( 61 | rand_h_2, rand_s_2, self.output_dim)) 62 | self.register_buffer("sparse_sketch_matrix1", sparse_sketch_matrix1) 63 | self.register_buffer("sparse_sketch_matrix2", sparse_sketch_matrix2) 64 | 65 | 66 | def forward(self, bottom1, bottom2): 67 | """ 68 | bottom1: 1st input, 4D Tensor of shape [batch_size, input_dim1, height, width]. 69 | bottom2: 2nd input, 4D Tensor of shape [batch_size, input_dim2, height, width]. 70 | """ 71 | assert bottom1.size(1) == self.input_dim1 and \ 72 | bottom2.size(1) == self.input_dim2 73 | 74 | 75 | batch_size, _, height, width = bottom1.size() 76 | 77 | bottom1_flat = bottom1.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim1) 78 | bottom2_flat = bottom2.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim2) 79 | 80 | sketch_1 = bottom1_flat.mm(self.sparse_sketch_matrix1) 81 | sketch_2 = bottom2_flat.mm(self.sparse_sketch_matrix2) 82 | 83 | fft1_real, fft1_imag = afft.Fft()(sketch_1, Variable(torch.zeros(sketch_1.size())).cuda()) 84 | fft2_real, fft2_imag = afft.Fft()(sketch_2, Variable(torch.zeros(sketch_2.size())).cuda()) 85 | 86 | fft_product_real, fft_product_imag = fft1_real.mul(fft2_real), fft1_imag.mul(fft2_imag) 87 | 88 | cbp_flat = afft.Ifft()(fft_product_real, fft_product_imag)[0] 89 | 90 | cbp = cbp_flat.view(batch_size, height, width, self.output_dim) 91 | 92 | if self.sum_pool: 93 | cbp = cbp.sum(dim=1).sum(dim=1) 94 | 95 | return cbp.permute(0, 3, 1, 2) 96 | 97 | @staticmethod 98 | def generate_sketch_matrix(rand_h, rand_s, output_dim): 99 | """ 100 | Return a sparse matrix used for tensor sketch operation in compact bilinear 101 | pooling 102 | Args: 103 | rand_h: an 1D numpy array containing indices in interval `[0, output_dim)`. 104 | rand_s: an 1D numpy array of 1 and -1, having the same shape as `rand_h`. 105 | output_dim: the output dimensions of compact bilinear pooling. 106 | Returns: 107 | a sparse matrix of shape [input_dim, output_dim] for tensor sketch. 108 | """ 109 | 110 | # Generate a sparse matrix for tensor count sketch 111 | rand_h = rand_h.astype(np.int64) 112 | rand_s = rand_s.astype(np.float32) 113 | assert(rand_h.ndim == 1 and rand_s.ndim == 114 | 1 and len(rand_h) == len(rand_s)) 115 | assert(np.all(rand_h >= 0) and np.all(rand_h < output_dim)) 116 | 117 | input_dim = len(rand_h) 118 | indices = np.concatenate((np.arange(input_dim)[..., np.newaxis], 119 | rand_h[..., np.newaxis]), axis=1) 120 | indices = torch.from_numpy(indices) 121 | rand_s = torch.from_numpy(rand_s) 122 | sparse_sketch_matrix = torch.sparse.FloatTensor( 123 | indices.t(), rand_s, torch.Size([input_dim, output_dim])) 124 | return sparse_sketch_matrix.to_dense() 125 | 126 | 127 | if __name__ == '__main__': 128 | 129 | bottom1 = Variable(torch.randn(48, 2048, 7, 7)).cuda() 130 | bottom2 = Variable(torch.randn(48, 2048, 7, 7)).cuda() 131 | 132 | layer = CompactBilinearPooling(2048, 2048, 16000) 133 | layer.cuda() 134 | layer.train() 135 | out = layer(bottom1, bottom2) -------------------------------------------------------------------------------- /lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc -------------------------------------------------------------------------------- /lib/configure/__pycache__/config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/config.cpython-35.pyc -------------------------------------------------------------------------------- /lib/configure/__pycache__/net_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/net_util.cpython-35.pyc -------------------------------------------------------------------------------- /lib/configure/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | 12 | import numpy as np 13 | 14 | # Base Configuration Class 15 | # Don't use this class directly. Instead, sub-class it and override 16 | # the configurations you need to change. 17 | 18 | 19 | class Config(object): 20 | """Base configuration class. For custom configurations, create a 21 | sub-class that inherits from this one and override properties 22 | that need to be changed. 23 | """ 24 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 25 | # Useful if your code needs to do things differently depending on which 26 | # experiment is running. 27 | NAME = None # Override in sub-classes 28 | 29 | # NUMBER OF GPUs to use. For CPU training, use 1 30 | GPU_COUNT = 1 31 | 32 | # Number of images to train with on each GPU. A 12GB GPU can typically 33 | # handle 2 images of 1024x1024px. 34 | # Adjust based on your GPU memory and image sizes. Use the highest 35 | # number that your GPU can handle for best performance. 36 | IMAGES_PER_GPU = 2 37 | 38 | # Number of training steps per epoch 39 | # This doesn't need to match the size of the training set. Tensorboard 40 | # updates are saved at the end of each epoch, so setting this to a 41 | # smaller number means getting more frequent TensorBoard updates. 42 | # Validation stats are also calculated at each epoch end and they 43 | # might take a while, so don't set this too small to avoid spending 44 | # a lot of time on validation stats. 45 | STEPS_PER_EPOCH = 1000 46 | 47 | # Number of validation steps to run at the end of every training epoch. 48 | # A bigger number improves accuracy of validation stats, but slows 49 | # down the training. 50 | VALIDATION_STEPS = 50 51 | 52 | # The strides of each layer of the FPN Pyramid. These values 53 | # are based on a Resnet101 backbone. 54 | BACKBONE_STRIDES = [4, 8, 16, 16, 16] 55 | 56 | # Number of classification classes (including background) 57 | NUM_CLASSES = 1 # Override in sub-classes 58 | 59 | # Length of square anchor side in pixels 60 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 256) 61 | 62 | # Ratios of anchors at each cell (width/height) 63 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 64 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 65 | 66 | # Anchor stride 67 | # If 1 then anchors are created for each cell in the backbone feature map. 68 | # If 2, then anchors are created for every other cell, and so on. 69 | RPN_ANCHOR_STRIDE = 1 70 | 71 | # Non-max suppression threshold to filter RPN proposals. 72 | # You can reduce this during training to generate more propsals. 73 | RPN_NMS_THRESHOLD = 0.7 74 | 75 | # How many anchors per image to use for RPN training 76 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 77 | 78 | # ROIs kept after non-maximum supression (training and inference) 79 | POST_NMS_ROIS_TRAINING = 500 80 | POST_NMS_ROIS_INFERENCE = 500 81 | 82 | # If enabled, re-sizes instance masks to a smaller size to reduce 83 | # memory load. Recommended when using high-resolution images. 84 | USE_MINI_MASK = True 85 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 86 | 87 | # Input image resizing 88 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and 89 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't 90 | # be satisfied together the IMAGE_MAX_DIM is enforced. 91 | IMAGE_MIN_DIM = 800 92 | IMAGE_MAX_DIM = 1024 93 | # If True, pad images with zeros such that they're (max_dim by max_dim) 94 | IMAGE_PADDING = True # currently, the False option is not supported 95 | 96 | # Image mean (RGB) 97 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 98 | 99 | # Number of ROIs per image to feed to classifier/mask heads1024 100 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 101 | # enough positive proposals to fill this and keep a positive:negative 102 | # ratio of 1:3. You can increase the number of proposals by adjusting 103 | # the RPN NMS threshold. 104 | TRAIN_ROIS_PER_IMAGE = 200 105 | 106 | # Percent of positive ROIs used to train classifier/mask heads 107 | ROI_POSITIVE_RATIO = 0.33 108 | 109 | # Pooled ROIs 110 | POOL_SIZE = 7 111 | MASK_POOL_SIZE = 14 112 | MASK_SHAPE = [128, 128] 113 | 114 | # Maximum number of ground truth instances to use in one image 115 | MAX_GT_INSTANCES = 100 116 | 117 | # Bounding box refinement standard deviation for RPN and final detections. 118 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 119 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 120 | 121 | # Max number of final detections 122 | DETECTION_MAX_INSTANCES = 100 123 | 124 | # Minimum probability value to accept a detected instance 125 | # ROIs below this threshold are skipped 126 | DETECTION_MIN_CONFIDENCE = 0.7 127 | 128 | # Non-maximum suppression threshold for detection 129 | DETECTION_NMS_THRESHOLD = 0.3 130 | 131 | # Learning rate and momentum 132 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 133 | # weights to explode. Likely due to differences in optimzer 134 | # implementation. 135 | LEARNING_RATE = 0.001 136 | LEARNING_MOMENTUM = 0.9 137 | 138 | # Weight decay regularization 139 | WEIGHT_DECAY = 0.0001 140 | 141 | # Use RPN ROIs or externally generated ROIs for training 142 | # Keep this True for most situations. Set to False if you want to train 143 | # the head branches on ROI generated by code rather than the ROIs from 144 | # the RPN. For example, to debug the classifier head without having to 145 | # train the RPN. 146 | USE_RPN_ROIS = True 147 | 148 | def __init__(self): 149 | """Set values of computed attributes.""" 150 | # Effective batch size 151 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 152 | 153 | # Input image size 154 | self.IMAGE_SHAPE = np.array( 155 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 156 | 157 | # Compute backbone size from input image size 158 | self.BACKBONE_SHAPES = np.array( 159 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), 160 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))] 161 | for stride in self.BACKBONE_STRIDES]) 162 | 163 | def display(self): 164 | """Display Configuration values.""" 165 | print("\nConfigurations:") 166 | for a in dir(self): 167 | if not a.startswith("__") and not callable(getattr(self, a)): 168 | print("{:30} {}".format(a, getattr(self, a))) 169 | print("\n") 170 | -------------------------------------------------------------------------------- /lib/configure/net_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import time 5 | import csv 6 | import torch 7 | import numpy as np 8 | from random import randint 9 | from torch.autograd import Variable 10 | from torch.utils.data.sampler import SubsetRandomSampler 11 | 12 | 13 | def set_parameters(opts): 14 | ''' 15 | This function is called before training/testing to set parameters 16 | :param opts: 17 | :return opts: 18 | ''' 19 | 20 | if not opts.__contains__('train_losses'): 21 | opts.train_losses=[] 22 | 23 | if not opts.__contains__('train_accuracies'): 24 | opts.train_accuracies = [] 25 | 26 | if not opts.__contains__('valid_losses'): 27 | opts.valid_losses = [] 28 | if not opts.__contains__('valid_accuracies'): 29 | opts.valid_accuracies = [] 30 | 31 | if not opts.__contains__('test_losses'): 32 | opts.test_loss=[] 33 | 34 | if not opts.__contains__('test_accuracies'): 35 | opts.test_accuracies = [] 36 | 37 | if not opts.__contains__('best_acc'): 38 | opts.best_acc = 0.0 39 | 40 | if not opts.__contains__('lowest_loss'): 41 | opts.lowest_loss = 1e4 42 | 43 | if not opts.__contains__('checkpoint_path'): 44 | opts.checkpoint_path = 'checkpoint' 45 | 46 | if not os.path.exists(opts.checkpoint_path): 47 | os.mkdir(opts.checkpoint_path) 48 | 49 | if not opts.__contains__('checkpoint_epoch'): 50 | opts.checkpoint_epoch = 5 51 | 52 | if not opts.__contains__('valid_pearson_r'): 53 | opts.valid_pearson_r = [] 54 | 55 | if not opts.__contains__('test_pearson_r'): 56 | opts.test_pearson_r = [] 57 | 58 | 59 | class Logger(object): 60 | def __init__(self, path, header): 61 | self.log_file = open(path, 'w') 62 | self.logger = csv.writer(self.log_file, delimiter='\t') 63 | 64 | self.logger.writerow(header) 65 | self.header = header 66 | 67 | def __del(self): 68 | self.log_file.close() 69 | 70 | def log(self, values): 71 | write_values = [] 72 | for col in self.header: 73 | assert col in values 74 | write_values.append(values[col]) 75 | 76 | self.logger.writerow(write_values) 77 | self.log_file.flush() 78 | 79 | 80 | -------------------------------------------------------------------------------- /lib/dataset/__pycache__/coco_dataset.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/dataset/__pycache__/coco_dataset.cpython-35.pyc -------------------------------------------------------------------------------- /lib/dataset/coco_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import nltk 3 | import os.path 4 | import numpy as np 5 | from PIL import Image 6 | import torch.utils.data as data 7 | from pycocotools.coco import COCO 8 | from nltk.stem import WordNetLemmatizer 9 | 10 | 11 | class CocoCaptions(data.Dataset): 12 | """`MS Coco Captions `_ Dataset. 13 | Args: 14 | root (string): Root directory where images are downloaded to. 15 | annFile (string): Path to json annotation file. 16 | transform (callable, optional): A function/transform that takes in an PIL image 17 | and returns a transformed version. E.g, ``transforms.ToTensor`` 18 | target_transform (callable, optional): A function/transform that takes in the 19 | target and transforms it. 20 | Example: 21 | .. code:: python 22 | import torchvision.datasets as dset 23 | import torchvision.transforms as transforms 24 | cap = dset.CocoCaptions(root = 'dir where images are', 25 | annFile = 'json annotation file', 26 | transform=transforms.ToTensor()) 27 | print('Number of samples: ', len(cap)) 28 | img, target = cap[3] # load 4th sample 29 | print("Image Size: ", img.size()) 30 | print(target) 31 | Output: :: 32 | Number of samples: 82783 33 | Image Size: (3L, 427L, 640L) 34 | [u'A plane emitting smoke stream flying over a mountain.', 35 | u'A plane darts across a bright blue sky behind a mountain covered in snow', 36 | u'A plane leaves a contrail above the snowy mountain top.', 37 | u'A mountain that has a plane flying overheard in the distance.', 38 | u'A mountain view with a plume of smoke in the background'] 39 | """ 40 | 41 | def __init__(self, root, annFile, transform=None, target_transform=None, embed=False): 42 | 43 | # Load COCO image IDs 44 | list_file = open('./others/coco_person_list.txt', 'r') 45 | ids = [] 46 | for i in list_file.readlines(): 47 | ids.append(int(i.replace('\n', ''))) 48 | 49 | # Load entity-attribute dictionary 50 | att_dict = [] 51 | ent_dict = [] 52 | list_file = open('./others/low-level-attr.txt', 'r') 53 | for i in list_file.readlines(): 54 | att_dict.append(i.replace('\n', '')) 55 | 56 | ent_dict = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 57 | 'bus', 'train', 'truck', 'boat', 'traffic light', 58 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 59 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 60 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 61 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 62 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 63 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 64 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 65 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 66 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 67 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 68 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 69 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 70 | 'teddy bear', 'hair drier', 'toothbrush'] 71 | 72 | self.ids = ids 73 | self.embed = embed 74 | self.coco = COCO(annFile) 75 | self.transform = transform 76 | self.att_dict = att_dict 77 | self.ent_dict = ent_dict 78 | self.root = os.path.expanduser(root) 79 | self.target_transform = target_transform 80 | 81 | def __getitem__(self, index): 82 | """ 83 | Args: 84 | index (int): Index 85 | Returns: 86 | tuple: Tuple (image, target). target is a list of captions for the image. 87 | """ 88 | coco = self.coco 89 | img_id = self.ids[index] 90 | ann_ids = coco.getAnnIds(imgIds=img_id) 91 | anns = coco.loadAnns(ann_ids) 92 | target = [ann['caption'] for ann in anns] 93 | 94 | path = coco.loadImgs(img_id)[0]['file_name'] 95 | 96 | img = Image.open(os.path.join(self.root, path)).convert('RGB') 97 | if self.transform is not None: 98 | img = self.transform(img) 99 | 100 | if self.target_transform is not None: 101 | target = self.target_transform(target) 102 | 103 | img = np.asarray(img) 104 | att_lable = np.zeros(10) 105 | ent_lable = np.zeros(81) 106 | 107 | for sentence in target: 108 | words = nltk.pos_tag([item for item in sentence.replace('.', ' ').split(' ') if len(item) > 0]) 109 | for item in words: 110 | word = item[0].lower() 111 | word = WordNetLemmatizer().lemmatize(word) 112 | 113 | # att = item[1] 114 | if word in self.att_dict: 115 | att_id = self.att_dict.index(word) 116 | att_lable[att_id] = 1 117 | if word in self.ent_dict: 118 | ent_id = self.ent_dict.index(word) 119 | ent_lable[ent_id] = 1 120 | 121 | return img, att_lable, ent_lable 122 | 123 | def __len__(self): 124 | return len(self.ids) 125 | 126 | 127 | # if __name__ == '__main__': 128 | # size = (512, 512) 129 | # img_path = '/media/drive1/Data/coco17/train2017/' 130 | # json = '/media/drive1/Data/coco17/annotations/captions_train2017.json' 131 | # coco = COCO(json) 132 | # transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()]) 133 | # data_set = CocoCaptions(img_path, json, transform) 134 | # data_loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False) 135 | # 136 | # img_ids = [] 137 | # count = 1 138 | # for index, (img, target) in enumerate(data_loader): 139 | # print(target) 140 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda 4 | 5 | echo "Compiling crop_and_resize kernels by nvcc..." 6 | cd roi_align/src/cuda 7 | $CUDA_PATH/bin/nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC -arch=61 8 | 9 | cd ../../ 10 | python3 build.py 11 | 12 | cd ../ 13 | echo "Compiling nms kernels by nvcc..." 14 | 15 | cd nms/src/cuda 16 | $CUDA_PATH/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 17 | 18 | cd ../../ 19 | python3 build.py 20 | 21 | -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nms/__pycache__/pth_nms.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/pth_nms.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/nms/_ext/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/nms/_ext/nms/_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/_nms.so -------------------------------------------------------------------------------- /lib/nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | """ 7 | dets has to be a tensor 8 | """ 9 | if not dets.is_cuda: 10 | x1 = dets[:, 0] 11 | y1 = dets[:, 1] 12 | x2 = dets[:, 2] 13 | y2 = dets[:, 3] 14 | scores = dets[:, 4] 15 | 16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | order = scores.sort(0, descending=True)[1] 18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | keep = torch.LongTensor(dets.size(0)) 21 | num_out = torch.LongTensor(1) 22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | return keep[:num_out[0]] 25 | else: 26 | x1 = dets[:, 0] 27 | y1 = dets[:, 1] 28 | x2 = dets[:, 2] 29 | y2 = dets[:, 3] 30 | scores = dets[:, 4] 31 | 32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 33 | order = scores.sort(0, descending=True)[1] 34 | 35 | dets = dets[order].contiguous() 36 | 37 | keep = torch.LongTensor(dets.size(0)) 38 | num_out = torch.LongTensor(1) 39 | 40 | nms.gpu_nms(keep, num_out, dets, thresh) 41 | 42 | return order[keep[:num_out[0]].cuda()].contiguous() 43 | 44 | -------------------------------------------------------------------------------- /lib/nms/pth_nms.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/pth_nms.pyc -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/src/cuda/nms_kernel.cu.o -------------------------------------------------------------------------------- /lib/nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /lib/nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /lib/nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /lib/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /lib/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from lib.nms.pth_nms import pth_nms 12 | 13 | 14 | def nms(dets, thresh): 15 | """Dispatch to either CPU or GPU NMS implementations. 16 | Accept dets as tensor""" 17 | return pth_nms(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms_wrapper.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/__init__.py: -------------------------------------------------------------------------------- 1 | from . import fft -------------------------------------------------------------------------------- /lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__init__.py -------------------------------------------------------------------------------- /lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/_ext/th_fft/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._th_fft import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/_ext/th_fft/_th_fft.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/_th_fft.so -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/__init__.py: -------------------------------------------------------------------------------- 1 | from .fft import * 2 | from .autograd import * -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/autograd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .fft import fft,ifft,fft2,ifft2,fft3,ifft3,rfft,irfft,rfft2,irfft2,rfft3,irfft3 3 | 4 | def make_contiguous(*Xs): 5 | return tuple(X if X.is_contiguous() else X.contiguous() for X in Xs) 6 | 7 | def contiguous_clone(X): 8 | if X.is_contiguous(): 9 | return X.clone() 10 | else: 11 | return X.contiguous() 12 | 13 | class Fft(torch.autograd.Function): 14 | def forward(self, X_re, X_im): 15 | X_re, X_im = make_contiguous(X_re, X_im) 16 | return fft(X_re, X_im) 17 | 18 | def backward(self, grad_output_re, grad_output_im): 19 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 20 | grad_output_im) 21 | gi, gr = fft(grad_output_im,grad_output_re) 22 | return gr,gi 23 | 24 | 25 | class Ifft(torch.autograd.Function): 26 | 27 | def forward(self, k_re, k_im): 28 | k_re, k_im = make_contiguous(k_re, k_im) 29 | return ifft(k_re, k_im) 30 | 31 | def backward(self, grad_output_re, grad_output_im): 32 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 33 | grad_output_im) 34 | gi, gr = ifft(grad_output_im,grad_output_re) 35 | return gr, gi 36 | 37 | 38 | class Fft2d(torch.autograd.Function): 39 | def forward(self, X_re, X_im): 40 | X_re, X_im = make_contiguous(X_re, X_im) 41 | return fft2(X_re, X_im) 42 | 43 | def backward(self, grad_output_re, grad_output_im): 44 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 45 | grad_output_im) 46 | gi, gr = fft2(grad_output_im,grad_output_re) 47 | return gr,gi 48 | 49 | 50 | class Ifft2d(torch.autograd.Function): 51 | 52 | def forward(self, k_re, k_im): 53 | k_re, k_im = make_contiguous(k_re, k_im) 54 | return ifft2(k_re, k_im) 55 | 56 | def backward(self, grad_output_re, grad_output_im): 57 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 58 | grad_output_im) 59 | gi, gr = ifft2(grad_output_im,grad_output_re) 60 | return gr, gi 61 | 62 | 63 | class Fft3d(torch.autograd.Function): 64 | def forward(self, X_re, X_im): 65 | X_re, X_im = make_contiguous(X_re, X_im) 66 | return fft3(X_re, X_im) 67 | 68 | def backward(self, grad_output_re, grad_output_im): 69 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 70 | grad_output_im) 71 | gi, gr = fft3(grad_output_im,grad_output_re) 72 | return gr,gi 73 | 74 | 75 | class Ifft3d(torch.autograd.Function): 76 | 77 | def forward(self, k_re, k_im): 78 | k_re, k_im = make_contiguous(k_re, k_im) 79 | return ifft3(k_re, k_im) 80 | 81 | def backward(self, grad_output_re, grad_output_im): 82 | grad_output_re, grad_output_im = make_contiguous(grad_output_re, 83 | grad_output_im) 84 | gi, gr = ifft3(grad_output_im,grad_output_re) 85 | return gr, gi 86 | 87 | 88 | class Rfft(torch.autograd.Function): 89 | def forward(self, X_re): 90 | X_re = X_re.contiguous() 91 | self._to_save_input_size = X_re.size(-1) 92 | return rfft(X_re) 93 | 94 | def backward(self, grad_output_re, grad_output_im): 95 | # Clone the array and make contiguous if needed 96 | grad_output_re = contiguous_clone(grad_output_re) 97 | grad_output_im = contiguous_clone(grad_output_im) 98 | 99 | if self._to_save_input_size & 1: 100 | grad_output_re[...,1:] /= 2 101 | else: 102 | grad_output_re[...,1:-1] /= 2 103 | 104 | if self._to_save_input_size & 1: 105 | grad_output_im[...,1:] /= 2 106 | else: 107 | grad_output_im[...,1:-1] /= 2 108 | 109 | gr = irfft(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False) 110 | return gr 111 | 112 | 113 | class Irfft(torch.autograd.Function): 114 | 115 | def forward(self, k_re, k_im): 116 | k_re, k_im = make_contiguous(k_re, k_im) 117 | return irfft(k_re, k_im) 118 | 119 | def backward(self, grad_output_re): 120 | grad_output_re = grad_output_re.contiguous() 121 | gr, gi = rfft(grad_output_re) 122 | 123 | N = grad_output_re.size(-1) 124 | gr[...,0] /= N 125 | gr[...,1:-1] /= N/2 126 | gr[...,-1] /= N 127 | 128 | gi[...,0] /= N 129 | gi[...,1:-1] /= N/2 130 | gi[...,-1] /= N 131 | return gr, gi 132 | 133 | 134 | class Rfft2d(torch.autograd.Function): 135 | def forward(self, X_re): 136 | X_re = X_re.contiguous() 137 | self._to_save_input_size = X_re.size(-1) 138 | return rfft2(X_re) 139 | 140 | def backward(self, grad_output_re, grad_output_im): 141 | # Clone the array and make contiguous if needed 142 | grad_output_re = contiguous_clone(grad_output_re) 143 | grad_output_im = contiguous_clone(grad_output_im) 144 | 145 | if self._to_save_input_size & 1: 146 | grad_output_re[...,1:] /= 2 147 | else: 148 | grad_output_re[...,1:-1] /= 2 149 | 150 | if self._to_save_input_size & 1: 151 | grad_output_im[...,1:] /= 2 152 | else: 153 | grad_output_im[...,1:-1] /= 2 154 | 155 | gr = irfft2(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False) 156 | return gr 157 | 158 | 159 | class Irfft2d(torch.autograd.Function): 160 | 161 | def forward(self, k_re, k_im): 162 | k_re, k_im = make_contiguous(k_re, k_im) 163 | return irfft2(k_re, k_im) 164 | 165 | def backward(self, grad_output_re): 166 | grad_output_re = grad_output_re.contiguous() 167 | gr, gi = rfft2(grad_output_re) 168 | 169 | N = grad_output_re.size(-1) * grad_output_re.size(-2) 170 | gr[...,0] /= N 171 | gr[...,1:-1] /= N/2 172 | gr[...,-1] /= N 173 | 174 | gi[...,0] /= N 175 | gi[...,1:-1] /= N/2 176 | gi[...,-1] /= N 177 | return gr, gi 178 | 179 | 180 | class Rfft3d(torch.autograd.Function): 181 | def forward(self, X_re): 182 | X_re = X_re.contiguous() 183 | self._to_save_input_size = X_re.size(-1) 184 | return rfft3(X_re) 185 | 186 | def backward(self, grad_output_re, grad_output_im): 187 | # Clone the array and make contiguous if needed 188 | grad_output_re = contiguous_clone(grad_output_re) 189 | grad_output_im = contiguous_clone(grad_output_im) 190 | 191 | if self._to_save_input_size & 1: 192 | grad_output_re[...,1:] /= 2 193 | else: 194 | grad_output_re[...,1:-1] /= 2 195 | 196 | if self._to_save_input_size & 1: 197 | grad_output_im[...,1:] /= 2 198 | else: 199 | grad_output_im[...,1:-1] /= 2 200 | 201 | gr = irfft3(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False) 202 | return gr 203 | 204 | 205 | class Irfft3d(torch.autograd.Function): 206 | 207 | def forward(self, k_re, k_im): 208 | k_re, k_im = make_contiguous(k_re, k_im) 209 | return irfft3(k_re, k_im) 210 | 211 | def backward(self, grad_output_re): 212 | grad_output_re = grad_output_re.contiguous() 213 | gr, gi = rfft3(grad_output_re) 214 | 215 | N = grad_output_re.size(-1) * grad_output_re.size(-2) * grad_output_re.size(-3) 216 | gr[...,0] /= N 217 | gr[...,1:-1] /= N/2 218 | gr[...,-1] /= N 219 | 220 | gi[...,0] /= N 221 | gi[...,1:-1] /= N/2 222 | gi[...,-1] /= N 223 | return gr, gi 224 | 225 | -------------------------------------------------------------------------------- /lib/pytorch_fft/fft/fft.py: -------------------------------------------------------------------------------- 1 | # functions/fft.py 2 | import torch 3 | from .._ext import th_fft 4 | 5 | def _fft(X_re, X_im, f, rank): 6 | if not(X_re.size() == X_im.size()): 7 | raise ValueError("Real and imaginary tensors must have the same dimension.") 8 | if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1): 9 | raise ValueError("Inputs must have at least {} dimensions.".format(rank+1)) 10 | if not(X_re.is_cuda and X_im.is_cuda): 11 | raise ValueError("Input must be a CUDA tensor.") 12 | if not(X_re.is_contiguous() and X_im.is_contiguous()): 13 | raise ValueError("Input must be contiguous.") 14 | 15 | Y1, Y2 = tuple(X_re.new(*X_re.size()).zero_() for _ in range(2)) 16 | f(X_re, X_im, Y1, Y2) 17 | return (Y1, Y2) 18 | 19 | def fft(X_re, X_im): 20 | if 'Float' in type(X_re).__name__ : 21 | f = th_fft.th_Float_fft1 22 | elif 'Double' in type(X_re).__name__: 23 | f = th_fft.th_Double_fft1 24 | else: 25 | raise NotImplementedError 26 | return _fft(X_re, X_im, f, 1) 27 | 28 | def ifft(X_re, X_im): 29 | N = X_re.size(-1) 30 | if 'Float' in type(X_re).__name__ : 31 | f = th_fft.th_Float_ifft1 32 | elif 'Double' in type(X_re).__name__: 33 | f = th_fft.th_Double_ifft1 34 | else: 35 | raise NotImplementedError 36 | Y1, Y2 = _fft(X_re, X_im, f, 1) 37 | return (Y1/N, Y2/N) 38 | 39 | def fft2(X_re, X_im): 40 | if 'Float' in type(X_re).__name__ : 41 | f = th_fft.th_Float_fft2 42 | elif 'Double' in type(X_re).__name__: 43 | f = th_fft.th_Double_fft2 44 | else: 45 | raise NotImplementedError 46 | return _fft(X_re, X_im, f, 2) 47 | 48 | def ifft2(X_re, X_im): 49 | N = X_re.size(-1)*X_re.size(-2) 50 | if 'Float' in type(X_re).__name__ : 51 | f = th_fft.th_Float_ifft2 52 | elif 'Double' in type(X_re).__name__: 53 | f = th_fft.th_Double_ifft2 54 | else: 55 | raise NotImplementedError 56 | Y1, Y2 = _fft(X_re, X_im, f, 2) 57 | return (Y1/N, Y2/N) 58 | 59 | def fft3(X_re, X_im): 60 | if 'Float' in type(X_re).__name__ : 61 | f = th_fft.th_Float_fft3 62 | elif 'Double' in type(X_re).__name__: 63 | f = th_fft.th_Double_fft3 64 | else: 65 | raise NotImplementedError 66 | return _fft(X_re, X_im, f, 3) 67 | 68 | def ifft3(X_re, X_im): 69 | N = X_re.size(-1)*X_re.size(-2)*X_re.size(-3) 70 | if 'Float' in type(X_re).__name__ : 71 | f = th_fft.th_Float_ifft3 72 | elif 'Double' in type(X_re).__name__: 73 | f = th_fft.th_Double_ifft3 74 | else: 75 | raise NotImplementedError 76 | Y1, Y2 = _fft(X_re, X_im, f, 3) 77 | return (Y1/N, Y2/N) 78 | 79 | _s = slice(None, None, None) 80 | 81 | def _rfft(X, f, rank): 82 | if not(X.dim() >= rank+1): 83 | raise ValueError("Input must have at least {} dimensions.".format(rank+1)) 84 | if not(X.is_cuda): 85 | raise ValueError("Input must be a CUDA tensor.") 86 | if not(X.is_contiguous()): 87 | raise ValueError("Input must be contiguous.") 88 | 89 | new_size = tuple(X.size())[:-1] + (X.size(-1)//2 + 1,) 90 | # new_size = tuple(X.size()) 91 | Y1, Y2 = tuple(X.new(*new_size).zero_() for _ in range(2)) 92 | f(X, Y1, Y2) 93 | # i = tuple(_s for _ in range(X.dim()-1)) + (slice(None, X.size(-1)//2 + 1, ),) 94 | # print(Y1, i) 95 | # return (Y1[i], Y2[i]) 96 | return (Y1, Y2) 97 | 98 | def rfft(X): 99 | if 'Float' in type(X).__name__ : 100 | f = th_fft.th_Float_rfft1 101 | elif 'Double' in type(X).__name__: 102 | f = th_fft.th_Double_rfft1 103 | else: 104 | raise NotImplementedError 105 | return _rfft(X, f, 1) 106 | 107 | def rfft2(X): 108 | if 'Float' in type(X).__name__ : 109 | f = th_fft.th_Float_rfft2 110 | elif 'Double' in type(X).__name__: 111 | f = th_fft.th_Double_rfft2 112 | else: 113 | raise NotImplementedError 114 | return _rfft(X, f, 2) 115 | 116 | def rfft3(X): 117 | if 'Float' in type(X).__name__ : 118 | f = th_fft.th_Float_rfft3 119 | elif 'Double' in type(X).__name__: 120 | f = th_fft.th_Double_rfft3 121 | else: 122 | raise NotImplementedError 123 | return _rfft(X, f, 3) 124 | 125 | def _irfft(X_re, X_im, f, rank, N, normalize): 126 | if not(X_re.size() == X_im.size()): 127 | raise ValueError("Real and imaginary tensors must have the same dimension.") 128 | if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1): 129 | raise ValueError("Inputs must have at least {} dimensions.".format(rank+1)) 130 | if not(X_re.is_cuda and X_im.is_cuda): 131 | raise ValueError("Input must be a CUDA tensor.") 132 | if not(X_re.is_contiguous() and X_im.is_contiguous()): 133 | raise ValueError("Input must be contiguous.") 134 | 135 | input_size = X_re.size(-1) 136 | 137 | if N is not None: 138 | if input_size != int(N/2) + 1: 139 | raise ValueError("Input size must be equal to n/2 + 1") 140 | else: 141 | N = (X_re.size(-1) - 1)*2 142 | 143 | new_size = tuple(X_re.size())[:-1] + (N,) 144 | Y = X_re.new(*new_size).zero_() 145 | f(X_re, X_im, Y) 146 | 147 | if normalize: 148 | M = 1 149 | for i in range(rank): 150 | M *= new_size[-(i+1)] 151 | return Y/M 152 | else: 153 | return Y 154 | 155 | def irfft(X_re, X_im, n=None, normalize=True): 156 | if 'Float' in type(X_re).__name__ : 157 | f = th_fft.th_Float_irfft1 158 | elif 'Double' in type(X_re).__name__: 159 | f = th_fft.th_Double_irfft1 160 | else: 161 | raise NotImplementedError 162 | return _irfft(X_re, X_im, f, 1, n, normalize) 163 | 164 | def irfft2(X_re, X_im, n=None, normalize=True): 165 | if 'Float' in type(X_re).__name__ : 166 | f = th_fft.th_Float_irfft2 167 | elif 'Double' in type(X_re).__name__: 168 | f = th_fft.th_Double_irfft2 169 | else: 170 | raise NotImplementedError 171 | return _irfft(X_re, X_im, f, 2, n, normalize) 172 | 173 | def irfft3(X_re, X_im, n=None, normalize=True): 174 | if 'Float' in type(X_re).__name__ : 175 | f = th_fft.th_Float_irfft3 176 | elif 'Double' in type(X_re).__name__: 177 | f = th_fft.th_Double_irfft3 178 | else: 179 | raise NotImplementedError 180 | return _irfft(X_re, X_im, f, 3, n, normalize) 181 | 182 | def reverse(X, group_size=1): 183 | if not(X.is_cuda): 184 | raise ValueError("Input must be a CUDA tensor.") 185 | if not(X.is_contiguous()): 186 | raise ValueError("Input must be contiguous.") 187 | 188 | if 'Float' in type(X).__name__: 189 | f = th_fft.reverse_Float 190 | elif 'Double' in type(X).__name__: 191 | f = th_fft.reverse_Double 192 | else: 193 | raise NotImplementedError 194 | Y = X.new(*X.size()) 195 | f(X,Y, group_size) 196 | return Y 197 | 198 | 199 | def expand(X, imag=False, odd=False): 200 | N1, N2 = X.size(-2), X.size(-1) 201 | N3 = (X.size(-1) - 1)*2 202 | if odd: 203 | N3 += 1 204 | new_size = tuple(X.size())[:-1] + (N3,) 205 | Y = X.new(*new_size).zero_() 206 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(None,N2, None),) 207 | Y[i] = X 208 | 209 | if odd: 210 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(N3-N2),None, None),) 211 | else: 212 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(1+N3-N2),-1, None),) 213 | X0 = X[i].contiguous() 214 | 215 | X0 = reverse(X0) 216 | i0 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) + 217 | (slice(-1,None, None), slice(None, None, None))) 218 | i1 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) + 219 | (slice(None, -1, None), slice(None, None, None))) 220 | X0 = torch.cat([X0[i0], X0[i1]], -2) 221 | X0 = reverse(X0, N1*(N3-N2)) 222 | 223 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(N2, None, None),) 224 | if not imag: 225 | Y[i] = X0 226 | else: 227 | Y[i] = -X0 228 | return Y 229 | 230 | def roll_n(X, axis, n): 231 | f_idx = tuple(slice(None, None, None) if i != axis else slice(0,n,None) 232 | for i in range(X.dim())) 233 | b_idx = tuple(slice(None, None, None) if i != axis else slice(n,None,None) 234 | for i in range(X.dim())) 235 | front = X[f_idx] 236 | back = X[b_idx] 237 | return torch.cat([back, front],axis) 238 | -------------------------------------------------------------------------------- /lib/pytorch_fft/src/generic/helpers.c: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "generic/helpers.c" 3 | #else 4 | 5 | // helper to convert a pair of real arrays into a complex array 6 | void pair2complex(real *a, real *b, cufft_complex *c, int n) 7 | { 8 | real *c_tmp = (real*)c; 9 | cudaMemcpy2D(c_tmp, 2*sizeof(real), 10 | a, sizeof(real), 11 | sizeof(real), n, cudaMemcpyDeviceToDevice); 12 | cudaMemcpy2D(c_tmp+1, 2*sizeof(real), 13 | b, sizeof(real), 14 | sizeof(real), n, cudaMemcpyDeviceToDevice); 15 | } 16 | 17 | void complex2pair(cufft_complex *a, real *b, real *c, int n) 18 | { 19 | real *a_tmp = (real*)a; 20 | cudaMemcpy2D(b, sizeof(real), 21 | a_tmp, 2*sizeof(real), 22 | sizeof(real), n, cudaMemcpyDeviceToDevice); 23 | cudaMemcpy2D(c, sizeof(real), 24 | a_tmp+1, 2*sizeof(real), 25 | sizeof(real), n, cudaMemcpyDeviceToDevice); 26 | } 27 | 28 | void reverse_(THCTensor *input, THCTensor *output, int group_size) 29 | { 30 | real *input_data = THCTensor_(data)(state, input); 31 | real *output_data = THCTensor_(data)(state, output); 32 | int n = THCTensor_(nElement)(state, input); 33 | 34 | cudaMemcpy2D(output_data, sizeof(real)*group_size, 35 | input_data+n-group_size, -sizeof(real)*group_size, 36 | sizeof(real)*group_size, n/group_size, cudaMemcpyDeviceToDevice); 37 | } 38 | 39 | #endif -------------------------------------------------------------------------------- /lib/pytorch_fft/src/generic/th_fft_cuda.c: -------------------------------------------------------------------------------- 1 | #ifndef THC_GENERIC_FILE 2 | #define THC_GENERIC_FILE "generic/th_fft_cuda.c" 3 | #else 4 | 5 | int th_(THCTensor *input1, THCTensor *input2, THCTensor *output1, THCTensor *output2) 6 | { 7 | // Require that all tensors be of the same size. 8 | if (!THCTensor_(isSameSizeAs)(state, input1, output1)) 9 | return 0; 10 | if (!THCTensor_(isSameSizeAs)(state, input1, output2)) 11 | return 0; 12 | if (!THCTensor_(isSameSizeAs)(state, input1, input2)) 13 | return 0; 14 | 15 | // Get the tensor dimensions (batchsize, rows, cols). 16 | int ndim = THCTensor_(nDimension)(state, input1); 17 | int batch = 1; 18 | int i, d; 19 | for(i=0; i 2 | #include 3 | #include 4 | #include 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | #define th_ TH_CONCAT_4(th_, Real, _, func_name) 9 | #define pair2complex TH_CONCAT_2(Real, 2complex) 10 | #define complex2pair TH_CONCAT_2(complex2, Real) 11 | #define reverse_ TH_CONCAT_2(reverse_, Real) 12 | 13 | #include "th_fft_generate_helpers.h" 14 | 15 | #define cufft_rank 1 16 | #include "th_fft_generate_float.h" 17 | #include "th_fft_generate_double.h" 18 | #undef cufft_rank 19 | 20 | #define cufft_rank 2 21 | #include "th_fft_generate_float.h" 22 | #include "th_fft_generate_double.h" 23 | #undef cufft_rank 24 | 25 | #define cufft_rank 3 26 | #include "th_fft_generate_float.h" 27 | #include "th_fft_generate_double.h" 28 | #undef cufft_rank 29 | -------------------------------------------------------------------------------- /lib/pytorch_fft/src/th_fft_cuda.h: -------------------------------------------------------------------------------- 1 | int th_Float_fft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 2 | int th_Float_ifft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 3 | int th_Double_fft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 4 | int th_Double_ifft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 5 | 6 | int th_Float_fft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 7 | int th_Float_ifft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 8 | int th_Double_fft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 9 | int th_Double_ifft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 10 | 11 | int th_Float_fft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 12 | int th_Float_ifft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2); 13 | int th_Double_fft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 14 | int th_Double_ifft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 15 | 16 | int th_Float_rfft1(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2); 17 | int th_Float_irfft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1); 18 | int th_Double_rfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 19 | int th_Double_irfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1); 20 | 21 | int th_Float_rfft2(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2); 22 | int th_Float_irfft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1); 23 | int th_Double_rfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 24 | int th_Double_irfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1); 25 | 26 | int th_Float_rfft3(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2); 27 | int th_Float_irfft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1); 28 | int th_Double_rfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2); 29 | int th_Double_irfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1); 30 | 31 | void reverse_Float(THCudaTensor *input, THCudaTensor *output, int group_size); 32 | void reverse_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output, int group_size); 33 | 34 | // void expand_2D_Float(THCudaTensor *input, THCudaTensor *output); 35 | // void expand_2D_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output); -------------------------------------------------------------------------------- /lib/pytorch_fft/src/th_fft_generate_double.h: -------------------------------------------------------------------------------- 1 | // Generate Double FFTs 2 | #define cufft_complex cufftDoubleComplex 3 | 4 | #define cufft_type CUFFT_Z2Z 5 | #define cufft_exec cufftExecZ2Z 6 | 7 | #define cufft_direction CUFFT_FORWARD 8 | #define func_name TH_CONCAT_2(fft, cufft_rank) 9 | 10 | #include "generic/th_fft_cuda.c" 11 | #include "THCGenerateDoubleType.h" 12 | 13 | #undef cufft_direction 14 | #undef func_name 15 | 16 | #define cufft_direction CUFFT_INVERSE 17 | #define func_name TH_CONCAT_2(ifft, cufft_rank) 18 | 19 | #include "generic/th_fft_cuda.c" 20 | #include "THCGenerateDoubleType.h" 21 | 22 | #undef cufft_direction 23 | #undef func_name 24 | 25 | #undef cufft_type 26 | #undef cufft_exec 27 | 28 | // Generate Double rFFTs 29 | #define cufft_type CUFFT_D2Z 30 | #define cufft_exec cufftExecD2Z 31 | #define func_name TH_CONCAT_2(rfft, cufft_rank) 32 | 33 | #include "generic/th_rfft_cuda.c" 34 | #include "THCGenerateDoubleType.h" 35 | 36 | #undef cufft_type 37 | #undef cufft_exec 38 | #undef func_name 39 | 40 | #define cufft_type CUFFT_Z2D 41 | #define cufft_exec cufftExecZ2D 42 | #define func_name TH_CONCAT_2(irfft, cufft_rank) 43 | 44 | #include "generic/th_irfft_cuda.c" 45 | #include "THCGenerateDoubleType.h" 46 | 47 | #undef cufft_type 48 | #undef cufft_exec 49 | #undef func_name 50 | 51 | #undef cufft_complex -------------------------------------------------------------------------------- /lib/pytorch_fft/src/th_fft_generate_float.h: -------------------------------------------------------------------------------- 1 | // Generate float FFTs 2 | #define cufft_complex cufftComplex 3 | 4 | #define cufft_type CUFFT_C2C 5 | #define cufft_exec cufftExecC2C 6 | 7 | #define cufft_direction CUFFT_FORWARD 8 | #define func_name TH_CONCAT_2(fft, cufft_rank) 9 | 10 | #include "generic/th_fft_cuda.c" 11 | #include "THCGenerateFloatType.h" 12 | 13 | #undef func_name 14 | #undef cufft_direction 15 | 16 | #define cufft_direction CUFFT_INVERSE 17 | #define func_name TH_CONCAT_2(ifft, cufft_rank) 18 | 19 | #include "generic/th_fft_cuda.c" 20 | #include "THCGenerateFloatType.h" 21 | 22 | #undef func_name 23 | #undef cufft_direction 24 | 25 | 26 | #undef cufft_type 27 | #undef cufft_exec 28 | 29 | // Generate float rFFTs 30 | #define cufft_type CUFFT_R2C 31 | #define cufft_exec cufftExecR2C 32 | #define func_name TH_CONCAT_2(rfft, cufft_rank) 33 | 34 | #include "generic/th_rfft_cuda.c" 35 | #include "THCGenerateFloatType.h" 36 | 37 | #undef func_name 38 | #undef cufft_type 39 | #undef cufft_exec 40 | 41 | #define cufft_type CUFFT_C2R 42 | #define cufft_exec cufftExecC2R 43 | #define func_name TH_CONCAT_2(irfft, cufft_rank) 44 | 45 | #include "generic/th_irfft_cuda.c" 46 | #include "THCGenerateFloatType.h" 47 | 48 | #undef func_name 49 | #undef cufft_type 50 | #undef cufft_exec 51 | 52 | #undef cufft_complex -------------------------------------------------------------------------------- /lib/pytorch_fft/src/th_fft_generate_helpers.h: -------------------------------------------------------------------------------- 1 | // Generate float and double helpers 2 | #define cufft_complex cufftComplex 3 | 4 | #include "generic/helpers.c" 5 | #include "THCGenerateFloatType.h" 6 | 7 | #undef cufft_complex 8 | 9 | #define cufft_complex cufftDoubleComplex 10 | 11 | #include "generic/helpers.c" 12 | #include "THCGenerateDoubleType.h" 13 | 14 | #undef cufft_complex -------------------------------------------------------------------------------- /lib/resnet/__pycache__/resnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/resnet/__pycache__/resnet.cpython-35.pyc -------------------------------------------------------------------------------- /lib/resnet/resnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import torch 4 | import torch.nn as nn 5 | from torchvision import transforms 6 | from torch.autograd import Variable 7 | import torch.utils.model_zoo as model_zoo 8 | 9 | 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 11 | 'resnet152'] 12 | 13 | 14 | model_urls = { 15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 20 | } 21 | 22 | 23 | def conv3x3(in_planes, out_planes, stride=1): 24 | "3x3 convolution with padding" 25 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes) 38 | self.bn2 = nn.BatchNorm2d(planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | residual = self.downsample(x) 54 | 55 | out += residual 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 67 | self.bn1 = nn.BatchNorm2d(planes) 68 | if dilation == 1: 69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | else: 72 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 73 | padding=dilation, dilation=dilation, bias=False) 74 | self.bn2 = nn.BatchNorm2d(planes) 75 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 76 | self.bn3 = nn.BatchNorm2d(planes * 4) 77 | self.relu = nn.ReLU(inplace=True) 78 | self.downsample = downsample 79 | self.stride = stride 80 | 81 | def forward(self, x): 82 | residual = x 83 | 84 | out = self.conv1(x) 85 | out = self.bn1(out) 86 | out = self.relu(out) 87 | 88 | out = self.conv2(out) 89 | out = self.bn2(out) 90 | out = self.relu(out) 91 | 92 | out = self.conv3(out) 93 | out = self.bn3(out) 94 | 95 | if self.downsample is not None: 96 | residual = self.downsample(x) 97 | 98 | out += residual 99 | out = self.relu(out) 100 | 101 | return out 102 | 103 | 104 | # We hook up one more 1*1 conv layer in Res_block 5th, and modified the method for checkpoint loading 105 | # An attribute-entity grounding pre-trained classification ResNet was adopted 106 | class ResNet(nn.Module): 107 | 108 | def __init__(self, block, layers, num_classes=1000): 109 | self.inplanes = 64 110 | super(ResNet, self).__init__() 111 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 112 | bias=False) 113 | self.bn1 = nn.BatchNorm2d(64) 114 | self.relu = nn.ReLU(inplace=True) 115 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 116 | self.layer1 = self._make_layer(block, 64, layers[0]) 117 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 119 | self.layer4 = self._make_layer(block, 512, layers[3], dilation=4) 120 | self.avgpool = nn.AvgPool2d(32, stride=1) 121 | self.fc = nn.Linear(512 * block.expansion, num_classes) 122 | self.sigmoid = nn.Sigmoid() 123 | 124 | for m in self.modules(): 125 | if isinstance(m, nn.Conv2d): 126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 127 | m.weight.data.normal_(0, math.sqrt(2. / n)) 128 | elif isinstance(m, nn.BatchNorm2d): 129 | m.weight.data.fill_(1) 130 | m.bias.data.zero_() 131 | 132 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 133 | downsample = None 134 | if stride != 1 or self.inplanes != planes * block.expansion: 135 | downsample = nn.Sequential( 136 | nn.Conv2d(self.inplanes, planes * block.expansion, 137 | kernel_size=1, stride=stride, bias=False), 138 | nn.BatchNorm2d(planes * block.expansion),) 139 | layers = [] 140 | layers.append(block(self.inplanes, planes, stride, dilation, downsample)) 141 | self.inplanes = planes * block.expansion 142 | for i in range(1, blocks): 143 | layers.append(block(self.inplanes, planes)) 144 | 145 | return nn.Sequential(*layers) 146 | 147 | def forward(self, x): 148 | x = self.conv1(x) 149 | x = self.bn1(x) 150 | x = self.relu(x) 151 | x = self.maxpool(x) 152 | 153 | conv_feat1 = self.layer1(x) 154 | conv_feat2 = self.layer2(conv_feat1) 155 | conv_feat3 = self.layer3(conv_feat2) 156 | conv_feat4 = self.layer4(conv_feat3) 157 | 158 | # Shrink the feature size and do classification 159 | conv_feat = self.shrink_conv(conv_feat4) 160 | # feat = self.avgpool(conv_feat) 161 | # y = self.sigmoid(self.fc(feat.view(feat.shape[0], feat.shape[1]))) 162 | 163 | return conv_feat4, conv_feat 164 | 165 | 166 | def resnet101(pretrained=False, path='', classnum=1000, **kwargs): 167 | """Constructs a ResNet-101 model. 168 | 169 | Args: 170 | pretrained (bool): If True, returns a model pre-trained on ImageNet 171 | 172 | Note that in this time we've pre-trained our modified ResNet on Flickr 30k 173 | for entity-attribute classification 174 | 175 | """ 176 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 177 | model.fc = torch.nn.Linear(256, 4) 178 | 179 | if pretrained: 180 | state_dict = torch.load(path)['state_dict'] 181 | new_params = model.state_dict() 182 | model_keys = model.state_dict().keys() 183 | for name, param in list(state_dict.items()): 184 | if name not in model_keys: 185 | del state_dict[name] 186 | 187 | new_params.update(state_dict) 188 | model.load_state_dict(new_params) 189 | 190 | else: 191 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 192 | return model 193 | 194 | 195 | def resnet50(pretrained=False, path='', classnum=1000, **kwargs): 196 | 197 | """Constructs a ResNet-50 model. 198 | 199 | Args: 200 | pretrained (bool): If True, returns a model pre-trained on ImageNet 201 | """ 202 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 203 | 204 | if pretrained: 205 | # In pre-trained model-gender the fc is 2, while in model-person it's 4 lasses. 206 | model.fc = torch.nn.Linear(256, 4) 207 | model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False) 208 | state_dict = torch.load(path)['state_dict'] 209 | new_params = model.state_dict() 210 | model_keys = model.state_dict().keys() 211 | for name, param in list(state_dict.items()): 212 | if name not in model_keys: 213 | del state_dict[name] 214 | 215 | new_params.update(state_dict) 216 | model.load_state_dict(new_params) 217 | 218 | else: 219 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 220 | model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False) 221 | model.fc = torch.nn.Linear(256, classnum) 222 | 223 | return model 224 | -------------------------------------------------------------------------------- /lib/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/roi_align/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.pyc -------------------------------------------------------------------------------- /lib/roi_align/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc -------------------------------------------------------------------------------- /lib/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/roi_align/_ext/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.pyc -------------------------------------------------------------------------------- /lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/roi_align/_ext/crop_and_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/roi_align/_ext/crop_and_resize/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__init__.pyc -------------------------------------------------------------------------------- /lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so -------------------------------------------------------------------------------- /lib/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/crop_and_resize.c'] 7 | headers = ['src/crop_and_resize.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | extra_objects = [] 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/crop_and_resize_gpu.c'] 15 | headers += ['src/crop_and_resize_gpu.h'] 16 | defines += [('WITH_CUDA', None)] 17 | extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o'] 18 | with_cuda = True 19 | 20 | extra_compile_args = ['-fopenmp', '-std=c99'] 21 | 22 | this_file = os.path.dirname(os.path.realpath(__file__)) 23 | print(this_file) 24 | sources = [os.path.join(this_file, fname) for fname in sources] 25 | headers = [os.path.join(this_file, fname) for fname in headers] 26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 27 | 28 | ffi = create_extension( 29 | '_ext.crop_and_resize', 30 | headers=headers, 31 | sources=sources, 32 | define_macros=defines, 33 | relative_to=__file__, 34 | with_cuda=with_cuda, 35 | extra_objects=extra_objects, 36 | extra_compile_args=extra_compile_args 37 | ) 38 | 39 | if __name__ == '__main__': 40 | ffi.build() 41 | -------------------------------------------------------------------------------- /lib/roi_align/crop_and_resize.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Function 6 | 7 | from ._ext import crop_and_resize as _backend 8 | 9 | 10 | class CropAndResizeFunction(Function): 11 | 12 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 13 | self.crop_height = crop_height 14 | self.crop_width = crop_width 15 | self.extrapolation_value = extrapolation_value 16 | 17 | def forward(self, image, boxes, box_ind): 18 | crops = torch.zeros_like(image) 19 | 20 | if image.is_cuda: 21 | _backend.crop_and_resize_gpu_forward( 22 | image, boxes, box_ind, 23 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 24 | else: 25 | _backend.crop_and_resize_forward( 26 | image, boxes, box_ind, 27 | self.extrapolation_value, self.crop_height, self.crop_width, crops) 28 | 29 | # save for backward 30 | self.im_size = image.size() 31 | self.save_for_backward(boxes, box_ind) 32 | 33 | return crops 34 | 35 | def backward(self, grad_outputs): 36 | boxes, box_ind = self.saved_tensors 37 | 38 | grad_outputs = grad_outputs.contiguous() 39 | grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size) 40 | 41 | if grad_outputs.is_cuda: 42 | _backend.crop_and_resize_gpu_backward( 43 | grad_outputs, boxes, box_ind, grad_image 44 | ) 45 | else: 46 | _backend.crop_and_resize_backward( 47 | grad_outputs, boxes, box_ind, grad_image 48 | ) 49 | 50 | return grad_image, None, None 51 | 52 | 53 | class CropAndResize(nn.Module): 54 | """ 55 | Crop and resize ported from tensorflow 56 | See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize 57 | """ 58 | 59 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 60 | super(CropAndResize, self).__init__() 61 | 62 | self.crop_height = crop_height 63 | self.crop_width = crop_width 64 | self.extrapolation_value = extrapolation_value 65 | 66 | def forward(self, image, boxes, box_ind): 67 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind) 68 | -------------------------------------------------------------------------------- /lib/roi_align/crop_and_resize.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/crop_and_resize.pyc -------------------------------------------------------------------------------- /lib/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .crop_and_resize import CropAndResize, CropAndResizeFunction 5 | 6 | class RoIAlign(nn.Module): 7 | 8 | def __init__(self, crop_height, crop_width, extrapolation_value=0): 9 | super(RoIAlign, self).__init__() 10 | 11 | self.crop_height = crop_height 12 | self.crop_width = crop_width 13 | self.extrapolation_value = extrapolation_value 14 | 15 | def forward(self, featuremap, boxes, box_ind): 16 | """ 17 | RoIAlign based on crop_and_resize. 18 | See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301 19 | :param featuremap: NxCxHxW 20 | :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization** 21 | :param box_ind: M 22 | :return: MxCxoHxoW 23 | """ 24 | x1, y1, x2, y2 = torch.split(boxes, 1, dim=1) 25 | 26 | spacing_w = (x2 - x1) / float(self.crop_width) 27 | spacing_h = (y2 - y1) / float(self.crop_height) 28 | 29 | image_height, image_width = featuremap.size()[2:4] 30 | nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1) 31 | ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1) 32 | 33 | nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1) 34 | nh = spacing_w * float(self.crop_height - 1) / float(image_height - 1) 35 | 36 | boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1) 37 | 38 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind) -------------------------------------------------------------------------------- /lib/roi_align/roi_align.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/roi_align.pyc -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | void CropAndResizePerBox( 7 | const float * image_data, 8 | const int batch_size, 9 | const int depth, 10 | const int image_height, 11 | const int image_width, 12 | 13 | const float * boxes_data, 14 | const int * box_index_data, 15 | const int start_box, 16 | const int limit_box, 17 | 18 | float * corps_data, 19 | const int crop_height, 20 | const int crop_width, 21 | const float extrapolation_value 22 | ) { 23 | const int image_channel_elements = image_height * image_width; 24 | const int image_elements = depth * image_channel_elements; 25 | 26 | const int channel_elements = crop_height * crop_width; 27 | const int crop_elements = depth * channel_elements; 28 | 29 | int b; 30 | #pragma omp parallel for 31 | for (b = start_box; b < limit_box; ++b) { 32 | const float * box = boxes_data + b * 4; 33 | const float y1 = box[0]; 34 | const float x1 = box[1]; 35 | const float y2 = box[2]; 36 | const float x2 = box[3]; 37 | 38 | const int b_in = box_index_data[b]; 39 | if (b_in < 0 || b_in >= batch_size) { 40 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size); 41 | exit(-1); 42 | } 43 | 44 | const float height_scale = 45 | (crop_height > 1) 46 | ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 47 | : 0; 48 | const float width_scale = 49 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) 50 | : 0; 51 | 52 | for (int y = 0; y < crop_height; ++y) 53 | { 54 | const float in_y = (crop_height > 1) 55 | ? y1 * (image_height - 1) + y * height_scale 56 | : 0.5 * (y1 + y2) * (image_height - 1); 57 | 58 | if (in_y < 0 || in_y > image_height - 1) 59 | { 60 | for (int x = 0; x < crop_width; ++x) 61 | { 62 | for (int d = 0; d < depth; ++d) 63 | { 64 | // crops(b, y, x, d) = extrapolation_value; 65 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value; 66 | } 67 | } 68 | continue; 69 | } 70 | 71 | const int top_y_index = floorf(in_y); 72 | const int bottom_y_index = ceilf(in_y); 73 | const float y_lerp = in_y - top_y_index; 74 | 75 | for (int x = 0; x < crop_width; ++x) 76 | { 77 | const float in_x = (crop_width > 1) 78 | ? x1 * (image_width - 1) + x * width_scale 79 | : 0.5 * (x1 + x2) * (image_width - 1); 80 | if (in_x < 0 || in_x > image_width - 1) 81 | { 82 | for (int d = 0; d < depth; ++d) 83 | { 84 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value; 85 | } 86 | continue; 87 | } 88 | 89 | const int left_x_index = floorf(in_x); 90 | const int right_x_index = ceilf(in_x); 91 | const float x_lerp = in_x - left_x_index; 92 | 93 | for (int d = 0; d < depth; ++d) 94 | { 95 | const float *pimage = image_data + b_in * image_elements + d * image_channel_elements; 96 | 97 | const float top_left = pimage[top_y_index * image_width + left_x_index]; 98 | const float top_right = pimage[top_y_index * image_width + right_x_index]; 99 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index]; 100 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index]; 101 | 102 | const float top = top_left + (top_right - top_left) * x_lerp; 103 | const float bottom = 104 | bottom_left + (bottom_right - bottom_left) * x_lerp; 105 | 106 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = top + (bottom - top) * y_lerp; 107 | } 108 | } // end for x 109 | } // end for y 110 | } // end for b 111 | 112 | } 113 | 114 | 115 | void crop_and_resize_forward( 116 | THFloatTensor * image, 117 | THFloatTensor * boxes, // [y1, x1, y2, x2] 118 | THIntTensor * box_index, // range in [0, batch_size) 119 | const float extrapolation_value, 120 | const int crop_height, 121 | const int crop_width, 122 | THFloatTensor * crops 123 | ) { 124 | const int batch_size = image->size[0]; 125 | const int depth = image->size[1]; 126 | const int image_height = image->size[2]; 127 | const int image_width = image->size[3]; 128 | 129 | const int num_boxes = boxes->size[0]; 130 | 131 | // init output space 132 | THFloatTensor_resize4d(crops, num_boxes, depth, crop_height, crop_width); 133 | THFloatTensor_zero(crops); 134 | 135 | // crop_and_resize for each box 136 | CropAndResizePerBox( 137 | THFloatTensor_data(image), 138 | batch_size, 139 | depth, 140 | image_height, 141 | image_width, 142 | 143 | THFloatTensor_data(boxes), 144 | THIntTensor_data(box_index), 145 | 0, 146 | num_boxes, 147 | 148 | THFloatTensor_data(crops), 149 | crop_height, 150 | crop_width, 151 | extrapolation_value 152 | ); 153 | 154 | } 155 | 156 | 157 | void crop_and_resize_backward( 158 | THFloatTensor * grads, 159 | THFloatTensor * boxes, // [y1, x1, y2, x2] 160 | THIntTensor * box_index, // range in [0, batch_size) 161 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 162 | ) 163 | { 164 | // shape 165 | const int batch_size = grads_image->size[0]; 166 | const int depth = grads_image->size[1]; 167 | const int image_height = grads_image->size[2]; 168 | const int image_width = grads_image->size[3]; 169 | 170 | const int num_boxes = grads->size[0]; 171 | const int crop_height = grads->size[2]; 172 | const int crop_width = grads->size[3]; 173 | 174 | // n_elements 175 | const int image_channel_elements = image_height * image_width; 176 | const int image_elements = depth * image_channel_elements; 177 | 178 | const int channel_elements = crop_height * crop_width; 179 | const int crop_elements = depth * channel_elements; 180 | 181 | // init output space 182 | THFloatTensor_zero(grads_image); 183 | 184 | // data pointer 185 | const float * grads_data = THFloatTensor_data(grads); 186 | const float * boxes_data = THFloatTensor_data(boxes); 187 | const int * box_index_data = THIntTensor_data(box_index); 188 | float * grads_image_data = THFloatTensor_data(grads_image); 189 | 190 | for (int b = 0; b < num_boxes; ++b) { 191 | const float * box = boxes_data + b * 4; 192 | const float y1 = box[0]; 193 | const float x1 = box[1]; 194 | const float y2 = box[2]; 195 | const float x2 = box[3]; 196 | 197 | const int b_in = box_index_data[b]; 198 | if (b_in < 0 || b_in >= batch_size) { 199 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size); 200 | exit(-1); 201 | } 202 | 203 | const float height_scale = 204 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 205 | : 0; 206 | const float width_scale = 207 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) 208 | : 0; 209 | 210 | for (int y = 0; y < crop_height; ++y) 211 | { 212 | const float in_y = (crop_height > 1) 213 | ? y1 * (image_height - 1) + y * height_scale 214 | : 0.5 * (y1 + y2) * (image_height - 1); 215 | if (in_y < 0 || in_y > image_height - 1) 216 | { 217 | continue; 218 | } 219 | const int top_y_index = floorf(in_y); 220 | const int bottom_y_index = ceilf(in_y); 221 | const float y_lerp = in_y - top_y_index; 222 | 223 | for (int x = 0; x < crop_width; ++x) 224 | { 225 | const float in_x = (crop_width > 1) 226 | ? x1 * (image_width - 1) + x * width_scale 227 | : 0.5 * (x1 + x2) * (image_width - 1); 228 | if (in_x < 0 || in_x > image_width - 1) 229 | { 230 | continue; 231 | } 232 | const int left_x_index = floorf(in_x); 233 | const int right_x_index = ceilf(in_x); 234 | const float x_lerp = in_x - left_x_index; 235 | 236 | for (int d = 0; d < depth; ++d) 237 | { 238 | float *pimage = grads_image_data + b_in * image_elements + d * image_channel_elements; 239 | const float grad_val = grads_data[crop_elements * b + channel_elements * d + y * crop_width + x]; 240 | 241 | const float dtop = (1 - y_lerp) * grad_val; 242 | pimage[top_y_index * image_width + left_x_index] += (1 - x_lerp) * dtop; 243 | pimage[top_y_index * image_width + right_x_index] += x_lerp * dtop; 244 | 245 | const float dbottom = y_lerp * grad_val; 246 | pimage[bottom_y_index * image_width + left_x_index] += (1 - x_lerp) * dbottom; 247 | pimage[bottom_y_index * image_width + right_x_index] += x_lerp * dbottom; 248 | } // end d 249 | } // end x 250 | } // end y 251 | } // end b 252 | } -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_forward( 2 | THFloatTensor * image, 3 | THFloatTensor * boxes, // [y1, x1, y2, x2] 4 | THIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THFloatTensor * crops 9 | ); 10 | 11 | void crop_and_resize_backward( 12 | THFloatTensor * grads, 13 | THFloatTensor * boxes, // [y1, x1, y2, x2] 14 | THIntTensor * box_index, // range in [0, batch_size) 15 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize_gpu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "cuda/crop_and_resize_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | 7 | void crop_and_resize_gpu_forward( 8 | THCudaTensor * image, 9 | THCudaTensor * boxes, // [y1, x1, y2, x2] 10 | THCudaIntTensor * box_index, // range in [0, batch_size) 11 | const float extrapolation_value, 12 | const int crop_height, 13 | const int crop_width, 14 | THCudaTensor * crops 15 | ) { 16 | const int batch_size = THCudaTensor_size(state, image, 0); 17 | const int depth = THCudaTensor_size(state, image, 1); 18 | const int image_height = THCudaTensor_size(state, image, 2); 19 | const int image_width = THCudaTensor_size(state, image, 3); 20 | 21 | const int num_boxes = THCudaTensor_size(state, boxes, 0); 22 | 23 | // init output space 24 | THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width); 25 | THCudaTensor_zero(state, crops); 26 | 27 | cudaStream_t stream = THCState_getCurrentStream(state); 28 | CropAndResizeLaucher( 29 | THCudaTensor_data(state, image), 30 | THCudaTensor_data(state, boxes), 31 | THCudaIntTensor_data(state, box_index), 32 | num_boxes, batch_size, image_height, image_width, 33 | crop_height, crop_width, depth, extrapolation_value, 34 | THCudaTensor_data(state, crops), 35 | stream 36 | ); 37 | } 38 | 39 | 40 | void crop_and_resize_gpu_backward( 41 | THCudaTensor * grads, 42 | THCudaTensor * boxes, // [y1, x1, y2, x2] 43 | THCudaIntTensor * box_index, // range in [0, batch_size) 44 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 45 | ) { 46 | // shape 47 | const int batch_size = THCudaTensor_size(state, grads_image, 0); 48 | const int depth = THCudaTensor_size(state, grads_image, 1); 49 | const int image_height = THCudaTensor_size(state, grads_image, 2); 50 | const int image_width = THCudaTensor_size(state, grads_image, 3); 51 | 52 | const int num_boxes = THCudaTensor_size(state, grads, 0); 53 | const int crop_height = THCudaTensor_size(state, grads, 2); 54 | const int crop_width = THCudaTensor_size(state, grads, 3); 55 | 56 | // init output space 57 | THCudaTensor_zero(state, grads_image); 58 | 59 | cudaStream_t stream = THCState_getCurrentStream(state); 60 | CropAndResizeBackpropImageLaucher( 61 | THCudaTensor_data(state, grads), 62 | THCudaTensor_data(state, boxes), 63 | THCudaIntTensor_data(state, box_index), 64 | num_boxes, batch_size, image_height, image_width, 65 | crop_height, crop_width, depth, 66 | THCudaTensor_data(state, grads_image), 67 | stream 68 | ); 69 | } -------------------------------------------------------------------------------- /lib/roi_align/src/crop_and_resize_gpu.h: -------------------------------------------------------------------------------- 1 | void crop_and_resize_gpu_forward( 2 | THCudaTensor * image, 3 | THCudaTensor * boxes, // [y1, x1, y2, x2] 4 | THCudaIntTensor * box_index, // range in [0, batch_size) 5 | const float extrapolation_value, 6 | const int crop_height, 7 | const int crop_width, 8 | THCudaTensor * crops 9 | ); 10 | 11 | void crop_and_resize_gpu_backward( 12 | THCudaTensor * grads, 13 | THCudaTensor * boxes, // [y1, x1, y2, x2] 14 | THCudaIntTensor * box_index, // range in [0, batch_size) 15 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc] 16 | ); -------------------------------------------------------------------------------- /lib/roi_align/src/cuda/crop_and_resize_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "crop_and_resize_kernel.h" 4 | 5 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 7 | i += blockDim.x * gridDim.x) 8 | 9 | 10 | __global__ 11 | void CropAndResizeKernel( 12 | const int nthreads, const float *image_ptr, const float *boxes_ptr, 13 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 14 | int image_width, int crop_height, int crop_width, int depth, 15 | float extrapolation_value, float *crops_ptr) 16 | { 17 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) 18 | { 19 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b)) 20 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b)) 21 | int idx = out_idx; 22 | const int x = idx % crop_width; 23 | idx /= crop_width; 24 | const int y = idx % crop_height; 25 | idx /= crop_height; 26 | const int d = idx % depth; 27 | const int b = idx / depth; 28 | 29 | const float y1 = boxes_ptr[b * 4]; 30 | const float x1 = boxes_ptr[b * 4 + 1]; 31 | const float y2 = boxes_ptr[b * 4 + 2]; 32 | const float x2 = boxes_ptr[b * 4 + 3]; 33 | 34 | const int b_in = box_ind_ptr[b]; 35 | if (b_in < 0 || b_in >= batch) 36 | { 37 | continue; 38 | } 39 | 40 | const float height_scale = 41 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 42 | : 0; 43 | const float width_scale = 44 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 45 | 46 | const float in_y = (crop_height > 1) 47 | ? y1 * (image_height - 1) + y * height_scale 48 | : 0.5 * (y1 + y2) * (image_height - 1); 49 | if (in_y < 0 || in_y > image_height - 1) 50 | { 51 | crops_ptr[out_idx] = extrapolation_value; 52 | continue; 53 | } 54 | 55 | const float in_x = (crop_width > 1) 56 | ? x1 * (image_width - 1) + x * width_scale 57 | : 0.5 * (x1 + x2) * (image_width - 1); 58 | if (in_x < 0 || in_x > image_width - 1) 59 | { 60 | crops_ptr[out_idx] = extrapolation_value; 61 | continue; 62 | } 63 | 64 | const int top_y_index = floorf(in_y); 65 | const int bottom_y_index = ceilf(in_y); 66 | const float y_lerp = in_y - top_y_index; 67 | 68 | const int left_x_index = floorf(in_x); 69 | const int right_x_index = ceilf(in_x); 70 | const float x_lerp = in_x - left_x_index; 71 | 72 | const float *pimage = image_ptr + (b_in * depth + d) * image_height * image_width; 73 | const float top_left = pimage[top_y_index * image_width + left_x_index]; 74 | const float top_right = pimage[top_y_index * image_width + right_x_index]; 75 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index]; 76 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index]; 77 | 78 | const float top = top_left + (top_right - top_left) * x_lerp; 79 | const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; 80 | crops_ptr[out_idx] = top + (bottom - top) * y_lerp; 81 | } 82 | } 83 | 84 | __global__ 85 | void CropAndResizeBackpropImageKernel( 86 | const int nthreads, const float *grads_ptr, const float *boxes_ptr, 87 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 88 | int image_width, int crop_height, int crop_width, int depth, 89 | float *grads_image_ptr) 90 | { 91 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads) 92 | { 93 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b)) 94 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b)) 95 | int idx = out_idx; 96 | const int x = idx % crop_width; 97 | idx /= crop_width; 98 | const int y = idx % crop_height; 99 | idx /= crop_height; 100 | const int d = idx % depth; 101 | const int b = idx / depth; 102 | 103 | const float y1 = boxes_ptr[b * 4]; 104 | const float x1 = boxes_ptr[b * 4 + 1]; 105 | const float y2 = boxes_ptr[b * 4 + 2]; 106 | const float x2 = boxes_ptr[b * 4 + 3]; 107 | 108 | const int b_in = box_ind_ptr[b]; 109 | if (b_in < 0 || b_in >= batch) 110 | { 111 | continue; 112 | } 113 | 114 | const float height_scale = 115 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1) 116 | : 0; 117 | const float width_scale = 118 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0; 119 | 120 | const float in_y = (crop_height > 1) 121 | ? y1 * (image_height - 1) + y * height_scale 122 | : 0.5 * (y1 + y2) * (image_height - 1); 123 | if (in_y < 0 || in_y > image_height - 1) 124 | { 125 | continue; 126 | } 127 | 128 | const float in_x = (crop_width > 1) 129 | ? x1 * (image_width - 1) + x * width_scale 130 | : 0.5 * (x1 + x2) * (image_width - 1); 131 | if (in_x < 0 || in_x > image_width - 1) 132 | { 133 | continue; 134 | } 135 | 136 | const int top_y_index = floorf(in_y); 137 | const int bottom_y_index = ceilf(in_y); 138 | const float y_lerp = in_y - top_y_index; 139 | 140 | const int left_x_index = floorf(in_x); 141 | const int right_x_index = ceilf(in_x); 142 | const float x_lerp = in_x - left_x_index; 143 | 144 | float *pimage = grads_image_ptr + (b_in * depth + d) * image_height * image_width; 145 | const float dtop = (1 - y_lerp) * grads_ptr[out_idx]; 146 | atomicAdd( 147 | pimage + top_y_index * image_width + left_x_index, 148 | (1 - x_lerp) * dtop 149 | ); 150 | atomicAdd( 151 | pimage + top_y_index * image_width + right_x_index, 152 | x_lerp * dtop 153 | ); 154 | 155 | const float dbottom = y_lerp * grads_ptr[out_idx]; 156 | atomicAdd( 157 | pimage + bottom_y_index * image_width + left_x_index, 158 | (1 - x_lerp) * dbottom 159 | ); 160 | atomicAdd( 161 | pimage + bottom_y_index * image_width + right_x_index, 162 | x_lerp * dbottom 163 | ); 164 | } 165 | } 166 | 167 | 168 | void CropAndResizeLaucher( 169 | const float *image_ptr, const float *boxes_ptr, 170 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 171 | int image_width, int crop_height, int crop_width, int depth, 172 | float extrapolation_value, float *crops_ptr, cudaStream_t stream) 173 | { 174 | const int total_count = num_boxes * crop_height * crop_width * depth; 175 | const int thread_per_block = 1024; 176 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block; 177 | cudaError_t err; 178 | 179 | if (total_count > 0) 180 | { 181 | CropAndResizeKernel<<>>( 182 | total_count, image_ptr, boxes_ptr, 183 | box_ind_ptr, num_boxes, batch, image_height, image_width, 184 | crop_height, crop_width, depth, extrapolation_value, crops_ptr); 185 | 186 | err = cudaGetLastError(); 187 | if (cudaSuccess != err) 188 | { 189 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); 190 | exit(-1); 191 | } 192 | } 193 | } 194 | 195 | 196 | void CropAndResizeBackpropImageLaucher( 197 | const float *grads_ptr, const float *boxes_ptr, 198 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 199 | int image_width, int crop_height, int crop_width, int depth, 200 | float *grads_image_ptr, cudaStream_t stream) 201 | { 202 | const int total_count = num_boxes * crop_height * crop_width * depth; 203 | const int thread_per_block = 1024; 204 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block; 205 | cudaError_t err; 206 | 207 | if (total_count > 0) 208 | { 209 | CropAndResizeBackpropImageKernel<<>>( 210 | total_count, grads_ptr, boxes_ptr, 211 | box_ind_ptr, num_boxes, batch, image_height, image_width, 212 | crop_height, crop_width, depth, grads_image_ptr); 213 | 214 | err = cudaGetLastError(); 215 | if (cudaSuccess != err) 216 | { 217 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); 218 | exit(-1); 219 | } 220 | } 221 | } -------------------------------------------------------------------------------- /lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o -------------------------------------------------------------------------------- /lib/roi_align/src/cuda/crop_and_resize_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CropAndResize_Kernel 2 | #define _CropAndResize_Kernel 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void CropAndResizeLaucher( 9 | const float *image_ptr, const float *boxes_ptr, 10 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 11 | int image_width, int crop_height, int crop_width, int depth, 12 | float extrapolation_value, float *crops_ptr, cudaStream_t stream); 13 | 14 | void CropAndResizeBackpropImageLaucher( 15 | const float *grads_ptr, const float *boxes_ptr, 16 | const int *box_ind_ptr, int num_boxes, int batch, int image_height, 17 | int image_width, int crop_height, int crop_width, int depth, 18 | float *grads_image_ptr, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif -------------------------------------------------------------------------------- /models/Model7.py: -------------------------------------------------------------------------------- 1 | """Model7 is for semantic embedding & attention. We replace the global classification with the semantic classification, 2 | thus applicable for textual grounding problem.""" 3 | 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import absolute_import 7 | import sys 8 | sys.path.insert(0, '/../') 9 | import torch 10 | import numpy as np 11 | import torch.nn as nn 12 | from lib.configure.config import Config 13 | from lib.resnet.resnet import resnet50 14 | from torch.autograd import Variable 15 | from lib.bilinear_pooling.CompactBilinearPooling import CompactBilinearPooling 16 | 17 | 18 | class Model7(nn.Module): 19 | 20 | def __init__(self, opts, body_pretrain=False): 21 | super(Model7, self).__init__() 22 | 23 | # Load pre-trained back-boned model 24 | print('==> Building backbone model...') 25 | config = Config() 26 | config.IMAGES_PER_GPU = opts.batch_size 27 | config.NUM_CLASSES = opts.class_num 28 | 29 | # Load Attribute module 30 | attr_branch = AttributeBranch(300) 31 | attr_res_net = resnet50(True, path='./checkpoint/AENet_clsfier_person_256d_4.pth', classnum=4) 32 | 33 | # Load semantic embeddings 34 | dictionary = {'man': [1, 0, 0.5, 0.5], 35 | 'woman': [0, 1, 0.5, 0.5], 36 | 'lady': [0, 1, 0.25, 0.75], 37 | 'female': [0, 1, 0.5, 0.5], 38 | 'boy': [1, 0, 1, 0], 39 | 'girl': [0, 1, 1, 0], 40 | 'kid': [0.5, 0.5, 1, 0], 41 | 'child': [0.5, 0.5, 1, 0], 42 | 'young': [0.5, 0.5, 1, 0], 43 | 'elderly': [0.5, 0.5, 0, 1]} 44 | for key in dictionary.keys(): 45 | dictionary[key] = np.asarray(dictionary[key]) 46 | 47 | # Freeze the attr-resnet model 48 | for param in attr_res_net.parameters(): 49 | param.requires_grad = False 50 | 51 | for param in attr_res_net.fc.parameters(): 52 | param.requires_grad = False 53 | 54 | # Freeze the attribute branch or not 55 | for param in attr_branch.parameters(): 56 | param.requires_grad = True 57 | 58 | self.attr_branch = attr_branch 59 | self.opts = opts 60 | self.attr_res_net = attr_res_net 61 | self.pool = nn.AvgPool2d(kernel_size=64, stride=1) 62 | self.sigmoid = nn.Sigmoid() 63 | self.regressor = nn.Linear(256, 4) 64 | self.semantic_layer = SemanticLayer(dictionary) 65 | 66 | def forward(self, img, label, embeddings): 67 | 68 | # Attribute Branch 69 | conv_feat4, conv_feat = self.attr_res_net(img) 70 | attr_map, att_conv_feature = self.attr_branch(conv_feat, embeddings) 71 | feat = self.pool(att_conv_feature) 72 | feat = self.regressor(feat.view(feat.shape[0], feat.shape[1])) 73 | output = self.semantic_layer(feat, label) 74 | return output, attr_map, att_conv_feature 75 | 76 | 77 | class AttributeBranch(nn.Module): 78 | 79 | def __init__(self, attr_num): 80 | super(AttributeBranch, self).__init__() 81 | 82 | self.textual_emb = nn.Linear(attr_num, 256) 83 | self.conv = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True) 84 | self.mcb_attr = CompactBilinearPooling(256, 256, 256).cuda() 85 | self.mcb_conv1_attr = nn.Conv2d(256, 32, kernel_size=1, stride=1, padding=0, bias=True) 86 | self.mcb_relu1_attr = nn.ReLU(inplace=True) 87 | self.mcb_conv2_attr = nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0, bias=True) 88 | self.mcb_sigmoid = nn.Sigmoid() 89 | 90 | def forward(self, entity_feature, attr_one_hot): 91 | 92 | feature = self.mcb_relu1_attr(entity_feature) 93 | # Reshape attribute one hot input 94 | attr_one_hot = self.textual_emb(attr_one_hot) 95 | attr_one_hot = attr_one_hot.view(attr_one_hot.shape[0], attr_one_hot.shape[1], 1, 1) 96 | 97 | # stack attention map generating for P3, P4, P5 98 | attr_one_hot = attr_one_hot.expand_as(feature) 99 | 100 | # Attribute attention generation and applied 101 | mcb_attr_feat = self.mcb_attr(self.conv(attr_one_hot), feature) 102 | attr_map = self.mcb_sigmoid(self.mcb_conv2_attr(self.mcb_relu1_attr(self.mcb_conv1_attr(mcb_attr_feat)))) 103 | attr_feature = (torch.mul(attr_map, entity_feature)) 104 | 105 | return attr_map, attr_feature 106 | 107 | 108 | class SemanticLayer(nn.Module): 109 | def __init__(self, dictionary): 110 | super(SemanticLayer, self).__init__() 111 | 112 | list_file = open('./others/low-level-attr.txt', 'r') 113 | entity_att = [] 114 | for i in list_file.readlines(): 115 | entity_att.append(i.replace('\n', '')) 116 | 117 | # Create semantic matrix 118 | s_matrix = torch.zeros(10, 4).cuda() 119 | for index, item in enumerate(entity_att): 120 | emb = torch.from_numpy(dictionary[item]) 121 | s_matrix[index] = emb 122 | self.s_matrix = Variable(s_matrix) 123 | 124 | def forward(self, x, label): 125 | # x: (batch * 4) 126 | # label: (batch,) 127 | prob = Variable(torch.zeros(x.shape[0])) 128 | for index in range(x.shape[0]): 129 | lbl = label[index] 130 | prob[index] = torch.nn.functional.cosine_similarity(self.s_matrix, x[index].view(1, -1))[lbl] 131 | prob = prob.sum() / prob.shape[0] 132 | return 1-prob 133 | -------------------------------------------------------------------------------- /models/__pycache__/Model7.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/models/__pycache__/Model7.cpython-35.pyc -------------------------------------------------------------------------------- /others/README.md: -------------------------------------------------------------------------------- 1 | # coco_person_list.txt: 2 | Our extracted 12000 images from coco_train_2017 for person attribute grounding. 3 | # low-level-attr.txt: 4 | The attribute dictionary. 5 | # glove.6B.300d.txt: 6 | Word embedding dictionary we are using, it's from glove 6B version, and each embedding is 300 dimension. 7 | # dictionary_emb.pkl: 8 | Instead of using the whole word embedding, we mannually extract just the attribute word embedding for faster embedding dictionary loading. 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /others/dictionary_emb.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/others/dictionary_emb.pkl -------------------------------------------------------------------------------- /others/low-level-attr.txt: -------------------------------------------------------------------------------- 1 | man 2 | woman 3 | lady 4 | female 5 | boy 6 | girl 7 | kid 8 | child 9 | young 10 | elderly 11 | -------------------------------------------------------------------------------- /parser.py: -------------------------------------------------------------------------------- 1 | '''Train Sun Attribute with PyTorch.''' 2 | from __future__ import print_function 3 | 4 | import torch 5 | import argparse 6 | import torch.optim as optim 7 | 8 | 9 | def parse_opts(): 10 | parser = argparse.ArgumentParser(description='PyTorch Attribute Grouning Training') 11 | parser.add_argument('--msg', default=False, type=bool, help='display message') 12 | parser.add_argument('--use_gpu', default=torch.cuda.is_available(), type=bool, help='Use GPU or not') 13 | parser.add_argument('--multi_gpu', default=(torch.cuda.device_count() > 0), type=bool, help='Use multi-GPU or not') 14 | parser.add_argument('--gpu_id', default=-1, type=int, help='Use specific GPU.') 15 | 16 | parser.add_argument('--optimizer', default=optim.SGD, help='optimizer') 17 | parser.add_argument('--num_workers', default=2, type=int, help='num of fetching threads') 18 | parser.add_argument('--batch_size', default=12, type=int, help='batch size') 19 | parser.add_argument('--weight_decay', default=1e-3, type=float, help='weight decay') 20 | parser.add_argument('--seed', default=0, type=int, help='random seed') 21 | parser.add_argument('--result_path', default='./results', help='result path') 22 | 23 | # Define the training parameters 24 | parser.add_argument('--class_num', default=5, type=int, help='num of fetchi ng threads') 25 | parser.add_argument('--checkpoint_epoch', default=2, type=int, help='epochs to save checkpoint ') 26 | parser.add_argument('--lr_adjust_epoch', default=5, type=int, help='lr adjust epoch') 27 | parser.add_argument('--n_epoch', default=1000, type=int, help='training epochs') 28 | parser.add_argument('--lr', default=0.01, type=float, help='learning rate') 29 | 30 | # Define the checkpoint reloading path 31 | parser.add_argument('--resume', default='', help='result path') 32 | 33 | # Define the data_set path 34 | parser.add_argument('--img_path', default='/media/drive1/Data/coco17/train2017/', help='coco_train_2017 path') 35 | parser.add_argument('--annotation', default='/media/drive1/Data/coco17/annotations/' 36 | 'captions_train2017.json', help='coco_train_2017 annotation path') 37 | parser.add_argument('--dictionary', default='./others/low-level-attr.txt', help='dict of attributes') 38 | args = parser.parse_args() 39 | 40 | return args 41 | 42 | -------------------------------------------------------------------------------- /results/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/results/architecture.png -------------------------------------------------------------------------------- /results/test.log: -------------------------------------------------------------------------------- 1 | epoch time loss 2 | -------------------------------------------------------------------------------- /results/train.log: -------------------------------------------------------------------------------- 1 | epoch time loss 2 | -------------------------------------------------------------------------------- /results/train_batch.log: -------------------------------------------------------------------------------- 1 | epoch batch loss 2 | 1 2 0.1857217252254486 3 | 1 3 0.20769339799880981 4 | 1 4 0.2339950054883957 5 | 1 5 0.24028053283691406 6 | 1 6 0.24673599004745483 7 | 1 7 0.24947353771754674 8 | 1 8 0.24981582164764404 9 | 1 9 0.24003050724665323 10 | 1 10 0.23302733302116393 11 | 1 11 0.22751894864169034 12 | 1 12 0.22201103965441385 13 | -------------------------------------------------------------------------------- /runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001 -------------------------------------------------------------------------------- /runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001 -------------------------------------------------------------------------------- /train_attr_attention_embedding.py: -------------------------------------------------------------------------------- 1 | '''Train unsuperwised entity grounding by attention+pixel classification mechanism.''' 2 | from __future__ import print_function 3 | 4 | import random 5 | import pickle 6 | from parser import * 7 | import matplotlib.pyplot as plt 8 | from models.Model7 import Model7 9 | from lib.configure.net_util import * 10 | from torchvision import transforms 11 | from tensorboardX import SummaryWriter 12 | from lib.dataset.coco_dataset import CocoCaptions 13 | 14 | 15 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 16 | # os.environ["CUDA_VISIBLE_DEVICES"]="1" 17 | 18 | def l2_regulariza_loss(map): 19 | # return torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1])) 20 | mean = torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1])) 21 | return mean 22 | 23 | 24 | def load_dictionary(name): 25 | with open('./others/' + name + '.pkl', 'rb') as f: 26 | return pickle.load(f) 27 | 28 | 29 | # Randomly pick a label from multi one-hot label 30 | def random_pick(one_hot): 31 | # return a randomly selected label 32 | label = torch.zeros(one_hot.shape[0]) 33 | one_hot_return = torch.zeros_like(one_hot) 34 | 35 | for i in range(one_hot.shape[0]): 36 | # all labels to save all the labels 37 | all_labels = [] 38 | count = 0 39 | for j in range(one_hot.shape[1]): 40 | if one_hot[i][j] == 1.: 41 | all_labels.append(count) 42 | count += 1 43 | # randomly picking one label 44 | if len(all_labels) != 0: 45 | label[i] = random.choice(all_labels) 46 | else: 47 | label[i] = 2 48 | one_hot_return[i][int(label[i])] = 1 49 | return label, one_hot_return 50 | 51 | 52 | # Multi-Pixel embedding learning for multi-category picking 53 | def top_k_emb(visual_emb, model, label, single_attribute_label, K=100): 54 | # Given pixel-wise features, select top-k pixels with highest category prob out for 55 | # multi-cross entropy learning 56 | # Visual-features: (batch, emb #, pixel #) 57 | # Returning prob: (batch #, top-K, class_prob) 58 | # Returning feat: (batch #, top-K, feature_size) 59 | visual_emb = visual_emb.view((visual_emb.shape[0], visual_emb.shape[1], visual_emb.shape[2]*visual_emb.shape[3])) 60 | 61 | # i: batch number 62 | for i in range(visual_emb.shape[0]): 63 | sorting = np.zeros((visual_emb.shape[2])) 64 | # j: pixel numbers in feature maps 65 | for j in range(visual_emb.shape[2]): 66 | # extracting pixel features and reshape 67 | emb = visual_emb[i, :, j] 68 | # emb = F.relu(model.fc_p5(emb.contiguous().view(1, -1))) 69 | emb_ = (emb.contiguous().view(1, -1)) 70 | output = model.attr_res_net.fc(emb_) 71 | prob = opts.criterion[0](output, single_attribute_label[i]) 72 | opts.prob_set[j] = output[0] 73 | opts.features_set[j] = emb 74 | sorting[j] = prob.data.cpu().numpy()[0] 75 | 76 | # Arg-sort the probability (and inverse the order) 77 | sorting = np.argsort(sorting)[0:K] 78 | 79 | # index: number of top-K 80 | for index in range(K): 81 | opts.return_prob[i, index] = opts.prob_set[int(sorting[index])] 82 | opts.return_feat[i, index] = opts.features_set[int(sorting[index])] 83 | return opts.return_feat 84 | 85 | 86 | def train_net(net, opts): 87 | 88 | print('training at epoch {}'.format(opts.epoch+1)) 89 | 90 | if opts.use_gpu: 91 | net.cuda() 92 | 93 | net.train(True) 94 | train_loss = 0 95 | total_time = 0 96 | batch_idx = 0 97 | optimizer = opts.current_optimizer 98 | # back_bone_optimizer = opts.backbone_optimizer 99 | end_time = time.time() 100 | train_back_bone = True 101 | fig = plt.figure() 102 | 103 | # category: semantic labels for single selected label 104 | # s_entity_one_hot: randomly selected entity one-hot 105 | # s_entity_label: randomly selected entity label 106 | # att_emb: word2vec embedding for attributes 107 | # att_label: attributes pairs for margin loss learning 108 | # attr_one_hot: all attributes one-hot 109 | # textual_emb: phrase embedding 110 | # phrase/line: phrases/lines in NLP format 111 | # mask: ground truth annotations for object 112 | for batch_idx, (images, attr_one_hot, entity_one_hot) in enumerate(data_loader): 113 | 114 | # model.visual_net.config.IMAGES_PER_GPU = images.size(0) 115 | images = Variable(images).cuda() 116 | 117 | # Randomly pick one attribute per iteration 118 | single_attribute_label, single_attribute_one_hot = random_pick(attr_one_hot) 119 | attr_one_hot = Variable(single_attribute_one_hot).cuda().float() 120 | single_attribute_label = Variable(single_attribute_label).cuda().long() 121 | 122 | # Create embeddings input 123 | embeddings = Variable(torch.zeros(attr_one_hot.shape[0], 300)) 124 | for index, item in enumerate(single_attribute_label): 125 | i = opts.entity_att[item.data.cpu().numpy()[0]] 126 | embeddings[index] = Variable(torch.from_numpy(opts.embeddings_index[i])).cuda() 127 | 128 | # Feed in network 129 | y, attr_map, att_conv_feature = net(images, single_attribute_label, embeddings) 130 | 131 | loss = y 132 | 133 | if train_back_bone: 134 | optimizer.zero_grad() 135 | train_loss += loss.data[0] 136 | loss.backward() 137 | optimizer.step() 138 | 139 | # Display the generated att_map and instant loss 140 | if batch_idx % 1 == 0: 141 | plt.ion() 142 | plt.show() 143 | random = randint(0, opts.batch_size - 1) 144 | if batch_idx % 1 == 0: 145 | # Print out the attribute labels 146 | # plt.suptitle(opts.entity_att[int(single_attribute_label[random])]) 147 | plt.subplot(141) 148 | vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[0].view(-1, 1, 1) 149 | * att_conv_feature[random]).sum(0)).cpu().data.numpy() 150 | plt.imshow(vis) 151 | 152 | plt.subplot(142) 153 | vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[1].view(-1, 1, 1) 154 | * att_conv_feature[random]).sum(0)).cpu().data.numpy() 155 | plt.imshow(vis) 156 | 157 | plt.subplot(143) 158 | plt.imshow(attr_map[random, 0].data.cpu().numpy()) 159 | 160 | plt.subplot(144) 161 | plt.imshow(images[random].permute(1, 2, 0).float().data.cpu()) 162 | plt.pause(0.001) 163 | writer.add_scalar('Cross Entropy Loss', train_loss / (batch_idx+1), opts.iter_n) 164 | opts.iter_n += 1 165 | 166 | print('Overall Loss: %.8f' 167 | % (train_loss/(batch_idx+1))) 168 | 169 | total_time += (time.time() - end_time) 170 | end_time = time.time() 171 | batch_idx += 1 172 | 173 | opts.train_batch_logger.log({ 174 | 'epoch': (opts.epoch+1), 175 | 'batch': batch_idx+1, 176 | 'loss': train_loss / (batch_idx+1), 177 | }) 178 | 179 | if batch_idx % 100 == 0: 180 | print('100 batch.') 181 | # Save checkpoint. 182 | net_states = { 183 | 'state_dict': net.state_dict(), 184 | 'epoch': opts.epoch + 1, 185 | 'loss': opts.train_losses, 186 | 'optimizer': opts.current_optimizer.state_dict() 187 | } 188 | epo_batch = str(opts.epoch) + '-' + str(batch_idx) 189 | save_file_path = os.path.join(opts.checkpoint_path, 190 | 'Model7_exp1_{}.pth'.format(epo_batch)) 191 | torch.save(net_states, save_file_path) 192 | opts.lr /= 2 193 | opts.regularization /= 2 194 | params = filter(lambda p: p.requires_grad, model.parameters()) 195 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay) 196 | train_loss /= (batch_idx + 1) 197 | 198 | opts.train_epoch_logger.log({ 199 | 'epoch': (opts.epoch+1), 200 | 'loss': train_loss, 201 | 'time': total_time, 202 | }) 203 | 204 | opts.train_losses.append(train_loss) 205 | 206 | # Save checkpoint. 207 | net_states = { 208 | 'state_dict': net.state_dict(), 209 | 'epoch': opts.epoch + 1, 210 | 'loss': opts.train_losses, 211 | 'optimizer': opts.current_optimizer.state_dict() 212 | } 213 | 214 | if opts.epoch % opts.checkpoint_epoch == 0: 215 | save_file_path = os.path.join(opts.checkpoint_path, 'Model7_exp1_{}.pth'.format(opts.epoch)) 216 | torch.save(net_states, save_file_path) 217 | 218 | print('Batch Loss: %.8f, elapsed time: %3.f seconds.' % (train_loss, total_time)) 219 | 220 | 221 | if __name__ == '__main__': 222 | 223 | opts = parse_opts() 224 | writer = SummaryWriter() 225 | 226 | if opts.gpu_id >= 0: 227 | torch.cuda.set_device(opts.gpu_id) 228 | opts.multi_gpu = False 229 | 230 | torch.manual_seed(opts.seed) 231 | if opts.use_gpu: 232 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 233 | torch.cuda.manual_seed(opts.seed) 234 | 235 | # Loading Data 236 | print("Preparing Flickr data set...") 237 | opts.k = 600 238 | opts.ite = 0 239 | opts.regularization = 0.1 240 | size = (1024, 1024) 241 | feat_size = (64, 64) 242 | transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()]) 243 | data_set = CocoCaptions(opts.img_path, opts.annotation, transform) 244 | data_loader = torch.utils.data.DataLoader(data_set, batch_size=opts.batch_size, shuffle=True) 245 | 246 | # Load dictionary 247 | list_file = open(opts.dictionary, 'r') 248 | entity_att = [] 249 | for i in list_file.readlines(): 250 | entity_att.append(i.replace('\n', '')) 251 | opts.entity_att = entity_att 252 | 253 | # Load semantic embeddings 254 | embeddings_index = load_dictionary('dictionary_emb') 255 | print('Dictionary loaded.') 256 | opts.embeddings_index = embeddings_index 257 | 258 | if not os.path.exists(opts.result_path): 259 | os.mkdir(opts.result_path) 260 | 261 | opts.train_epoch_logger = Logger(os.path.join(opts.result_path, 'train.log'), 262 | ['epoch', 'time', 'loss']) 263 | opts.train_batch_logger = Logger(os.path.join(opts.result_path, 'train_batch.log'), 264 | ['epoch', 'batch', 'loss']) 265 | opts.test_epoch_logger = Logger(os.path.join(opts.result_path, 'test.log'), 266 | ['epoch', 'time', 'loss']) 267 | 268 | # Model 269 | print('==> Building model...') 270 | model = Model7(opts) 271 | 272 | # Load Back bone Module 273 | if opts.resume: 274 | state_dict = torch.load(opts.resume)['state_dict'] 275 | new_params = model.state_dict() 276 | new_params.update(state_dict) 277 | # Remove the extra keys 278 | model_keys = model.state_dict().keys() 279 | for name, param in list(new_params.items()): 280 | if name not in model_keys: 281 | del new_params[name] 282 | model.load_state_dict(new_params) 283 | start_epoch = 0 284 | print('==> model built.') 285 | opts.criterion = [torch.nn.CrossEntropyLoss(), torch.nn.MSELoss()] 286 | 287 | # Training 288 | parameters = filter(lambda p: p.requires_grad, model.parameters()) 289 | params = sum([np.prod(p.size()) for p in parameters]) 290 | print(params, 'trainable parameters in the network.') 291 | set_parameters(opts) 292 | opts.iter_n = 0 293 | 294 | for epoch in range(start_epoch, start_epoch+opts.n_epoch): 295 | opts.epoch = epoch 296 | if epoch is 0: 297 | params = filter(lambda p: p.requires_grad, model.parameters()) 298 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay) 299 | 300 | elif (epoch % opts.lr_adjust_epoch) == 0 and epoch is not 0: 301 | opts.lr /= 5 302 | params = filter(lambda p: p.requires_grad, model.parameters()) 303 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay) 304 | 305 | train_net(model, opts) 306 | 307 | # export scalar data to JSON for external processing 308 | writer.export_scalars_to_json("./all_scalars.json") 309 | writer.close() 310 | --------------------------------------------------------------------------------