├── .idea
└── vcs.xml
├── Demo.ipynb
├── README.md
├── __pycache__
└── parser.cpython-35.pyc
├── checkpoint
└── ReadMe
├── demo_attr.png
├── lib
├── __init__.py
├── __init__.pyc
├── __pycache__
│ ├── __init__.cpython-35.pyc
│ └── nms_wrapper.cpython-35.pyc
├── bilinear_pooling
│ ├── CompactBilinearPooling.py
│ └── __pycache__
│ │ └── CompactBilinearPooling.cpython-35.pyc
├── configure
│ ├── __pycache__
│ │ ├── config.cpython-35.pyc
│ │ └── net_util.cpython-35.pyc
│ ├── config.py
│ └── net_util.py
├── dataset
│ ├── __pycache__
│ │ └── coco_dataset.cpython-35.pyc
│ └── coco_dataset.py
├── make.sh
├── nms
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-35.pyc
│ │ └── pth_nms.cpython-35.pyc
│ ├── _ext
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ │ └── __init__.cpython-35.pyc
│ │ └── nms
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyc
│ │ │ ├── __pycache__
│ │ │ └── __init__.cpython-35.pyc
│ │ │ └── _nms.so
│ ├── build.py
│ ├── pth_nms.py
│ ├── pth_nms.pyc
│ └── src
│ │ ├── cuda
│ │ ├── nms_kernel.cu
│ │ ├── nms_kernel.cu.o
│ │ └── nms_kernel.h
│ │ ├── nms.c
│ │ ├── nms.h
│ │ ├── nms_cuda.c
│ │ └── nms_cuda.h
├── nms_wrapper.py
├── nms_wrapper.pyc
├── pytorch_fft
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-35.pyc
│ ├── _ext
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ └── __init__.cpython-35.pyc
│ │ └── th_fft
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ └── __init__.cpython-35.pyc
│ │ │ └── _th_fft.so
│ ├── fft
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-35.pyc
│ │ │ ├── autograd.cpython-35.pyc
│ │ │ └── fft.cpython-35.pyc
│ │ ├── autograd.py
│ │ └── fft.py
│ └── src
│ │ ├── generic
│ │ ├── helpers.c
│ │ ├── th_fft_cuda.c
│ │ ├── th_fft_cuda.h
│ │ ├── th_irfft_cuda.c
│ │ └── th_rfft_cuda.c
│ │ ├── th_fft_cuda.c
│ │ ├── th_fft_cuda.h
│ │ ├── th_fft_generate_double.h
│ │ ├── th_fft_generate_float.h
│ │ └── th_fft_generate_helpers.h
├── resnet
│ ├── __pycache__
│ │ └── resnet.cpython-35.pyc
│ └── resnet.py
└── roi_align
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ ├── __init__.cpython-35.pyc
│ └── crop_and_resize.cpython-35.pyc
│ ├── _ext
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ └── __init__.cpython-35.pyc
│ └── crop_and_resize
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ └── __init__.cpython-35.pyc
│ │ └── _crop_and_resize.so
│ ├── build.py
│ ├── crop_and_resize.py
│ ├── crop_and_resize.pyc
│ ├── roi_align.py
│ ├── roi_align.pyc
│ └── src
│ ├── crop_and_resize.c
│ ├── crop_and_resize.h
│ ├── crop_and_resize_gpu.c
│ ├── crop_and_resize_gpu.h
│ └── cuda
│ ├── crop_and_resize_kernel.cu
│ ├── crop_and_resize_kernel.cu.o
│ └── crop_and_resize_kernel.h
├── models
├── Model7.py
└── __pycache__
│ └── Model7.cpython-35.pyc
├── others
├── README.md
├── coco_person_list.txt
├── dictionary_emb.pkl
└── low-level-attr.txt
├── parser.py
├── results
├── architecture.png
├── test.log
├── train.log
└── train_batch.log
├── runs
├── Oct05_13-58-18_apg395-001
│ └── events.out.tfevents.1538773098.apg395-001
├── Oct05_14-08-13_apg395-001
│ └── events.out.tfevents.1538773693.apg395-001
├── Oct05_14-08-27_apg395-001
│ └── events.out.tfevents.1538773707.apg395-001
├── Oct05_14-08-58_apg395-001
│ └── events.out.tfevents.1538773738.apg395-001
├── Oct05_14-17-30_apg395-001
│ └── events.out.tfevents.1538774250.apg395-001
├── Oct05_14-17-42_apg395-001
│ └── events.out.tfevents.1538774262.apg395-001
├── Oct05_14-18-03_apg395-001
│ └── events.out.tfevents.1538774283.apg395-001
├── Oct05_14-18-55_apg395-001
│ └── events.out.tfevents.1538774335.apg395-001
└── Oct05_14-19-46_apg395-001
│ └── events.out.tfevents.1538774386.apg395-001
└── train_attr_attention_embedding.py
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Visual Cues Grounding Through Weak Suppervision
2 |
3 | PyTorch implementation of **[Modularized Textual Grounding for Counterfactual Resilience
4 | ](http://openaccess.thecvf.com/content_CVPR_2019/papers/Fang_Modularized_Textual_Grounding_for_Counterfactual_Resilience_CVPR_2019_paper.pdf)** , CVPR 2019.
5 |
6 | Qualitative grounding results can be found in our **[webpage](http://www.public.asu.edu/~zfang29/textual_grounding_cvpr2019/website.html)**.
7 |
8 | ## Introduction
9 | We propose a cross-modal grounding method through weak supervision.
10 |
11 | 
12 |
13 | A demonstration on how to load and ground the attribute can be found at : Demo.ipynb
14 |
15 | Image --> 'Boy' Attribute -- > 'Lady' Attribute
16 |
17 |
18 |
19 |
20 | ## Requirements
21 | 1. PyTorch 0.4.
22 | 2. Python 3.6.
23 | 3. FFT package.
24 |
25 | ## Dataset
26 | Weakly trained on both COCO or Flickr 30k.
27 |
28 | ## Usage
29 | Training script for attribute grounding: Train_attr_attention_embedding.py
30 |
31 | Attention model for attribute grounding, it's based on a pre-trained Res-50 Network on person gender/age classification network:
32 | /Models/Model7.py
33 |
34 | \ Contains all the neccesary dependencies for our framework, it consists of:
35 |
36 |
37 | - bilinear pooling module: Implemented from Compact Bilinear Pooling. Faster Fourier Transform module is needed before using. Download and install it from here by running:
38 |
pip3 install pytorch_fft
39 |
40 | - resnet: We modified the last fully connected layer from 2048d to 256d to a more compact representation.
41 | - nms/roi_align module: Not neccesary in this time. (For entity grounding and bbox detection.)
42 |
43 |
44 | In order to re_train our framework, several things might be modified:
45 | parser.py
46 |
47 | In parser.py, img_path/annotations need to be changed to your local coco_2017_train directory:
48 | /path/to/your/local/coco17/image path/annotations/
49 |
50 | Argument resume is for loading pre-trained overall model.
51 |
52 | ## Download
53 | To download the pre-trained unsupervised network:
54 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/__pycache__/parser.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/__pycache__/parser.cpython-35.pyc
--------------------------------------------------------------------------------
/checkpoint/ReadMe:
--------------------------------------------------------------------------------
1 | This directory contains the pretrained model.
2 |
--------------------------------------------------------------------------------
/demo_attr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/demo_attr.png
--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.py
--------------------------------------------------------------------------------
/lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.pyc
--------------------------------------------------------------------------------
/lib/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/__pycache__/nms_wrapper.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/nms_wrapper.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/bilinear_pooling/CompactBilinearPooling.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, '/../../')
3 | import numpy as np
4 | import torch
5 | from torch import nn
6 | from torch.autograd import Variable
7 |
8 | import lib.pytorch_fft.fft.autograd as afft
9 |
10 |
11 | class CompactBilinearPooling(nn.Module):
12 | """
13 | Compute compact bilinear pooling over two bottom inputs.
14 | Args:
15 | output_dim: output dimension for compact bilinear pooling.
16 | sum_pool: (Optional) If True, sum the output along height and width
17 | dimensions and return output shape [batch_size, output_dim].
18 | Otherwise return [batch_size, height, width, output_dim].
19 | Default: True.
20 | rand_h_1: (Optional) an 1D numpy array containing indices in interval
21 | `[0, output_dim)`. Automatically generated from `seed_h_1`
22 | if is None.
23 | rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape
24 | as `rand_h_1`. Automatically generated from `seed_s_1` if is
25 | None.
26 | rand_h_2: (Optional) an 1D numpy array containing indices in interval
27 | `[0, output_dim)`. Automatically generated from `seed_h_2`
28 | if is None.
29 | rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape
30 | as `rand_h_2`. Automatically generated from `seed_s_2` if is
31 | None.
32 | """
33 |
34 | def __init__(self, input_dim1, input_dim2, output_dim,
35 | sum_pool=False, cuda=True,
36 | rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None):
37 | super(CompactBilinearPooling, self).__init__()
38 | self.input_dim1 = input_dim1
39 | self.input_dim2 = input_dim2
40 | self.output_dim = output_dim
41 | self.sum_pool = sum_pool
42 |
43 | if rand_h_1 is None:
44 | np.random.seed(1)
45 | rand_h_1 = np.random.randint(output_dim, size=self.input_dim1)
46 | if rand_s_1 is None:
47 | np.random.seed(3)
48 | rand_s_1 = 2 * np.random.randint(2, size=self.input_dim1) - 1
49 |
50 | sparse_sketch_matrix1 = Variable(self.generate_sketch_matrix(
51 | rand_h_1, rand_s_1, self.output_dim))
52 |
53 | if rand_h_2 is None:
54 | np.random.seed(5)
55 | rand_h_2 = np.random.randint(output_dim, size=self.input_dim2)
56 | if rand_s_2 is None:
57 | np.random.seed(7)
58 | rand_s_2 = 2 * np.random.randint(2, size=self.input_dim2) - 1
59 |
60 | sparse_sketch_matrix2 = Variable(self.generate_sketch_matrix(
61 | rand_h_2, rand_s_2, self.output_dim))
62 | self.register_buffer("sparse_sketch_matrix1", sparse_sketch_matrix1)
63 | self.register_buffer("sparse_sketch_matrix2", sparse_sketch_matrix2)
64 |
65 |
66 | def forward(self, bottom1, bottom2):
67 | """
68 | bottom1: 1st input, 4D Tensor of shape [batch_size, input_dim1, height, width].
69 | bottom2: 2nd input, 4D Tensor of shape [batch_size, input_dim2, height, width].
70 | """
71 | assert bottom1.size(1) == self.input_dim1 and \
72 | bottom2.size(1) == self.input_dim2
73 |
74 |
75 | batch_size, _, height, width = bottom1.size()
76 |
77 | bottom1_flat = bottom1.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim1)
78 | bottom2_flat = bottom2.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim2)
79 |
80 | sketch_1 = bottom1_flat.mm(self.sparse_sketch_matrix1)
81 | sketch_2 = bottom2_flat.mm(self.sparse_sketch_matrix2)
82 |
83 | fft1_real, fft1_imag = afft.Fft()(sketch_1, Variable(torch.zeros(sketch_1.size())).cuda())
84 | fft2_real, fft2_imag = afft.Fft()(sketch_2, Variable(torch.zeros(sketch_2.size())).cuda())
85 |
86 | fft_product_real, fft_product_imag = fft1_real.mul(fft2_real), fft1_imag.mul(fft2_imag)
87 |
88 | cbp_flat = afft.Ifft()(fft_product_real, fft_product_imag)[0]
89 |
90 | cbp = cbp_flat.view(batch_size, height, width, self.output_dim)
91 |
92 | if self.sum_pool:
93 | cbp = cbp.sum(dim=1).sum(dim=1)
94 |
95 | return cbp.permute(0, 3, 1, 2)
96 |
97 | @staticmethod
98 | def generate_sketch_matrix(rand_h, rand_s, output_dim):
99 | """
100 | Return a sparse matrix used for tensor sketch operation in compact bilinear
101 | pooling
102 | Args:
103 | rand_h: an 1D numpy array containing indices in interval `[0, output_dim)`.
104 | rand_s: an 1D numpy array of 1 and -1, having the same shape as `rand_h`.
105 | output_dim: the output dimensions of compact bilinear pooling.
106 | Returns:
107 | a sparse matrix of shape [input_dim, output_dim] for tensor sketch.
108 | """
109 |
110 | # Generate a sparse matrix for tensor count sketch
111 | rand_h = rand_h.astype(np.int64)
112 | rand_s = rand_s.astype(np.float32)
113 | assert(rand_h.ndim == 1 and rand_s.ndim ==
114 | 1 and len(rand_h) == len(rand_s))
115 | assert(np.all(rand_h >= 0) and np.all(rand_h < output_dim))
116 |
117 | input_dim = len(rand_h)
118 | indices = np.concatenate((np.arange(input_dim)[..., np.newaxis],
119 | rand_h[..., np.newaxis]), axis=1)
120 | indices = torch.from_numpy(indices)
121 | rand_s = torch.from_numpy(rand_s)
122 | sparse_sketch_matrix = torch.sparse.FloatTensor(
123 | indices.t(), rand_s, torch.Size([input_dim, output_dim]))
124 | return sparse_sketch_matrix.to_dense()
125 |
126 |
127 | if __name__ == '__main__':
128 |
129 | bottom1 = Variable(torch.randn(48, 2048, 7, 7)).cuda()
130 | bottom2 = Variable(torch.randn(48, 2048, 7, 7)).cuda()
131 |
132 | layer = CompactBilinearPooling(2048, 2048, 16000)
133 | layer.cuda()
134 | layer.train()
135 | out = layer(bottom1, bottom2)
--------------------------------------------------------------------------------
/lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/configure/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/config.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/configure/__pycache__/net_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/net_util.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/configure/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Base Configurations class.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import math
11 |
12 | import numpy as np
13 |
14 | # Base Configuration Class
15 | # Don't use this class directly. Instead, sub-class it and override
16 | # the configurations you need to change.
17 |
18 |
19 | class Config(object):
20 | """Base configuration class. For custom configurations, create a
21 | sub-class that inherits from this one and override properties
22 | that need to be changed.
23 | """
24 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
25 | # Useful if your code needs to do things differently depending on which
26 | # experiment is running.
27 | NAME = None # Override in sub-classes
28 |
29 | # NUMBER OF GPUs to use. For CPU training, use 1
30 | GPU_COUNT = 1
31 |
32 | # Number of images to train with on each GPU. A 12GB GPU can typically
33 | # handle 2 images of 1024x1024px.
34 | # Adjust based on your GPU memory and image sizes. Use the highest
35 | # number that your GPU can handle for best performance.
36 | IMAGES_PER_GPU = 2
37 |
38 | # Number of training steps per epoch
39 | # This doesn't need to match the size of the training set. Tensorboard
40 | # updates are saved at the end of each epoch, so setting this to a
41 | # smaller number means getting more frequent TensorBoard updates.
42 | # Validation stats are also calculated at each epoch end and they
43 | # might take a while, so don't set this too small to avoid spending
44 | # a lot of time on validation stats.
45 | STEPS_PER_EPOCH = 1000
46 |
47 | # Number of validation steps to run at the end of every training epoch.
48 | # A bigger number improves accuracy of validation stats, but slows
49 | # down the training.
50 | VALIDATION_STEPS = 50
51 |
52 | # The strides of each layer of the FPN Pyramid. These values
53 | # are based on a Resnet101 backbone.
54 | BACKBONE_STRIDES = [4, 8, 16, 16, 16]
55 |
56 | # Number of classification classes (including background)
57 | NUM_CLASSES = 1 # Override in sub-classes
58 |
59 | # Length of square anchor side in pixels
60 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 256)
61 |
62 | # Ratios of anchors at each cell (width/height)
63 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor
64 | RPN_ANCHOR_RATIOS = [0.5, 1, 2]
65 |
66 | # Anchor stride
67 | # If 1 then anchors are created for each cell in the backbone feature map.
68 | # If 2, then anchors are created for every other cell, and so on.
69 | RPN_ANCHOR_STRIDE = 1
70 |
71 | # Non-max suppression threshold to filter RPN proposals.
72 | # You can reduce this during training to generate more propsals.
73 | RPN_NMS_THRESHOLD = 0.7
74 |
75 | # How many anchors per image to use for RPN training
76 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256
77 |
78 | # ROIs kept after non-maximum supression (training and inference)
79 | POST_NMS_ROIS_TRAINING = 500
80 | POST_NMS_ROIS_INFERENCE = 500
81 |
82 | # If enabled, re-sizes instance masks to a smaller size to reduce
83 | # memory load. Recommended when using high-resolution images.
84 | USE_MINI_MASK = True
85 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
86 |
87 | # Input image resizing
88 | # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
89 | # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
90 | # be satisfied together the IMAGE_MAX_DIM is enforced.
91 | IMAGE_MIN_DIM = 800
92 | IMAGE_MAX_DIM = 1024
93 | # If True, pad images with zeros such that they're (max_dim by max_dim)
94 | IMAGE_PADDING = True # currently, the False option is not supported
95 |
96 | # Image mean (RGB)
97 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
98 |
99 | # Number of ROIs per image to feed to classifier/mask heads1024
100 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate
101 | # enough positive proposals to fill this and keep a positive:negative
102 | # ratio of 1:3. You can increase the number of proposals by adjusting
103 | # the RPN NMS threshold.
104 | TRAIN_ROIS_PER_IMAGE = 200
105 |
106 | # Percent of positive ROIs used to train classifier/mask heads
107 | ROI_POSITIVE_RATIO = 0.33
108 |
109 | # Pooled ROIs
110 | POOL_SIZE = 7
111 | MASK_POOL_SIZE = 14
112 | MASK_SHAPE = [128, 128]
113 |
114 | # Maximum number of ground truth instances to use in one image
115 | MAX_GT_INSTANCES = 100
116 |
117 | # Bounding box refinement standard deviation for RPN and final detections.
118 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
120 |
121 | # Max number of final detections
122 | DETECTION_MAX_INSTANCES = 100
123 |
124 | # Minimum probability value to accept a detected instance
125 | # ROIs below this threshold are skipped
126 | DETECTION_MIN_CONFIDENCE = 0.7
127 |
128 | # Non-maximum suppression threshold for detection
129 | DETECTION_NMS_THRESHOLD = 0.3
130 |
131 | # Learning rate and momentum
132 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
133 | # weights to explode. Likely due to differences in optimzer
134 | # implementation.
135 | LEARNING_RATE = 0.001
136 | LEARNING_MOMENTUM = 0.9
137 |
138 | # Weight decay regularization
139 | WEIGHT_DECAY = 0.0001
140 |
141 | # Use RPN ROIs or externally generated ROIs for training
142 | # Keep this True for most situations. Set to False if you want to train
143 | # the head branches on ROI generated by code rather than the ROIs from
144 | # the RPN. For example, to debug the classifier head without having to
145 | # train the RPN.
146 | USE_RPN_ROIS = True
147 |
148 | def __init__(self):
149 | """Set values of computed attributes."""
150 | # Effective batch size
151 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
152 |
153 | # Input image size
154 | self.IMAGE_SHAPE = np.array(
155 | [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
156 |
157 | # Compute backbone size from input image size
158 | self.BACKBONE_SHAPES = np.array(
159 | [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
160 | int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
161 | for stride in self.BACKBONE_STRIDES])
162 |
163 | def display(self):
164 | """Display Configuration values."""
165 | print("\nConfigurations:")
166 | for a in dir(self):
167 | if not a.startswith("__") and not callable(getattr(self, a)):
168 | print("{:30} {}".format(a, getattr(self, a)))
169 | print("\n")
170 |
--------------------------------------------------------------------------------
/lib/configure/net_util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 | import time
5 | import csv
6 | import torch
7 | import numpy as np
8 | from random import randint
9 | from torch.autograd import Variable
10 | from torch.utils.data.sampler import SubsetRandomSampler
11 |
12 |
13 | def set_parameters(opts):
14 | '''
15 | This function is called before training/testing to set parameters
16 | :param opts:
17 | :return opts:
18 | '''
19 |
20 | if not opts.__contains__('train_losses'):
21 | opts.train_losses=[]
22 |
23 | if not opts.__contains__('train_accuracies'):
24 | opts.train_accuracies = []
25 |
26 | if not opts.__contains__('valid_losses'):
27 | opts.valid_losses = []
28 | if not opts.__contains__('valid_accuracies'):
29 | opts.valid_accuracies = []
30 |
31 | if not opts.__contains__('test_losses'):
32 | opts.test_loss=[]
33 |
34 | if not opts.__contains__('test_accuracies'):
35 | opts.test_accuracies = []
36 |
37 | if not opts.__contains__('best_acc'):
38 | opts.best_acc = 0.0
39 |
40 | if not opts.__contains__('lowest_loss'):
41 | opts.lowest_loss = 1e4
42 |
43 | if not opts.__contains__('checkpoint_path'):
44 | opts.checkpoint_path = 'checkpoint'
45 |
46 | if not os.path.exists(opts.checkpoint_path):
47 | os.mkdir(opts.checkpoint_path)
48 |
49 | if not opts.__contains__('checkpoint_epoch'):
50 | opts.checkpoint_epoch = 5
51 |
52 | if not opts.__contains__('valid_pearson_r'):
53 | opts.valid_pearson_r = []
54 |
55 | if not opts.__contains__('test_pearson_r'):
56 | opts.test_pearson_r = []
57 |
58 |
59 | class Logger(object):
60 | def __init__(self, path, header):
61 | self.log_file = open(path, 'w')
62 | self.logger = csv.writer(self.log_file, delimiter='\t')
63 |
64 | self.logger.writerow(header)
65 | self.header = header
66 |
67 | def __del(self):
68 | self.log_file.close()
69 |
70 | def log(self, values):
71 | write_values = []
72 | for col in self.header:
73 | assert col in values
74 | write_values.append(values[col])
75 |
76 | self.logger.writerow(write_values)
77 | self.log_file.flush()
78 |
79 |
80 |
--------------------------------------------------------------------------------
/lib/dataset/__pycache__/coco_dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/dataset/__pycache__/coco_dataset.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/dataset/coco_dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | import nltk
3 | import os.path
4 | import numpy as np
5 | from PIL import Image
6 | import torch.utils.data as data
7 | from pycocotools.coco import COCO
8 | from nltk.stem import WordNetLemmatizer
9 |
10 |
11 | class CocoCaptions(data.Dataset):
12 | """`MS Coco Captions `_ Dataset.
13 | Args:
14 | root (string): Root directory where images are downloaded to.
15 | annFile (string): Path to json annotation file.
16 | transform (callable, optional): A function/transform that takes in an PIL image
17 | and returns a transformed version. E.g, ``transforms.ToTensor``
18 | target_transform (callable, optional): A function/transform that takes in the
19 | target and transforms it.
20 | Example:
21 | .. code:: python
22 | import torchvision.datasets as dset
23 | import torchvision.transforms as transforms
24 | cap = dset.CocoCaptions(root = 'dir where images are',
25 | annFile = 'json annotation file',
26 | transform=transforms.ToTensor())
27 | print('Number of samples: ', len(cap))
28 | img, target = cap[3] # load 4th sample
29 | print("Image Size: ", img.size())
30 | print(target)
31 | Output: ::
32 | Number of samples: 82783
33 | Image Size: (3L, 427L, 640L)
34 | [u'A plane emitting smoke stream flying over a mountain.',
35 | u'A plane darts across a bright blue sky behind a mountain covered in snow',
36 | u'A plane leaves a contrail above the snowy mountain top.',
37 | u'A mountain that has a plane flying overheard in the distance.',
38 | u'A mountain view with a plume of smoke in the background']
39 | """
40 |
41 | def __init__(self, root, annFile, transform=None, target_transform=None, embed=False):
42 |
43 | # Load COCO image IDs
44 | list_file = open('./others/coco_person_list.txt', 'r')
45 | ids = []
46 | for i in list_file.readlines():
47 | ids.append(int(i.replace('\n', '')))
48 |
49 | # Load entity-attribute dictionary
50 | att_dict = []
51 | ent_dict = []
52 | list_file = open('./others/low-level-attr.txt', 'r')
53 | for i in list_file.readlines():
54 | att_dict.append(i.replace('\n', ''))
55 |
56 | ent_dict = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
57 | 'bus', 'train', 'truck', 'boat', 'traffic light',
58 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
59 | 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
60 | 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
61 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
62 | 'kite', 'baseball bat', 'baseball glove', 'skateboard',
63 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
64 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
65 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
66 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
67 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
68 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
69 | 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
70 | 'teddy bear', 'hair drier', 'toothbrush']
71 |
72 | self.ids = ids
73 | self.embed = embed
74 | self.coco = COCO(annFile)
75 | self.transform = transform
76 | self.att_dict = att_dict
77 | self.ent_dict = ent_dict
78 | self.root = os.path.expanduser(root)
79 | self.target_transform = target_transform
80 |
81 | def __getitem__(self, index):
82 | """
83 | Args:
84 | index (int): Index
85 | Returns:
86 | tuple: Tuple (image, target). target is a list of captions for the image.
87 | """
88 | coco = self.coco
89 | img_id = self.ids[index]
90 | ann_ids = coco.getAnnIds(imgIds=img_id)
91 | anns = coco.loadAnns(ann_ids)
92 | target = [ann['caption'] for ann in anns]
93 |
94 | path = coco.loadImgs(img_id)[0]['file_name']
95 |
96 | img = Image.open(os.path.join(self.root, path)).convert('RGB')
97 | if self.transform is not None:
98 | img = self.transform(img)
99 |
100 | if self.target_transform is not None:
101 | target = self.target_transform(target)
102 |
103 | img = np.asarray(img)
104 | att_lable = np.zeros(10)
105 | ent_lable = np.zeros(81)
106 |
107 | for sentence in target:
108 | words = nltk.pos_tag([item for item in sentence.replace('.', ' ').split(' ') if len(item) > 0])
109 | for item in words:
110 | word = item[0].lower()
111 | word = WordNetLemmatizer().lemmatize(word)
112 |
113 | # att = item[1]
114 | if word in self.att_dict:
115 | att_id = self.att_dict.index(word)
116 | att_lable[att_id] = 1
117 | if word in self.ent_dict:
118 | ent_id = self.ent_dict.index(word)
119 | ent_lable[ent_id] = 1
120 |
121 | return img, att_lable, ent_lable
122 |
123 | def __len__(self):
124 | return len(self.ids)
125 |
126 |
127 | # if __name__ == '__main__':
128 | # size = (512, 512)
129 | # img_path = '/media/drive1/Data/coco17/train2017/'
130 | # json = '/media/drive1/Data/coco17/annotations/captions_train2017.json'
131 | # coco = COCO(json)
132 | # transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()])
133 | # data_set = CocoCaptions(img_path, json, transform)
134 | # data_loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False)
135 | #
136 | # img_ids = []
137 | # count = 1
138 | # for index, (img, target) in enumerate(data_loader):
139 | # print(target)
140 |
--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda
4 |
5 | echo "Compiling crop_and_resize kernels by nvcc..."
6 | cd roi_align/src/cuda
7 | $CUDA_PATH/bin/nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC -arch=61
8 |
9 | cd ../../
10 | python3 build.py
11 |
12 | cd ../
13 | echo "Compiling nms kernels by nvcc..."
14 |
15 | cd nms/src/cuda
16 | $CUDA_PATH/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
17 |
18 | cd ../../
19 | python3 build.py
20 |
21 |
--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.py
--------------------------------------------------------------------------------
/lib/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.pyc
--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/pth_nms.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.pyc
--------------------------------------------------------------------------------
/lib/nms/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._nms import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__init__.pyc
--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/nms/_ext/nms/_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/_nms.so
--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/nms.c']
7 | headers = ['src/nms.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/nms_cuda.c']
14 | headers += ['src/nms_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.nms',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from ._ext import nms
3 | import numpy as np
4 |
5 | def pth_nms(dets, thresh):
6 | """
7 | dets has to be a tensor
8 | """
9 | if not dets.is_cuda:
10 | x1 = dets[:, 0]
11 | y1 = dets[:, 1]
12 | x2 = dets[:, 2]
13 | y2 = dets[:, 3]
14 | scores = dets[:, 4]
15 |
16 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 | order = scores.sort(0, descending=True)[1]
18 | # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 |
20 | keep = torch.LongTensor(dets.size(0))
21 | num_out = torch.LongTensor(1)
22 | nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 |
24 | return keep[:num_out[0]]
25 | else:
26 | x1 = dets[:, 0]
27 | y1 = dets[:, 1]
28 | x2 = dets[:, 2]
29 | y2 = dets[:, 3]
30 | scores = dets[:, 4]
31 |
32 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 | order = scores.sort(0, descending=True)[1]
34 |
35 | dets = dets[order].contiguous()
36 |
37 | keep = torch.LongTensor(dets.size(0))
38 | num_out = torch.LongTensor(1)
39 |
40 | nms.gpu_nms(keep, num_out, dets, thresh)
41 |
42 | return order[keep[:num_out[0]].cuda()].contiguous()
43 |
44 |
--------------------------------------------------------------------------------
/lib/nms/pth_nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/pth_nms.pyc
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | #include
12 | #include
13 | #include
14 | #include "nms_kernel.h"
15 |
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 | float interS = width * height;
21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 | return interS / (Sa + Sb - interS);
24 | }
25 |
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 | const float *dev_boxes, unsigned long long *dev_mask) {
28 | const int row_start = blockIdx.y;
29 | const int col_start = blockIdx.x;
30 |
31 | // if (row_start > col_start) return;
32 |
33 | const int row_size =
34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 | const int col_size =
36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 |
38 | __shared__ float block_boxes[threadsPerBlock * 5];
39 | if (threadIdx.x < col_size) {
40 | block_boxes[threadIdx.x * 5 + 0] =
41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 | block_boxes[threadIdx.x * 5 + 1] =
43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 | block_boxes[threadIdx.x * 5 + 2] =
45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 | block_boxes[threadIdx.x * 5 + 3] =
47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 | block_boxes[threadIdx.x * 5 + 4] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 | }
51 | __syncthreads();
52 |
53 | if (threadIdx.x < row_size) {
54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 | const float *cur_box = dev_boxes + cur_box_idx * 5;
56 | int i = 0;
57 | unsigned long long t = 0;
58 | int start = 0;
59 | if (row_start == col_start) {
60 | start = threadIdx.x + 1;
61 | }
62 | for (i = start; i < col_size; i++) {
63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 | t |= 1ULL << i;
65 | }
66 | }
67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 | }
70 | }
71 |
72 |
73 | void _nms(int boxes_num, float * boxes_dev,
74 | unsigned long long * mask_dev, float nms_overlap_thresh) {
75 |
76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 | DIVUP(boxes_num, threadsPerBlock));
78 | dim3 threads(threadsPerBlock);
79 | nms_kernel<<>>(boxes_num,
80 | nms_overlap_thresh,
81 | boxes_dev,
82 | mask_dev);
83 | }
84 |
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 |
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/src/cuda/nms_kernel.cu.o
--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _NMS_KERNEL
2 | #define _NMS_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 |
11 | void _nms(int boxes_num, float * boxes_dev,
12 | unsigned long long * mask_dev, float nms_overlap_thresh);
13 |
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 |
18 | #endif
19 |
20 |
--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
5 | // boxes has to be sorted
6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 | // Number of ROIs
11 | long boxes_num = THFloatTensor_size(boxes, 0);
12 | long boxes_dim = THFloatTensor_size(boxes, 1);
13 |
14 | long * keep_out_flat = THLongTensor_data(keep_out);
15 | float * boxes_flat = THFloatTensor_data(boxes);
16 | long * order_flat = THLongTensor_data(order);
17 | float * areas_flat = THFloatTensor_data(areas);
18 |
19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 | THByteTensor_fill(suppressed, 0);
21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed);
22 |
23 | // nominal indices
24 | int i, j;
25 | // sorted indices
26 | int _i, _j;
27 | // temp variables for box i's (the box currently under consideration)
28 | float ix1, iy1, ix2, iy2, iarea;
29 | // variables for computing overlap with box j (lower scoring box)
30 | float xx1, yy1, xx2, yy2;
31 | float w, h;
32 | float inter, ovr;
33 |
34 | long num_to_keep = 0;
35 | for (_i=0; _i < boxes_num; ++_i) {
36 | i = order_flat[_i];
37 | if (suppressed_flat[i] == 1) {
38 | continue;
39 | }
40 | keep_out_flat[num_to_keep++] = i;
41 | ix1 = boxes_flat[i * boxes_dim];
42 | iy1 = boxes_flat[i * boxes_dim + 1];
43 | ix2 = boxes_flat[i * boxes_dim + 2];
44 | iy2 = boxes_flat[i * boxes_dim + 3];
45 | iarea = areas_flat[i];
46 | for (_j = _i + 1; _j < boxes_num; ++_j) {
47 | j = order_flat[_j];
48 | if (suppressed_flat[j] == 1) {
49 | continue;
50 | }
51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 | w = fmaxf(0.0, xx2 - xx1 + 1);
56 | h = fmaxf(0.0, yy2 - yy1 + 1);
57 | inter = w * h;
58 | ovr = inter / (iarea + areas_flat[j] - inter);
59 | if (ovr >= nms_overlap_thresh) {
60 | suppressed_flat[j] = 1;
61 | }
62 | }
63 | }
64 |
65 | long *num_out_flat = THLongTensor_data(num_out);
66 | *num_out_flat = num_to_keep;
67 | THByteTensor_free(suppressed);
68 | return 1;
69 | }
--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include "cuda/nms_kernel.h"
13 |
14 |
15 | extern THCState *state;
16 |
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 | // boxes has to be sorted
19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 | // Number of ROIs
22 | int boxes_num = THCudaTensor_size(state, boxes, 0);
23 | int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 |
25 | float* boxes_flat = THCudaTensor_data(state, boxes);
26 |
27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 |
31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 |
33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 | THLongTensor_copyCuda(state, mask_cpu, mask);
35 | THCudaLongTensor_free(state, mask);
36 |
37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 |
39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 | THLongTensor_fill(remv_cpu, 0);
42 |
43 | long * keep_flat = THLongTensor_data(keep);
44 | long num_to_keep = 0;
45 |
46 | int i, j;
47 | for (i = 0; i < boxes_num; i++) {
48 | int nblock = i / threadsPerBlock;
49 | int inblock = i % threadsPerBlock;
50 |
51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 | keep_flat[num_to_keep++] = i;
53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 | for (j = nblock; j < col_blocks; j++) {
55 | remv_cpu_flat[j] |= p[j];
56 | }
57 | }
58 | }
59 |
60 | long * num_out_flat = THLongTensor_data(num_out);
61 | * num_out_flat = num_to_keep;
62 |
63 | THLongTensor_free(mask_cpu);
64 | THLongTensor_free(remv_cpu);
65 |
66 | return 1;
67 | }
68 |
--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/lib/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | from lib.nms.pth_nms import pth_nms
12 |
13 |
14 | def nms(dets, thresh):
15 | """Dispatch to either CPU or GPU NMS implementations.
16 | Accept dets as tensor"""
17 | return pth_nms(dets, thresh)
18 |
--------------------------------------------------------------------------------
/lib/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms_wrapper.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/__init__.py:
--------------------------------------------------------------------------------
1 | from . import fft
--------------------------------------------------------------------------------
/lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._th_fft import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/_th_fft.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/_th_fft.so
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__init__.py:
--------------------------------------------------------------------------------
1 | from .fft import *
2 | from .autograd import *
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/autograd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from .fft import fft,ifft,fft2,ifft2,fft3,ifft3,rfft,irfft,rfft2,irfft2,rfft3,irfft3
3 |
4 | def make_contiguous(*Xs):
5 | return tuple(X if X.is_contiguous() else X.contiguous() for X in Xs)
6 |
7 | def contiguous_clone(X):
8 | if X.is_contiguous():
9 | return X.clone()
10 | else:
11 | return X.contiguous()
12 |
13 | class Fft(torch.autograd.Function):
14 | def forward(self, X_re, X_im):
15 | X_re, X_im = make_contiguous(X_re, X_im)
16 | return fft(X_re, X_im)
17 |
18 | def backward(self, grad_output_re, grad_output_im):
19 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
20 | grad_output_im)
21 | gi, gr = fft(grad_output_im,grad_output_re)
22 | return gr,gi
23 |
24 |
25 | class Ifft(torch.autograd.Function):
26 |
27 | def forward(self, k_re, k_im):
28 | k_re, k_im = make_contiguous(k_re, k_im)
29 | return ifft(k_re, k_im)
30 |
31 | def backward(self, grad_output_re, grad_output_im):
32 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
33 | grad_output_im)
34 | gi, gr = ifft(grad_output_im,grad_output_re)
35 | return gr, gi
36 |
37 |
38 | class Fft2d(torch.autograd.Function):
39 | def forward(self, X_re, X_im):
40 | X_re, X_im = make_contiguous(X_re, X_im)
41 | return fft2(X_re, X_im)
42 |
43 | def backward(self, grad_output_re, grad_output_im):
44 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
45 | grad_output_im)
46 | gi, gr = fft2(grad_output_im,grad_output_re)
47 | return gr,gi
48 |
49 |
50 | class Ifft2d(torch.autograd.Function):
51 |
52 | def forward(self, k_re, k_im):
53 | k_re, k_im = make_contiguous(k_re, k_im)
54 | return ifft2(k_re, k_im)
55 |
56 | def backward(self, grad_output_re, grad_output_im):
57 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
58 | grad_output_im)
59 | gi, gr = ifft2(grad_output_im,grad_output_re)
60 | return gr, gi
61 |
62 |
63 | class Fft3d(torch.autograd.Function):
64 | def forward(self, X_re, X_im):
65 | X_re, X_im = make_contiguous(X_re, X_im)
66 | return fft3(X_re, X_im)
67 |
68 | def backward(self, grad_output_re, grad_output_im):
69 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
70 | grad_output_im)
71 | gi, gr = fft3(grad_output_im,grad_output_re)
72 | return gr,gi
73 |
74 |
75 | class Ifft3d(torch.autograd.Function):
76 |
77 | def forward(self, k_re, k_im):
78 | k_re, k_im = make_contiguous(k_re, k_im)
79 | return ifft3(k_re, k_im)
80 |
81 | def backward(self, grad_output_re, grad_output_im):
82 | grad_output_re, grad_output_im = make_contiguous(grad_output_re,
83 | grad_output_im)
84 | gi, gr = ifft3(grad_output_im,grad_output_re)
85 | return gr, gi
86 |
87 |
88 | class Rfft(torch.autograd.Function):
89 | def forward(self, X_re):
90 | X_re = X_re.contiguous()
91 | self._to_save_input_size = X_re.size(-1)
92 | return rfft(X_re)
93 |
94 | def backward(self, grad_output_re, grad_output_im):
95 | # Clone the array and make contiguous if needed
96 | grad_output_re = contiguous_clone(grad_output_re)
97 | grad_output_im = contiguous_clone(grad_output_im)
98 |
99 | if self._to_save_input_size & 1:
100 | grad_output_re[...,1:] /= 2
101 | else:
102 | grad_output_re[...,1:-1] /= 2
103 |
104 | if self._to_save_input_size & 1:
105 | grad_output_im[...,1:] /= 2
106 | else:
107 | grad_output_im[...,1:-1] /= 2
108 |
109 | gr = irfft(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
110 | return gr
111 |
112 |
113 | class Irfft(torch.autograd.Function):
114 |
115 | def forward(self, k_re, k_im):
116 | k_re, k_im = make_contiguous(k_re, k_im)
117 | return irfft(k_re, k_im)
118 |
119 | def backward(self, grad_output_re):
120 | grad_output_re = grad_output_re.contiguous()
121 | gr, gi = rfft(grad_output_re)
122 |
123 | N = grad_output_re.size(-1)
124 | gr[...,0] /= N
125 | gr[...,1:-1] /= N/2
126 | gr[...,-1] /= N
127 |
128 | gi[...,0] /= N
129 | gi[...,1:-1] /= N/2
130 | gi[...,-1] /= N
131 | return gr, gi
132 |
133 |
134 | class Rfft2d(torch.autograd.Function):
135 | def forward(self, X_re):
136 | X_re = X_re.contiguous()
137 | self._to_save_input_size = X_re.size(-1)
138 | return rfft2(X_re)
139 |
140 | def backward(self, grad_output_re, grad_output_im):
141 | # Clone the array and make contiguous if needed
142 | grad_output_re = contiguous_clone(grad_output_re)
143 | grad_output_im = contiguous_clone(grad_output_im)
144 |
145 | if self._to_save_input_size & 1:
146 | grad_output_re[...,1:] /= 2
147 | else:
148 | grad_output_re[...,1:-1] /= 2
149 |
150 | if self._to_save_input_size & 1:
151 | grad_output_im[...,1:] /= 2
152 | else:
153 | grad_output_im[...,1:-1] /= 2
154 |
155 | gr = irfft2(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
156 | return gr
157 |
158 |
159 | class Irfft2d(torch.autograd.Function):
160 |
161 | def forward(self, k_re, k_im):
162 | k_re, k_im = make_contiguous(k_re, k_im)
163 | return irfft2(k_re, k_im)
164 |
165 | def backward(self, grad_output_re):
166 | grad_output_re = grad_output_re.contiguous()
167 | gr, gi = rfft2(grad_output_re)
168 |
169 | N = grad_output_re.size(-1) * grad_output_re.size(-2)
170 | gr[...,0] /= N
171 | gr[...,1:-1] /= N/2
172 | gr[...,-1] /= N
173 |
174 | gi[...,0] /= N
175 | gi[...,1:-1] /= N/2
176 | gi[...,-1] /= N
177 | return gr, gi
178 |
179 |
180 | class Rfft3d(torch.autograd.Function):
181 | def forward(self, X_re):
182 | X_re = X_re.contiguous()
183 | self._to_save_input_size = X_re.size(-1)
184 | return rfft3(X_re)
185 |
186 | def backward(self, grad_output_re, grad_output_im):
187 | # Clone the array and make contiguous if needed
188 | grad_output_re = contiguous_clone(grad_output_re)
189 | grad_output_im = contiguous_clone(grad_output_im)
190 |
191 | if self._to_save_input_size & 1:
192 | grad_output_re[...,1:] /= 2
193 | else:
194 | grad_output_re[...,1:-1] /= 2
195 |
196 | if self._to_save_input_size & 1:
197 | grad_output_im[...,1:] /= 2
198 | else:
199 | grad_output_im[...,1:-1] /= 2
200 |
201 | gr = irfft3(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
202 | return gr
203 |
204 |
205 | class Irfft3d(torch.autograd.Function):
206 |
207 | def forward(self, k_re, k_im):
208 | k_re, k_im = make_contiguous(k_re, k_im)
209 | return irfft3(k_re, k_im)
210 |
211 | def backward(self, grad_output_re):
212 | grad_output_re = grad_output_re.contiguous()
213 | gr, gi = rfft3(grad_output_re)
214 |
215 | N = grad_output_re.size(-1) * grad_output_re.size(-2) * grad_output_re.size(-3)
216 | gr[...,0] /= N
217 | gr[...,1:-1] /= N/2
218 | gr[...,-1] /= N
219 |
220 | gi[...,0] /= N
221 | gi[...,1:-1] /= N/2
222 | gi[...,-1] /= N
223 | return gr, gi
224 |
225 |
--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/fft.py:
--------------------------------------------------------------------------------
1 | # functions/fft.py
2 | import torch
3 | from .._ext import th_fft
4 |
5 | def _fft(X_re, X_im, f, rank):
6 | if not(X_re.size() == X_im.size()):
7 | raise ValueError("Real and imaginary tensors must have the same dimension.")
8 | if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1):
9 | raise ValueError("Inputs must have at least {} dimensions.".format(rank+1))
10 | if not(X_re.is_cuda and X_im.is_cuda):
11 | raise ValueError("Input must be a CUDA tensor.")
12 | if not(X_re.is_contiguous() and X_im.is_contiguous()):
13 | raise ValueError("Input must be contiguous.")
14 |
15 | Y1, Y2 = tuple(X_re.new(*X_re.size()).zero_() for _ in range(2))
16 | f(X_re, X_im, Y1, Y2)
17 | return (Y1, Y2)
18 |
19 | def fft(X_re, X_im):
20 | if 'Float' in type(X_re).__name__ :
21 | f = th_fft.th_Float_fft1
22 | elif 'Double' in type(X_re).__name__:
23 | f = th_fft.th_Double_fft1
24 | else:
25 | raise NotImplementedError
26 | return _fft(X_re, X_im, f, 1)
27 |
28 | def ifft(X_re, X_im):
29 | N = X_re.size(-1)
30 | if 'Float' in type(X_re).__name__ :
31 | f = th_fft.th_Float_ifft1
32 | elif 'Double' in type(X_re).__name__:
33 | f = th_fft.th_Double_ifft1
34 | else:
35 | raise NotImplementedError
36 | Y1, Y2 = _fft(X_re, X_im, f, 1)
37 | return (Y1/N, Y2/N)
38 |
39 | def fft2(X_re, X_im):
40 | if 'Float' in type(X_re).__name__ :
41 | f = th_fft.th_Float_fft2
42 | elif 'Double' in type(X_re).__name__:
43 | f = th_fft.th_Double_fft2
44 | else:
45 | raise NotImplementedError
46 | return _fft(X_re, X_im, f, 2)
47 |
48 | def ifft2(X_re, X_im):
49 | N = X_re.size(-1)*X_re.size(-2)
50 | if 'Float' in type(X_re).__name__ :
51 | f = th_fft.th_Float_ifft2
52 | elif 'Double' in type(X_re).__name__:
53 | f = th_fft.th_Double_ifft2
54 | else:
55 | raise NotImplementedError
56 | Y1, Y2 = _fft(X_re, X_im, f, 2)
57 | return (Y1/N, Y2/N)
58 |
59 | def fft3(X_re, X_im):
60 | if 'Float' in type(X_re).__name__ :
61 | f = th_fft.th_Float_fft3
62 | elif 'Double' in type(X_re).__name__:
63 | f = th_fft.th_Double_fft3
64 | else:
65 | raise NotImplementedError
66 | return _fft(X_re, X_im, f, 3)
67 |
68 | def ifft3(X_re, X_im):
69 | N = X_re.size(-1)*X_re.size(-2)*X_re.size(-3)
70 | if 'Float' in type(X_re).__name__ :
71 | f = th_fft.th_Float_ifft3
72 | elif 'Double' in type(X_re).__name__:
73 | f = th_fft.th_Double_ifft3
74 | else:
75 | raise NotImplementedError
76 | Y1, Y2 = _fft(X_re, X_im, f, 3)
77 | return (Y1/N, Y2/N)
78 |
79 | _s = slice(None, None, None)
80 |
81 | def _rfft(X, f, rank):
82 | if not(X.dim() >= rank+1):
83 | raise ValueError("Input must have at least {} dimensions.".format(rank+1))
84 | if not(X.is_cuda):
85 | raise ValueError("Input must be a CUDA tensor.")
86 | if not(X.is_contiguous()):
87 | raise ValueError("Input must be contiguous.")
88 |
89 | new_size = tuple(X.size())[:-1] + (X.size(-1)//2 + 1,)
90 | # new_size = tuple(X.size())
91 | Y1, Y2 = tuple(X.new(*new_size).zero_() for _ in range(2))
92 | f(X, Y1, Y2)
93 | # i = tuple(_s for _ in range(X.dim()-1)) + (slice(None, X.size(-1)//2 + 1, ),)
94 | # print(Y1, i)
95 | # return (Y1[i], Y2[i])
96 | return (Y1, Y2)
97 |
98 | def rfft(X):
99 | if 'Float' in type(X).__name__ :
100 | f = th_fft.th_Float_rfft1
101 | elif 'Double' in type(X).__name__:
102 | f = th_fft.th_Double_rfft1
103 | else:
104 | raise NotImplementedError
105 | return _rfft(X, f, 1)
106 |
107 | def rfft2(X):
108 | if 'Float' in type(X).__name__ :
109 | f = th_fft.th_Float_rfft2
110 | elif 'Double' in type(X).__name__:
111 | f = th_fft.th_Double_rfft2
112 | else:
113 | raise NotImplementedError
114 | return _rfft(X, f, 2)
115 |
116 | def rfft3(X):
117 | if 'Float' in type(X).__name__ :
118 | f = th_fft.th_Float_rfft3
119 | elif 'Double' in type(X).__name__:
120 | f = th_fft.th_Double_rfft3
121 | else:
122 | raise NotImplementedError
123 | return _rfft(X, f, 3)
124 |
125 | def _irfft(X_re, X_im, f, rank, N, normalize):
126 | if not(X_re.size() == X_im.size()):
127 | raise ValueError("Real and imaginary tensors must have the same dimension.")
128 | if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1):
129 | raise ValueError("Inputs must have at least {} dimensions.".format(rank+1))
130 | if not(X_re.is_cuda and X_im.is_cuda):
131 | raise ValueError("Input must be a CUDA tensor.")
132 | if not(X_re.is_contiguous() and X_im.is_contiguous()):
133 | raise ValueError("Input must be contiguous.")
134 |
135 | input_size = X_re.size(-1)
136 |
137 | if N is not None:
138 | if input_size != int(N/2) + 1:
139 | raise ValueError("Input size must be equal to n/2 + 1")
140 | else:
141 | N = (X_re.size(-1) - 1)*2
142 |
143 | new_size = tuple(X_re.size())[:-1] + (N,)
144 | Y = X_re.new(*new_size).zero_()
145 | f(X_re, X_im, Y)
146 |
147 | if normalize:
148 | M = 1
149 | for i in range(rank):
150 | M *= new_size[-(i+1)]
151 | return Y/M
152 | else:
153 | return Y
154 |
155 | def irfft(X_re, X_im, n=None, normalize=True):
156 | if 'Float' in type(X_re).__name__ :
157 | f = th_fft.th_Float_irfft1
158 | elif 'Double' in type(X_re).__name__:
159 | f = th_fft.th_Double_irfft1
160 | else:
161 | raise NotImplementedError
162 | return _irfft(X_re, X_im, f, 1, n, normalize)
163 |
164 | def irfft2(X_re, X_im, n=None, normalize=True):
165 | if 'Float' in type(X_re).__name__ :
166 | f = th_fft.th_Float_irfft2
167 | elif 'Double' in type(X_re).__name__:
168 | f = th_fft.th_Double_irfft2
169 | else:
170 | raise NotImplementedError
171 | return _irfft(X_re, X_im, f, 2, n, normalize)
172 |
173 | def irfft3(X_re, X_im, n=None, normalize=True):
174 | if 'Float' in type(X_re).__name__ :
175 | f = th_fft.th_Float_irfft3
176 | elif 'Double' in type(X_re).__name__:
177 | f = th_fft.th_Double_irfft3
178 | else:
179 | raise NotImplementedError
180 | return _irfft(X_re, X_im, f, 3, n, normalize)
181 |
182 | def reverse(X, group_size=1):
183 | if not(X.is_cuda):
184 | raise ValueError("Input must be a CUDA tensor.")
185 | if not(X.is_contiguous()):
186 | raise ValueError("Input must be contiguous.")
187 |
188 | if 'Float' in type(X).__name__:
189 | f = th_fft.reverse_Float
190 | elif 'Double' in type(X).__name__:
191 | f = th_fft.reverse_Double
192 | else:
193 | raise NotImplementedError
194 | Y = X.new(*X.size())
195 | f(X,Y, group_size)
196 | return Y
197 |
198 |
199 | def expand(X, imag=False, odd=False):
200 | N1, N2 = X.size(-2), X.size(-1)
201 | N3 = (X.size(-1) - 1)*2
202 | if odd:
203 | N3 += 1
204 | new_size = tuple(X.size())[:-1] + (N3,)
205 | Y = X.new(*new_size).zero_()
206 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(None,N2, None),)
207 | Y[i] = X
208 |
209 | if odd:
210 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(N3-N2),None, None),)
211 | else:
212 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(1+N3-N2),-1, None),)
213 | X0 = X[i].contiguous()
214 |
215 | X0 = reverse(X0)
216 | i0 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) +
217 | (slice(-1,None, None), slice(None, None, None)))
218 | i1 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) +
219 | (slice(None, -1, None), slice(None, None, None)))
220 | X0 = torch.cat([X0[i0], X0[i1]], -2)
221 | X0 = reverse(X0, N1*(N3-N2))
222 |
223 | i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(N2, None, None),)
224 | if not imag:
225 | Y[i] = X0
226 | else:
227 | Y[i] = -X0
228 | return Y
229 |
230 | def roll_n(X, axis, n):
231 | f_idx = tuple(slice(None, None, None) if i != axis else slice(0,n,None)
232 | for i in range(X.dim()))
233 | b_idx = tuple(slice(None, None, None) if i != axis else slice(n,None,None)
234 | for i in range(X.dim()))
235 | front = X[f_idx]
236 | back = X[b_idx]
237 | return torch.cat([back, front],axis)
238 |
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/helpers.c:
--------------------------------------------------------------------------------
1 | #ifndef THC_GENERIC_FILE
2 | #define THC_GENERIC_FILE "generic/helpers.c"
3 | #else
4 |
5 | // helper to convert a pair of real arrays into a complex array
6 | void pair2complex(real *a, real *b, cufft_complex *c, int n)
7 | {
8 | real *c_tmp = (real*)c;
9 | cudaMemcpy2D(c_tmp, 2*sizeof(real),
10 | a, sizeof(real),
11 | sizeof(real), n, cudaMemcpyDeviceToDevice);
12 | cudaMemcpy2D(c_tmp+1, 2*sizeof(real),
13 | b, sizeof(real),
14 | sizeof(real), n, cudaMemcpyDeviceToDevice);
15 | }
16 |
17 | void complex2pair(cufft_complex *a, real *b, real *c, int n)
18 | {
19 | real *a_tmp = (real*)a;
20 | cudaMemcpy2D(b, sizeof(real),
21 | a_tmp, 2*sizeof(real),
22 | sizeof(real), n, cudaMemcpyDeviceToDevice);
23 | cudaMemcpy2D(c, sizeof(real),
24 | a_tmp+1, 2*sizeof(real),
25 | sizeof(real), n, cudaMemcpyDeviceToDevice);
26 | }
27 |
28 | void reverse_(THCTensor *input, THCTensor *output, int group_size)
29 | {
30 | real *input_data = THCTensor_(data)(state, input);
31 | real *output_data = THCTensor_(data)(state, output);
32 | int n = THCTensor_(nElement)(state, input);
33 |
34 | cudaMemcpy2D(output_data, sizeof(real)*group_size,
35 | input_data+n-group_size, -sizeof(real)*group_size,
36 | sizeof(real)*group_size, n/group_size, cudaMemcpyDeviceToDevice);
37 | }
38 |
39 | #endif
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/th_fft_cuda.c:
--------------------------------------------------------------------------------
1 | #ifndef THC_GENERIC_FILE
2 | #define THC_GENERIC_FILE "generic/th_fft_cuda.c"
3 | #else
4 |
5 | int th_(THCTensor *input1, THCTensor *input2, THCTensor *output1, THCTensor *output2)
6 | {
7 | // Require that all tensors be of the same size.
8 | if (!THCTensor_(isSameSizeAs)(state, input1, output1))
9 | return 0;
10 | if (!THCTensor_(isSameSizeAs)(state, input1, output2))
11 | return 0;
12 | if (!THCTensor_(isSameSizeAs)(state, input1, input2))
13 | return 0;
14 |
15 | // Get the tensor dimensions (batchsize, rows, cols).
16 | int ndim = THCTensor_(nDimension)(state, input1);
17 | int batch = 1;
18 | int i, d;
19 | for(i=0; i
2 | #include
3 | #include
4 | #include
5 | // this symbol will be resolved automatically from PyTorch libs
6 | extern THCState *state;
7 |
8 | #define th_ TH_CONCAT_4(th_, Real, _, func_name)
9 | #define pair2complex TH_CONCAT_2(Real, 2complex)
10 | #define complex2pair TH_CONCAT_2(complex2, Real)
11 | #define reverse_ TH_CONCAT_2(reverse_, Real)
12 |
13 | #include "th_fft_generate_helpers.h"
14 |
15 | #define cufft_rank 1
16 | #include "th_fft_generate_float.h"
17 | #include "th_fft_generate_double.h"
18 | #undef cufft_rank
19 |
20 | #define cufft_rank 2
21 | #include "th_fft_generate_float.h"
22 | #include "th_fft_generate_double.h"
23 | #undef cufft_rank
24 |
25 | #define cufft_rank 3
26 | #include "th_fft_generate_float.h"
27 | #include "th_fft_generate_double.h"
28 | #undef cufft_rank
29 |
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_cuda.h:
--------------------------------------------------------------------------------
1 | int th_Float_fft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
2 | int th_Float_ifft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
3 | int th_Double_fft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
4 | int th_Double_ifft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
5 |
6 | int th_Float_fft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
7 | int th_Float_ifft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
8 | int th_Double_fft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
9 | int th_Double_ifft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
10 |
11 | int th_Float_fft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
12 | int th_Float_ifft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
13 | int th_Double_fft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
14 | int th_Double_ifft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
15 |
16 | int th_Float_rfft1(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
17 | int th_Float_irfft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
18 | int th_Double_rfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
19 | int th_Double_irfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
20 |
21 | int th_Float_rfft2(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
22 | int th_Float_irfft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
23 | int th_Double_rfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
24 | int th_Double_irfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
25 |
26 | int th_Float_rfft3(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
27 | int th_Float_irfft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
28 | int th_Double_rfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
29 | int th_Double_irfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
30 |
31 | void reverse_Float(THCudaTensor *input, THCudaTensor *output, int group_size);
32 | void reverse_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output, int group_size);
33 |
34 | // void expand_2D_Float(THCudaTensor *input, THCudaTensor *output);
35 | // void expand_2D_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output);
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_double.h:
--------------------------------------------------------------------------------
1 | // Generate Double FFTs
2 | #define cufft_complex cufftDoubleComplex
3 |
4 | #define cufft_type CUFFT_Z2Z
5 | #define cufft_exec cufftExecZ2Z
6 |
7 | #define cufft_direction CUFFT_FORWARD
8 | #define func_name TH_CONCAT_2(fft, cufft_rank)
9 |
10 | #include "generic/th_fft_cuda.c"
11 | #include "THCGenerateDoubleType.h"
12 |
13 | #undef cufft_direction
14 | #undef func_name
15 |
16 | #define cufft_direction CUFFT_INVERSE
17 | #define func_name TH_CONCAT_2(ifft, cufft_rank)
18 |
19 | #include "generic/th_fft_cuda.c"
20 | #include "THCGenerateDoubleType.h"
21 |
22 | #undef cufft_direction
23 | #undef func_name
24 |
25 | #undef cufft_type
26 | #undef cufft_exec
27 |
28 | // Generate Double rFFTs
29 | #define cufft_type CUFFT_D2Z
30 | #define cufft_exec cufftExecD2Z
31 | #define func_name TH_CONCAT_2(rfft, cufft_rank)
32 |
33 | #include "generic/th_rfft_cuda.c"
34 | #include "THCGenerateDoubleType.h"
35 |
36 | #undef cufft_type
37 | #undef cufft_exec
38 | #undef func_name
39 |
40 | #define cufft_type CUFFT_Z2D
41 | #define cufft_exec cufftExecZ2D
42 | #define func_name TH_CONCAT_2(irfft, cufft_rank)
43 |
44 | #include "generic/th_irfft_cuda.c"
45 | #include "THCGenerateDoubleType.h"
46 |
47 | #undef cufft_type
48 | #undef cufft_exec
49 | #undef func_name
50 |
51 | #undef cufft_complex
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_float.h:
--------------------------------------------------------------------------------
1 | // Generate float FFTs
2 | #define cufft_complex cufftComplex
3 |
4 | #define cufft_type CUFFT_C2C
5 | #define cufft_exec cufftExecC2C
6 |
7 | #define cufft_direction CUFFT_FORWARD
8 | #define func_name TH_CONCAT_2(fft, cufft_rank)
9 |
10 | #include "generic/th_fft_cuda.c"
11 | #include "THCGenerateFloatType.h"
12 |
13 | #undef func_name
14 | #undef cufft_direction
15 |
16 | #define cufft_direction CUFFT_INVERSE
17 | #define func_name TH_CONCAT_2(ifft, cufft_rank)
18 |
19 | #include "generic/th_fft_cuda.c"
20 | #include "THCGenerateFloatType.h"
21 |
22 | #undef func_name
23 | #undef cufft_direction
24 |
25 |
26 | #undef cufft_type
27 | #undef cufft_exec
28 |
29 | // Generate float rFFTs
30 | #define cufft_type CUFFT_R2C
31 | #define cufft_exec cufftExecR2C
32 | #define func_name TH_CONCAT_2(rfft, cufft_rank)
33 |
34 | #include "generic/th_rfft_cuda.c"
35 | #include "THCGenerateFloatType.h"
36 |
37 | #undef func_name
38 | #undef cufft_type
39 | #undef cufft_exec
40 |
41 | #define cufft_type CUFFT_C2R
42 | #define cufft_exec cufftExecC2R
43 | #define func_name TH_CONCAT_2(irfft, cufft_rank)
44 |
45 | #include "generic/th_irfft_cuda.c"
46 | #include "THCGenerateFloatType.h"
47 |
48 | #undef func_name
49 | #undef cufft_type
50 | #undef cufft_exec
51 |
52 | #undef cufft_complex
--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_helpers.h:
--------------------------------------------------------------------------------
1 | // Generate float and double helpers
2 | #define cufft_complex cufftComplex
3 |
4 | #include "generic/helpers.c"
5 | #include "THCGenerateFloatType.h"
6 |
7 | #undef cufft_complex
8 |
9 | #define cufft_complex cufftDoubleComplex
10 |
11 | #include "generic/helpers.c"
12 | #include "THCGenerateDoubleType.h"
13 |
14 | #undef cufft_complex
--------------------------------------------------------------------------------
/lib/resnet/__pycache__/resnet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/resnet/__pycache__/resnet.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/resnet/resnet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import torch
4 | import torch.nn as nn
5 | from torchvision import transforms
6 | from torch.autograd import Variable
7 | import torch.utils.model_zoo as model_zoo
8 |
9 |
10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
11 | 'resnet152']
12 |
13 |
14 | model_urls = {
15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
20 | }
21 |
22 |
23 | def conv3x3(in_planes, out_planes, stride=1):
24 | "3x3 convolution with padding"
25 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
26 | padding=1, bias=False)
27 |
28 |
29 | class BasicBlock(nn.Module):
30 | expansion = 1
31 |
32 | def __init__(self, inplanes, planes, stride=1, downsample=None):
33 | super(BasicBlock, self).__init__()
34 | self.conv1 = conv3x3(inplanes, planes, stride)
35 | self.bn1 = nn.BatchNorm2d(planes)
36 | self.relu = nn.ReLU(inplace=True)
37 | self.conv2 = conv3x3(planes, planes)
38 | self.bn2 = nn.BatchNorm2d(planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 |
42 | def forward(self, x):
43 | residual = x
44 |
45 | out = self.conv1(x)
46 | out = self.bn1(out)
47 | out = self.relu(out)
48 |
49 | out = self.conv2(out)
50 | out = self.bn2(out)
51 |
52 | if self.downsample is not None:
53 | residual = self.downsample(x)
54 |
55 | out += residual
56 | out = self.relu(out)
57 |
58 | return out
59 |
60 |
61 | class Bottleneck(nn.Module):
62 | expansion = 4
63 |
64 | def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
65 | super(Bottleneck, self).__init__()
66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
67 | self.bn1 = nn.BatchNorm2d(planes)
68 | if dilation == 1:
69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
70 | padding=1, bias=False)
71 | else:
72 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
73 | padding=dilation, dilation=dilation, bias=False)
74 | self.bn2 = nn.BatchNorm2d(planes)
75 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
76 | self.bn3 = nn.BatchNorm2d(planes * 4)
77 | self.relu = nn.ReLU(inplace=True)
78 | self.downsample = downsample
79 | self.stride = stride
80 |
81 | def forward(self, x):
82 | residual = x
83 |
84 | out = self.conv1(x)
85 | out = self.bn1(out)
86 | out = self.relu(out)
87 |
88 | out = self.conv2(out)
89 | out = self.bn2(out)
90 | out = self.relu(out)
91 |
92 | out = self.conv3(out)
93 | out = self.bn3(out)
94 |
95 | if self.downsample is not None:
96 | residual = self.downsample(x)
97 |
98 | out += residual
99 | out = self.relu(out)
100 |
101 | return out
102 |
103 |
104 | # We hook up one more 1*1 conv layer in Res_block 5th, and modified the method for checkpoint loading
105 | # An attribute-entity grounding pre-trained classification ResNet was adopted
106 | class ResNet(nn.Module):
107 |
108 | def __init__(self, block, layers, num_classes=1000):
109 | self.inplanes = 64
110 | super(ResNet, self).__init__()
111 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
112 | bias=False)
113 | self.bn1 = nn.BatchNorm2d(64)
114 | self.relu = nn.ReLU(inplace=True)
115 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
116 | self.layer1 = self._make_layer(block, 64, layers[0])
117 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
118 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
119 | self.layer4 = self._make_layer(block, 512, layers[3], dilation=4)
120 | self.avgpool = nn.AvgPool2d(32, stride=1)
121 | self.fc = nn.Linear(512 * block.expansion, num_classes)
122 | self.sigmoid = nn.Sigmoid()
123 |
124 | for m in self.modules():
125 | if isinstance(m, nn.Conv2d):
126 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 | m.weight.data.normal_(0, math.sqrt(2. / n))
128 | elif isinstance(m, nn.BatchNorm2d):
129 | m.weight.data.fill_(1)
130 | m.bias.data.zero_()
131 |
132 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
133 | downsample = None
134 | if stride != 1 or self.inplanes != planes * block.expansion:
135 | downsample = nn.Sequential(
136 | nn.Conv2d(self.inplanes, planes * block.expansion,
137 | kernel_size=1, stride=stride, bias=False),
138 | nn.BatchNorm2d(planes * block.expansion),)
139 | layers = []
140 | layers.append(block(self.inplanes, planes, stride, dilation, downsample))
141 | self.inplanes = planes * block.expansion
142 | for i in range(1, blocks):
143 | layers.append(block(self.inplanes, planes))
144 |
145 | return nn.Sequential(*layers)
146 |
147 | def forward(self, x):
148 | x = self.conv1(x)
149 | x = self.bn1(x)
150 | x = self.relu(x)
151 | x = self.maxpool(x)
152 |
153 | conv_feat1 = self.layer1(x)
154 | conv_feat2 = self.layer2(conv_feat1)
155 | conv_feat3 = self.layer3(conv_feat2)
156 | conv_feat4 = self.layer4(conv_feat3)
157 |
158 | # Shrink the feature size and do classification
159 | conv_feat = self.shrink_conv(conv_feat4)
160 | # feat = self.avgpool(conv_feat)
161 | # y = self.sigmoid(self.fc(feat.view(feat.shape[0], feat.shape[1])))
162 |
163 | return conv_feat4, conv_feat
164 |
165 |
166 | def resnet101(pretrained=False, path='', classnum=1000, **kwargs):
167 | """Constructs a ResNet-101 model.
168 |
169 | Args:
170 | pretrained (bool): If True, returns a model pre-trained on ImageNet
171 |
172 | Note that in this time we've pre-trained our modified ResNet on Flickr 30k
173 | for entity-attribute classification
174 |
175 | """
176 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
177 | model.fc = torch.nn.Linear(256, 4)
178 |
179 | if pretrained:
180 | state_dict = torch.load(path)['state_dict']
181 | new_params = model.state_dict()
182 | model_keys = model.state_dict().keys()
183 | for name, param in list(state_dict.items()):
184 | if name not in model_keys:
185 | del state_dict[name]
186 |
187 | new_params.update(state_dict)
188 | model.load_state_dict(new_params)
189 |
190 | else:
191 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
192 | return model
193 |
194 |
195 | def resnet50(pretrained=False, path='', classnum=1000, **kwargs):
196 |
197 | """Constructs a ResNet-50 model.
198 |
199 | Args:
200 | pretrained (bool): If True, returns a model pre-trained on ImageNet
201 | """
202 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
203 |
204 | if pretrained:
205 | # In pre-trained model-gender the fc is 2, while in model-person it's 4 lasses.
206 | model.fc = torch.nn.Linear(256, 4)
207 | model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False)
208 | state_dict = torch.load(path)['state_dict']
209 | new_params = model.state_dict()
210 | model_keys = model.state_dict().keys()
211 | for name, param in list(state_dict.items()):
212 | if name not in model_keys:
213 | del state_dict[name]
214 |
215 | new_params.update(state_dict)
216 | model.load_state_dict(new_params)
217 |
218 | else:
219 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
220 | model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False)
221 | model.fc = torch.nn.Linear(256, classnum)
222 |
223 | return model
224 |
--------------------------------------------------------------------------------
/lib/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.py
--------------------------------------------------------------------------------
/lib/roi_align/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.pyc
--------------------------------------------------------------------------------
/lib/roi_align/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.py
--------------------------------------------------------------------------------
/lib/roi_align/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.pyc
--------------------------------------------------------------------------------
/lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._crop_and_resize import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__init__.pyc
--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so
--------------------------------------------------------------------------------
/lib/roi_align/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/crop_and_resize.c']
7 | headers = ['src/crop_and_resize.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/crop_and_resize_gpu.c']
15 | headers += ['src/crop_and_resize_gpu.h']
16 | defines += [('WITH_CUDA', None)]
17 | extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o']
18 | with_cuda = True
19 |
20 | extra_compile_args = ['-fopenmp', '-std=c99']
21 |
22 | this_file = os.path.dirname(os.path.realpath(__file__))
23 | print(this_file)
24 | sources = [os.path.join(this_file, fname) for fname in sources]
25 | headers = [os.path.join(this_file, fname) for fname in headers]
26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
27 |
28 | ffi = create_extension(
29 | '_ext.crop_and_resize',
30 | headers=headers,
31 | sources=sources,
32 | define_macros=defines,
33 | relative_to=__file__,
34 | with_cuda=with_cuda,
35 | extra_objects=extra_objects,
36 | extra_compile_args=extra_compile_args
37 | )
38 |
39 | if __name__ == '__main__':
40 | ffi.build()
41 |
--------------------------------------------------------------------------------
/lib/roi_align/crop_and_resize.py:
--------------------------------------------------------------------------------
1 | import math
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from torch.autograd import Function
6 |
7 | from ._ext import crop_and_resize as _backend
8 |
9 |
10 | class CropAndResizeFunction(Function):
11 |
12 | def __init__(self, crop_height, crop_width, extrapolation_value=0):
13 | self.crop_height = crop_height
14 | self.crop_width = crop_width
15 | self.extrapolation_value = extrapolation_value
16 |
17 | def forward(self, image, boxes, box_ind):
18 | crops = torch.zeros_like(image)
19 |
20 | if image.is_cuda:
21 | _backend.crop_and_resize_gpu_forward(
22 | image, boxes, box_ind,
23 | self.extrapolation_value, self.crop_height, self.crop_width, crops)
24 | else:
25 | _backend.crop_and_resize_forward(
26 | image, boxes, box_ind,
27 | self.extrapolation_value, self.crop_height, self.crop_width, crops)
28 |
29 | # save for backward
30 | self.im_size = image.size()
31 | self.save_for_backward(boxes, box_ind)
32 |
33 | return crops
34 |
35 | def backward(self, grad_outputs):
36 | boxes, box_ind = self.saved_tensors
37 |
38 | grad_outputs = grad_outputs.contiguous()
39 | grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size)
40 |
41 | if grad_outputs.is_cuda:
42 | _backend.crop_and_resize_gpu_backward(
43 | grad_outputs, boxes, box_ind, grad_image
44 | )
45 | else:
46 | _backend.crop_and_resize_backward(
47 | grad_outputs, boxes, box_ind, grad_image
48 | )
49 |
50 | return grad_image, None, None
51 |
52 |
53 | class CropAndResize(nn.Module):
54 | """
55 | Crop and resize ported from tensorflow
56 | See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize
57 | """
58 |
59 | def __init__(self, crop_height, crop_width, extrapolation_value=0):
60 | super(CropAndResize, self).__init__()
61 |
62 | self.crop_height = crop_height
63 | self.crop_width = crop_width
64 | self.extrapolation_value = extrapolation_value
65 |
66 | def forward(self, image, boxes, box_ind):
67 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind)
68 |
--------------------------------------------------------------------------------
/lib/roi_align/crop_and_resize.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/crop_and_resize.pyc
--------------------------------------------------------------------------------
/lib/roi_align/roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .crop_and_resize import CropAndResize, CropAndResizeFunction
5 |
6 | class RoIAlign(nn.Module):
7 |
8 | def __init__(self, crop_height, crop_width, extrapolation_value=0):
9 | super(RoIAlign, self).__init__()
10 |
11 | self.crop_height = crop_height
12 | self.crop_width = crop_width
13 | self.extrapolation_value = extrapolation_value
14 |
15 | def forward(self, featuremap, boxes, box_ind):
16 | """
17 | RoIAlign based on crop_and_resize.
18 | See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301
19 | :param featuremap: NxCxHxW
20 | :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
21 | :param box_ind: M
22 | :return: MxCxoHxoW
23 | """
24 | x1, y1, x2, y2 = torch.split(boxes, 1, dim=1)
25 |
26 | spacing_w = (x2 - x1) / float(self.crop_width)
27 | spacing_h = (y2 - y1) / float(self.crop_height)
28 |
29 | image_height, image_width = featuremap.size()[2:4]
30 | nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
31 | ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
32 |
33 | nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
34 | nh = spacing_w * float(self.crop_height - 1) / float(image_height - 1)
35 |
36 | boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
37 |
38 | return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind)
--------------------------------------------------------------------------------
/lib/roi_align/roi_align.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/roi_align.pyc
--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 |
6 | void CropAndResizePerBox(
7 | const float * image_data,
8 | const int batch_size,
9 | const int depth,
10 | const int image_height,
11 | const int image_width,
12 |
13 | const float * boxes_data,
14 | const int * box_index_data,
15 | const int start_box,
16 | const int limit_box,
17 |
18 | float * corps_data,
19 | const int crop_height,
20 | const int crop_width,
21 | const float extrapolation_value
22 | ) {
23 | const int image_channel_elements = image_height * image_width;
24 | const int image_elements = depth * image_channel_elements;
25 |
26 | const int channel_elements = crop_height * crop_width;
27 | const int crop_elements = depth * channel_elements;
28 |
29 | int b;
30 | #pragma omp parallel for
31 | for (b = start_box; b < limit_box; ++b) {
32 | const float * box = boxes_data + b * 4;
33 | const float y1 = box[0];
34 | const float x1 = box[1];
35 | const float y2 = box[2];
36 | const float x2 = box[3];
37 |
38 | const int b_in = box_index_data[b];
39 | if (b_in < 0 || b_in >= batch_size) {
40 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
41 | exit(-1);
42 | }
43 |
44 | const float height_scale =
45 | (crop_height > 1)
46 | ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
47 | : 0;
48 | const float width_scale =
49 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
50 | : 0;
51 |
52 | for (int y = 0; y < crop_height; ++y)
53 | {
54 | const float in_y = (crop_height > 1)
55 | ? y1 * (image_height - 1) + y * height_scale
56 | : 0.5 * (y1 + y2) * (image_height - 1);
57 |
58 | if (in_y < 0 || in_y > image_height - 1)
59 | {
60 | for (int x = 0; x < crop_width; ++x)
61 | {
62 | for (int d = 0; d < depth; ++d)
63 | {
64 | // crops(b, y, x, d) = extrapolation_value;
65 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
66 | }
67 | }
68 | continue;
69 | }
70 |
71 | const int top_y_index = floorf(in_y);
72 | const int bottom_y_index = ceilf(in_y);
73 | const float y_lerp = in_y - top_y_index;
74 |
75 | for (int x = 0; x < crop_width; ++x)
76 | {
77 | const float in_x = (crop_width > 1)
78 | ? x1 * (image_width - 1) + x * width_scale
79 | : 0.5 * (x1 + x2) * (image_width - 1);
80 | if (in_x < 0 || in_x > image_width - 1)
81 | {
82 | for (int d = 0; d < depth; ++d)
83 | {
84 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
85 | }
86 | continue;
87 | }
88 |
89 | const int left_x_index = floorf(in_x);
90 | const int right_x_index = ceilf(in_x);
91 | const float x_lerp = in_x - left_x_index;
92 |
93 | for (int d = 0; d < depth; ++d)
94 | {
95 | const float *pimage = image_data + b_in * image_elements + d * image_channel_elements;
96 |
97 | const float top_left = pimage[top_y_index * image_width + left_x_index];
98 | const float top_right = pimage[top_y_index * image_width + right_x_index];
99 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
100 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
101 |
102 | const float top = top_left + (top_right - top_left) * x_lerp;
103 | const float bottom =
104 | bottom_left + (bottom_right - bottom_left) * x_lerp;
105 |
106 | corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = top + (bottom - top) * y_lerp;
107 | }
108 | } // end for x
109 | } // end for y
110 | } // end for b
111 |
112 | }
113 |
114 |
115 | void crop_and_resize_forward(
116 | THFloatTensor * image,
117 | THFloatTensor * boxes, // [y1, x1, y2, x2]
118 | THIntTensor * box_index, // range in [0, batch_size)
119 | const float extrapolation_value,
120 | const int crop_height,
121 | const int crop_width,
122 | THFloatTensor * crops
123 | ) {
124 | const int batch_size = image->size[0];
125 | const int depth = image->size[1];
126 | const int image_height = image->size[2];
127 | const int image_width = image->size[3];
128 |
129 | const int num_boxes = boxes->size[0];
130 |
131 | // init output space
132 | THFloatTensor_resize4d(crops, num_boxes, depth, crop_height, crop_width);
133 | THFloatTensor_zero(crops);
134 |
135 | // crop_and_resize for each box
136 | CropAndResizePerBox(
137 | THFloatTensor_data(image),
138 | batch_size,
139 | depth,
140 | image_height,
141 | image_width,
142 |
143 | THFloatTensor_data(boxes),
144 | THIntTensor_data(box_index),
145 | 0,
146 | num_boxes,
147 |
148 | THFloatTensor_data(crops),
149 | crop_height,
150 | crop_width,
151 | extrapolation_value
152 | );
153 |
154 | }
155 |
156 |
157 | void crop_and_resize_backward(
158 | THFloatTensor * grads,
159 | THFloatTensor * boxes, // [y1, x1, y2, x2]
160 | THIntTensor * box_index, // range in [0, batch_size)
161 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
162 | )
163 | {
164 | // shape
165 | const int batch_size = grads_image->size[0];
166 | const int depth = grads_image->size[1];
167 | const int image_height = grads_image->size[2];
168 | const int image_width = grads_image->size[3];
169 |
170 | const int num_boxes = grads->size[0];
171 | const int crop_height = grads->size[2];
172 | const int crop_width = grads->size[3];
173 |
174 | // n_elements
175 | const int image_channel_elements = image_height * image_width;
176 | const int image_elements = depth * image_channel_elements;
177 |
178 | const int channel_elements = crop_height * crop_width;
179 | const int crop_elements = depth * channel_elements;
180 |
181 | // init output space
182 | THFloatTensor_zero(grads_image);
183 |
184 | // data pointer
185 | const float * grads_data = THFloatTensor_data(grads);
186 | const float * boxes_data = THFloatTensor_data(boxes);
187 | const int * box_index_data = THIntTensor_data(box_index);
188 | float * grads_image_data = THFloatTensor_data(grads_image);
189 |
190 | for (int b = 0; b < num_boxes; ++b) {
191 | const float * box = boxes_data + b * 4;
192 | const float y1 = box[0];
193 | const float x1 = box[1];
194 | const float y2 = box[2];
195 | const float x2 = box[3];
196 |
197 | const int b_in = box_index_data[b];
198 | if (b_in < 0 || b_in >= batch_size) {
199 | printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
200 | exit(-1);
201 | }
202 |
203 | const float height_scale =
204 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
205 | : 0;
206 | const float width_scale =
207 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
208 | : 0;
209 |
210 | for (int y = 0; y < crop_height; ++y)
211 | {
212 | const float in_y = (crop_height > 1)
213 | ? y1 * (image_height - 1) + y * height_scale
214 | : 0.5 * (y1 + y2) * (image_height - 1);
215 | if (in_y < 0 || in_y > image_height - 1)
216 | {
217 | continue;
218 | }
219 | const int top_y_index = floorf(in_y);
220 | const int bottom_y_index = ceilf(in_y);
221 | const float y_lerp = in_y - top_y_index;
222 |
223 | for (int x = 0; x < crop_width; ++x)
224 | {
225 | const float in_x = (crop_width > 1)
226 | ? x1 * (image_width - 1) + x * width_scale
227 | : 0.5 * (x1 + x2) * (image_width - 1);
228 | if (in_x < 0 || in_x > image_width - 1)
229 | {
230 | continue;
231 | }
232 | const int left_x_index = floorf(in_x);
233 | const int right_x_index = ceilf(in_x);
234 | const float x_lerp = in_x - left_x_index;
235 |
236 | for (int d = 0; d < depth; ++d)
237 | {
238 | float *pimage = grads_image_data + b_in * image_elements + d * image_channel_elements;
239 | const float grad_val = grads_data[crop_elements * b + channel_elements * d + y * crop_width + x];
240 |
241 | const float dtop = (1 - y_lerp) * grad_val;
242 | pimage[top_y_index * image_width + left_x_index] += (1 - x_lerp) * dtop;
243 | pimage[top_y_index * image_width + right_x_index] += x_lerp * dtop;
244 |
245 | const float dbottom = y_lerp * grad_val;
246 | pimage[bottom_y_index * image_width + left_x_index] += (1 - x_lerp) * dbottom;
247 | pimage[bottom_y_index * image_width + right_x_index] += x_lerp * dbottom;
248 | } // end d
249 | } // end x
250 | } // end y
251 | } // end b
252 | }
--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.h:
--------------------------------------------------------------------------------
1 | void crop_and_resize_forward(
2 | THFloatTensor * image,
3 | THFloatTensor * boxes, // [y1, x1, y2, x2]
4 | THIntTensor * box_index, // range in [0, batch_size)
5 | const float extrapolation_value,
6 | const int crop_height,
7 | const int crop_width,
8 | THFloatTensor * crops
9 | );
10 |
11 | void crop_and_resize_backward(
12 | THFloatTensor * grads,
13 | THFloatTensor * boxes, // [y1, x1, y2, x2]
14 | THIntTensor * box_index, // range in [0, batch_size)
15 | THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );
--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "cuda/crop_and_resize_kernel.h"
3 |
4 | extern THCState *state;
5 |
6 |
7 | void crop_and_resize_gpu_forward(
8 | THCudaTensor * image,
9 | THCudaTensor * boxes, // [y1, x1, y2, x2]
10 | THCudaIntTensor * box_index, // range in [0, batch_size)
11 | const float extrapolation_value,
12 | const int crop_height,
13 | const int crop_width,
14 | THCudaTensor * crops
15 | ) {
16 | const int batch_size = THCudaTensor_size(state, image, 0);
17 | const int depth = THCudaTensor_size(state, image, 1);
18 | const int image_height = THCudaTensor_size(state, image, 2);
19 | const int image_width = THCudaTensor_size(state, image, 3);
20 |
21 | const int num_boxes = THCudaTensor_size(state, boxes, 0);
22 |
23 | // init output space
24 | THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width);
25 | THCudaTensor_zero(state, crops);
26 |
27 | cudaStream_t stream = THCState_getCurrentStream(state);
28 | CropAndResizeLaucher(
29 | THCudaTensor_data(state, image),
30 | THCudaTensor_data(state, boxes),
31 | THCudaIntTensor_data(state, box_index),
32 | num_boxes, batch_size, image_height, image_width,
33 | crop_height, crop_width, depth, extrapolation_value,
34 | THCudaTensor_data(state, crops),
35 | stream
36 | );
37 | }
38 |
39 |
40 | void crop_and_resize_gpu_backward(
41 | THCudaTensor * grads,
42 | THCudaTensor * boxes, // [y1, x1, y2, x2]
43 | THCudaIntTensor * box_index, // range in [0, batch_size)
44 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
45 | ) {
46 | // shape
47 | const int batch_size = THCudaTensor_size(state, grads_image, 0);
48 | const int depth = THCudaTensor_size(state, grads_image, 1);
49 | const int image_height = THCudaTensor_size(state, grads_image, 2);
50 | const int image_width = THCudaTensor_size(state, grads_image, 3);
51 |
52 | const int num_boxes = THCudaTensor_size(state, grads, 0);
53 | const int crop_height = THCudaTensor_size(state, grads, 2);
54 | const int crop_width = THCudaTensor_size(state, grads, 3);
55 |
56 | // init output space
57 | THCudaTensor_zero(state, grads_image);
58 |
59 | cudaStream_t stream = THCState_getCurrentStream(state);
60 | CropAndResizeBackpropImageLaucher(
61 | THCudaTensor_data(state, grads),
62 | THCudaTensor_data(state, boxes),
63 | THCudaIntTensor_data(state, box_index),
64 | num_boxes, batch_size, image_height, image_width,
65 | crop_height, crop_width, depth,
66 | THCudaTensor_data(state, grads_image),
67 | stream
68 | );
69 | }
--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.h:
--------------------------------------------------------------------------------
1 | void crop_and_resize_gpu_forward(
2 | THCudaTensor * image,
3 | THCudaTensor * boxes, // [y1, x1, y2, x2]
4 | THCudaIntTensor * box_index, // range in [0, batch_size)
5 | const float extrapolation_value,
6 | const int crop_height,
7 | const int crop_width,
8 | THCudaTensor * crops
9 | );
10 |
11 | void crop_and_resize_gpu_backward(
12 | THCudaTensor * grads,
13 | THCudaTensor * boxes, // [y1, x1, y2, x2]
14 | THCudaIntTensor * box_index, // range in [0, batch_size)
15 | THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );
--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "crop_and_resize_kernel.h"
4 |
5 | #define CUDA_1D_KERNEL_LOOP(i, n) \
6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
7 | i += blockDim.x * gridDim.x)
8 |
9 |
10 | __global__
11 | void CropAndResizeKernel(
12 | const int nthreads, const float *image_ptr, const float *boxes_ptr,
13 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
14 | int image_width, int crop_height, int crop_width, int depth,
15 | float extrapolation_value, float *crops_ptr)
16 | {
17 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
18 | {
19 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
20 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
21 | int idx = out_idx;
22 | const int x = idx % crop_width;
23 | idx /= crop_width;
24 | const int y = idx % crop_height;
25 | idx /= crop_height;
26 | const int d = idx % depth;
27 | const int b = idx / depth;
28 |
29 | const float y1 = boxes_ptr[b * 4];
30 | const float x1 = boxes_ptr[b * 4 + 1];
31 | const float y2 = boxes_ptr[b * 4 + 2];
32 | const float x2 = boxes_ptr[b * 4 + 3];
33 |
34 | const int b_in = box_ind_ptr[b];
35 | if (b_in < 0 || b_in >= batch)
36 | {
37 | continue;
38 | }
39 |
40 | const float height_scale =
41 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
42 | : 0;
43 | const float width_scale =
44 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
45 |
46 | const float in_y = (crop_height > 1)
47 | ? y1 * (image_height - 1) + y * height_scale
48 | : 0.5 * (y1 + y2) * (image_height - 1);
49 | if (in_y < 0 || in_y > image_height - 1)
50 | {
51 | crops_ptr[out_idx] = extrapolation_value;
52 | continue;
53 | }
54 |
55 | const float in_x = (crop_width > 1)
56 | ? x1 * (image_width - 1) + x * width_scale
57 | : 0.5 * (x1 + x2) * (image_width - 1);
58 | if (in_x < 0 || in_x > image_width - 1)
59 | {
60 | crops_ptr[out_idx] = extrapolation_value;
61 | continue;
62 | }
63 |
64 | const int top_y_index = floorf(in_y);
65 | const int bottom_y_index = ceilf(in_y);
66 | const float y_lerp = in_y - top_y_index;
67 |
68 | const int left_x_index = floorf(in_x);
69 | const int right_x_index = ceilf(in_x);
70 | const float x_lerp = in_x - left_x_index;
71 |
72 | const float *pimage = image_ptr + (b_in * depth + d) * image_height * image_width;
73 | const float top_left = pimage[top_y_index * image_width + left_x_index];
74 | const float top_right = pimage[top_y_index * image_width + right_x_index];
75 | const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
76 | const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
77 |
78 | const float top = top_left + (top_right - top_left) * x_lerp;
79 | const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
80 | crops_ptr[out_idx] = top + (bottom - top) * y_lerp;
81 | }
82 | }
83 |
84 | __global__
85 | void CropAndResizeBackpropImageKernel(
86 | const int nthreads, const float *grads_ptr, const float *boxes_ptr,
87 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
88 | int image_width, int crop_height, int crop_width, int depth,
89 | float *grads_image_ptr)
90 | {
91 | CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
92 | {
93 | // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
94 | // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
95 | int idx = out_idx;
96 | const int x = idx % crop_width;
97 | idx /= crop_width;
98 | const int y = idx % crop_height;
99 | idx /= crop_height;
100 | const int d = idx % depth;
101 | const int b = idx / depth;
102 |
103 | const float y1 = boxes_ptr[b * 4];
104 | const float x1 = boxes_ptr[b * 4 + 1];
105 | const float y2 = boxes_ptr[b * 4 + 2];
106 | const float x2 = boxes_ptr[b * 4 + 3];
107 |
108 | const int b_in = box_ind_ptr[b];
109 | if (b_in < 0 || b_in >= batch)
110 | {
111 | continue;
112 | }
113 |
114 | const float height_scale =
115 | (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
116 | : 0;
117 | const float width_scale =
118 | (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
119 |
120 | const float in_y = (crop_height > 1)
121 | ? y1 * (image_height - 1) + y * height_scale
122 | : 0.5 * (y1 + y2) * (image_height - 1);
123 | if (in_y < 0 || in_y > image_height - 1)
124 | {
125 | continue;
126 | }
127 |
128 | const float in_x = (crop_width > 1)
129 | ? x1 * (image_width - 1) + x * width_scale
130 | : 0.5 * (x1 + x2) * (image_width - 1);
131 | if (in_x < 0 || in_x > image_width - 1)
132 | {
133 | continue;
134 | }
135 |
136 | const int top_y_index = floorf(in_y);
137 | const int bottom_y_index = ceilf(in_y);
138 | const float y_lerp = in_y - top_y_index;
139 |
140 | const int left_x_index = floorf(in_x);
141 | const int right_x_index = ceilf(in_x);
142 | const float x_lerp = in_x - left_x_index;
143 |
144 | float *pimage = grads_image_ptr + (b_in * depth + d) * image_height * image_width;
145 | const float dtop = (1 - y_lerp) * grads_ptr[out_idx];
146 | atomicAdd(
147 | pimage + top_y_index * image_width + left_x_index,
148 | (1 - x_lerp) * dtop
149 | );
150 | atomicAdd(
151 | pimage + top_y_index * image_width + right_x_index,
152 | x_lerp * dtop
153 | );
154 |
155 | const float dbottom = y_lerp * grads_ptr[out_idx];
156 | atomicAdd(
157 | pimage + bottom_y_index * image_width + left_x_index,
158 | (1 - x_lerp) * dbottom
159 | );
160 | atomicAdd(
161 | pimage + bottom_y_index * image_width + right_x_index,
162 | x_lerp * dbottom
163 | );
164 | }
165 | }
166 |
167 |
168 | void CropAndResizeLaucher(
169 | const float *image_ptr, const float *boxes_ptr,
170 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
171 | int image_width, int crop_height, int crop_width, int depth,
172 | float extrapolation_value, float *crops_ptr, cudaStream_t stream)
173 | {
174 | const int total_count = num_boxes * crop_height * crop_width * depth;
175 | const int thread_per_block = 1024;
176 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
177 | cudaError_t err;
178 |
179 | if (total_count > 0)
180 | {
181 | CropAndResizeKernel<<>>(
182 | total_count, image_ptr, boxes_ptr,
183 | box_ind_ptr, num_boxes, batch, image_height, image_width,
184 | crop_height, crop_width, depth, extrapolation_value, crops_ptr);
185 |
186 | err = cudaGetLastError();
187 | if (cudaSuccess != err)
188 | {
189 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
190 | exit(-1);
191 | }
192 | }
193 | }
194 |
195 |
196 | void CropAndResizeBackpropImageLaucher(
197 | const float *grads_ptr, const float *boxes_ptr,
198 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
199 | int image_width, int crop_height, int crop_width, int depth,
200 | float *grads_image_ptr, cudaStream_t stream)
201 | {
202 | const int total_count = num_boxes * crop_height * crop_width * depth;
203 | const int thread_per_block = 1024;
204 | const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
205 | cudaError_t err;
206 |
207 | if (total_count > 0)
208 | {
209 | CropAndResizeBackpropImageKernel<<>>(
210 | total_count, grads_ptr, boxes_ptr,
211 | box_ind_ptr, num_boxes, batch, image_height, image_width,
212 | crop_height, crop_width, depth, grads_image_ptr);
213 |
214 | err = cudaGetLastError();
215 | if (cudaSuccess != err)
216 | {
217 | fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
218 | exit(-1);
219 | }
220 | }
221 | }
--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o
--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _CropAndResize_Kernel
2 | #define _CropAndResize_Kernel
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void CropAndResizeLaucher(
9 | const float *image_ptr, const float *boxes_ptr,
10 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
11 | int image_width, int crop_height, int crop_width, int depth,
12 | float extrapolation_value, float *crops_ptr, cudaStream_t stream);
13 |
14 | void CropAndResizeBackpropImageLaucher(
15 | const float *grads_ptr, const float *boxes_ptr,
16 | const int *box_ind_ptr, int num_boxes, int batch, int image_height,
17 | int image_width, int crop_height, int crop_width, int depth,
18 | float *grads_image_ptr, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
--------------------------------------------------------------------------------
/models/Model7.py:
--------------------------------------------------------------------------------
1 | """Model7 is for semantic embedding & attention. We replace the global classification with the semantic classification,
2 | thus applicable for textual grounding problem."""
3 |
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import absolute_import
7 | import sys
8 | sys.path.insert(0, '/../')
9 | import torch
10 | import numpy as np
11 | import torch.nn as nn
12 | from lib.configure.config import Config
13 | from lib.resnet.resnet import resnet50
14 | from torch.autograd import Variable
15 | from lib.bilinear_pooling.CompactBilinearPooling import CompactBilinearPooling
16 |
17 |
18 | class Model7(nn.Module):
19 |
20 | def __init__(self, opts, body_pretrain=False):
21 | super(Model7, self).__init__()
22 |
23 | # Load pre-trained back-boned model
24 | print('==> Building backbone model...')
25 | config = Config()
26 | config.IMAGES_PER_GPU = opts.batch_size
27 | config.NUM_CLASSES = opts.class_num
28 |
29 | # Load Attribute module
30 | attr_branch = AttributeBranch(300)
31 | attr_res_net = resnet50(True, path='./checkpoint/AENet_clsfier_person_256d_4.pth', classnum=4)
32 |
33 | # Load semantic embeddings
34 | dictionary = {'man': [1, 0, 0.5, 0.5],
35 | 'woman': [0, 1, 0.5, 0.5],
36 | 'lady': [0, 1, 0.25, 0.75],
37 | 'female': [0, 1, 0.5, 0.5],
38 | 'boy': [1, 0, 1, 0],
39 | 'girl': [0, 1, 1, 0],
40 | 'kid': [0.5, 0.5, 1, 0],
41 | 'child': [0.5, 0.5, 1, 0],
42 | 'young': [0.5, 0.5, 1, 0],
43 | 'elderly': [0.5, 0.5, 0, 1]}
44 | for key in dictionary.keys():
45 | dictionary[key] = np.asarray(dictionary[key])
46 |
47 | # Freeze the attr-resnet model
48 | for param in attr_res_net.parameters():
49 | param.requires_grad = False
50 |
51 | for param in attr_res_net.fc.parameters():
52 | param.requires_grad = False
53 |
54 | # Freeze the attribute branch or not
55 | for param in attr_branch.parameters():
56 | param.requires_grad = True
57 |
58 | self.attr_branch = attr_branch
59 | self.opts = opts
60 | self.attr_res_net = attr_res_net
61 | self.pool = nn.AvgPool2d(kernel_size=64, stride=1)
62 | self.sigmoid = nn.Sigmoid()
63 | self.regressor = nn.Linear(256, 4)
64 | self.semantic_layer = SemanticLayer(dictionary)
65 |
66 | def forward(self, img, label, embeddings):
67 |
68 | # Attribute Branch
69 | conv_feat4, conv_feat = self.attr_res_net(img)
70 | attr_map, att_conv_feature = self.attr_branch(conv_feat, embeddings)
71 | feat = self.pool(att_conv_feature)
72 | feat = self.regressor(feat.view(feat.shape[0], feat.shape[1]))
73 | output = self.semantic_layer(feat, label)
74 | return output, attr_map, att_conv_feature
75 |
76 |
77 | class AttributeBranch(nn.Module):
78 |
79 | def __init__(self, attr_num):
80 | super(AttributeBranch, self).__init__()
81 |
82 | self.textual_emb = nn.Linear(attr_num, 256)
83 | self.conv = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True)
84 | self.mcb_attr = CompactBilinearPooling(256, 256, 256).cuda()
85 | self.mcb_conv1_attr = nn.Conv2d(256, 32, kernel_size=1, stride=1, padding=0, bias=True)
86 | self.mcb_relu1_attr = nn.ReLU(inplace=True)
87 | self.mcb_conv2_attr = nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0, bias=True)
88 | self.mcb_sigmoid = nn.Sigmoid()
89 |
90 | def forward(self, entity_feature, attr_one_hot):
91 |
92 | feature = self.mcb_relu1_attr(entity_feature)
93 | # Reshape attribute one hot input
94 | attr_one_hot = self.textual_emb(attr_one_hot)
95 | attr_one_hot = attr_one_hot.view(attr_one_hot.shape[0], attr_one_hot.shape[1], 1, 1)
96 |
97 | # stack attention map generating for P3, P4, P5
98 | attr_one_hot = attr_one_hot.expand_as(feature)
99 |
100 | # Attribute attention generation and applied
101 | mcb_attr_feat = self.mcb_attr(self.conv(attr_one_hot), feature)
102 | attr_map = self.mcb_sigmoid(self.mcb_conv2_attr(self.mcb_relu1_attr(self.mcb_conv1_attr(mcb_attr_feat))))
103 | attr_feature = (torch.mul(attr_map, entity_feature))
104 |
105 | return attr_map, attr_feature
106 |
107 |
108 | class SemanticLayer(nn.Module):
109 | def __init__(self, dictionary):
110 | super(SemanticLayer, self).__init__()
111 |
112 | list_file = open('./others/low-level-attr.txt', 'r')
113 | entity_att = []
114 | for i in list_file.readlines():
115 | entity_att.append(i.replace('\n', ''))
116 |
117 | # Create semantic matrix
118 | s_matrix = torch.zeros(10, 4).cuda()
119 | for index, item in enumerate(entity_att):
120 | emb = torch.from_numpy(dictionary[item])
121 | s_matrix[index] = emb
122 | self.s_matrix = Variable(s_matrix)
123 |
124 | def forward(self, x, label):
125 | # x: (batch * 4)
126 | # label: (batch,)
127 | prob = Variable(torch.zeros(x.shape[0]))
128 | for index in range(x.shape[0]):
129 | lbl = label[index]
130 | prob[index] = torch.nn.functional.cosine_similarity(self.s_matrix, x[index].view(1, -1))[lbl]
131 | prob = prob.sum() / prob.shape[0]
132 | return 1-prob
133 |
--------------------------------------------------------------------------------
/models/__pycache__/Model7.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/models/__pycache__/Model7.cpython-35.pyc
--------------------------------------------------------------------------------
/others/README.md:
--------------------------------------------------------------------------------
1 | # coco_person_list.txt:
2 | Our extracted 12000 images from coco_train_2017 for person attribute grounding.
3 | # low-level-attr.txt:
4 | The attribute dictionary.
5 | # glove.6B.300d.txt:
6 | Word embedding dictionary we are using, it's from glove 6B version, and each embedding is 300 dimension.
7 | # dictionary_emb.pkl:
8 | Instead of using the whole word embedding, we mannually extract just the attribute word embedding for faster embedding dictionary loading.
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/others/dictionary_emb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/others/dictionary_emb.pkl
--------------------------------------------------------------------------------
/others/low-level-attr.txt:
--------------------------------------------------------------------------------
1 | man
2 | woman
3 | lady
4 | female
5 | boy
6 | girl
7 | kid
8 | child
9 | young
10 | elderly
11 |
--------------------------------------------------------------------------------
/parser.py:
--------------------------------------------------------------------------------
1 | '''Train Sun Attribute with PyTorch.'''
2 | from __future__ import print_function
3 |
4 | import torch
5 | import argparse
6 | import torch.optim as optim
7 |
8 |
9 | def parse_opts():
10 | parser = argparse.ArgumentParser(description='PyTorch Attribute Grouning Training')
11 | parser.add_argument('--msg', default=False, type=bool, help='display message')
12 | parser.add_argument('--use_gpu', default=torch.cuda.is_available(), type=bool, help='Use GPU or not')
13 | parser.add_argument('--multi_gpu', default=(torch.cuda.device_count() > 0), type=bool, help='Use multi-GPU or not')
14 | parser.add_argument('--gpu_id', default=-1, type=int, help='Use specific GPU.')
15 |
16 | parser.add_argument('--optimizer', default=optim.SGD, help='optimizer')
17 | parser.add_argument('--num_workers', default=2, type=int, help='num of fetching threads')
18 | parser.add_argument('--batch_size', default=12, type=int, help='batch size')
19 | parser.add_argument('--weight_decay', default=1e-3, type=float, help='weight decay')
20 | parser.add_argument('--seed', default=0, type=int, help='random seed')
21 | parser.add_argument('--result_path', default='./results', help='result path')
22 |
23 | # Define the training parameters
24 | parser.add_argument('--class_num', default=5, type=int, help='num of fetchi ng threads')
25 | parser.add_argument('--checkpoint_epoch', default=2, type=int, help='epochs to save checkpoint ')
26 | parser.add_argument('--lr_adjust_epoch', default=5, type=int, help='lr adjust epoch')
27 | parser.add_argument('--n_epoch', default=1000, type=int, help='training epochs')
28 | parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
29 |
30 | # Define the checkpoint reloading path
31 | parser.add_argument('--resume', default='', help='result path')
32 |
33 | # Define the data_set path
34 | parser.add_argument('--img_path', default='/media/drive1/Data/coco17/train2017/', help='coco_train_2017 path')
35 | parser.add_argument('--annotation', default='/media/drive1/Data/coco17/annotations/'
36 | 'captions_train2017.json', help='coco_train_2017 annotation path')
37 | parser.add_argument('--dictionary', default='./others/low-level-attr.txt', help='dict of attributes')
38 | args = parser.parse_args()
39 |
40 | return args
41 |
42 |
--------------------------------------------------------------------------------
/results/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/results/architecture.png
--------------------------------------------------------------------------------
/results/test.log:
--------------------------------------------------------------------------------
1 | epoch time loss
2 |
--------------------------------------------------------------------------------
/results/train.log:
--------------------------------------------------------------------------------
1 | epoch time loss
2 |
--------------------------------------------------------------------------------
/results/train_batch.log:
--------------------------------------------------------------------------------
1 | epoch batch loss
2 | 1 2 0.1857217252254486
3 | 1 3 0.20769339799880981
4 | 1 4 0.2339950054883957
5 | 1 5 0.24028053283691406
6 | 1 6 0.24673599004745483
7 | 1 7 0.24947353771754674
8 | 1 8 0.24981582164764404
9 | 1 9 0.24003050724665323
10 | 1 10 0.23302733302116393
11 | 1 11 0.22751894864169034
12 | 1 12 0.22201103965441385
13 |
--------------------------------------------------------------------------------
/runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001
--------------------------------------------------------------------------------
/runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001
--------------------------------------------------------------------------------
/train_attr_attention_embedding.py:
--------------------------------------------------------------------------------
1 | '''Train unsuperwised entity grounding by attention+pixel classification mechanism.'''
2 | from __future__ import print_function
3 |
4 | import random
5 | import pickle
6 | from parser import *
7 | import matplotlib.pyplot as plt
8 | from models.Model7 import Model7
9 | from lib.configure.net_util import *
10 | from torchvision import transforms
11 | from tensorboardX import SummaryWriter
12 | from lib.dataset.coco_dataset import CocoCaptions
13 |
14 |
15 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152
16 | # os.environ["CUDA_VISIBLE_DEVICES"]="1"
17 |
18 | def l2_regulariza_loss(map):
19 | # return torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1]))
20 | mean = torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1]))
21 | return mean
22 |
23 |
24 | def load_dictionary(name):
25 | with open('./others/' + name + '.pkl', 'rb') as f:
26 | return pickle.load(f)
27 |
28 |
29 | # Randomly pick a label from multi one-hot label
30 | def random_pick(one_hot):
31 | # return a randomly selected label
32 | label = torch.zeros(one_hot.shape[0])
33 | one_hot_return = torch.zeros_like(one_hot)
34 |
35 | for i in range(one_hot.shape[0]):
36 | # all labels to save all the labels
37 | all_labels = []
38 | count = 0
39 | for j in range(one_hot.shape[1]):
40 | if one_hot[i][j] == 1.:
41 | all_labels.append(count)
42 | count += 1
43 | # randomly picking one label
44 | if len(all_labels) != 0:
45 | label[i] = random.choice(all_labels)
46 | else:
47 | label[i] = 2
48 | one_hot_return[i][int(label[i])] = 1
49 | return label, one_hot_return
50 |
51 |
52 | # Multi-Pixel embedding learning for multi-category picking
53 | def top_k_emb(visual_emb, model, label, single_attribute_label, K=100):
54 | # Given pixel-wise features, select top-k pixels with highest category prob out for
55 | # multi-cross entropy learning
56 | # Visual-features: (batch, emb #, pixel #)
57 | # Returning prob: (batch #, top-K, class_prob)
58 | # Returning feat: (batch #, top-K, feature_size)
59 | visual_emb = visual_emb.view((visual_emb.shape[0], visual_emb.shape[1], visual_emb.shape[2]*visual_emb.shape[3]))
60 |
61 | # i: batch number
62 | for i in range(visual_emb.shape[0]):
63 | sorting = np.zeros((visual_emb.shape[2]))
64 | # j: pixel numbers in feature maps
65 | for j in range(visual_emb.shape[2]):
66 | # extracting pixel features and reshape
67 | emb = visual_emb[i, :, j]
68 | # emb = F.relu(model.fc_p5(emb.contiguous().view(1, -1)))
69 | emb_ = (emb.contiguous().view(1, -1))
70 | output = model.attr_res_net.fc(emb_)
71 | prob = opts.criterion[0](output, single_attribute_label[i])
72 | opts.prob_set[j] = output[0]
73 | opts.features_set[j] = emb
74 | sorting[j] = prob.data.cpu().numpy()[0]
75 |
76 | # Arg-sort the probability (and inverse the order)
77 | sorting = np.argsort(sorting)[0:K]
78 |
79 | # index: number of top-K
80 | for index in range(K):
81 | opts.return_prob[i, index] = opts.prob_set[int(sorting[index])]
82 | opts.return_feat[i, index] = opts.features_set[int(sorting[index])]
83 | return opts.return_feat
84 |
85 |
86 | def train_net(net, opts):
87 |
88 | print('training at epoch {}'.format(opts.epoch+1))
89 |
90 | if opts.use_gpu:
91 | net.cuda()
92 |
93 | net.train(True)
94 | train_loss = 0
95 | total_time = 0
96 | batch_idx = 0
97 | optimizer = opts.current_optimizer
98 | # back_bone_optimizer = opts.backbone_optimizer
99 | end_time = time.time()
100 | train_back_bone = True
101 | fig = plt.figure()
102 |
103 | # category: semantic labels for single selected label
104 | # s_entity_one_hot: randomly selected entity one-hot
105 | # s_entity_label: randomly selected entity label
106 | # att_emb: word2vec embedding for attributes
107 | # att_label: attributes pairs for margin loss learning
108 | # attr_one_hot: all attributes one-hot
109 | # textual_emb: phrase embedding
110 | # phrase/line: phrases/lines in NLP format
111 | # mask: ground truth annotations for object
112 | for batch_idx, (images, attr_one_hot, entity_one_hot) in enumerate(data_loader):
113 |
114 | # model.visual_net.config.IMAGES_PER_GPU = images.size(0)
115 | images = Variable(images).cuda()
116 |
117 | # Randomly pick one attribute per iteration
118 | single_attribute_label, single_attribute_one_hot = random_pick(attr_one_hot)
119 | attr_one_hot = Variable(single_attribute_one_hot).cuda().float()
120 | single_attribute_label = Variable(single_attribute_label).cuda().long()
121 |
122 | # Create embeddings input
123 | embeddings = Variable(torch.zeros(attr_one_hot.shape[0], 300))
124 | for index, item in enumerate(single_attribute_label):
125 | i = opts.entity_att[item.data.cpu().numpy()[0]]
126 | embeddings[index] = Variable(torch.from_numpy(opts.embeddings_index[i])).cuda()
127 |
128 | # Feed in network
129 | y, attr_map, att_conv_feature = net(images, single_attribute_label, embeddings)
130 |
131 | loss = y
132 |
133 | if train_back_bone:
134 | optimizer.zero_grad()
135 | train_loss += loss.data[0]
136 | loss.backward()
137 | optimizer.step()
138 |
139 | # Display the generated att_map and instant loss
140 | if batch_idx % 1 == 0:
141 | plt.ion()
142 | plt.show()
143 | random = randint(0, opts.batch_size - 1)
144 | if batch_idx % 1 == 0:
145 | # Print out the attribute labels
146 | # plt.suptitle(opts.entity_att[int(single_attribute_label[random])])
147 | plt.subplot(141)
148 | vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[0].view(-1, 1, 1)
149 | * att_conv_feature[random]).sum(0)).cpu().data.numpy()
150 | plt.imshow(vis)
151 |
152 | plt.subplot(142)
153 | vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[1].view(-1, 1, 1)
154 | * att_conv_feature[random]).sum(0)).cpu().data.numpy()
155 | plt.imshow(vis)
156 |
157 | plt.subplot(143)
158 | plt.imshow(attr_map[random, 0].data.cpu().numpy())
159 |
160 | plt.subplot(144)
161 | plt.imshow(images[random].permute(1, 2, 0).float().data.cpu())
162 | plt.pause(0.001)
163 | writer.add_scalar('Cross Entropy Loss', train_loss / (batch_idx+1), opts.iter_n)
164 | opts.iter_n += 1
165 |
166 | print('Overall Loss: %.8f'
167 | % (train_loss/(batch_idx+1)))
168 |
169 | total_time += (time.time() - end_time)
170 | end_time = time.time()
171 | batch_idx += 1
172 |
173 | opts.train_batch_logger.log({
174 | 'epoch': (opts.epoch+1),
175 | 'batch': batch_idx+1,
176 | 'loss': train_loss / (batch_idx+1),
177 | })
178 |
179 | if batch_idx % 100 == 0:
180 | print('100 batch.')
181 | # Save checkpoint.
182 | net_states = {
183 | 'state_dict': net.state_dict(),
184 | 'epoch': opts.epoch + 1,
185 | 'loss': opts.train_losses,
186 | 'optimizer': opts.current_optimizer.state_dict()
187 | }
188 | epo_batch = str(opts.epoch) + '-' + str(batch_idx)
189 | save_file_path = os.path.join(opts.checkpoint_path,
190 | 'Model7_exp1_{}.pth'.format(epo_batch))
191 | torch.save(net_states, save_file_path)
192 | opts.lr /= 2
193 | opts.regularization /= 2
194 | params = filter(lambda p: p.requires_grad, model.parameters())
195 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
196 | train_loss /= (batch_idx + 1)
197 |
198 | opts.train_epoch_logger.log({
199 | 'epoch': (opts.epoch+1),
200 | 'loss': train_loss,
201 | 'time': total_time,
202 | })
203 |
204 | opts.train_losses.append(train_loss)
205 |
206 | # Save checkpoint.
207 | net_states = {
208 | 'state_dict': net.state_dict(),
209 | 'epoch': opts.epoch + 1,
210 | 'loss': opts.train_losses,
211 | 'optimizer': opts.current_optimizer.state_dict()
212 | }
213 |
214 | if opts.epoch % opts.checkpoint_epoch == 0:
215 | save_file_path = os.path.join(opts.checkpoint_path, 'Model7_exp1_{}.pth'.format(opts.epoch))
216 | torch.save(net_states, save_file_path)
217 |
218 | print('Batch Loss: %.8f, elapsed time: %3.f seconds.' % (train_loss, total_time))
219 |
220 |
221 | if __name__ == '__main__':
222 |
223 | opts = parse_opts()
224 | writer = SummaryWriter()
225 |
226 | if opts.gpu_id >= 0:
227 | torch.cuda.set_device(opts.gpu_id)
228 | opts.multi_gpu = False
229 |
230 | torch.manual_seed(opts.seed)
231 | if opts.use_gpu:
232 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
233 | torch.cuda.manual_seed(opts.seed)
234 |
235 | # Loading Data
236 | print("Preparing Flickr data set...")
237 | opts.k = 600
238 | opts.ite = 0
239 | opts.regularization = 0.1
240 | size = (1024, 1024)
241 | feat_size = (64, 64)
242 | transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()])
243 | data_set = CocoCaptions(opts.img_path, opts.annotation, transform)
244 | data_loader = torch.utils.data.DataLoader(data_set, batch_size=opts.batch_size, shuffle=True)
245 |
246 | # Load dictionary
247 | list_file = open(opts.dictionary, 'r')
248 | entity_att = []
249 | for i in list_file.readlines():
250 | entity_att.append(i.replace('\n', ''))
251 | opts.entity_att = entity_att
252 |
253 | # Load semantic embeddings
254 | embeddings_index = load_dictionary('dictionary_emb')
255 | print('Dictionary loaded.')
256 | opts.embeddings_index = embeddings_index
257 |
258 | if not os.path.exists(opts.result_path):
259 | os.mkdir(opts.result_path)
260 |
261 | opts.train_epoch_logger = Logger(os.path.join(opts.result_path, 'train.log'),
262 | ['epoch', 'time', 'loss'])
263 | opts.train_batch_logger = Logger(os.path.join(opts.result_path, 'train_batch.log'),
264 | ['epoch', 'batch', 'loss'])
265 | opts.test_epoch_logger = Logger(os.path.join(opts.result_path, 'test.log'),
266 | ['epoch', 'time', 'loss'])
267 |
268 | # Model
269 | print('==> Building model...')
270 | model = Model7(opts)
271 |
272 | # Load Back bone Module
273 | if opts.resume:
274 | state_dict = torch.load(opts.resume)['state_dict']
275 | new_params = model.state_dict()
276 | new_params.update(state_dict)
277 | # Remove the extra keys
278 | model_keys = model.state_dict().keys()
279 | for name, param in list(new_params.items()):
280 | if name not in model_keys:
281 | del new_params[name]
282 | model.load_state_dict(new_params)
283 | start_epoch = 0
284 | print('==> model built.')
285 | opts.criterion = [torch.nn.CrossEntropyLoss(), torch.nn.MSELoss()]
286 |
287 | # Training
288 | parameters = filter(lambda p: p.requires_grad, model.parameters())
289 | params = sum([np.prod(p.size()) for p in parameters])
290 | print(params, 'trainable parameters in the network.')
291 | set_parameters(opts)
292 | opts.iter_n = 0
293 |
294 | for epoch in range(start_epoch, start_epoch+opts.n_epoch):
295 | opts.epoch = epoch
296 | if epoch is 0:
297 | params = filter(lambda p: p.requires_grad, model.parameters())
298 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
299 |
300 | elif (epoch % opts.lr_adjust_epoch) == 0 and epoch is not 0:
301 | opts.lr /= 5
302 | params = filter(lambda p: p.requires_grad, model.parameters())
303 | opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
304 |
305 | train_net(model, opts)
306 |
307 | # export scalar data to JSON for external processing
308 | writer.export_scalars_to_json("./all_scalars.json")
309 | writer.close()
310 |
--------------------------------------------------------------------------------
| | |