├── .idea
    └── vcs.xml
├── Demo.ipynb
├── README.md
├── __pycache__
    └── parser.cpython-35.pyc
├── checkpoint
    └── ReadMe
├── demo_attr.png
├── lib
    ├── __init__.py
    ├── __init__.pyc
    ├── __pycache__
    │   ├── __init__.cpython-35.pyc
    │   └── nms_wrapper.cpython-35.pyc
    ├── bilinear_pooling
    │   ├── CompactBilinearPooling.py
    │   └── __pycache__
    │   │   └── CompactBilinearPooling.cpython-35.pyc
    ├── configure
    │   ├── __pycache__
    │   │   ├── config.cpython-35.pyc
    │   │   └── net_util.cpython-35.pyc
    │   ├── config.py
    │   └── net_util.py
    ├── dataset
    │   ├── __pycache__
    │   │   └── coco_dataset.cpython-35.pyc
    │   └── coco_dataset.py
    ├── make.sh
    ├── nms
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── __pycache__
    │   │   ├── __init__.cpython-35.pyc
    │   │   └── pth_nms.cpython-35.pyc
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-35.pyc
    │   │   └── nms
    │   │   │   ├── __init__.py
    │   │   │   ├── __init__.pyc
    │   │   │   ├── __pycache__
    │   │   │       └── __init__.cpython-35.pyc
    │   │   │   └── _nms.so
    │   ├── build.py
    │   ├── pth_nms.py
    │   ├── pth_nms.pyc
    │   └── src
    │   │   ├── cuda
    │   │       ├── nms_kernel.cu
    │   │       ├── nms_kernel.cu.o
    │   │       └── nms_kernel.h
    │   │   ├── nms.c
    │   │   ├── nms.h
    │   │   ├── nms_cuda.c
    │   │   └── nms_cuda.h
    ├── nms_wrapper.py
    ├── nms_wrapper.pyc
    ├── pytorch_fft
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-35.pyc
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-35.pyc
    │   │   └── th_fft
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       └── __init__.cpython-35.pyc
    │   │   │   └── _th_fft.so
    │   ├── fft
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   ├── autograd.cpython-35.pyc
    │   │   │   └── fft.cpython-35.pyc
    │   │   ├── autograd.py
    │   │   └── fft.py
    │   └── src
    │   │   ├── generic
    │   │       ├── helpers.c
    │   │       ├── th_fft_cuda.c
    │   │       ├── th_fft_cuda.h
    │   │       ├── th_irfft_cuda.c
    │   │       └── th_rfft_cuda.c
    │   │   ├── th_fft_cuda.c
    │   │   ├── th_fft_cuda.h
    │   │   ├── th_fft_generate_double.h
    │   │   ├── th_fft_generate_float.h
    │   │   └── th_fft_generate_helpers.h
    ├── resnet
    │   ├── __pycache__
    │   │   └── resnet.cpython-35.pyc
    │   └── resnet.py
    └── roi_align
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── __pycache__
    │       ├── __init__.cpython-35.pyc
    │       └── crop_and_resize.cpython-35.pyc
    │   ├── _ext
    │       ├── __init__.py
    │       ├── __init__.pyc
    │       ├── __pycache__
    │       │   └── __init__.cpython-35.pyc
    │       └── crop_and_resize
    │       │   ├── __init__.py
    │       │   ├── __init__.pyc
    │       │   ├── __pycache__
    │       │       └── __init__.cpython-35.pyc
    │       │   └── _crop_and_resize.so
    │   ├── build.py
    │   ├── crop_and_resize.py
    │   ├── crop_and_resize.pyc
    │   ├── roi_align.py
    │   ├── roi_align.pyc
    │   └── src
    │       ├── crop_and_resize.c
    │       ├── crop_and_resize.h
    │       ├── crop_and_resize_gpu.c
    │       ├── crop_and_resize_gpu.h
    │       └── cuda
    │           ├── crop_and_resize_kernel.cu
    │           ├── crop_and_resize_kernel.cu.o
    │           └── crop_and_resize_kernel.h
├── models
    ├── Model7.py
    └── __pycache__
    │   └── Model7.cpython-35.pyc
├── others
    ├── README.md
    ├── coco_person_list.txt
    ├── dictionary_emb.pkl
    └── low-level-attr.txt
├── parser.py
├── results
    ├── architecture.png
    ├── test.log
    ├── train.log
    └── train_batch.log
├── runs
    ├── Oct05_13-58-18_apg395-001
    │   └── events.out.tfevents.1538773098.apg395-001
    ├── Oct05_14-08-13_apg395-001
    │   └── events.out.tfevents.1538773693.apg395-001
    ├── Oct05_14-08-27_apg395-001
    │   └── events.out.tfevents.1538773707.apg395-001
    ├── Oct05_14-08-58_apg395-001
    │   └── events.out.tfevents.1538773738.apg395-001
    ├── Oct05_14-17-30_apg395-001
    │   └── events.out.tfevents.1538774250.apg395-001
    ├── Oct05_14-17-42_apg395-001
    │   └── events.out.tfevents.1538774262.apg395-001
    ├── Oct05_14-18-03_apg395-001
    │   └── events.out.tfevents.1538774283.apg395-001
    ├── Oct05_14-18-55_apg395-001
    │   └── events.out.tfevents.1538774335.apg395-001
    └── Oct05_14-19-46_apg395-001
    │   └── events.out.tfevents.1538774386.apg395-001
└── train_attr_attention_embedding.py


/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Visual Cues Grounding Through Weak Suppervision
 2 | 
 3 | <i>PyTorch</i> implementation of **[Modularized Textual Grounding for Counterfactual Resilience
 4 | ](http://openaccess.thecvf.com/content_CVPR_2019/papers/Fang_Modularized_Textual_Grounding_for_Counterfactual_Resilience_CVPR_2019_paper.pdf)** , CVPR 2019.
 5 | 
 6 | Qualitative grounding results can be found in our **[webpage](http://www.public.asu.edu/~zfang29/textual_grounding_cvpr2019/website.html)**.
 7 | 
 8 | ## Introduction
 9 | We propose a cross-modal grounding method through weak supervision.
10 | 
11 | ![architecture](./results/architecture.png "Ground Visual Cue Through a Top-down Guided Design.")
12 | 
13 | A demonstration on how to load and ground the attribute can be found at : <b>Demo.ipynb</b>
14 | 
15 | Image -->  <em>'Boy'</em> Attribute -- > <em>'Lady'</em> Attribute
16 | <p float="center">
17 |   <img src="demo_attr.png" width="400" />
18 | </p>
19 | 
20 | ## Requirements
21 | 1. <i>PyTorch</i> 0.4.
22 | 2. Python 3.6.
23 | 3. FFT package.
24 | 
25 | ## Dataset
26 | Weakly trained on both COCO or Flickr 30k.
27 | 
28 | ## Usage
29 | Training script for attribute grounding: <pre>Train_attr_attention_embedding.py</pre>
30 | 
31 | Attention model for attribute grounding, it's based on a pre-trained Res-50 Network on person gender/age classification network:
32 | <pre> /Models/Model7.py</pre>
33 | 
34 | \<lib> Contains all the neccesary dependencies for our framework, it consists of:
35 | 
36 | <ul>
37 |   <li>bilinear pooling module: Implemented from <a href="https://github.com/DeepInsight-PCALab/CompactBilinearPooling-Pytorch">Compact Bilinear Pooling</a>. Faster Fourier Transform module is needed before using. Download and install it from <a href="https://github.com/DeepInsight-PCALab/CompactBilinearPooling-Pytorch">here</a> by running:
38 |  <pre>pip3 install pytorch_fft</pre>
39 | </li>
40 |   <li><em>resnet</em>: We modified the last fully connected layer from 2048d to 256d to a more compact representation.</li>
41 |   <li><em>nms/roi_align module</em>: Not neccesary in this time. (For entity grounding and bbox detection.)</li>
42 | </ul></pre>
43 | 
44 | In order to re_train our framework, several things might be modified:
45 | <pre> parser.py </pre>
46 | 
47 | In parser.py, img_path/annotations need to be changed to your local coco_2017_train directory:
48 | <pre> /path/to/your/local/coco17/image path/annotations/ </pre>
49 | 
50 | Argument resume is for loading pre-trained overall model.
51 | 
52 | ## Download
53 | To download the pre-trained unsupervised network:
54 |   <ul>
55 |     <li><a href="https://drive.google.com/open?id=10syFqPtkUp4frDV6YEQbgbKs9dUdfTB_">Res50</a> can be found it here.</li>
56 |     <li><a href="https://drive.google.com/open?id=1YPkw0n-beGZ1HTCxxroQTMa21nvg613p"> Model 7 in here</a>.</li>
57 |   </ul>
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/__pycache__/parser.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/__pycache__/parser.cpython-35.pyc


--------------------------------------------------------------------------------
/checkpoint/ReadMe:
--------------------------------------------------------------------------------
1 | This directory contains the pretrained model.
2 | 


--------------------------------------------------------------------------------
/demo_attr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/demo_attr.png


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.py


--------------------------------------------------------------------------------
/lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__init__.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/__pycache__/nms_wrapper.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/__pycache__/nms_wrapper.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/bilinear_pooling/CompactBilinearPooling.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(0, '/../../')
  3 | import numpy as np
  4 | import torch
  5 | from torch import nn
  6 | from torch.autograd import Variable
  7 | 
  8 | import lib.pytorch_fft.fft.autograd as afft
  9 | 
 10 | 
 11 | class CompactBilinearPooling(nn.Module):
 12 |     """
 13 |     Compute compact bilinear pooling over two bottom inputs.
 14 |     Args:
 15 |         output_dim: output dimension for compact bilinear pooling.
 16 |         sum_pool: (Optional) If True, sum the output along height and width
 17 |                   dimensions and return output shape [batch_size, output_dim].
 18 |                   Otherwise return [batch_size, height, width, output_dim].
 19 |                   Default: True.
 20 |         rand_h_1: (Optional) an 1D numpy array containing indices in interval
 21 |                   `[0, output_dim)`. Automatically generated from `seed_h_1`
 22 |                   if is None.
 23 |         rand_s_1: (Optional) an 1D numpy array of 1 and -1, having the same shape
 24 |                   as `rand_h_1`. Automatically generated from `seed_s_1` if is
 25 |                   None.
 26 |         rand_h_2: (Optional) an 1D numpy array containing indices in interval
 27 |                   `[0, output_dim)`. Automatically generated from `seed_h_2`
 28 |                   if is None.
 29 |         rand_s_2: (Optional) an 1D numpy array of 1 and -1, having the same shape
 30 |                   as `rand_h_2`. Automatically generated from `seed_s_2` if is
 31 |                   None.
 32 |     """
 33 | 
 34 |     def __init__(self, input_dim1, input_dim2, output_dim,
 35 |                  sum_pool=False, cuda=True,
 36 |                  rand_h_1=None, rand_s_1=None, rand_h_2=None, rand_s_2=None):
 37 |         super(CompactBilinearPooling, self).__init__()
 38 |         self.input_dim1 = input_dim1
 39 |         self.input_dim2 = input_dim2
 40 |         self.output_dim = output_dim
 41 |         self.sum_pool = sum_pool
 42 | 
 43 |         if rand_h_1 is None:
 44 |             np.random.seed(1)
 45 |             rand_h_1 = np.random.randint(output_dim, size=self.input_dim1)
 46 |         if rand_s_1 is None:
 47 |             np.random.seed(3)
 48 |             rand_s_1 = 2 * np.random.randint(2, size=self.input_dim1) - 1
 49 | 
 50 |         sparse_sketch_matrix1 = Variable(self.generate_sketch_matrix(
 51 |             rand_h_1, rand_s_1, self.output_dim))
 52 | 
 53 |         if rand_h_2 is None:
 54 |             np.random.seed(5)
 55 |             rand_h_2 = np.random.randint(output_dim, size=self.input_dim2)
 56 |         if rand_s_2 is None:
 57 |             np.random.seed(7)
 58 |             rand_s_2 = 2 * np.random.randint(2, size=self.input_dim2) - 1
 59 | 
 60 |         sparse_sketch_matrix2 = Variable(self.generate_sketch_matrix(
 61 |             rand_h_2, rand_s_2, self.output_dim))
 62 |         self.register_buffer("sparse_sketch_matrix1", sparse_sketch_matrix1)
 63 |         self.register_buffer("sparse_sketch_matrix2", sparse_sketch_matrix2)
 64 | 
 65 |         
 66 |     def forward(self, bottom1, bottom2):
 67 |         """
 68 |         bottom1: 1st input, 4D Tensor of shape [batch_size, input_dim1, height, width].
 69 |         bottom2: 2nd input, 4D Tensor of shape [batch_size, input_dim2, height, width].
 70 |         """
 71 |         assert bottom1.size(1) == self.input_dim1 and \
 72 |             bottom2.size(1) == self.input_dim2
 73 | 
 74 |             
 75 |         batch_size, _, height, width = bottom1.size()
 76 | 
 77 |         bottom1_flat = bottom1.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim1)
 78 |         bottom2_flat = bottom2.permute(0, 2, 3, 1).contiguous().view(-1, self.input_dim2)
 79 | 
 80 |         sketch_1 = bottom1_flat.mm(self.sparse_sketch_matrix1)
 81 |         sketch_2 = bottom2_flat.mm(self.sparse_sketch_matrix2)
 82 | 
 83 |         fft1_real, fft1_imag = afft.Fft()(sketch_1, Variable(torch.zeros(sketch_1.size())).cuda())
 84 |         fft2_real, fft2_imag = afft.Fft()(sketch_2, Variable(torch.zeros(sketch_2.size())).cuda())
 85 | 
 86 |         fft_product_real, fft_product_imag = fft1_real.mul(fft2_real), fft1_imag.mul(fft2_imag)
 87 | 
 88 |         cbp_flat = afft.Ifft()(fft_product_real, fft_product_imag)[0]
 89 | 
 90 |         cbp = cbp_flat.view(batch_size, height, width, self.output_dim)
 91 | 
 92 |         if self.sum_pool:
 93 |             cbp = cbp.sum(dim=1).sum(dim=1)
 94 | 
 95 |         return cbp.permute(0, 3, 1, 2)
 96 | 
 97 |     @staticmethod
 98 |     def generate_sketch_matrix(rand_h, rand_s, output_dim):
 99 |         """
100 |         Return a sparse matrix used for tensor sketch operation in compact bilinear
101 |         pooling
102 |         Args:
103 |             rand_h: an 1D numpy array containing indices in interval `[0, output_dim)`.
104 |             rand_s: an 1D numpy array of 1 and -1, having the same shape as `rand_h`.
105 |             output_dim: the output dimensions of compact bilinear pooling.
106 |         Returns:
107 |             a sparse matrix of shape [input_dim, output_dim] for tensor sketch.
108 |         """
109 | 
110 |         # Generate a sparse matrix for tensor count sketch
111 |         rand_h = rand_h.astype(np.int64)
112 |         rand_s = rand_s.astype(np.float32)
113 |         assert(rand_h.ndim == 1 and rand_s.ndim ==
114 |                1 and len(rand_h) == len(rand_s))
115 |         assert(np.all(rand_h >= 0) and np.all(rand_h < output_dim))
116 | 
117 |         input_dim = len(rand_h)
118 |         indices = np.concatenate((np.arange(input_dim)[..., np.newaxis],
119 |                                   rand_h[..., np.newaxis]), axis=1)
120 |         indices = torch.from_numpy(indices)
121 |         rand_s = torch.from_numpy(rand_s)
122 |         sparse_sketch_matrix = torch.sparse.FloatTensor(
123 |             indices.t(), rand_s, torch.Size([input_dim, output_dim]))
124 |         return sparse_sketch_matrix.to_dense()
125 | 
126 | 
127 | if __name__ == '__main__':
128 | 
129 |     bottom1 = Variable(torch.randn(48, 2048, 7, 7)).cuda()
130 |     bottom2 = Variable(torch.randn(48, 2048, 7, 7)).cuda()
131 | 
132 |     layer = CompactBilinearPooling(2048, 2048, 16000)
133 |     layer.cuda()
134 |     layer.train()
135 |     out = layer(bottom1, bottom2)


--------------------------------------------------------------------------------
/lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/bilinear_pooling/__pycache__/CompactBilinearPooling.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/configure/__pycache__/config.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/config.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/configure/__pycache__/net_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/configure/__pycache__/net_util.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/configure/config.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mask R-CNN
  3 | Base Configurations class.
  4 | 
  5 | Copyright (c) 2017 Matterport, Inc.
  6 | Licensed under the MIT License (see LICENSE for details)
  7 | Written by Waleed Abdulla
  8 | """
  9 | 
 10 | import math
 11 | 
 12 | import numpy as np
 13 | 
 14 | # Base Configuration Class
 15 | # Don't use this class directly. Instead, sub-class it and override
 16 | # the configurations you need to change.
 17 | 
 18 | 
 19 | class Config(object):
 20 |     """Base configuration class. For custom configurations, create a
 21 |     sub-class that inherits from this one and override properties
 22 |     that need to be changed.
 23 |     """
 24 |     # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
 25 |     # Useful if your code needs to do things differently depending on which
 26 |     # experiment is running.
 27 |     NAME = None  # Override in sub-classes
 28 | 
 29 |     # NUMBER OF GPUs to use. For CPU training, use 1
 30 |     GPU_COUNT = 1
 31 | 
 32 |     # Number of images to train with on each GPU. A 12GB GPU can typically
 33 |     # handle 2 images of 1024x1024px.
 34 |     # Adjust based on your GPU memory and image sizes. Use the highest
 35 |     # number that your GPU can handle for best performance.
 36 |     IMAGES_PER_GPU = 2
 37 | 
 38 |     # Number of training steps per epoch
 39 |     # This doesn't need to match the size of the training set. Tensorboard
 40 |     # updates are saved at the end of each epoch, so setting this to a
 41 |     # smaller number means getting more frequent TensorBoard updates.
 42 |     # Validation stats are also calculated at each epoch end and they
 43 |     # might take a while, so don't set this too small to avoid spending
 44 |     # a lot of time on validation stats.
 45 |     STEPS_PER_EPOCH = 1000
 46 | 
 47 |     # Number of validation steps to run at the end of every training epoch.
 48 |     # A bigger number improves accuracy of validation stats, but slows
 49 |     # down the training.
 50 |     VALIDATION_STEPS = 50
 51 | 
 52 |     # The strides of each layer of the FPN Pyramid. These values
 53 |     # are based on a Resnet101 backbone.
 54 |     BACKBONE_STRIDES = [4, 8, 16, 16, 16]
 55 | 
 56 |     # Number of classification classes (including background)
 57 |     NUM_CLASSES = 1  # Override in sub-classes
 58 | 
 59 |     # Length of square anchor side in pixels
 60 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 256)
 61 | 
 62 |     # Ratios of anchors at each cell (width/height)
 63 |     # A value of 1 represents a square anchor, and 0.5 is a wide anchor
 64 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 65 | 
 66 |     # Anchor stride
 67 |     # If 1 then anchors are created for each cell in the backbone feature map.
 68 |     # If 2, then anchors are created for every other cell, and so on.
 69 |     RPN_ANCHOR_STRIDE = 1
 70 | 
 71 |     # Non-max suppression threshold to filter RPN proposals.
 72 |     # You can reduce this during training to generate more propsals.
 73 |     RPN_NMS_THRESHOLD = 0.7
 74 | 
 75 |     # How many anchors per image to use for RPN training
 76 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 77 | 
 78 |     # ROIs kept after non-maximum supression (training and inference)
 79 |     POST_NMS_ROIS_TRAINING = 500
 80 |     POST_NMS_ROIS_INFERENCE = 500
 81 | 
 82 |     # If enabled, re-sizes instance masks to a smaller size to reduce
 83 |     # memory load. Recommended when using high-resolution images.
 84 |     USE_MINI_MASK = True
 85 |     MINI_MASK_SHAPE = (56, 56)  # (height, width) of the mini-mask
 86 | 
 87 |     # Input image resizing
 88 |     # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
 89 |     # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
 90 |     # be satisfied together the IMAGE_MAX_DIM is enforced.
 91 |     IMAGE_MIN_DIM = 800
 92 |     IMAGE_MAX_DIM = 1024
 93 |     # If True, pad images with zeros such that they're (max_dim by max_dim)
 94 |     IMAGE_PADDING = True  # currently, the False option is not supported
 95 | 
 96 |     # Image mean (RGB)
 97 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
 98 | 
 99 |     # Number of ROIs per image to feed to classifier/mask heads1024
100 |     # The Mask RCNN paper uses 512 but often the RPN doesn't generate
101 |     # enough positive proposals to fill this and keep a positive:negative
102 |     # ratio of 1:3. You can increase the number of proposals by adjusting
103 |     # the RPN NMS threshold.
104 |     TRAIN_ROIS_PER_IMAGE = 200
105 | 
106 |     # Percent of positive ROIs used to train classifier/mask heads
107 |     ROI_POSITIVE_RATIO = 0.33
108 | 
109 |     # Pooled ROIs
110 |     POOL_SIZE = 7
111 |     MASK_POOL_SIZE = 14
112 |     MASK_SHAPE = [128, 128]
113 | 
114 |     # Maximum number of ground truth instances to use in one image
115 |     MAX_GT_INSTANCES = 100
116 | 
117 |     # Bounding box refinement standard deviation for RPN and final detections.
118 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
119 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
120 | 
121 |     # Max number of final detections
122 |     DETECTION_MAX_INSTANCES = 100
123 | 
124 |     # Minimum probability value to accept a detected instance
125 |     # ROIs below this threshold are skipped
126 |     DETECTION_MIN_CONFIDENCE = 0.7
127 | 
128 |     # Non-maximum suppression threshold for detection
129 |     DETECTION_NMS_THRESHOLD = 0.3
130 | 
131 |     # Learning rate and momentum
132 |     # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
133 |     # weights to explode. Likely due to differences in optimzer
134 |     # implementation.
135 |     LEARNING_RATE = 0.001
136 |     LEARNING_MOMENTUM = 0.9
137 | 
138 |     # Weight decay regularization
139 |     WEIGHT_DECAY = 0.0001
140 | 
141 |     # Use RPN ROIs or externally generated ROIs for training
142 |     # Keep this True for most situations. Set to False if you want to train
143 |     # the head branches on ROI generated by code rather than the ROIs from
144 |     # the RPN. For example, to debug the classifier head without having to
145 |     # train the RPN.
146 |     USE_RPN_ROIS = True
147 | 
148 |     def __init__(self):
149 |         """Set values of computed attributes."""
150 |         # Effective batch size
151 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
152 | 
153 |         # Input image size
154 |         self.IMAGE_SHAPE = np.array(
155 |             [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
156 | 
157 |         # Compute backbone size from input image size
158 |         self.BACKBONE_SHAPES = np.array(
159 |             [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
160 |               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
161 |              for stride in self.BACKBONE_STRIDES])
162 | 
163 |     def display(self):
164 |         """Display Configuration values."""
165 |         print("\nConfigurations:")
166 |         for a in dir(self):
167 |             if not a.startswith("__") and not callable(getattr(self, a)):
168 |                 print("{:30} {}".format(a, getattr(self, a)))
169 |         print("\n")
170 | 


--------------------------------------------------------------------------------
/lib/configure/net_util.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import time
 5 | import csv
 6 | import torch
 7 | import numpy as np
 8 | from random import randint
 9 | from torch.autograd import Variable
10 | from torch.utils.data.sampler import SubsetRandomSampler
11 | 
12 | 
13 | def set_parameters(opts):
14 |     '''
15 |     This function is called before training/testing to set parameters
16 |     :param opts:
17 |     :return opts:
18 |     '''
19 | 
20 |     if not opts.__contains__('train_losses'):
21 |         opts.train_losses=[]
22 | 
23 |     if not opts.__contains__('train_accuracies'):
24 |         opts.train_accuracies = []
25 | 
26 |     if not opts.__contains__('valid_losses'):
27 |         opts.valid_losses = []
28 |     if not opts.__contains__('valid_accuracies'):
29 |         opts.valid_accuracies = []
30 | 
31 |     if not opts.__contains__('test_losses'):
32 |         opts.test_loss=[]
33 | 
34 |     if not opts.__contains__('test_accuracies'):
35 |         opts.test_accuracies = []
36 | 
37 |     if not opts.__contains__('best_acc'):
38 |         opts.best_acc = 0.0
39 | 
40 |     if not opts.__contains__('lowest_loss'):
41 |         opts.lowest_loss = 1e4
42 | 
43 |     if not opts.__contains__('checkpoint_path'):
44 |         opts.checkpoint_path = 'checkpoint'
45 | 
46 |     if not os.path.exists(opts.checkpoint_path):
47 |         os.mkdir(opts.checkpoint_path)
48 | 
49 |     if not opts.__contains__('checkpoint_epoch'):
50 |         opts.checkpoint_epoch = 5
51 | 
52 |     if not opts.__contains__('valid_pearson_r'):
53 |         opts.valid_pearson_r = []
54 | 
55 |     if not opts.__contains__('test_pearson_r'):
56 |         opts.test_pearson_r = []
57 | 
58 | 
59 | class Logger(object):
60 |     def __init__(self, path, header):
61 |         self.log_file = open(path, 'w')
62 |         self.logger = csv.writer(self.log_file, delimiter='\t')
63 | 
64 |         self.logger.writerow(header)
65 |         self.header = header
66 | 
67 |     def __del(self):
68 |         self.log_file.close()
69 | 
70 |     def log(self, values):
71 |         write_values = []
72 |         for col in self.header:
73 |             assert col in values
74 |             write_values.append(values[col])
75 | 
76 |         self.logger.writerow(write_values)
77 |         self.log_file.flush()
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/lib/dataset/__pycache__/coco_dataset.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/dataset/__pycache__/coco_dataset.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/dataset/coco_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import nltk
  3 | import os.path
  4 | import numpy as np
  5 | from PIL import Image
  6 | import torch.utils.data as data
  7 | from pycocotools.coco import COCO
  8 | from nltk.stem import WordNetLemmatizer
  9 | 
 10 | 
 11 | class CocoCaptions(data.Dataset):
 12 |     """`MS Coco Captions <http://mscoco.org/dataset/#captions-challenge2015>`_ Dataset.
 13 |     Args:
 14 |         root (string): Root directory where images are downloaded to.
 15 |         annFile (string): Path to json annotation file.
 16 |         transform (callable, optional): A function/transform that  takes in an PIL image
 17 |             and returns a transformed version. E.g, ``transforms.ToTensor``
 18 |         target_transform (callable, optional): A function/transform that takes in the
 19 |             target and transforms it.
 20 |     Example:
 21 |         .. code:: python
 22 |             import torchvision.datasets as dset
 23 |             import torchvision.transforms as transforms
 24 |             cap = dset.CocoCaptions(root = 'dir where images are',
 25 |                                     annFile = 'json annotation file',
 26 |                                     transform=transforms.ToTensor())
 27 |             print('Number of samples: ', len(cap))
 28 |             img, target = cap[3] # load 4th sample
 29 |             print("Image Size: ", img.size())
 30 |             print(target)
 31 |         Output: ::
 32 |             Number of samples: 82783
 33 |             Image Size: (3L, 427L, 640L)
 34 |             [u'A plane emitting smoke stream flying over a mountain.',
 35 |             u'A plane darts across a bright blue sky behind a mountain covered in snow',
 36 |             u'A plane leaves a contrail above the snowy mountain top.',
 37 |             u'A mountain that has a plane flying overheard in the distance.',
 38 |             u'A mountain view with a plume of smoke in the background']
 39 |     """
 40 | 
 41 |     def __init__(self, root, annFile, transform=None, target_transform=None, embed=False):
 42 | 
 43 |         # Load COCO image IDs
 44 |         list_file = open('./others/coco_person_list.txt', 'r')
 45 |         ids = []
 46 |         for i in list_file.readlines():
 47 |             ids.append(int(i.replace('\n', '')))
 48 | 
 49 |         # Load entity-attribute dictionary
 50 |         att_dict = []
 51 |         ent_dict = []
 52 |         list_file = open('./others/low-level-attr.txt', 'r')
 53 |         for i in list_file.readlines():
 54 |             att_dict.append(i.replace('\n', ''))
 55 | 
 56 |         ent_dict = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 57 |                        'bus', 'train', 'truck', 'boat', 'traffic light',
 58 |                        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
 59 |                        'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
 60 |                        'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
 61 |                        'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 62 |                        'kite', 'baseball bat', 'baseball glove', 'skateboard',
 63 |                        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
 64 |                        'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
 65 |                        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 66 |                        'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
 67 |                        'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
 68 |                        'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
 69 |                        'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
 70 |                        'teddy bear', 'hair drier', 'toothbrush']
 71 | 
 72 |         self.ids = ids
 73 |         self.embed = embed
 74 |         self.coco = COCO(annFile)
 75 |         self.transform = transform
 76 |         self.att_dict = att_dict
 77 |         self.ent_dict = ent_dict
 78 |         self.root = os.path.expanduser(root)
 79 |         self.target_transform = target_transform
 80 | 
 81 |     def __getitem__(self, index):
 82 |         """
 83 |         Args:
 84 |             index (int): Index
 85 |         Returns:
 86 |             tuple: Tuple (image, target). target is a list of captions for the image.
 87 |         """
 88 |         coco = self.coco
 89 |         img_id = self.ids[index]
 90 |         ann_ids = coco.getAnnIds(imgIds=img_id)
 91 |         anns = coco.loadAnns(ann_ids)
 92 |         target = [ann['caption'] for ann in anns]
 93 | 
 94 |         path = coco.loadImgs(img_id)[0]['file_name']
 95 | 
 96 |         img = Image.open(os.path.join(self.root, path)).convert('RGB')
 97 |         if self.transform is not None:
 98 |             img = self.transform(img)
 99 | 
100 |         if self.target_transform is not None:
101 |             target = self.target_transform(target)
102 | 
103 |         img = np.asarray(img)
104 |         att_lable = np.zeros(10)
105 |         ent_lable = np.zeros(81)
106 | 
107 |         for sentence in target:
108 |             words = nltk.pos_tag([item for item in sentence.replace('.', ' ').split(' ') if len(item) > 0])
109 |             for item in words:
110 |                 word = item[0].lower()
111 |                 word = WordNetLemmatizer().lemmatize(word)
112 | 
113 |                 # att = item[1]
114 |                 if word in self.att_dict:
115 |                     att_id = self.att_dict.index(word)
116 |                     att_lable[att_id] = 1
117 |                 if word in self.ent_dict:
118 |                     ent_id = self.ent_dict.index(word)
119 |                     ent_lable[ent_id] = 1
120 | 
121 |         return img, att_lable, ent_lable
122 | 
123 |     def __len__(self):
124 |         return len(self.ids)
125 | 
126 | 
127 | # if __name__ == '__main__':
128 | #     size = (512, 512)
129 | #     img_path = '/media/drive1/Data/coco17/train2017/'
130 | #     json = '/media/drive1/Data/coco17/annotations/captions_train2017.json'
131 | #     coco = COCO(json)
132 | #     transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()])
133 | #     data_set = CocoCaptions(img_path, json, transform)
134 | #     data_loader = torch.utils.data.DataLoader(data_set, batch_size=1, shuffle=False)
135 | #
136 | #     img_ids = []
137 | #     count = 1
138 | #     for index, (img, target) in enumerate(data_loader):
139 | #         print(target)
140 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda
 4 | 
 5 | echo "Compiling crop_and_resize kernels by nvcc..."
 6 | cd roi_align/src/cuda
 7 | $CUDA_PATH/bin/nvcc -c -o crop_and_resize_kernel.cu.o crop_and_resize_kernel.cu -x cu -Xcompiler -fPIC -arch=61
 8 | 
 9 | cd ../../
10 | python3 build.py
11 | 
12 | cd ../
13 | echo "Compiling nms kernels by nvcc..."
14 | 
15 | cd nms/src/cuda
16 | $CUDA_PATH/bin/nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61
17 | 
18 | cd ../../
19 | python3 build.py
20 | 
21 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__init__.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/nms/__pycache__/pth_nms.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/__pycache__/pth_nms.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__init__.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__init__.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/_nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/_ext/nms/_nms.so


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   """
 7 |   dets has to be a tensor
 8 |   """
 9 |   if not dets.is_cuda:
10 |     x1 = dets[:, 0]
11 |     y1 = dets[:, 1]
12 |     x2 = dets[:, 2]
13 |     y2 = dets[:, 3]
14 |     scores = dets[:, 4]
15 | 
16 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |     order = scores.sort(0, descending=True)[1]
18 |     # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |     keep = torch.LongTensor(dets.size(0))
21 |     num_out = torch.LongTensor(1)
22 |     nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |     return keep[:num_out[0]]
25 |   else:
26 |     x1 = dets[:, 0]
27 |     y1 = dets[:, 1]
28 |     x2 = dets[:, 2]
29 |     y2 = dets[:, 3]
30 |     scores = dets[:, 4]
31 | 
32 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
33 |     order = scores.sort(0, descending=True)[1]
34 | 
35 |     dets = dets[order].contiguous()
36 | 
37 |     keep = torch.LongTensor(dets.size(0))
38 |     num_out = torch.LongTensor(1)
39 | 
40 |     nms.gpu_nms(keep, num_out, dets, thresh)
41 | 
42 |     return order[keep[:num_out[0]].cuda()].contiguous()
43 | 
44 | 


--------------------------------------------------------------------------------
/lib/nms/pth_nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/pth_nms.pyc


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms/src/cuda/nms_kernel.cu.o


--------------------------------------------------------------------------------
/lib/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/lib/nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/lib/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from lib.nms.pth_nms import pth_nms
12 | 
13 | 
14 | def nms(dets, thresh):
15 |   """Dispatch to either CPU or GPU NMS implementations.
16 |   Accept dets as tensor"""
17 |   return pth_nms(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/nms_wrapper.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/__init__.py:
--------------------------------------------------------------------------------
1 | from . import fft


--------------------------------------------------------------------------------
/lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._th_fft import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/_ext/th_fft/_th_fft.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/_ext/th_fft/_th_fft.so


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__init__.py:
--------------------------------------------------------------------------------
1 | from .fft import *
2 | from .autograd import *


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/autograd.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/pytorch_fft/fft/__pycache__/fft.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/autograd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from .fft import fft,ifft,fft2,ifft2,fft3,ifft3,rfft,irfft,rfft2,irfft2,rfft3,irfft3
  3 | 
  4 | def make_contiguous(*Xs):
  5 |     return tuple(X if X.is_contiguous() else X.contiguous() for X in Xs)
  6 | 
  7 | def contiguous_clone(X):
  8 |     if X.is_contiguous():
  9 |         return X.clone()
 10 |     else:
 11 |         return X.contiguous()
 12 | 
 13 | class Fft(torch.autograd.Function):
 14 |     def forward(self, X_re, X_im):
 15 |         X_re, X_im = make_contiguous(X_re, X_im)
 16 |         return fft(X_re, X_im)
 17 | 
 18 |     def backward(self, grad_output_re, grad_output_im):
 19 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 20 |                                                          grad_output_im)
 21 |         gi, gr = fft(grad_output_im,grad_output_re)
 22 |         return gr,gi
 23 | 
 24 | 
 25 | class Ifft(torch.autograd.Function):
 26 | 
 27 |     def forward(self, k_re, k_im):
 28 |         k_re, k_im = make_contiguous(k_re, k_im)
 29 |         return ifft(k_re, k_im)
 30 | 
 31 |     def backward(self, grad_output_re, grad_output_im):
 32 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 33 |                                                          grad_output_im)
 34 |         gi, gr = ifft(grad_output_im,grad_output_re)
 35 |         return gr, gi
 36 | 
 37 | 
 38 | class Fft2d(torch.autograd.Function):
 39 |     def forward(self, X_re, X_im):
 40 |         X_re, X_im = make_contiguous(X_re, X_im)
 41 |         return fft2(X_re, X_im)
 42 | 
 43 |     def backward(self, grad_output_re, grad_output_im):
 44 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 45 |                                                          grad_output_im)
 46 |         gi, gr = fft2(grad_output_im,grad_output_re)
 47 |         return gr,gi
 48 | 
 49 | 
 50 | class Ifft2d(torch.autograd.Function):
 51 | 
 52 |     def forward(self, k_re, k_im):
 53 |         k_re, k_im = make_contiguous(k_re, k_im)
 54 |         return ifft2(k_re, k_im)
 55 | 
 56 |     def backward(self, grad_output_re, grad_output_im):
 57 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 58 |                                                          grad_output_im)
 59 |         gi, gr = ifft2(grad_output_im,grad_output_re)
 60 |         return gr, gi
 61 | 
 62 | 
 63 | class Fft3d(torch.autograd.Function):
 64 |     def forward(self, X_re, X_im):
 65 |         X_re, X_im = make_contiguous(X_re, X_im)
 66 |         return fft3(X_re, X_im)
 67 | 
 68 |     def backward(self, grad_output_re, grad_output_im):
 69 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 70 |                                                          grad_output_im)
 71 |         gi, gr = fft3(grad_output_im,grad_output_re)
 72 |         return gr,gi
 73 | 
 74 | 
 75 | class Ifft3d(torch.autograd.Function):
 76 | 
 77 |     def forward(self, k_re, k_im):
 78 |         k_re, k_im = make_contiguous(k_re, k_im)
 79 |         return ifft3(k_re, k_im)
 80 | 
 81 |     def backward(self, grad_output_re, grad_output_im):
 82 |         grad_output_re, grad_output_im = make_contiguous(grad_output_re, 
 83 |                                                          grad_output_im)
 84 |         gi, gr = ifft3(grad_output_im,grad_output_re)
 85 |         return gr, gi
 86 | 
 87 | 
 88 | class Rfft(torch.autograd.Function):
 89 |     def forward(self, X_re):
 90 |         X_re = X_re.contiguous()
 91 |         self._to_save_input_size = X_re.size(-1)
 92 |         return rfft(X_re)
 93 | 
 94 |     def backward(self, grad_output_re, grad_output_im):
 95 |         # Clone the array and make contiguous if needed
 96 |         grad_output_re = contiguous_clone(grad_output_re)
 97 |         grad_output_im = contiguous_clone(grad_output_im)
 98 | 
 99 |         if self._to_save_input_size & 1:
100 |             grad_output_re[...,1:] /= 2
101 |         else:
102 |             grad_output_re[...,1:-1] /= 2
103 | 
104 |         if self._to_save_input_size & 1:
105 |             grad_output_im[...,1:] /= 2
106 |         else:
107 |             grad_output_im[...,1:-1] /= 2
108 | 
109 |         gr = irfft(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
110 |         return gr
111 | 
112 | 
113 | class Irfft(torch.autograd.Function):
114 | 
115 |     def forward(self, k_re, k_im):
116 |         k_re, k_im = make_contiguous(k_re, k_im)
117 |         return irfft(k_re, k_im)
118 | 
119 |     def backward(self, grad_output_re):
120 |         grad_output_re = grad_output_re.contiguous()
121 |         gr, gi = rfft(grad_output_re)
122 | 
123 |         N = grad_output_re.size(-1)
124 |         gr[...,0] /= N
125 |         gr[...,1:-1] /= N/2
126 |         gr[...,-1] /= N
127 | 
128 |         gi[...,0] /= N
129 |         gi[...,1:-1] /= N/2
130 |         gi[...,-1] /= N
131 |         return gr, gi
132 | 
133 | 
134 | class Rfft2d(torch.autograd.Function):
135 |     def forward(self, X_re):
136 |         X_re = X_re.contiguous()
137 |         self._to_save_input_size = X_re.size(-1)
138 |         return rfft2(X_re)
139 | 
140 |     def backward(self, grad_output_re, grad_output_im):
141 |         # Clone the array and make contiguous if needed
142 |         grad_output_re = contiguous_clone(grad_output_re)
143 |         grad_output_im = contiguous_clone(grad_output_im)
144 | 
145 |         if self._to_save_input_size & 1:
146 |             grad_output_re[...,1:] /= 2
147 |         else:
148 |             grad_output_re[...,1:-1] /= 2
149 | 
150 |         if self._to_save_input_size & 1:
151 |             grad_output_im[...,1:] /= 2
152 |         else:
153 |             grad_output_im[...,1:-1] /= 2
154 | 
155 |         gr = irfft2(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
156 |         return gr
157 | 
158 | 
159 | class Irfft2d(torch.autograd.Function):
160 | 
161 |     def forward(self, k_re, k_im):
162 |         k_re, k_im = make_contiguous(k_re, k_im)
163 |         return irfft2(k_re, k_im)
164 | 
165 |     def backward(self, grad_output_re):
166 |         grad_output_re = grad_output_re.contiguous()
167 |         gr, gi = rfft2(grad_output_re)
168 | 
169 |         N = grad_output_re.size(-1) * grad_output_re.size(-2)
170 |         gr[...,0] /= N
171 |         gr[...,1:-1] /= N/2
172 |         gr[...,-1] /= N
173 | 
174 |         gi[...,0] /= N
175 |         gi[...,1:-1] /= N/2
176 |         gi[...,-1] /= N
177 |         return gr, gi
178 | 
179 | 
180 | class Rfft3d(torch.autograd.Function):
181 |     def forward(self, X_re):
182 |         X_re = X_re.contiguous()
183 |         self._to_save_input_size = X_re.size(-1)
184 |         return rfft3(X_re)
185 | 
186 |     def backward(self, grad_output_re, grad_output_im):
187 |         # Clone the array and make contiguous if needed
188 |         grad_output_re = contiguous_clone(grad_output_re)
189 |         grad_output_im = contiguous_clone(grad_output_im)
190 | 
191 |         if self._to_save_input_size & 1:
192 |             grad_output_re[...,1:] /= 2
193 |         else:
194 |             grad_output_re[...,1:-1] /= 2
195 | 
196 |         if self._to_save_input_size & 1:
197 |             grad_output_im[...,1:] /= 2
198 |         else:
199 |             grad_output_im[...,1:-1] /= 2
200 | 
201 |         gr = irfft3(grad_output_re,grad_output_im,self._to_save_input_size, normalize=False)
202 |         return gr
203 | 
204 | 
205 | class Irfft3d(torch.autograd.Function):
206 | 
207 |     def forward(self, k_re, k_im):
208 |         k_re, k_im = make_contiguous(k_re, k_im)
209 |         return irfft3(k_re, k_im)
210 | 
211 |     def backward(self, grad_output_re):
212 |         grad_output_re = grad_output_re.contiguous()
213 |         gr, gi = rfft3(grad_output_re)
214 | 
215 |         N = grad_output_re.size(-1) * grad_output_re.size(-2) * grad_output_re.size(-3)
216 |         gr[...,0] /= N
217 |         gr[...,1:-1] /= N/2
218 |         gr[...,-1] /= N
219 | 
220 |         gi[...,0] /= N
221 |         gi[...,1:-1] /= N/2
222 |         gi[...,-1] /= N
223 |         return gr, gi
224 | 
225 | 


--------------------------------------------------------------------------------
/lib/pytorch_fft/fft/fft.py:
--------------------------------------------------------------------------------
  1 | # functions/fft.py
  2 | import torch
  3 | from .._ext import th_fft
  4 | 
  5 | def _fft(X_re, X_im, f, rank):
  6 |     if not(X_re.size() == X_im.size()): 
  7 |         raise ValueError("Real and imaginary tensors must have the same dimension.")
  8 |     if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1): 
  9 |         raise ValueError("Inputs must have at least {} dimensions.".format(rank+1))
 10 |     if not(X_re.is_cuda and X_im.is_cuda): 
 11 |         raise ValueError("Input must be a CUDA tensor.")
 12 |     if not(X_re.is_contiguous() and X_im.is_contiguous()):
 13 |         raise ValueError("Input must be contiguous.")
 14 | 
 15 |     Y1, Y2 = tuple(X_re.new(*X_re.size()).zero_() for _ in range(2))
 16 |     f(X_re, X_im, Y1, Y2)
 17 |     return (Y1, Y2)
 18 | 
 19 | def fft(X_re, X_im): 
 20 |     if 'Float' in type(X_re).__name__ :
 21 |         f = th_fft.th_Float_fft1
 22 |     elif 'Double' in type(X_re).__name__: 
 23 |         f = th_fft.th_Double_fft1
 24 |     else: 
 25 |         raise NotImplementedError
 26 |     return _fft(X_re, X_im, f, 1)
 27 | 
 28 | def ifft(X_re, X_im): 
 29 |     N = X_re.size(-1)
 30 |     if 'Float' in type(X_re).__name__ :
 31 |         f = th_fft.th_Float_ifft1
 32 |     elif 'Double' in type(X_re).__name__: 
 33 |         f = th_fft.th_Double_ifft1
 34 |     else: 
 35 |         raise NotImplementedError   
 36 |     Y1, Y2 = _fft(X_re, X_im, f, 1)
 37 |     return (Y1/N, Y2/N)
 38 | 
 39 | def fft2(X_re, X_im): 
 40 |     if 'Float' in type(X_re).__name__ :
 41 |         f = th_fft.th_Float_fft2
 42 |     elif 'Double' in type(X_re).__name__: 
 43 |         f = th_fft.th_Double_fft2
 44 |     else: 
 45 |         raise NotImplementedError
 46 |     return _fft(X_re, X_im, f, 2)
 47 | 
 48 | def ifft2(X_re, X_im): 
 49 |     N = X_re.size(-1)*X_re.size(-2)
 50 |     if 'Float' in type(X_re).__name__ :
 51 |         f = th_fft.th_Float_ifft2
 52 |     elif 'Double' in type(X_re).__name__: 
 53 |         f = th_fft.th_Double_ifft2
 54 |     else: 
 55 |         raise NotImplementedError   
 56 |     Y1, Y2 = _fft(X_re, X_im, f, 2)
 57 |     return (Y1/N, Y2/N)
 58 | 
 59 | def fft3(X_re, X_im): 
 60 |     if 'Float' in type(X_re).__name__ :
 61 |         f = th_fft.th_Float_fft3
 62 |     elif 'Double' in type(X_re).__name__: 
 63 |         f = th_fft.th_Double_fft3
 64 |     else: 
 65 |         raise NotImplementedError
 66 |     return _fft(X_re, X_im, f, 3)
 67 | 
 68 | def ifft3(X_re, X_im): 
 69 |     N = X_re.size(-1)*X_re.size(-2)*X_re.size(-3)
 70 |     if 'Float' in type(X_re).__name__ :
 71 |         f = th_fft.th_Float_ifft3
 72 |     elif 'Double' in type(X_re).__name__: 
 73 |         f = th_fft.th_Double_ifft3
 74 |     else: 
 75 |         raise NotImplementedError   
 76 |     Y1, Y2 = _fft(X_re, X_im, f, 3)
 77 |     return (Y1/N, Y2/N)
 78 | 
 79 | _s = slice(None, None, None)
 80 | 
 81 | def _rfft(X, f, rank):
 82 |     if not(X.dim() >= rank+1): 
 83 |         raise ValueError("Input must have at least {} dimensions.".format(rank+1))
 84 |     if not(X.is_cuda): 
 85 |         raise ValueError("Input must be a CUDA tensor.")
 86 |     if not(X.is_contiguous()):
 87 |         raise ValueError("Input must be contiguous.")
 88 | 
 89 |     new_size = tuple(X.size())[:-1] + (X.size(-1)//2 + 1,)
 90 |     # new_size = tuple(X.size())
 91 |     Y1, Y2 = tuple(X.new(*new_size).zero_() for _ in range(2))
 92 |     f(X, Y1, Y2)
 93 |     # i = tuple(_s for _ in range(X.dim()-1)) + (slice(None, X.size(-1)//2 + 1, ),)
 94 |     # print(Y1, i)
 95 |     # return (Y1[i], Y2[i])
 96 |     return (Y1, Y2)
 97 | 
 98 | def rfft(X): 
 99 |     if 'Float' in type(X).__name__ :
100 |         f = th_fft.th_Float_rfft1
101 |     elif 'Double' in type(X).__name__: 
102 |         f = th_fft.th_Double_rfft1
103 |     else: 
104 |         raise NotImplementedError
105 |     return _rfft(X, f, 1)
106 | 
107 | def rfft2(X): 
108 |     if 'Float' in type(X).__name__ :
109 |         f = th_fft.th_Float_rfft2
110 |     elif 'Double' in type(X).__name__: 
111 |         f = th_fft.th_Double_rfft2
112 |     else: 
113 |         raise NotImplementedError
114 |     return _rfft(X, f, 2)
115 | 
116 | def rfft3(X): 
117 |     if 'Float' in type(X).__name__ :
118 |         f = th_fft.th_Float_rfft3
119 |     elif 'Double' in type(X).__name__: 
120 |         f = th_fft.th_Double_rfft3
121 |     else: 
122 |         raise NotImplementedError
123 |     return _rfft(X, f, 3)
124 | 
125 | def _irfft(X_re, X_im, f, rank, N, normalize):
126 |     if not(X_re.size() == X_im.size()): 
127 |         raise ValueError("Real and imaginary tensors must have the same dimension.")
128 |     if not(X_re.dim() >= rank+1 and X_im.dim() >= rank+1): 
129 |         raise ValueError("Inputs must have at least {} dimensions.".format(rank+1))
130 |     if not(X_re.is_cuda and X_im.is_cuda): 
131 |         raise ValueError("Input must be a CUDA tensor.")
132 |     if not(X_re.is_contiguous() and X_im.is_contiguous()):
133 |         raise ValueError("Input must be contiguous.")
134 | 
135 |     input_size = X_re.size(-1)
136 | 
137 |     if N is not None:
138 |         if input_size != int(N/2) + 1:
139 |             raise ValueError("Input size must be equal to n/2 + 1")
140 |     else:
141 |         N = (X_re.size(-1) - 1)*2
142 | 
143 |     new_size = tuple(X_re.size())[:-1] + (N,)
144 |     Y = X_re.new(*new_size).zero_()
145 |     f(X_re, X_im, Y)
146 | 
147 |     if normalize:
148 |         M = 1
149 |         for i in range(rank):
150 |             M *= new_size[-(i+1)]
151 |         return Y/M
152 |     else:
153 |         return Y
154 | 
155 | def irfft(X_re, X_im, n=None, normalize=True):
156 |     if 'Float' in type(X_re).__name__ :
157 |         f = th_fft.th_Float_irfft1
158 |     elif 'Double' in type(X_re).__name__: 
159 |         f = th_fft.th_Double_irfft1
160 |     else: 
161 |         raise NotImplementedError
162 |     return _irfft(X_re, X_im, f, 1, n, normalize)
163 | 
164 | def irfft2(X_re, X_im, n=None, normalize=True):
165 |     if 'Float' in type(X_re).__name__ :
166 |         f = th_fft.th_Float_irfft2
167 |     elif 'Double' in type(X_re).__name__: 
168 |         f = th_fft.th_Double_irfft2
169 |     else: 
170 |         raise NotImplementedError
171 |     return _irfft(X_re, X_im, f, 2, n, normalize)
172 | 
173 | def irfft3(X_re, X_im, n=None, normalize=True):
174 |     if 'Float' in type(X_re).__name__ :
175 |         f = th_fft.th_Float_irfft3
176 |     elif 'Double' in type(X_re).__name__: 
177 |         f = th_fft.th_Double_irfft3
178 |     else: 
179 |         raise NotImplementedError
180 |     return _irfft(X_re, X_im, f, 3, n, normalize)
181 | 
182 | def reverse(X, group_size=1): 
183 |     if not(X.is_cuda): 
184 |         raise ValueError("Input must be a CUDA tensor.")
185 |     if not(X.is_contiguous()):
186 |         raise ValueError("Input must be contiguous.")
187 | 
188 |     if 'Float' in type(X).__name__:
189 |         f = th_fft.reverse_Float
190 |     elif 'Double' in type(X).__name__: 
191 |         f = th_fft.reverse_Double
192 |     else: 
193 |         raise NotImplementedError
194 |     Y = X.new(*X.size())
195 |     f(X,Y, group_size)
196 |     return Y
197 | 
198 | 
199 | def expand(X, imag=False, odd=False): 
200 |     N1, N2 = X.size(-2), X.size(-1)
201 |     N3 = (X.size(-1) - 1)*2
202 |     if odd: 
203 |         N3 += 1
204 |     new_size = tuple(X.size())[:-1] + (N3,)
205 |     Y = X.new(*new_size).zero_()
206 |     i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(None,N2, None),)
207 |     Y[i] = X
208 | 
209 |     if odd:
210 |         i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(N3-N2),None, None),)
211 |     else:
212 |         i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(-(1+N3-N2),-1, None),)
213 |     X0 = X[i].contiguous()
214 | 
215 |     X0 = reverse(X0)
216 |     i0 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) + 
217 |               (slice(-1,None, None), slice(None, None, None)))
218 |     i1 = (tuple(slice(None, None, None) for _ in range(X.dim() - 2)) + 
219 |               (slice(None, -1, None), slice(None, None, None)))
220 |     X0 = torch.cat([X0[i0], X0[i1]], -2)
221 |     X0 = reverse(X0, N1*(N3-N2))
222 | 
223 |     i = tuple(slice(None, None, None) for _ in range(X.dim() - 1)) + (slice(N2, None, None),)
224 |     if not imag: 
225 |         Y[i] = X0
226 |     else:
227 |         Y[i] = -X0
228 |     return Y
229 | 
230 | def roll_n(X, axis, n):
231 |     f_idx = tuple(slice(None, None, None) if i != axis else slice(0,n,None) 
232 |                   for i in range(X.dim()))
233 |     b_idx = tuple(slice(None, None, None) if i != axis else slice(n,None,None)
234 |                   for i in range(X.dim()))
235 |     front = X[f_idx]
236 |     back = X[b_idx]
237 |     return torch.cat([back, front],axis)
238 | 


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/helpers.c:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "generic/helpers.c"
 3 | #else 
 4 | 
 5 | // helper to convert a pair of real arrays into a complex array
 6 | void pair2complex(real *a, real *b, cufft_complex *c, int n)
 7 | {
 8 |   real *c_tmp = (real*)c;
 9 |   cudaMemcpy2D(c_tmp, 2*sizeof(real), 
10 |                a, sizeof(real), 
11 |                sizeof(real), n, cudaMemcpyDeviceToDevice);
12 |   cudaMemcpy2D(c_tmp+1, 2*sizeof(real), 
13 |                b, sizeof(real), 
14 |                sizeof(real), n, cudaMemcpyDeviceToDevice);
15 | }
16 | 
17 | void complex2pair(cufft_complex *a, real *b, real *c, int n)
18 | {
19 |   real *a_tmp = (real*)a; 
20 |   cudaMemcpy2D(b, sizeof(real), 
21 |                a_tmp, 2*sizeof(real), 
22 |                sizeof(real), n, cudaMemcpyDeviceToDevice);
23 |   cudaMemcpy2D(c, sizeof(real), 
24 |                a_tmp+1, 2*sizeof(real), 
25 |                sizeof(real), n, cudaMemcpyDeviceToDevice);
26 | }
27 | 
28 | void reverse_(THCTensor *input, THCTensor *output, int group_size)
29 | {
30 |   real *input_data = THCTensor_(data)(state, input);
31 |   real *output_data = THCTensor_(data)(state, output); 
32 |   int n = THCTensor_(nElement)(state, input);
33 | 
34 |   cudaMemcpy2D(output_data, sizeof(real)*group_size, 
35 |                input_data+n-group_size, -sizeof(real)*group_size,
36 |                sizeof(real)*group_size, n/group_size, cudaMemcpyDeviceToDevice);
37 | }
38 | 
39 | #endif


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/th_fft_cuda.c:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "generic/th_fft_cuda.c"
 3 | #else 
 4 | 
 5 | int th_(THCTensor *input1, THCTensor *input2, THCTensor *output1, THCTensor *output2)
 6 | {
 7 |   // Require that all tensors be of the same size. 
 8 |   if (!THCTensor_(isSameSizeAs)(state, input1, output1))
 9 |     return 0;
10 |   if (!THCTensor_(isSameSizeAs)(state, input1, output2))
11 |     return 0;
12 |   if (!THCTensor_(isSameSizeAs)(state, input1, input2))
13 |     return 0;
14 | 
15 |   // Get the tensor dimensions (batchsize, rows, cols). 
16 |   int ndim = THCTensor_(nDimension)(state, input1);
17 |   int batch = 1;
18 |   int i, d;
19 |   for(i=0; i<ndim-cufft_rank; i++) {
20 |     batch *= THCTensor_(size)(state, input1, i);
21 |   }
22 | 
23 |   // array of dimensions for fft of dimension cufft_rank
24 |   int dim_arr[cufft_rank];
25 |   // product of all dimensions
26 |   int dist=1;
27 | 
28 |   for(i=ndim-cufft_rank; i<ndim; i++){
29 |     d = THCTensor_(size)(state, input1, i);
30 |     dim_arr[i-(ndim-cufft_rank)] = d;
31 |     dist *= d;
32 |   }
33 | 
34 |   // Get actual tensor data.
35 |   real *input1_data = THCTensor_(data)(state, input1);
36 |   real *input2_data = THCTensor_(data)(state, input2);
37 |   real *output1_data = THCTensor_(data)(state, output1);
38 |   real *output2_data = THCTensor_(data)(state, output2);
39 | 
40 |   // Turn input into a complex array.
41 |   cufft_complex *input_complex; 
42 |   cudaMalloc((void**)&input_complex, sizeof(cufft_complex)*batch*dist);
43 |   if (cudaGetLastError() != cudaSuccess) {
44 |     fprintf(stderr, "Cuda error: Failed to allocate\n");
45 |     return -1;
46 |   }
47 | 
48 |   pair2complex(input1_data, input2_data, input_complex, batch*dist);
49 | 
50 |   // Allocate the complex array to store the output
51 |   cufft_complex *output_complex; 
52 |   cudaMalloc((void**)&output_complex, sizeof(cufft_complex)*batch*dist);
53 |   if (cudaGetLastError() != cudaSuccess) {
54 |     fprintf(stderr, "Cuda error: Failed to allocate\n");
55 |     return -1;
56 |   }
57 | 
58 |   // Make the fft plan. 
59 |   cufftHandle plan;
60 |   int rank = cufft_rank;
61 |   int stride = 1;
62 |   if (cufftPlanMany(&plan, rank, dim_arr, 
63 |                     dim_arr, stride, dist, 
64 |                     dim_arr, stride, dist, 
65 |                     cufft_type, batch) != CUFFT_SUCCESS) {
66 |     fprintf(stderr, "CUFFT error: Plan creation failed");
67 |     return -1;
68 |   }
69 | 
70 |   // Execute the fft plan. 
71 |   if (cufft_exec(plan, input_complex, output_complex, cufft_direction) != CUFFT_SUCCESS) {
72 |     fprintf(stderr, "CUFFT error: ExecC2C failed");
73 |     return -1;
74 |   }
75 |   // Not sure if this is necessary. 
76 |   if (cudaThreadSynchronize() != cudaSuccess){
77 |     fprintf(stderr, "Cuda error: Failed to synchronize\n");
78 |     return -1;
79 |   }
80 | 
81 |   // Copy the real and imaginary parts to the output pointers
82 |   complex2pair(output_complex, output1_data, output2_data, batch*dist);
83 | 
84 |   cufftDestroy(plan);
85 |   cudaFree(input_complex);
86 |   cudaFree(output_complex);
87 |   return 1;
88 | }
89 | 
90 | #endif


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/th_fft_cuda.h:
--------------------------------------------------------------------------------
1 | #ifndef TH_GENERIC_FILE
2 | #define TH_GENERIC_FILE "generic/th_fft_cuda.h"
3 | #else
4 | 
5 | int th_(fft)(THCudaTensor *input, THCudaTensor *output1, THCudaTensor *output2);
6 | int th_(ifft)(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output);
7 | #endif


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/th_irfft_cuda.c:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "generic/th_irfft_cuda.c"
 3 | #else 
 4 | 
 5 | int th_(THCTensor *input1, THCTensor *input2, THCTensor *output1)
 6 | {
 7 |   // Require that all tensors be of the same size. 
 8 |   if (!THCTensor_(isSameSizeAs)(state, input1, input2))
 9 |     return 0;
10 | 
11 |   // Get the tensor dimensions (batchsize, rows, cols). 
12 |   int ndim = THCTensor_(nDimension)(state, input1);
13 |   int batch = 1;
14 |   int i, d;
15 |   for(i=0; i<ndim-cufft_rank; i++) {
16 |     batch *= THCTensor_(size)(state, input1, i);
17 |   }
18 | 
19 |   // array of dimensions for fft of dimension cufft_rank
20 |   int idim_arr[cufft_rank];
21 |   // product of all dimensions
22 |   int idist=1;
23 | 
24 |   for(i=ndim-cufft_rank; i<ndim; i++){
25 |     d = THCTensor_(size)(state, input1, i);
26 |     idim_arr[i-(ndim-cufft_rank)] = d;
27 |     idist *= d;
28 |   }
29 | 
30 |   int odim_arr[cufft_rank];
31 |   int odist=1;
32 | 
33 |   for(i=ndim-cufft_rank; i<ndim; i++){
34 |     d = THCTensor_(size)(state, output1, i);
35 |     odim_arr[i-(ndim-cufft_rank)] = d;
36 |     odist *= d;
37 |   }
38 | 
39 |   // Get actual tensor data.
40 |   real *input1_data = THCTensor_(data)(state, input1);
41 |   real *input2_data = THCTensor_(data)(state, input2);
42 |   real *output1_data = THCTensor_(data)(state, output1);
43 | 
44 |   // Allocate the complex array to store the output
45 |   cufft_complex *input_complex; 
46 |   cudaMalloc((void**)&input_complex, sizeof(cufft_complex)*batch*idist);
47 |   if (cudaGetLastError() != cudaSuccess) {
48 |     fprintf(stderr, "Cuda error: Failed to allocate\n");
49 |     return -1;
50 |   }
51 |   // Copy the real and imaginary parts to the input pointers
52 |   pair2complex(input1_data, input2_data, input_complex, batch*idist);
53 | 
54 |   // Make the fft plan. 
55 |   cufftHandle plan;
56 |   int rank = cufft_rank;
57 |   int stride = 1;
58 |   // for(i=0;i<cufft_rank;i++)
59 |   // {
60 |   //   printf("%d\n", odim_arr[i]);
61 |   // }
62 |   if (cufftPlanMany(&plan, rank, odim_arr, 
63 |                     idim_arr, stride, idist, 
64 |                     odim_arr, stride, odist, 
65 |                     cufft_type, batch) != CUFFT_SUCCESS) {
66 |     fprintf(stderr, "CUFFT error: Plan creation failed");
67 |     return -1;
68 |   }
69 | 
70 |   // Execute the fft plan. 
71 |   if (cufft_exec(plan, input_complex, output1_data) !=    CUFFT_SUCCESS) 
72 |   {
73 |     fprintf(stderr, "CUFFT error: cufft_exec failed");
74 |     return -1;
75 |   }
76 |   // Not sure if this is necessary. 
77 |   if (cudaThreadSynchronize() != cudaSuccess){
78 |     fprintf(stderr, "Cuda error: Failed to synchronize\n");
79 |     return -1;
80 |   }
81 | 
82 |   cufftDestroy(plan);
83 |   cudaFree(input_complex);
84 |   return 1;
85 | }
86 | 
87 | #endif


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/generic/th_rfft_cuda.c:
--------------------------------------------------------------------------------
 1 | #ifndef THC_GENERIC_FILE
 2 | #define THC_GENERIC_FILE "generic/th_rfft_cuda.c"
 3 | #else 
 4 | 
 5 | int th_(THCTensor *input1, THCTensor *output1, THCTensor *output2)
 6 | {
 7 |   // Require that all tensors be of the same size. 
 8 |   if (!THCTensor_(isSameSizeAs)(state, output1, output2))
 9 |     return 0;
10 | 
11 |   // Get the tensor dimensions (batchsize, rows, cols). 
12 |   int ndim = THCTensor_(nDimension)(state, input1);
13 |   int batch = 1;
14 |   int i, d;
15 |   for(i=0; i<ndim-cufft_rank; i++) {
16 |     batch *= THCTensor_(size)(state, input1, i);
17 |   }
18 | 
19 |   // array of dimensions for fft of dimension cufft_rank
20 |   int idim_arr[cufft_rank];
21 |   // product of all dimensions
22 |   int idist=1;
23 | 
24 |   for(i=ndim-cufft_rank; i<ndim; i++){
25 |     d = THCTensor_(size)(state, input1, i);
26 |     idim_arr[i-(ndim-cufft_rank)] = d;
27 |     idist *= d;
28 |   }
29 | 
30 |   int odim_arr[cufft_rank];
31 |   int odist=1;
32 | 
33 |   for(i=ndim-cufft_rank; i<ndim; i++){
34 |     d = THCTensor_(size)(state, output1, i);
35 |     odim_arr[i-(ndim-cufft_rank)] = d;
36 |     odist *= d;
37 |   }
38 | 
39 | 
40 |   // Get actual tensor data.
41 |   real *input1_data = THCTensor_(data)(state, input1);
42 |   real *output1_data = THCTensor_(data)(state, output1);
43 |   real *output2_data = THCTensor_(data)(state, output2);
44 | 
45 |   // Allocate the complex array to store the output
46 |   cufft_complex *output_complex; 
47 |   cudaMalloc((void**)&output_complex, sizeof(cufft_complex)*batch*odist);
48 |   if (cudaGetLastError() != cudaSuccess) {
49 |     fprintf(stderr, "Cuda error: Failed to allocate\n");
50 |     return -1;
51 |   
52 | }
53 |   // Make the fft plan. 
54 |   cufftHandle plan;
55 |   int rank = cufft_rank;
56 |   int stride = 1;
57 |   if (cufftPlanMany(&plan, rank, idim_arr, 
58 |                     idim_arr, stride, idist, 
59 |                     odim_arr, stride, odist, 
60 |                     cufft_type, batch) != CUFFT_SUCCESS) {
61 |     fprintf(stderr, "CUFFT error: Plan creation failed");
62 |     return -1;
63 |   }
64 | 
65 |   // Execute the fft plan. 
66 |   if (cufft_exec(plan, input1_data, output_complex) !=    CUFFT_SUCCESS) {
67 |     fprintf(stderr, "CUFFT error: cufft_exec failed");
68 |     return -1;
69 |   }
70 |   // Not sure if this is necessary. 
71 |   if (cudaThreadSynchronize() != cudaSuccess){
72 |     fprintf(stderr, "Cuda error: Failed to synchronize\n");
73 |     return -1;
74 |   }
75 | 
76 |   // Copy the real and imaginary parts to the output pointers
77 |   complex2pair(output_complex, output1_data, output2_data, batch*odist);
78 | 
79 |   cufftDestroy(plan);
80 |   cudaFree(output_complex);
81 |   return 1;
82 | }
83 | 
84 | #endif


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <cuda.h>
 3 | #include <cufft.h>
 4 | #include <complex.h>
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | #define th_ TH_CONCAT_4(th_, Real, _, func_name)
 9 | #define pair2complex TH_CONCAT_2(Real, 2complex) 
10 | #define complex2pair TH_CONCAT_2(complex2, Real) 
11 | #define reverse_ TH_CONCAT_2(reverse_, Real)
12 | 
13 | #include "th_fft_generate_helpers.h"
14 | 
15 | #define cufft_rank 1
16 | #include "th_fft_generate_float.h"
17 | #include "th_fft_generate_double.h"
18 | #undef cufft_rank
19 | 
20 | #define cufft_rank 2
21 | #include "th_fft_generate_float.h"
22 | #include "th_fft_generate_double.h"
23 | #undef cufft_rank
24 | 
25 | #define cufft_rank 3
26 | #include "th_fft_generate_float.h"
27 | #include "th_fft_generate_double.h"
28 | #undef cufft_rank
29 | 


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_cuda.h:
--------------------------------------------------------------------------------
 1 | int th_Float_fft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
 2 | int th_Float_ifft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
 3 | int th_Double_fft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
 4 | int th_Double_ifft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
 5 | 
 6 | int th_Float_fft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
 7 | int th_Float_ifft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
 8 | int th_Double_fft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
 9 | int th_Double_ifft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
10 | 
11 | int th_Float_fft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
12 | int th_Float_ifft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1, THCudaTensor *output2);
13 | int th_Double_fft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
14 | int th_Double_ifft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
15 | 
16 | int th_Float_rfft1(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
17 | int th_Float_irfft1(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
18 | int th_Double_rfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
19 | int th_Double_irfft1(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
20 | 
21 | int th_Float_rfft2(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
22 | int th_Float_irfft2(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
23 | int th_Double_rfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
24 | int th_Double_irfft2(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
25 | 
26 | int th_Float_rfft3(THCudaTensor *input1, THCudaTensor *output1, THCudaTensor *output2);
27 | int th_Float_irfft3(THCudaTensor *input1, THCudaTensor *input2, THCudaTensor *output1);
28 | int th_Double_rfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *output1, THCudaDoubleTensor *output2);
29 | int th_Double_irfft3(THCudaDoubleTensor *input1, THCudaDoubleTensor *input2, THCudaDoubleTensor *output1);
30 | 
31 | void reverse_Float(THCudaTensor *input, THCudaTensor *output, int group_size);
32 | void reverse_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output, int group_size);
33 | 
34 | // void expand_2D_Float(THCudaTensor *input, THCudaTensor *output);
35 | // void expand_2D_Double(THCudaDoubleTensor *input, THCudaDoubleTensor *output);


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_double.h:
--------------------------------------------------------------------------------
 1 | // Generate Double FFTs
 2 | #define cufft_complex cufftDoubleComplex
 3 | 
 4 | #define cufft_type CUFFT_Z2Z
 5 | #define cufft_exec cufftExecZ2Z
 6 | 
 7 | #define cufft_direction CUFFT_FORWARD
 8 | #define func_name TH_CONCAT_2(fft, cufft_rank)
 9 | 
10 | #include "generic/th_fft_cuda.c"
11 | #include "THCGenerateDoubleType.h"
12 | 
13 | #undef cufft_direction
14 | #undef func_name
15 | 
16 | #define cufft_direction CUFFT_INVERSE
17 | #define func_name TH_CONCAT_2(ifft, cufft_rank)
18 | 
19 | #include "generic/th_fft_cuda.c"
20 | #include "THCGenerateDoubleType.h"
21 | 
22 | #undef cufft_direction
23 | #undef func_name
24 | 
25 | #undef cufft_type
26 | #undef cufft_exec
27 | 
28 | // Generate Double rFFTs
29 | #define cufft_type CUFFT_D2Z
30 | #define cufft_exec cufftExecD2Z
31 | #define func_name TH_CONCAT_2(rfft, cufft_rank)
32 | 
33 | #include "generic/th_rfft_cuda.c"
34 | #include "THCGenerateDoubleType.h"
35 | 
36 | #undef cufft_type
37 | #undef cufft_exec
38 | #undef func_name
39 | 
40 | #define cufft_type CUFFT_Z2D
41 | #define cufft_exec cufftExecZ2D
42 | #define func_name TH_CONCAT_2(irfft, cufft_rank)
43 | 
44 | #include "generic/th_irfft_cuda.c"
45 | #include "THCGenerateDoubleType.h"
46 | 
47 | #undef cufft_type
48 | #undef cufft_exec
49 | #undef func_name
50 | 
51 | #undef cufft_complex


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_float.h:
--------------------------------------------------------------------------------
 1 | // Generate float FFTs
 2 | #define cufft_complex cufftComplex
 3 | 
 4 | #define cufft_type CUFFT_C2C
 5 | #define cufft_exec cufftExecC2C
 6 | 
 7 | #define cufft_direction CUFFT_FORWARD
 8 | #define func_name TH_CONCAT_2(fft, cufft_rank)
 9 | 
10 | #include "generic/th_fft_cuda.c"
11 | #include "THCGenerateFloatType.h"
12 | 
13 | #undef func_name
14 | #undef cufft_direction
15 | 
16 | #define cufft_direction CUFFT_INVERSE
17 | #define func_name TH_CONCAT_2(ifft, cufft_rank)
18 | 
19 | #include "generic/th_fft_cuda.c"
20 | #include "THCGenerateFloatType.h"
21 | 
22 | #undef func_name
23 | #undef cufft_direction
24 | 
25 | 
26 | #undef cufft_type
27 | #undef cufft_exec
28 | 
29 | // Generate float rFFTs
30 | #define cufft_type CUFFT_R2C
31 | #define cufft_exec cufftExecR2C
32 | #define func_name TH_CONCAT_2(rfft, cufft_rank)
33 | 
34 | #include "generic/th_rfft_cuda.c"
35 | #include "THCGenerateFloatType.h"
36 | 
37 | #undef func_name
38 | #undef cufft_type
39 | #undef cufft_exec
40 | 
41 | #define cufft_type CUFFT_C2R
42 | #define cufft_exec cufftExecC2R
43 | #define func_name TH_CONCAT_2(irfft, cufft_rank)
44 | 
45 | #include "generic/th_irfft_cuda.c"
46 | #include "THCGenerateFloatType.h"
47 | 
48 | #undef func_name
49 | #undef cufft_type
50 | #undef cufft_exec
51 | 
52 | #undef cufft_complex


--------------------------------------------------------------------------------
/lib/pytorch_fft/src/th_fft_generate_helpers.h:
--------------------------------------------------------------------------------
 1 | // Generate float and double helpers
 2 | #define cufft_complex cufftComplex
 3 | 
 4 | #include "generic/helpers.c"
 5 | #include "THCGenerateFloatType.h"
 6 | 
 7 | #undef cufft_complex
 8 | 
 9 | #define cufft_complex cufftDoubleComplex
10 | 
11 | #include "generic/helpers.c"
12 | #include "THCGenerateDoubleType.h"
13 | 
14 | #undef cufft_complex


--------------------------------------------------------------------------------
/lib/resnet/__pycache__/resnet.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/resnet/__pycache__/resnet.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/resnet/resnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import torch
  4 | import torch.nn as nn
  5 | from torchvision import transforms
  6 | from torch.autograd import Variable
  7 | import torch.utils.model_zoo as model_zoo
  8 | 
  9 | 
 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 11 |            'resnet152']
 12 | 
 13 | 
 14 | model_urls = {
 15 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 16 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 17 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 18 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 19 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 20 | }
 21 | 
 22 | 
 23 | def conv3x3(in_planes, out_planes, stride=1):
 24 |     "3x3 convolution with padding"
 25 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 26 |                      padding=1, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 33 |         super(BasicBlock, self).__init__()
 34 |         self.conv1 = conv3x3(inplanes, planes, stride)
 35 |         self.bn1 = nn.BatchNorm2d(planes)
 36 |         self.relu = nn.ReLU(inplace=True)
 37 |         self.conv2 = conv3x3(planes, planes)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 | 
 49 |         out = self.conv2(out)
 50 |         out = self.bn2(out)
 51 | 
 52 |         if self.downsample is not None:
 53 |             residual = self.downsample(x)
 54 | 
 55 |         out += residual
 56 |         out = self.relu(out)
 57 | 
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 67 |         self.bn1 = nn.BatchNorm2d(planes)
 68 |         if dilation == 1:
 69 |             self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 70 |                                    padding=1, bias=False)
 71 |         else:
 72 |             self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 73 |                                    padding=dilation, dilation=dilation, bias=False)
 74 |         self.bn2 = nn.BatchNorm2d(planes)
 75 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 76 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 77 |         self.relu = nn.ReLU(inplace=True)
 78 |         self.downsample = downsample
 79 |         self.stride = stride
 80 | 
 81 |     def forward(self, x):
 82 |         residual = x
 83 | 
 84 |         out = self.conv1(x)
 85 |         out = self.bn1(out)
 86 |         out = self.relu(out)
 87 | 
 88 |         out = self.conv2(out)
 89 |         out = self.bn2(out)
 90 |         out = self.relu(out)
 91 | 
 92 |         out = self.conv3(out)
 93 |         out = self.bn3(out)
 94 | 
 95 |         if self.downsample is not None:
 96 |             residual = self.downsample(x)
 97 | 
 98 |         out += residual
 99 |         out = self.relu(out)
100 | 
101 |         return out
102 | 
103 | 
104 | # We hook up one more 1*1 conv layer in Res_block 5th, and modified the method for checkpoint loading
105 | # An attribute-entity grounding pre-trained classification ResNet was adopted
106 | class ResNet(nn.Module):
107 | 
108 |     def __init__(self, block, layers, num_classes=1000):
109 |         self.inplanes = 64
110 |         super(ResNet, self).__init__()
111 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
112 |                                bias=False)
113 |         self.bn1 = nn.BatchNorm2d(64)
114 |         self.relu = nn.ReLU(inplace=True)
115 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
116 |         self.layer1 = self._make_layer(block, 64, layers[0])
117 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
118 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
119 |         self.layer4 = self._make_layer(block, 512, layers[3], dilation=4)
120 |         self.avgpool = nn.AvgPool2d(32, stride=1)
121 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
122 |         self.sigmoid = nn.Sigmoid()
123 | 
124 |         for m in self.modules():
125 |             if isinstance(m, nn.Conv2d):
126 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
127 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
128 |             elif isinstance(m, nn.BatchNorm2d):
129 |                 m.weight.data.fill_(1)
130 |                 m.bias.data.zero_()
131 | 
132 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
133 |         downsample = None
134 |         if stride != 1 or self.inplanes != planes * block.expansion:
135 |             downsample = nn.Sequential(
136 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
137 |                           kernel_size=1, stride=stride, bias=False),
138 |                 nn.BatchNorm2d(planes * block.expansion),)
139 |         layers = []
140 |         layers.append(block(self.inplanes, planes, stride, dilation, downsample))
141 |         self.inplanes = planes * block.expansion
142 |         for i in range(1, blocks):
143 |             layers.append(block(self.inplanes, planes))
144 | 
145 |         return nn.Sequential(*layers)
146 | 
147 |     def forward(self, x):
148 |         x = self.conv1(x)
149 |         x = self.bn1(x)
150 |         x = self.relu(x)
151 |         x = self.maxpool(x)
152 | 
153 |         conv_feat1 = self.layer1(x)
154 |         conv_feat2 = self.layer2(conv_feat1)
155 |         conv_feat3 = self.layer3(conv_feat2)
156 |         conv_feat4 = self.layer4(conv_feat3)
157 | 
158 |         # Shrink the feature size and do classification
159 |         conv_feat = self.shrink_conv(conv_feat4)
160 |         # feat = self.avgpool(conv_feat)
161 |         # y = self.sigmoid(self.fc(feat.view(feat.shape[0], feat.shape[1])))
162 | 
163 |         return conv_feat4, conv_feat
164 | 
165 | 
166 | def resnet101(pretrained=False, path='', classnum=1000, **kwargs):
167 |     """Constructs a ResNet-101 model.
168 | 
169 |     Args:
170 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
171 | 
172 |     Note that in this time we've pre-trained our modified ResNet on Flickr 30k
173 |         for entity-attribute classification
174 | 
175 |     """
176 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
177 |     model.fc = torch.nn.Linear(256, 4)
178 | 
179 |     if pretrained:
180 |         state_dict = torch.load(path)['state_dict']
181 |         new_params = model.state_dict()
182 |         model_keys = model.state_dict().keys()
183 |         for name, param in list(state_dict.items()):
184 |             if name not in model_keys:
185 |                 del state_dict[name]
186 | 
187 |         new_params.update(state_dict)
188 |         model.load_state_dict(new_params)
189 | 
190 |     else:
191 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
192 |     return model
193 | 
194 | 
195 | def resnet50(pretrained=False, path='', classnum=1000, **kwargs):
196 | 
197 |     """Constructs a ResNet-50 model.
198 | 
199 |     Args:
200 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
201 |     """
202 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
203 | 
204 |     if pretrained:
205 |         # In pre-trained model-gender the fc is 2, while in model-person it's 4 lasses.
206 |         model.fc = torch.nn.Linear(256, 4)
207 |         model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False)
208 |         state_dict = torch.load(path)['state_dict']
209 |         new_params = model.state_dict()
210 |         model_keys = model.state_dict().keys()
211 |         for name, param in list(state_dict.items()):
212 |             if name not in model_keys:
213 |                 del state_dict[name]
214 | 
215 |         new_params.update(state_dict)
216 |         model.load_state_dict(new_params)
217 | 
218 |     else:
219 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
220 |         model.shrink_conv = nn.Conv2d(2048, 256, kernel_size=1, bias=False)
221 |         model.fc = torch.nn.Linear(256, classnum)
222 | 
223 |     return model
224 | 


--------------------------------------------------------------------------------
/lib/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/roi_align/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__init__.pyc


--------------------------------------------------------------------------------
/lib/roi_align/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/__pycache__/crop_and_resize.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/roi_align/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__init__.pyc


--------------------------------------------------------------------------------
/lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_and_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__init__.pyc


--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/_ext/crop_and_resize/_crop_and_resize.so


--------------------------------------------------------------------------------
/lib/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/crop_and_resize.c']
 7 | headers = ['src/crop_and_resize.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | extra_objects = []
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/crop_and_resize_gpu.c']
15 |     headers += ['src/crop_and_resize_gpu.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     extra_objects += ['src/cuda/crop_and_resize_kernel.cu.o']
18 |     with_cuda = True
19 | 
20 | extra_compile_args = ['-fopenmp', '-std=c99']
21 | 
22 | this_file = os.path.dirname(os.path.realpath(__file__))
23 | print(this_file)
24 | sources = [os.path.join(this_file, fname) for fname in sources]
25 | headers = [os.path.join(this_file, fname) for fname in headers]
26 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
27 | 
28 | ffi = create_extension(
29 |     '_ext.crop_and_resize',
30 |     headers=headers,
31 |     sources=sources,
32 |     define_macros=defines,
33 |     relative_to=__file__,
34 |     with_cuda=with_cuda,
35 |     extra_objects=extra_objects,
36 |     extra_compile_args=extra_compile_args
37 | )
38 | 
39 | if __name__ == '__main__':
40 |     ffi.build()
41 | 


--------------------------------------------------------------------------------
/lib/roi_align/crop_and_resize.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Function
 6 | 
 7 | from ._ext import crop_and_resize as _backend
 8 | 
 9 | 
10 | class CropAndResizeFunction(Function):
11 | 
12 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
13 |         self.crop_height = crop_height
14 |         self.crop_width = crop_width
15 |         self.extrapolation_value = extrapolation_value
16 | 
17 |     def forward(self, image, boxes, box_ind):
18 |         crops = torch.zeros_like(image)
19 | 
20 |         if image.is_cuda:
21 |             _backend.crop_and_resize_gpu_forward(
22 |                 image, boxes, box_ind,
23 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
24 |         else:
25 |             _backend.crop_and_resize_forward(
26 |                 image, boxes, box_ind,
27 |                 self.extrapolation_value, self.crop_height, self.crop_width, crops)
28 | 
29 |         # save for backward
30 |         self.im_size = image.size()
31 |         self.save_for_backward(boxes, box_ind)
32 | 
33 |         return crops
34 | 
35 |     def backward(self, grad_outputs):
36 |         boxes, box_ind = self.saved_tensors
37 | 
38 |         grad_outputs = grad_outputs.contiguous()
39 |         grad_image = torch.zeros_like(grad_outputs).resize_(*self.im_size)
40 | 
41 |         if grad_outputs.is_cuda:
42 |             _backend.crop_and_resize_gpu_backward(
43 |                 grad_outputs, boxes, box_ind, grad_image
44 |             )
45 |         else:
46 |             _backend.crop_and_resize_backward(
47 |                 grad_outputs, boxes, box_ind, grad_image
48 |             )
49 | 
50 |         return grad_image, None, None
51 | 
52 | 
53 | class CropAndResize(nn.Module):
54 |     """
55 |     Crop and resize ported from tensorflow
56 |     See more details on https://www.tensorflow.org/api_docs/python/tf/image/crop_and_resize
57 |     """
58 | 
59 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
60 |         super(CropAndResize, self).__init__()
61 | 
62 |         self.crop_height = crop_height
63 |         self.crop_width = crop_width
64 |         self.extrapolation_value = extrapolation_value
65 | 
66 |     def forward(self, image, boxes, box_ind):
67 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(image, boxes, box_ind)
68 | 


--------------------------------------------------------------------------------
/lib/roi_align/crop_and_resize.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/crop_and_resize.pyc


--------------------------------------------------------------------------------
/lib/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from .crop_and_resize import  CropAndResize, CropAndResizeFunction
 5 | 
 6 | class RoIAlign(nn.Module):
 7 | 
 8 |     def __init__(self, crop_height, crop_width, extrapolation_value=0):
 9 |         super(RoIAlign, self).__init__()
10 | 
11 |         self.crop_height = crop_height
12 |         self.crop_width = crop_width
13 |         self.extrapolation_value = extrapolation_value
14 | 
15 |     def forward(self, featuremap, boxes, box_ind):
16 |         """
17 |         RoIAlign based on crop_and_resize.
18 |         See more details on https://github.com/ppwwyyxx/tensorpack/blob/6d5ba6a970710eaaa14b89d24aace179eb8ee1af/examples/FasterRCNN/model.py#L301
19 |         :param featuremap: NxCxHxW
20 |         :param boxes: Mx4 float box with (x1, y1, x2, y2) **without normalization**
21 |         :param box_ind: M
22 |         :return: MxCxoHxoW
23 |         """
24 |         x1, y1, x2, y2 = torch.split(boxes, 1, dim=1)
25 | 
26 |         spacing_w = (x2 - x1) / float(self.crop_width)
27 |         spacing_h = (y2 - y1) / float(self.crop_height)
28 | 
29 |         image_height, image_width = featuremap.size()[2:4]
30 |         nx0 = (x1 + spacing_w / 2 - 0.5) / float(image_width - 1)
31 |         ny0 = (y1 + spacing_h / 2 - 0.5) / float(image_height - 1)
32 | 
33 |         nw = spacing_w * float(self.crop_width - 1) / float(image_width - 1)
34 |         nh = spacing_w * float(self.crop_height - 1) / float(image_height - 1)
35 | 
36 |         boxes = torch.cat((ny0, nx0, ny0 + nh, nx0 + nw), 1)
37 | 
38 |         return CropAndResizeFunction(self.crop_height, self.crop_width, self.extrapolation_value)(featuremap, boxes, box_ind)


--------------------------------------------------------------------------------
/lib/roi_align/roi_align.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/roi_align.pyc


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <stdio.h>
  3 | #include <math.h>
  4 | 
  5 | 
  6 | void CropAndResizePerBox(
  7 |     const float * image_data, 
  8 |     const int batch_size,
  9 |     const int depth,
 10 |     const int image_height,
 11 |     const int image_width,
 12 | 
 13 |     const float * boxes_data, 
 14 |     const int * box_index_data,
 15 |     const int start_box, 
 16 |     const int limit_box,
 17 | 
 18 |     float * corps_data,
 19 |     const int crop_height,
 20 |     const int crop_width,
 21 |     const float extrapolation_value
 22 | ) {
 23 |     const int image_channel_elements = image_height * image_width;
 24 |     const int image_elements = depth * image_channel_elements;
 25 | 
 26 |     const int channel_elements = crop_height * crop_width;
 27 |     const int crop_elements = depth * channel_elements;
 28 | 
 29 |     int b;
 30 |     #pragma omp parallel for
 31 |     for (b = start_box; b < limit_box; ++b) {
 32 |         const float * box = boxes_data + b * 4;
 33 |         const float y1 = box[0];
 34 |         const float x1 = box[1];
 35 |         const float y2 = box[2];
 36 |         const float x2 = box[3];
 37 | 
 38 |         const int b_in = box_index_data[b];
 39 |         if (b_in < 0 || b_in >= batch_size) {
 40 |             printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
 41 |             exit(-1);
 42 |         }
 43 | 
 44 |         const float height_scale =
 45 |             (crop_height > 1)
 46 |                 ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 47 |                 : 0;
 48 |         const float width_scale =
 49 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
 50 |                              : 0;
 51 | 
 52 |         for (int y = 0; y < crop_height; ++y)
 53 |         {
 54 |             const float in_y = (crop_height > 1)
 55 |                                    ? y1 * (image_height - 1) + y * height_scale
 56 |                                    : 0.5 * (y1 + y2) * (image_height - 1);
 57 | 
 58 |             if (in_y < 0 || in_y > image_height - 1)
 59 |             {
 60 |                 for (int x = 0; x < crop_width; ++x)
 61 |                 {
 62 |                     for (int d = 0; d < depth; ++d)
 63 |                     {
 64 |                         // crops(b, y, x, d) = extrapolation_value;
 65 |                         corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
 66 |                     }
 67 |                 }
 68 |                 continue;
 69 |             }
 70 |             
 71 |             const int top_y_index = floorf(in_y);
 72 |             const int bottom_y_index = ceilf(in_y);
 73 |             const float y_lerp = in_y - top_y_index;
 74 | 
 75 |             for (int x = 0; x < crop_width; ++x)
 76 |             {
 77 |                 const float in_x = (crop_width > 1)
 78 |                                        ? x1 * (image_width - 1) + x * width_scale
 79 |                                        : 0.5 * (x1 + x2) * (image_width - 1);
 80 |                 if (in_x < 0 || in_x > image_width - 1)
 81 |                 {
 82 |                     for (int d = 0; d < depth; ++d)
 83 |                     {
 84 |                         corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = extrapolation_value;
 85 |                     }
 86 |                     continue;
 87 |                 }
 88 |             
 89 |                 const int left_x_index = floorf(in_x);
 90 |                 const int right_x_index = ceilf(in_x);
 91 |                 const float x_lerp = in_x - left_x_index;
 92 | 
 93 |                 for (int d = 0; d < depth; ++d)
 94 |                 {   
 95 |                     const float *pimage = image_data + b_in * image_elements + d * image_channel_elements;
 96 | 
 97 |                     const float top_left = pimage[top_y_index * image_width + left_x_index];
 98 |                     const float top_right = pimage[top_y_index * image_width + right_x_index];
 99 |                     const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
100 |                     const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
101 |                     
102 |                     const float top = top_left + (top_right - top_left) * x_lerp;
103 |                     const float bottom =
104 |                         bottom_left + (bottom_right - bottom_left) * x_lerp;
105 |                         
106 |                     corps_data[crop_elements * b + channel_elements * d + y * crop_width + x] = top + (bottom - top) * y_lerp;
107 |                 }
108 |             }   // end for x
109 |         }   // end for y
110 |     }   // end for b
111 | 
112 | }
113 | 
114 | 
115 | void crop_and_resize_forward(
116 |     THFloatTensor * image,
117 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
118 |     THIntTensor * box_index,    // range in [0, batch_size)
119 |     const float extrapolation_value,
120 |     const int crop_height,
121 |     const int crop_width,
122 |     THFloatTensor * crops
123 | ) {
124 |     const int batch_size = image->size[0];
125 |     const int depth = image->size[1];
126 |     const int image_height = image->size[2];
127 |     const int image_width = image->size[3];
128 | 
129 |     const int num_boxes = boxes->size[0];
130 | 
131 |     // init output space
132 |     THFloatTensor_resize4d(crops, num_boxes, depth, crop_height, crop_width);
133 |     THFloatTensor_zero(crops);
134 | 
135 |     // crop_and_resize for each box
136 |     CropAndResizePerBox(
137 |         THFloatTensor_data(image),
138 |         batch_size,
139 |         depth,
140 |         image_height,
141 |         image_width,
142 | 
143 |         THFloatTensor_data(boxes),
144 |         THIntTensor_data(box_index),
145 |         0,
146 |         num_boxes,
147 | 
148 |         THFloatTensor_data(crops),
149 |         crop_height,
150 |         crop_width,
151 |         extrapolation_value
152 |     );
153 | 
154 | }
155 | 
156 | 
157 | void crop_and_resize_backward(
158 |     THFloatTensor * grads,
159 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
160 |     THIntTensor * box_index,    // range in [0, batch_size)
161 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
162 | )
163 | {   
164 |     // shape
165 |     const int batch_size = grads_image->size[0];
166 |     const int depth = grads_image->size[1];
167 |     const int image_height = grads_image->size[2];
168 |     const int image_width = grads_image->size[3];
169 | 
170 |     const int num_boxes = grads->size[0];
171 |     const int crop_height = grads->size[2];
172 |     const int crop_width = grads->size[3];
173 | 
174 |     // n_elements
175 |     const int image_channel_elements = image_height * image_width;
176 |     const int image_elements = depth * image_channel_elements;
177 | 
178 |     const int channel_elements = crop_height * crop_width;
179 |     const int crop_elements = depth * channel_elements;
180 | 
181 |     // init output space
182 |     THFloatTensor_zero(grads_image);
183 | 
184 |     // data pointer
185 |     const float * grads_data = THFloatTensor_data(grads);
186 |     const float * boxes_data = THFloatTensor_data(boxes);
187 |     const int * box_index_data = THIntTensor_data(box_index);
188 |     float * grads_image_data = THFloatTensor_data(grads_image);
189 | 
190 |     for (int b = 0; b < num_boxes; ++b) {
191 |         const float * box = boxes_data + b * 4;
192 |         const float y1 = box[0];
193 |         const float x1 = box[1];
194 |         const float y2 = box[2];
195 |         const float x2 = box[3];
196 | 
197 |         const int b_in = box_index_data[b];
198 |         if (b_in < 0 || b_in >= batch_size) {
199 |             printf("Error: batch_index %d out of range [0, %d)\n", b_in, batch_size);
200 |             exit(-1);
201 |         }
202 | 
203 |         const float height_scale =
204 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
205 |                               : 0;
206 |         const float width_scale =
207 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1)
208 |                              : 0;
209 | 
210 |         for (int y = 0; y < crop_height; ++y)
211 |         {
212 |             const float in_y = (crop_height > 1)
213 |                                    ? y1 * (image_height - 1) + y * height_scale
214 |                                    : 0.5 * (y1 + y2) * (image_height - 1);
215 |             if (in_y < 0 || in_y > image_height - 1)
216 |             {
217 |                 continue;
218 |             }
219 |             const int top_y_index = floorf(in_y);
220 |             const int bottom_y_index = ceilf(in_y);
221 |             const float y_lerp = in_y - top_y_index;
222 | 
223 |             for (int x = 0; x < crop_width; ++x)
224 |             {
225 |                 const float in_x = (crop_width > 1)
226 |                                        ? x1 * (image_width - 1) + x * width_scale
227 |                                        : 0.5 * (x1 + x2) * (image_width - 1);
228 |                 if (in_x < 0 || in_x > image_width - 1)
229 |                 {
230 |                     continue;
231 |                 }
232 |                 const int left_x_index = floorf(in_x);
233 |                 const int right_x_index = ceilf(in_x);
234 |                 const float x_lerp = in_x - left_x_index;
235 | 
236 |                 for (int d = 0; d < depth; ++d)
237 |                 {   
238 |                     float *pimage = grads_image_data + b_in * image_elements + d * image_channel_elements;
239 |                     const float grad_val = grads_data[crop_elements * b + channel_elements * d + y * crop_width + x];
240 | 
241 |                     const float dtop = (1 - y_lerp) * grad_val;
242 |                     pimage[top_y_index * image_width + left_x_index] += (1 - x_lerp) * dtop;
243 |                     pimage[top_y_index * image_width + right_x_index] += x_lerp * dtop;
244 | 
245 |                     const float dbottom = y_lerp * grad_val;
246 |                     pimage[bottom_y_index * image_width + left_x_index] += (1 - x_lerp) * dbottom;
247 |                     pimage[bottom_y_index * image_width + right_x_index] += x_lerp * dbottom;
248 |                 }   // end d
249 |             }   // end x
250 |         }   // end y
251 |     }   // end b
252 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_forward(
 2 |     THFloatTensor * image,
 3 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
 4 |     THIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THFloatTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_backward(
12 |     THFloatTensor * grads,
13 |     THFloatTensor * boxes,      // [y1, x1, y2, x2]
14 |     THIntTensor * box_index,    // range in [0, batch_size)
15 |     THFloatTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "cuda/crop_and_resize_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | 
 7 | void crop_and_resize_gpu_forward(
 8 |     THCudaTensor * image,
 9 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
10 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
11 |     const float extrapolation_value,
12 |     const int crop_height,
13 |     const int crop_width,
14 |     THCudaTensor * crops
15 | ) {
16 |     const int batch_size = THCudaTensor_size(state, image, 0);
17 |     const int depth = THCudaTensor_size(state, image, 1);
18 |     const int image_height = THCudaTensor_size(state, image, 2);
19 |     const int image_width = THCudaTensor_size(state, image, 3);
20 | 
21 |     const int num_boxes = THCudaTensor_size(state, boxes, 0);
22 | 
23 |     // init output space
24 |     THCudaTensor_resize4d(state, crops, num_boxes, depth, crop_height, crop_width);
25 |     THCudaTensor_zero(state, crops);
26 | 
27 |     cudaStream_t stream = THCState_getCurrentStream(state);
28 |     CropAndResizeLaucher(
29 |         THCudaTensor_data(state, image),
30 |         THCudaTensor_data(state, boxes),
31 |         THCudaIntTensor_data(state, box_index),
32 |         num_boxes, batch_size, image_height, image_width,
33 |         crop_height, crop_width, depth, extrapolation_value,
34 |         THCudaTensor_data(state, crops),
35 |         stream
36 |     );
37 | }
38 | 
39 | 
40 | void crop_and_resize_gpu_backward(
41 |     THCudaTensor * grads,
42 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
43 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
44 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
45 | ) {
46 |     // shape
47 |     const int batch_size = THCudaTensor_size(state, grads_image, 0);
48 |     const int depth = THCudaTensor_size(state, grads_image, 1);
49 |     const int image_height = THCudaTensor_size(state, grads_image, 2);
50 |     const int image_width = THCudaTensor_size(state, grads_image, 3);
51 | 
52 |     const int num_boxes = THCudaTensor_size(state, grads, 0);
53 |     const int crop_height = THCudaTensor_size(state, grads, 2);
54 |     const int crop_width = THCudaTensor_size(state, grads, 3);
55 | 
56 |     // init output space
57 |     THCudaTensor_zero(state, grads_image);
58 | 
59 |     cudaStream_t stream = THCState_getCurrentStream(state);
60 |     CropAndResizeBackpropImageLaucher(
61 |         THCudaTensor_data(state, grads),
62 |         THCudaTensor_data(state, boxes),
63 |         THCudaIntTensor_data(state, box_index),
64 |         num_boxes, batch_size, image_height, image_width,
65 |         crop_height, crop_width, depth,
66 |         THCudaTensor_data(state, grads_image),
67 |         stream
68 |     );
69 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/crop_and_resize_gpu.h:
--------------------------------------------------------------------------------
 1 | void crop_and_resize_gpu_forward(
 2 |     THCudaTensor * image,
 3 |     THCudaTensor * boxes,           // [y1, x1, y2, x2]
 4 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
 5 |     const float extrapolation_value,
 6 |     const int crop_height,
 7 |     const int crop_width,
 8 |     THCudaTensor * crops
 9 | );
10 | 
11 | void crop_and_resize_gpu_backward(
12 |     THCudaTensor * grads,
13 |     THCudaTensor * boxes,      // [y1, x1, y2, x2]
14 |     THCudaIntTensor * box_index,    // range in [0, batch_size)
15 |     THCudaTensor * grads_image // resize to [bsize, c, hc, wc]
16 | );


--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include "crop_and_resize_kernel.h"
  4 | 
  5 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  6 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
  7 |      i += blockDim.x * gridDim.x)
  8 | 
  9 | 
 10 | __global__
 11 | void CropAndResizeKernel(
 12 |     const int nthreads, const float *image_ptr, const float *boxes_ptr,
 13 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
 14 |     int image_width, int crop_height, int crop_width, int depth,
 15 |     float extrapolation_value, float *crops_ptr)
 16 | {
 17 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
 18 |     {
 19 |         // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
 20 |         // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
 21 |         int idx = out_idx;
 22 |         const int x = idx % crop_width;
 23 |         idx /= crop_width;
 24 |         const int y = idx % crop_height;
 25 |         idx /= crop_height;
 26 |         const int d = idx % depth;
 27 |         const int b = idx / depth;
 28 | 
 29 |         const float y1 = boxes_ptr[b * 4];
 30 |         const float x1 = boxes_ptr[b * 4 + 1];
 31 |         const float y2 = boxes_ptr[b * 4 + 2];
 32 |         const float x2 = boxes_ptr[b * 4 + 3];
 33 | 
 34 |         const int b_in = box_ind_ptr[b];
 35 |         if (b_in < 0 || b_in >= batch)
 36 |         {
 37 |             continue;
 38 |         }
 39 | 
 40 |         const float height_scale =
 41 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 42 |                                 : 0;
 43 |         const float width_scale =
 44 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
 45 | 
 46 |         const float in_y = (crop_height > 1)
 47 |                                 ? y1 * (image_height - 1) + y * height_scale
 48 |                                 : 0.5 * (y1 + y2) * (image_height - 1);
 49 |         if (in_y < 0 || in_y > image_height - 1)
 50 |         {
 51 |             crops_ptr[out_idx] = extrapolation_value;
 52 |             continue;
 53 |         }
 54 | 
 55 |         const float in_x = (crop_width > 1)
 56 |                                 ? x1 * (image_width - 1) + x * width_scale
 57 |                                 : 0.5 * (x1 + x2) * (image_width - 1);
 58 |         if (in_x < 0 || in_x > image_width - 1)
 59 |         {
 60 |             crops_ptr[out_idx] = extrapolation_value;
 61 |             continue;
 62 |         }
 63 | 
 64 |         const int top_y_index = floorf(in_y);
 65 |         const int bottom_y_index = ceilf(in_y);
 66 |         const float y_lerp = in_y - top_y_index;
 67 | 
 68 |         const int left_x_index = floorf(in_x);
 69 |         const int right_x_index = ceilf(in_x);
 70 |         const float x_lerp = in_x - left_x_index;
 71 | 
 72 |         const float *pimage = image_ptr + (b_in * depth + d) * image_height * image_width;
 73 |         const float top_left = pimage[top_y_index * image_width + left_x_index];
 74 |         const float top_right = pimage[top_y_index * image_width + right_x_index];
 75 |         const float bottom_left = pimage[bottom_y_index * image_width + left_x_index];
 76 |         const float bottom_right = pimage[bottom_y_index * image_width + right_x_index];
 77 | 
 78 |         const float top = top_left + (top_right - top_left) * x_lerp;
 79 |         const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
 80 |         crops_ptr[out_idx] = top + (bottom - top) * y_lerp;
 81 |     }
 82 | }
 83 | 
 84 | __global__
 85 | void CropAndResizeBackpropImageKernel(
 86 |     const int nthreads, const float *grads_ptr, const float *boxes_ptr,
 87 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
 88 |     int image_width, int crop_height, int crop_width, int depth,
 89 |     float *grads_image_ptr)
 90 | {
 91 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads)
 92 |     {
 93 |         // NHWC: out_idx = d + depth * (w + crop_width * (h + crop_height * b))
 94 |         // NCHW: out_idx = w + crop_width * (h + crop_height * (d + depth * b))
 95 |         int idx = out_idx;
 96 |         const int x = idx % crop_width;
 97 |         idx /= crop_width;
 98 |         const int y = idx % crop_height;
 99 |         idx /= crop_height;
100 |         const int d = idx % depth;
101 |         const int b = idx / depth;
102 | 
103 |         const float y1 = boxes_ptr[b * 4];
104 |         const float x1 = boxes_ptr[b * 4 + 1];
105 |         const float y2 = boxes_ptr[b * 4 + 2];
106 |         const float x2 = boxes_ptr[b * 4 + 3];
107 | 
108 |         const int b_in = box_ind_ptr[b];
109 |         if (b_in < 0 || b_in >= batch)
110 |         {
111 |             continue;
112 |         }
113 | 
114 |         const float height_scale =
115 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
116 |                                 : 0;
117 |         const float width_scale =
118 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
119 | 
120 |         const float in_y = (crop_height > 1)
121 |                                 ? y1 * (image_height - 1) + y * height_scale
122 |                                 : 0.5 * (y1 + y2) * (image_height - 1);
123 |         if (in_y < 0 || in_y > image_height - 1)
124 |         {
125 |             continue;
126 |         }
127 | 
128 |         const float in_x = (crop_width > 1)
129 |                                 ? x1 * (image_width - 1) + x * width_scale
130 |                                 : 0.5 * (x1 + x2) * (image_width - 1);
131 |         if (in_x < 0 || in_x > image_width - 1)
132 |         {
133 |             continue;
134 |         }
135 | 
136 |         const int top_y_index = floorf(in_y);
137 |         const int bottom_y_index = ceilf(in_y);
138 |         const float y_lerp = in_y - top_y_index;
139 | 
140 |         const int left_x_index = floorf(in_x);
141 |         const int right_x_index = ceilf(in_x);
142 |         const float x_lerp = in_x - left_x_index;
143 | 
144 |         float *pimage = grads_image_ptr + (b_in * depth + d) * image_height * image_width;
145 |         const float dtop = (1 - y_lerp) * grads_ptr[out_idx];
146 |         atomicAdd(
147 |             pimage + top_y_index * image_width + left_x_index, 
148 |             (1 - x_lerp) * dtop
149 |         );
150 |         atomicAdd(
151 |             pimage + top_y_index * image_width + right_x_index, 
152 |             x_lerp * dtop
153 |         );
154 | 
155 |         const float dbottom = y_lerp * grads_ptr[out_idx];
156 |         atomicAdd(
157 |             pimage + bottom_y_index * image_width + left_x_index, 
158 |             (1 - x_lerp) * dbottom
159 |         );
160 |         atomicAdd(
161 |             pimage + bottom_y_index * image_width + right_x_index, 
162 |             x_lerp * dbottom
163 |         );
164 |     }
165 | }
166 | 
167 | 
168 | void CropAndResizeLaucher(
169 |     const float *image_ptr, const float *boxes_ptr,
170 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
171 |     int image_width, int crop_height, int crop_width, int depth,
172 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream)
173 | {   
174 |     const int total_count = num_boxes * crop_height * crop_width * depth;
175 |     const int thread_per_block = 1024;
176 |     const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
177 |     cudaError_t err;
178 | 
179 |     if (total_count > 0)
180 |     {
181 |         CropAndResizeKernel<<<block_count, thread_per_block, 0, stream>>>(
182 |             total_count, image_ptr, boxes_ptr,
183 |             box_ind_ptr, num_boxes, batch, image_height, image_width,
184 |             crop_height, crop_width, depth, extrapolation_value, crops_ptr);
185 | 
186 |         err = cudaGetLastError();
187 |         if (cudaSuccess != err)
188 |         {
189 |             fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
190 |             exit(-1);
191 |         }
192 |     }
193 | }
194 | 
195 | 
196 | void CropAndResizeBackpropImageLaucher(
197 |     const float *grads_ptr, const float *boxes_ptr,
198 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
199 |     int image_width, int crop_height, int crop_width, int depth,
200 |     float *grads_image_ptr, cudaStream_t stream)
201 | {   
202 |     const int total_count = num_boxes * crop_height * crop_width * depth;
203 |     const int thread_per_block = 1024;
204 |     const int block_count = (total_count + thread_per_block - 1) / thread_per_block;
205 |     cudaError_t err;
206 | 
207 |     if (total_count > 0)
208 |     {
209 |         CropAndResizeBackpropImageKernel<<<block_count, thread_per_block, 0, stream>>>(
210 |             total_count, grads_ptr, boxes_ptr,
211 |             box_ind_ptr, num_boxes, batch, image_height, image_width,
212 |             crop_height, crop_width, depth, grads_image_ptr);
213 | 
214 |         err = cudaGetLastError();
215 |         if (cudaSuccess != err)
216 |         {
217 |             fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err));
218 |             exit(-1);
219 |         }
220 |     }
221 | }


--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/lib/roi_align/src/cuda/crop_and_resize_kernel.cu.o


--------------------------------------------------------------------------------
/lib/roi_align/src/cuda/crop_and_resize_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CropAndResize_Kernel
 2 | #define _CropAndResize_Kernel
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void CropAndResizeLaucher(
 9 |     const float *image_ptr, const float *boxes_ptr,
10 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
11 |     int image_width, int crop_height, int crop_width, int depth,
12 |     float extrapolation_value, float *crops_ptr, cudaStream_t stream);
13 | 
14 | void CropAndResizeBackpropImageLaucher(
15 |     const float *grads_ptr, const float *boxes_ptr,
16 |     const int *box_ind_ptr, int num_boxes, int batch, int image_height,
17 |     int image_width, int crop_height, int crop_width, int depth,
18 |     float *grads_image_ptr, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif


--------------------------------------------------------------------------------
/models/Model7.py:
--------------------------------------------------------------------------------
  1 | """Model7 is for semantic embedding & attention. We replace the global classification with the semantic classification,
  2 | thus applicable for textual grounding problem."""
  3 | 
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import absolute_import
  7 | import sys
  8 | sys.path.insert(0, '/../')
  9 | import torch
 10 | import numpy as np
 11 | import torch.nn as nn
 12 | from lib.configure.config import Config
 13 | from lib.resnet.resnet import resnet50
 14 | from torch.autograd import Variable
 15 | from lib.bilinear_pooling.CompactBilinearPooling import CompactBilinearPooling
 16 | 
 17 | 
 18 | class Model7(nn.Module):
 19 | 
 20 |     def __init__(self, opts, body_pretrain=False):
 21 |         super(Model7, self).__init__()
 22 | 
 23 |         # Load pre-trained back-boned model
 24 |         print('==> Building backbone model...')
 25 |         config = Config()
 26 |         config.IMAGES_PER_GPU = opts.batch_size
 27 |         config.NUM_CLASSES = opts.class_num
 28 | 
 29 |         # Load Attribute module
 30 |         attr_branch = AttributeBranch(300)
 31 |         attr_res_net = resnet50(True, path='./checkpoint/AENet_clsfier_person_256d_4.pth', classnum=4)
 32 | 
 33 |         # Load semantic embeddings
 34 |         dictionary = {'man': [1, 0, 0.5, 0.5],
 35 |                       'woman': [0, 1, 0.5, 0.5],
 36 |                       'lady': [0, 1, 0.25, 0.75],
 37 |                       'female': [0, 1, 0.5, 0.5],
 38 |                       'boy': [1, 0, 1, 0],
 39 |                       'girl': [0, 1, 1, 0],
 40 |                       'kid': [0.5, 0.5, 1, 0],
 41 |                       'child': [0.5, 0.5, 1, 0],
 42 |                       'young': [0.5, 0.5, 1, 0],
 43 |                       'elderly': [0.5, 0.5, 0, 1]}
 44 |         for key in dictionary.keys():
 45 |             dictionary[key] = np.asarray(dictionary[key])
 46 | 
 47 |         # Freeze the attr-resnet model
 48 |         for param in attr_res_net.parameters():
 49 |             param.requires_grad = False
 50 | 
 51 |         for param in attr_res_net.fc.parameters():
 52 |             param.requires_grad = False
 53 | 
 54 |         # Freeze the attribute branch or not
 55 |         for param in attr_branch.parameters():
 56 |             param.requires_grad = True
 57 | 
 58 |         self.attr_branch = attr_branch
 59 |         self.opts = opts
 60 |         self.attr_res_net = attr_res_net
 61 |         self.pool = nn.AvgPool2d(kernel_size=64, stride=1)
 62 |         self.sigmoid = nn.Sigmoid()
 63 |         self.regressor = nn.Linear(256, 4)
 64 |         self.semantic_layer = SemanticLayer(dictionary)
 65 | 
 66 |     def forward(self, img, label, embeddings):
 67 | 
 68 |         # Attribute Branch
 69 |         conv_feat4, conv_feat = self.attr_res_net(img)
 70 |         attr_map, att_conv_feature = self.attr_branch(conv_feat, embeddings)
 71 |         feat = self.pool(att_conv_feature)
 72 |         feat = self.regressor(feat.view(feat.shape[0], feat.shape[1]))
 73 |         output = self.semantic_layer(feat, label)
 74 |         return output, attr_map, att_conv_feature
 75 | 
 76 | 
 77 | class AttributeBranch(nn.Module):
 78 | 
 79 |     def __init__(self, attr_num):
 80 |         super(AttributeBranch, self).__init__()
 81 | 
 82 |         self.textual_emb = nn.Linear(attr_num, 256)
 83 |         self.conv = nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True)
 84 |         self.mcb_attr = CompactBilinearPooling(256, 256, 256).cuda()
 85 |         self.mcb_conv1_attr = nn.Conv2d(256, 32, kernel_size=1, stride=1, padding=0, bias=True)
 86 |         self.mcb_relu1_attr = nn.ReLU(inplace=True)
 87 |         self.mcb_conv2_attr = nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0, bias=True)
 88 |         self.mcb_sigmoid = nn.Sigmoid()
 89 | 
 90 |     def forward(self, entity_feature, attr_one_hot):
 91 | 
 92 |         feature = self.mcb_relu1_attr(entity_feature)
 93 |         # Reshape attribute one hot input
 94 |         attr_one_hot = self.textual_emb(attr_one_hot)
 95 |         attr_one_hot = attr_one_hot.view(attr_one_hot.shape[0], attr_one_hot.shape[1], 1, 1)
 96 | 
 97 |         # stack attention map generating for P3, P4, P5
 98 |         attr_one_hot = attr_one_hot.expand_as(feature)
 99 | 
100 |         # Attribute attention generation and applied
101 |         mcb_attr_feat = self.mcb_attr(self.conv(attr_one_hot), feature)
102 |         attr_map = self.mcb_sigmoid(self.mcb_conv2_attr(self.mcb_relu1_attr(self.mcb_conv1_attr(mcb_attr_feat))))
103 |         attr_feature = (torch.mul(attr_map, entity_feature))
104 | 
105 |         return attr_map, attr_feature
106 | 
107 | 
108 | class SemanticLayer(nn.Module):
109 |     def __init__(self, dictionary):
110 |         super(SemanticLayer, self).__init__()
111 | 
112 |         list_file = open('./others/low-level-attr.txt', 'r')
113 |         entity_att = []
114 |         for i in list_file.readlines():
115 |             entity_att.append(i.replace('\n', ''))
116 | 
117 |         # Create semantic matrix
118 |         s_matrix = torch.zeros(10, 4).cuda()
119 |         for index, item in enumerate(entity_att):
120 |             emb = torch.from_numpy(dictionary[item])
121 |             s_matrix[index] = emb
122 |         self.s_matrix = Variable(s_matrix)
123 | 
124 |     def forward(self, x, label):
125 |         # x: (batch * 4)
126 |         # label: (batch,)
127 |         prob = Variable(torch.zeros(x.shape[0]))
128 |         for index in range(x.shape[0]):
129 |             lbl = label[index]
130 |             prob[index] = torch.nn.functional.cosine_similarity(self.s_matrix, x[index].view(1, -1))[lbl]
131 |         prob = prob.sum() / prob.shape[0]
132 |         return 1-prob
133 | 


--------------------------------------------------------------------------------
/models/__pycache__/Model7.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/models/__pycache__/Model7.cpython-35.pyc


--------------------------------------------------------------------------------
/others/README.md:
--------------------------------------------------------------------------------
 1 | # coco_person_list.txt:
 2 | Our extracted 12000 images from coco_train_2017 for person attribute grounding.
 3 | # low-level-attr.txt:
 4 | The attribute dictionary.
 5 | # glove.6B.300d.txt:
 6 | Word embedding dictionary we are using, it's from glove 6B version, and each embedding is 300 dimension.
 7 | # dictionary_emb.pkl:
 8 | Instead of using the whole word embedding, we mannually extract just the attribute word embedding for faster embedding dictionary loading.
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/others/dictionary_emb.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/others/dictionary_emb.pkl


--------------------------------------------------------------------------------
/others/low-level-attr.txt:
--------------------------------------------------------------------------------
 1 | man
 2 | woman
 3 | lady
 4 | female
 5 | boy
 6 | girl
 7 | kid
 8 | child
 9 | young
10 | elderly
11 | 


--------------------------------------------------------------------------------
/parser.py:
--------------------------------------------------------------------------------
 1 | '''Train Sun Attribute with PyTorch.'''
 2 | from __future__ import print_function
 3 | 
 4 | import torch
 5 | import argparse
 6 | import torch.optim as optim
 7 | 
 8 | 
 9 | def parse_opts():
10 |     parser = argparse.ArgumentParser(description='PyTorch Attribute Grouning Training')
11 |     parser.add_argument('--msg', default=False, type=bool, help='display message')
12 |     parser.add_argument('--use_gpu', default=torch.cuda.is_available(), type=bool, help='Use GPU or not')
13 |     parser.add_argument('--multi_gpu', default=(torch.cuda.device_count() > 0), type=bool, help='Use multi-GPU or not')
14 |     parser.add_argument('--gpu_id', default=-1, type=int, help='Use specific GPU.')
15 | 
16 |     parser.add_argument('--optimizer', default=optim.SGD, help='optimizer')
17 |     parser.add_argument('--num_workers', default=2, type=int, help='num of fetching threads')
18 |     parser.add_argument('--batch_size', default=12, type=int, help='batch size')
19 |     parser.add_argument('--weight_decay', default=1e-3, type=float, help='weight decay')
20 |     parser.add_argument('--seed', default=0, type=int, help='random seed')
21 |     parser.add_argument('--result_path', default='./results', help='result path')
22 | 
23 |     # Define the training parameters
24 |     parser.add_argument('--class_num', default=5, type=int, help='num of fetchi  ng threads')
25 |     parser.add_argument('--checkpoint_epoch', default=2, type=int, help='epochs to save checkpoint ')
26 |     parser.add_argument('--lr_adjust_epoch', default=5, type=int, help='lr adjust epoch')
27 |     parser.add_argument('--n_epoch', default=1000, type=int, help='training epochs')
28 |     parser.add_argument('--lr', default=0.01, type=float, help='learning rate')
29 | 
30 |     # Define the checkpoint reloading path
31 |     parser.add_argument('--resume', default='', help='result path')
32 | 
33 |     # Define the data_set path
34 |     parser.add_argument('--img_path', default='/media/drive1/Data/coco17/train2017/', help='coco_train_2017 path')
35 |     parser.add_argument('--annotation', default='/media/drive1/Data/coco17/annotations/'
36 |                                                 'captions_train2017.json', help='coco_train_2017 annotation path')
37 |     parser.add_argument('--dictionary', default='./others/low-level-attr.txt', help='dict of attributes')
38 |     args = parser.parse_args()
39 | 
40 |     return args
41 | 
42 | 


--------------------------------------------------------------------------------
/results/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/results/architecture.png


--------------------------------------------------------------------------------
/results/test.log:
--------------------------------------------------------------------------------
1 | epoch	time	loss
2 | 


--------------------------------------------------------------------------------
/results/train.log:
--------------------------------------------------------------------------------
1 | epoch	time	loss
2 | 


--------------------------------------------------------------------------------
/results/train_batch.log:
--------------------------------------------------------------------------------
 1 | epoch	batch	loss
 2 | 1	2	0.1857217252254486
 3 | 1	3	0.20769339799880981
 4 | 1	4	0.2339950054883957
 5 | 1	5	0.24028053283691406
 6 | 1	6	0.24673599004745483
 7 | 1	7	0.24947353771754674
 8 | 1	8	0.24981582164764404
 9 | 1	9	0.24003050724665323
10 | 1	10	0.23302733302116393
11 | 1	11	0.22751894864169034
12 | 1	12	0.22201103965441385
13 | 


--------------------------------------------------------------------------------
/runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_13-58-18_apg395-001/events.out.tfevents.1538773098.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-13_apg395-001/events.out.tfevents.1538773693.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-27_apg395-001/events.out.tfevents.1538773707.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-08-58_apg395-001/events.out.tfevents.1538773738.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-30_apg395-001/events.out.tfevents.1538774250.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-17-42_apg395-001/events.out.tfevents.1538774262.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-03_apg395-001/events.out.tfevents.1538774283.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-18-55_apg395-001/events.out.tfevents.1538774335.apg395-001


--------------------------------------------------------------------------------
/runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jacobswan1/MTG-pytorch/0b9429f0715ccfc27037b6366e79443a76fef439/runs/Oct05_14-19-46_apg395-001/events.out.tfevents.1538774386.apg395-001


--------------------------------------------------------------------------------
/train_attr_attention_embedding.py:
--------------------------------------------------------------------------------
  1 | '''Train unsuperwised entity grounding by attention+pixel classification mechanism.'''
  2 | from __future__ import print_function
  3 | 
  4 | import random
  5 | import pickle
  6 | from parser import *
  7 | import matplotlib.pyplot as plt
  8 | from models.Model7 import Model7
  9 | from lib.configure.net_util import *
 10 | from torchvision import transforms
 11 | from tensorboardX import SummaryWriter
 12 | from lib.dataset.coco_dataset import CocoCaptions
 13 | 
 14 | 
 15 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
 16 | # os.environ["CUDA_VISIBLE_DEVICES"]="1"
 17 | 
 18 | def l2_regulariza_loss(map):
 19 |     # return torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1]))
 20 |     mean = torch.mean(map.view(map.shape[0], map.shape[-2], map.shape[-1]))
 21 |     return mean
 22 | 
 23 | 
 24 | def load_dictionary(name):
 25 |     with open('./others/' + name + '.pkl', 'rb') as f:
 26 |         return pickle.load(f)
 27 | 
 28 | 
 29 | # Randomly pick a label from multi one-hot label
 30 | def random_pick(one_hot):
 31 |     # return a randomly selected label
 32 |     label = torch.zeros(one_hot.shape[0])
 33 |     one_hot_return = torch.zeros_like(one_hot)
 34 | 
 35 |     for i in range(one_hot.shape[0]):
 36 |         # all labels to save all the labels
 37 |         all_labels = []
 38 |         count = 0
 39 |         for j in range(one_hot.shape[1]):
 40 |             if one_hot[i][j] == 1.:
 41 |                 all_labels.append(count)
 42 |             count += 1
 43 |         # randomly picking one label
 44 |         if len(all_labels) != 0:
 45 |             label[i] = random.choice(all_labels)
 46 |         else:
 47 |             label[i] = 2
 48 |         one_hot_return[i][int(label[i])] = 1
 49 |     return label, one_hot_return
 50 | 
 51 | 
 52 | # Multi-Pixel embedding learning for multi-category picking
 53 | def top_k_emb(visual_emb, model, label, single_attribute_label, K=100):
 54 |     # Given pixel-wise features, select top-k pixels with highest category prob out for
 55 |     # multi-cross entropy learning
 56 |     # Visual-features:   (batch, emb #, pixel #)
 57 |     # Returning prob: (batch #, top-K, class_prob)
 58 |     # Returning feat: (batch #, top-K, feature_size)
 59 |     visual_emb = visual_emb.view((visual_emb.shape[0], visual_emb.shape[1], visual_emb.shape[2]*visual_emb.shape[3]))
 60 | 
 61 |     # i: batch number
 62 |     for i in range(visual_emb.shape[0]):
 63 |         sorting = np.zeros((visual_emb.shape[2]))
 64 |         # j: pixel numbers in feature maps
 65 |         for j in range(visual_emb.shape[2]):
 66 |             # extracting pixel features and reshape
 67 |             emb = visual_emb[i, :, j]
 68 |             # emb = F.relu(model.fc_p5(emb.contiguous().view(1, -1)))
 69 |             emb_ = (emb.contiguous().view(1, -1))
 70 |             output = model.attr_res_net.fc(emb_)
 71 |             prob = opts.criterion[0](output, single_attribute_label[i])
 72 |             opts.prob_set[j] = output[0]
 73 |             opts.features_set[j] = emb
 74 |             sorting[j] = prob.data.cpu().numpy()[0]
 75 | 
 76 |         # Arg-sort the probability (and inverse the order)
 77 |         sorting = np.argsort(sorting)[0:K]
 78 | 
 79 |         # index: number of top-K
 80 |         for index in range(K):
 81 |             opts.return_prob[i, index] = opts.prob_set[int(sorting[index])]
 82 |             opts.return_feat[i, index] = opts.features_set[int(sorting[index])]
 83 |     return opts.return_feat
 84 | 
 85 | 
 86 | def train_net(net, opts):
 87 | 
 88 |     print('training at epoch {}'.format(opts.epoch+1))
 89 | 
 90 |     if opts.use_gpu:
 91 |         net.cuda()
 92 | 
 93 |     net.train(True)
 94 |     train_loss = 0
 95 |     total_time = 0
 96 |     batch_idx = 0
 97 |     optimizer = opts.current_optimizer
 98 |     # back_bone_optimizer = opts.backbone_optimizer
 99 |     end_time = time.time()
100 |     train_back_bone = True
101 |     fig = plt.figure()
102 | 
103 |     # category:         semantic labels for single selected label
104 |     # s_entity_one_hot: randomly selected entity one-hot
105 |     # s_entity_label:   randomly selected entity label
106 |     # att_emb:          word2vec embedding for attributes
107 |     # att_label:        attributes pairs for margin loss learning
108 |     # attr_one_hot:     all attributes one-hot
109 |     # textual_emb:      phrase embedding
110 |     # phrase/line:      phrases/lines in NLP format
111 |     # mask:             ground truth annotations for object
112 |     for batch_idx, (images, attr_one_hot, entity_one_hot) in enumerate(data_loader):
113 | 
114 |         # model.visual_net.config.IMAGES_PER_GPU = images.size(0)
115 |         images = Variable(images).cuda()
116 | 
117 |         # Randomly pick one attribute per iteration
118 |         single_attribute_label, single_attribute_one_hot = random_pick(attr_one_hot)
119 |         attr_one_hot = Variable(single_attribute_one_hot).cuda().float()
120 |         single_attribute_label = Variable(single_attribute_label).cuda().long()
121 | 
122 |         # Create embeddings input
123 |         embeddings = Variable(torch.zeros(attr_one_hot.shape[0], 300))
124 |         for index, item in enumerate(single_attribute_label):
125 |             i = opts.entity_att[item.data.cpu().numpy()[0]]
126 |             embeddings[index] = Variable(torch.from_numpy(opts.embeddings_index[i])).cuda()
127 | 
128 |         # Feed in network
129 |         y, attr_map, att_conv_feature = net(images, single_attribute_label, embeddings)
130 | 
131 |         loss = y
132 | 
133 |         if train_back_bone:
134 |             optimizer.zero_grad()
135 |             train_loss += loss.data[0]
136 |             loss.backward()
137 |             optimizer.step()
138 | 
139 |         # Display the generated att_map and instant loss
140 |         if batch_idx % 1 == 0:
141 |             plt.ion()
142 |             plt.show()
143 |             random = randint(0, opts.batch_size - 1)
144 |             if batch_idx % 1 == 0:
145 |                 # Print out the attribute labels
146 |                 # plt.suptitle(opts.entity_att[int(single_attribute_label[random])])
147 |                 plt.subplot(141)
148 |                 vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[0].view(-1, 1, 1)
149 |                                                    * att_conv_feature[random]).sum(0)).cpu().data.numpy()
150 |                 plt.imshow(vis)
151 | 
152 |                 plt.subplot(142)
153 |                 vis = torch.nn.functional.sigmoid((model.attr_res_net.fc.weight[1].view(-1, 1, 1)
154 |                                                    * att_conv_feature[random]).sum(0)).cpu().data.numpy()
155 |                 plt.imshow(vis)
156 | 
157 |                 plt.subplot(143)
158 |                 plt.imshow(attr_map[random, 0].data.cpu().numpy())
159 | 
160 |                 plt.subplot(144)
161 |                 plt.imshow(images[random].permute(1, 2, 0).float().data.cpu())
162 |                 plt.pause(0.001)
163 |                 writer.add_scalar('Cross Entropy Loss', train_loss / (batch_idx+1), opts.iter_n)
164 |                 opts.iter_n += 1
165 | 
166 |         print('Overall Loss: %.8f'
167 |               % (train_loss/(batch_idx+1)))
168 | 
169 |         total_time += (time.time() - end_time)
170 |         end_time = time.time()
171 |         batch_idx += 1
172 | 
173 |         opts.train_batch_logger.log({
174 |             'epoch': (opts.epoch+1),
175 |             'batch': batch_idx+1,
176 |             'loss': train_loss / (batch_idx+1),
177 |         })
178 | 
179 |         if batch_idx % 100 == 0:
180 |             print('100 batch.')
181 |             # Save checkpoint.
182 |             net_states = {
183 |                 'state_dict': net.state_dict(),
184 |                 'epoch': opts.epoch + 1,
185 |                 'loss': opts.train_losses,
186 |                 'optimizer': opts.current_optimizer.state_dict()
187 |             }
188 |             epo_batch = str(opts.epoch) + '-' + str(batch_idx)
189 |             save_file_path = os.path.join(opts.checkpoint_path,
190 |                                           'Model7_exp1_{}.pth'.format(epo_batch))
191 |             torch.save(net_states, save_file_path)
192 |             opts.lr /= 2
193 |             opts.regularization /= 2
194 |             params = filter(lambda p: p.requires_grad, model.parameters())
195 |             opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
196 |     train_loss /= (batch_idx + 1)
197 | 
198 |     opts.train_epoch_logger.log({
199 |         'epoch': (opts.epoch+1),
200 |         'loss': train_loss,
201 |         'time': total_time,
202 |     })
203 | 
204 |     opts.train_losses.append(train_loss)
205 | 
206 |     # Save checkpoint.
207 |     net_states = {
208 |         'state_dict': net.state_dict(),
209 |         'epoch': opts.epoch + 1,
210 |         'loss': opts.train_losses,
211 |         'optimizer': opts.current_optimizer.state_dict()
212 |     }
213 | 
214 |     if opts.epoch % opts.checkpoint_epoch == 0:
215 |         save_file_path = os.path.join(opts.checkpoint_path, 'Model7_exp1_{}.pth'.format(opts.epoch))
216 |         torch.save(net_states, save_file_path)
217 | 
218 |     print('Batch Loss: %.8f, elapsed time: %3.f seconds.' % (train_loss, total_time))
219 | 
220 | 
221 | if __name__ == '__main__':
222 | 
223 |     opts = parse_opts()
224 |     writer = SummaryWriter()
225 | 
226 |     if opts.gpu_id >= 0:
227 |         torch.cuda.set_device(opts.gpu_id)
228 |         opts.multi_gpu = False
229 | 
230 |     torch.manual_seed(opts.seed)
231 |     if opts.use_gpu:
232 |         torch.set_default_tensor_type('torch.cuda.FloatTensor')
233 |         torch.cuda.manual_seed(opts.seed)
234 | 
235 |     # Loading Data
236 |     print("Preparing Flickr data set...")
237 |     opts.k = 600
238 |     opts.ite = 0
239 |     opts.regularization = 0.1
240 |     size = (1024, 1024)
241 |     feat_size = (64, 64)
242 |     transform = transforms.Compose([transforms.Resize(size), transforms.ToTensor()])
243 |     data_set = CocoCaptions(opts.img_path, opts.annotation, transform)
244 |     data_loader = torch.utils.data.DataLoader(data_set, batch_size=opts.batch_size, shuffle=True)
245 | 
246 |     # Load dictionary
247 |     list_file = open(opts.dictionary, 'r')
248 |     entity_att = []
249 |     for i in list_file.readlines():
250 |         entity_att.append(i.replace('\n', ''))
251 |     opts.entity_att = entity_att
252 | 
253 |     # Load semantic embeddings
254 |     embeddings_index = load_dictionary('dictionary_emb')
255 |     print('Dictionary loaded.')
256 |     opts.embeddings_index = embeddings_index
257 | 
258 |     if not os.path.exists(opts.result_path):
259 |         os.mkdir(opts.result_path)
260 | 
261 |     opts.train_epoch_logger = Logger(os.path.join(opts.result_path, 'train.log'),
262 |                                      ['epoch', 'time', 'loss'])
263 |     opts.train_batch_logger = Logger(os.path.join(opts.result_path, 'train_batch.log'),
264 |                                      ['epoch', 'batch', 'loss'])
265 |     opts.test_epoch_logger = Logger(os.path.join(opts.result_path, 'test.log'),
266 |                                     ['epoch', 'time', 'loss'])
267 | 
268 |     # Model
269 |     print('==> Building model...')
270 |     model = Model7(opts)
271 | 
272 |     # Load Back bone Module
273 |     if opts.resume:
274 |         state_dict = torch.load(opts.resume)['state_dict']
275 |         new_params = model.state_dict()
276 |         new_params.update(state_dict)
277 |         # Remove the extra keys
278 |         model_keys = model.state_dict().keys()
279 |         for name, param in list(new_params.items()):
280 |             if name not in model_keys:
281 |                 del new_params[name]
282 |         model.load_state_dict(new_params)
283 |     start_epoch = 0
284 |     print('==> model built.')
285 |     opts.criterion = [torch.nn.CrossEntropyLoss(), torch.nn.MSELoss()]
286 | 
287 |     # Training
288 |     parameters = filter(lambda p: p.requires_grad, model.parameters())
289 |     params = sum([np.prod(p.size()) for p in parameters])
290 |     print(params, 'trainable parameters in the network.')
291 |     set_parameters(opts)
292 |     opts.iter_n = 0
293 | 
294 |     for epoch in range(start_epoch, start_epoch+opts.n_epoch):
295 |         opts.epoch = epoch
296 |         if epoch is 0:
297 |             params = filter(lambda p: p.requires_grad, model.parameters())
298 |             opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
299 | 
300 |         elif (epoch % opts.lr_adjust_epoch) == 0 and epoch is not 0:
301 |             opts.lr /= 5
302 |             params = filter(lambda p: p.requires_grad, model.parameters())
303 |             opts.current_optimizer = opts.optimizer(params, lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
304 | 
305 |         train_net(model, opts)
306 | 
307 |     # export scalar data to JSON for external processing
308 |     writer.export_scalars_to_json("./all_scalars.json")
309 |     writer.close()
310 | 


--------------------------------------------------------------------------------