├── .gitignore
├── README.md
├── __init__.py
├── lib
    ├── Makefile
    ├── __init__.py
    ├── datasets
    │   ├── VRD_loader.py
    │   ├── __init__.py
    │   ├── sVG_loader.py
    │   └── visual_genome_loader.py
    ├── fast_rcnn
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   ├── config.py
    │   ├── config.py.FN
    │   ├── config.py.hdn
    │   ├── config2.py
    │   └── nms_wrapper.py
    ├── layer_utils
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── csrc
    │   │   ├── ROIAlign.h
    │   │   ├── ROIPool.h
    │   │   ├── cpu
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   ├── nms_cpu.cpp
    │   │   │   └── vision.h
    │   │   ├── cuda
    │   │   │   ├── ROIAlign_cuda.cu
    │   │   │   ├── ROIPool_cuda.cu
    │   │   │   ├── nms.cu
    │   │   │   └── vision.h
    │   │   ├── nms.h
    │   │   └── vision.cpp
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_top_layer.py
    │   ├── roi_layers
    │   │   ├── __init__.py
    │   │   ├── nms.py
    │   │   ├── roi_align.py
    │   │   └── roi_pool.py
    │   └── snippets.py
    ├── network.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── nms
    │   │   │   └── __init__.py
    │   ├── build.py
    │   ├── make.sh
    │   ├── nms_gpu.py
    │   ├── nms_kernel.cu
    │   ├── nms_retain_all.pyx
    │   └── src
    │   │   ├── nms_cuda.c
    │   │   ├── nms_cuda.h
    │   │   ├── nms_cuda_kernel.cu
    │   │   └── nms_cuda_kernel.h
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── license.txt
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── rpn_msr
    │   ├── __init__.py
    │   ├── anchor_target_layer.py
    │   ├── generate.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   ├── proposal_target_layer_FN.py
    │   ├── proposal_target_layer_hdn_v0.py
    │   ├── proposal_target_layer_hdn_v1.py
    │   └── proposal_target_layer_hdn_v2.py
    ├── setup_cuda.py
    ├── setup_cython.py
    ├── utils
    │   ├── .gitignore
    │   ├── FN_utils.py
    │   ├── HDN_utils.py
    │   ├── __init__.py
    │   ├── bbox.pyx
    │   ├── blob.py
    │   ├── boxes_grid.py
    │   ├── general_utils.py
    │   ├── logger.py
    │   ├── metrics.py
    │   ├── nms.py
    │   ├── nms.pyx
    │   ├── proposal_target_layer_v0.py
    │   ├── proposal_target_layer_v1.py
    │   ├── proposal_target_layer_v2.py
    │   ├── proposal_target_layer_v3.py
    │   ├── timer.py
    │   └── voc_eval.py
    └── visualize_graph
    │   ├── __init__.py
    │   ├── vis_utils.py
    │   └── visualize.py
├── models
    ├── HDN_v2
    │   ├── __init__.py
    │   ├── criteria.py
    │   ├── engines_v1.py
    │   ├── factorizable_network_v4.py
    │   ├── factorizable_network_v4s.py
    │   └── utils.py
    ├── RPN
    │   ├── RPN.py
    │   ├── RPN_region.py
    │   ├── __init__.py
    │   └── utils.py
    ├── __init__.py
    ├── modules
    │   ├── NMS.py
    │   ├── __init__.py
    │   ├── dataParallel.py
    │   ├── factor_updating_structure.py
    │   ├── factor_updating_structure_v3.py
    │   ├── factor_updating_structure_v3r.py
    │   ├── geometry_transform.py
    │   ├── phrase_inference_structure.py
    │   └── relation_module.py
    └── utils
    │   ├── __init__.py
    │   └── vgg16.py
├── options
    ├── RPN
    │   ├── RPN_FN.yaml
    │   ├── RPN_FN_VRD.yaml
    │   └── RPN_FN_svg.yaml
    ├── __init__.py
    ├── config_FN.py
    ├── data.yaml
    ├── data_VRD.yaml
    ├── data_sVG.yaml
    └── models
    │   ├── VG-DR-Net.yaml
    │   ├── VG-MSDN.yaml
    │   └── VRD.yaml
├── requirements.txt
├── scripts
    ├── collect_samples.py
    ├── preprocessing_data.py
    ├── setup_eval.sh
    ├── voc_converter.py
    └── voc_converter_vg.py
├── train.sh
├── train_FN.py
├── train_rpn.py
├── train_rpn_VRD.py
├── visualize_graph.py
└── visualize_gt_graphs.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | demo.py
 10 | *.o
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *,cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo *.pot # Django stuff: *.log
 53 | local_settings.py
 54 | 
 55 | # Flask stuff:
 56 | instance/
 57 | .webassets-cache
 58 | 
 59 | # Scrapy stuff:
 60 | .scrapy
 61 | 
 62 | # Sphinx documentation
 63 | docs/_build/
 64 | 
 65 | # PyBuilder
 66 | target/
 67 | 
 68 | # Jupyter Notebook
 69 | .ipynb_checkpoints
 70 | 
 71 | # pyenv
 72 | .python-version
 73 | 
 74 | # celery beat schedule file
 75 | celerybeat-schedule
 76 | 
 77 | # dotenv
 78 | .env
 79 | 
 80 | # virtualenv
 81 | .venv/
 82 | venv/
 83 | ENV/
 84 | 
 85 | # Spyder project settings
 86 | .spyderproject
 87 | 
 88 | # Rope project settings
 89 | .ropeproject
 90 | 
 91 | .idea
 92 | extension-ffi
 93 | demo_mot.py
 94 | 
 95 | 
 96 | # defined by yikang
 97 | model/
 98 | data/
 99 | data
100 | model
101 | output
102 | output/
103 | log/
104 | *.mat
105 | Debug_Code.ipynb
106 | eval/*.json
107 | *.DS_Store
108 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/__init__.py


--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup_cuda.py build develop
3 | 	python setup_cython.py build_ext --inplace 
4 | 
5 | clean:
6 | 	rm -rf nms/*.c nms/*.cpp  nms/*.so
7 | 	rm -rf utils/*.c utils/*.cpp utils/*.so
8 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/lib/__init__.py


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | # TODO: make this fold self-contained, only depends on utils package
 9 | 
10 | from .VRD_loader import VRD
11 | from .visual_genome_loader import visual_genome
12 | from .sVG_loader import sVG
13 | 


--------------------------------------------------------------------------------
/lib/datasets/sVG_loader.py:
--------------------------------------------------------------------------------
1 | from .VRD_loader import VRD
2 | import os.path as osp
3 | 
4 | class sVG(VRD):
5 |     def __init__(self, opts, image_set='train', batch_size=1, dataset_option=None, use_region=False):
6 |         image_set = image_set + '_' + dataset_option
7 |         super(sVG, self).__init__(opts, image_set, batch_size, dataset_option, use_region)
8 |         self._data_path = osp.join(self.opts['dir'], 'images')
9 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from . import config
 9 | from . import nms_wrapper
10 | # from nms_wrapper import nms


--------------------------------------------------------------------------------
/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | from sympy.physics.paulialgebra import delta
 10 | from config import cfg
 11 | 
 12 | np.seterr(all='warn')
 13 | 
 14 | def bbox_transform(ex_rois, gt_rois):
 15 | 
 16 |     # print 'ex_rois', ex_rois
 17 |     # print 'gt_rois', gt_rois
 18 | 
 19 | 
 20 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 21 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 22 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 23 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 24 | 
 25 |     # print 'ex_widths', ex_widths
 26 |     # print 'ex_heights', ex_heights
 27 |     # print 'ex_ctr_x', ex_ctr_x
 28 |     # print 'ex_ctr_y', ex_ctr_y
 29 | 
 30 | 
 31 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 32 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 33 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
 34 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
 35 | 
 36 | 
 37 |     # print 'gt_widths', gt_widths
 38 |     # print 'gt_heights', gt_heights
 39 |     # print 'gt_ctr_x', gt_ctr_x
 40 |     # print 'gt_ctr_y', gt_ctr_y
 41 | 
 42 | 
 43 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 44 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 45 |     targets_dw = np.log(gt_widths / ex_widths)
 46 |     targets_dh = np.log(gt_heights / ex_heights)
 47 | 
 48 |     # print 'targets_dx', targets_dx.mean(), targets_dx.std()
 49 |     # print 'targets_dy', targets_dy.mean(), targets_dy.std()
 50 |     # print 'targets_dw', targets_dw.mean(), targets_dw.std()
 51 |     # print 'targets_dh', targets_dh.mean(), targets_dh.std()
 52 | 
 53 | 
 54 |     targets = np.vstack(
 55 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 56 | 
 57 | 
 58 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 59 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
 60 |                    / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
 61 | 
 62 |     # print 'targets_dx(normalized)', targets[:, 0].mean(), targets[:, 0].std()
 63 |     # print 'targets_dy(normalized)', targets[:, 1].mean(), targets[:, 1].std()
 64 |     # print 'targets_dw(normalized)', targets[:, 2].mean(), targets[:, 2].std()
 65 |     # print 'targets_dh(normalized)', targets[:, 3].mean(), targets[:, 3].std()
 66 | 
 67 |     return targets
 68 | 
 69 | 
 70 | def bbox_transform_inv(boxes, deltas):
 71 |     return bbox_transform_inv_hdn(boxes, deltas)
 72 | 
 73 | 
 74 | def bbox_transform_inv_hdn(boxes, deltas):
 75 |     if boxes.shape[0] == 0:
 76 |         return np.zeros((0,), dtype=deltas.dtype)
 77 | 
 78 |     boxes = boxes.astype(deltas.dtype, copy=False)
 79 | 
 80 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
 81 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
 82 |     ctr_x = boxes[:, 0] + 0.5 * widths
 83 |     ctr_y = boxes[:, 1] + 0.5 * heights
 84 | 
 85 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 86 |         deltas = deltas * np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS) + np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)
 87 | 
 88 |     dx = deltas[:, 0::4]
 89 |     dy = deltas[:, 1::4]
 90 |     dw = deltas[:, 2::4]
 91 |     dh = deltas[:, 3::4]
 92 | 
 93 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
 94 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
 95 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
 96 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
 97 | 
 98 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
 99 |     # x1
100 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
101 |     # y1
102 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
103 |     # x2
104 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1.0 
105 |     # y2
106 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1.0 
107 | 
108 |     return pred_boxes
109 | 
110 | 
111 | def clip_boxes(boxes, im_shape):
112 |     """
113 |     Clip boxes to image boundaries.
114 |     """
115 |     if boxes.shape[0] == 0:
116 |         return boxes
117 | 
118 |     # x1 >= 0
119 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
120 |     # y1 >= 0
121 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
122 |     # x2 < im_shape[1]
123 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
124 |     # y2 < im_shape[0]
125 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
126 |     return boxes
127 | 


--------------------------------------------------------------------------------
/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | from lib.layer_utils.roi_layers import nms as nms_gpu
 9 | from lib.nms.nms_retain_all import nms_retain_all
10 | import torch
11 | # from ..nms import cpu_nms
12 | # from ..nms import gpu_nms
13 | 
14 | def nms(dets, thresh, retain_all=False):
15 |     """Dispatch to either CPU or GPU NMS implementations."""
16 |     
17 |     if dets.shape[0] == 0:
18 |         return []
19 |     # ---numpy version---
20 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
21 |     # ---pytorch version---
22 |     if retain_all:
23 |     	return nms_retain_all(dets, thresh)
24 |     else:
25 |         dets = torch.Tensor(dets).cuda()
26 |     	return nms_gpu(dets[:, :4], dets[:, 4], thresh).cpu().numpy()


--------------------------------------------------------------------------------
/lib/layer_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/lib/layer_utils/__init__.py


--------------------------------------------------------------------------------
/lib/layer_utils/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | from model.config import cfg
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | from utils.bbox import bbox_overlaps
 16 | from model.bbox_transform import bbox_transform
 17 | import torch
 18 | 
 19 | 
 20 | def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
 21 |                         all_anchors, num_anchors):
 22 |     """Same as the anchor target layer in original Fast/er RCNN """
 23 |     A = num_anchors
 24 |     total_anchors = all_anchors.shape[0]
 25 |     K = total_anchors / num_anchors
 26 | 
 27 |     # allow boxes to sit over the edge by a small amount
 28 |     _allowed_border = 0
 29 | 
 30 |     # map of shape (..., H, W)
 31 |     height, width = rpn_cls_score.shape[1:3]
 32 | 
 33 |     # only keep anchors inside the image
 34 |     inds_inside = np.where(
 35 |         (all_anchors[:, 0] >= -_allowed_border) &
 36 |         (all_anchors[:, 1] >= -_allowed_border) &
 37 |         (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
 38 |         (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
 39 |     )[0]
 40 | 
 41 |     # keep only inside anchors
 42 |     anchors = all_anchors[inds_inside, :]
 43 | 
 44 |     # label: 1 is positive, 0 is negative, -1 is dont care
 45 |     labels = np.empty((len(inds_inside), ), dtype=np.float32)
 46 |     labels.fill(-1)
 47 | 
 48 |     # overlaps between the anchors and the gt boxes
 49 |     # overlaps (ex, gt)
 50 |     overlaps = bbox_overlaps(
 51 |         np.ascontiguousarray(anchors, dtype=np.float),
 52 |         np.ascontiguousarray(gt_boxes, dtype=np.float))
 53 |     argmax_overlaps = overlaps.argmax(axis=1)
 54 |     max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
 55 |     gt_argmax_overlaps = overlaps.argmax(axis=0)
 56 |     gt_max_overlaps = overlaps[gt_argmax_overlaps,
 57 |                                np.arange(overlaps.shape[1])]
 58 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 59 | 
 60 |     if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 61 |         # assign bg labels first so that positive labels can clobber them
 62 |         # first set the negatives
 63 |         labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 64 | 
 65 |     # fg label: for each gt, anchor with highest overlap
 66 |     labels[gt_argmax_overlaps] = 1
 67 | 
 68 |     # fg label: above threshold IOU
 69 |     labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
 70 | 
 71 |     if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 72 |         # assign bg labels last so that negative labels can clobber positives
 73 |         labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 74 | 
 75 |     # subsample positive labels if we have too many
 76 |     num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
 77 |     fg_inds = np.where(labels == 1)[0]
 78 |     if len(fg_inds) > num_fg:
 79 |         disable_inds = npr.choice(
 80 |             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
 81 |         labels[disable_inds] = -1
 82 | 
 83 |     # subsample negative labels if we have too many
 84 |     num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
 85 |     bg_inds = np.where(labels == 0)[0]
 86 |     if len(bg_inds) > num_bg:
 87 |         disable_inds = npr.choice(
 88 |             bg_inds, size=(len(bg_inds) - num_bg), replace=False)
 89 |         labels[disable_inds] = -1
 90 | 
 91 |     bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
 92 |     bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
 93 | 
 94 |     bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
 95 |     # only the positive ones have regression targets
 96 |     bbox_inside_weights[labels == 1, :] = np.array(
 97 |         cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
 98 | 
 99 |     bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
100 |     if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
101 |         # uniform weighting of examples (given non-uniform sampling)
102 |         num_examples = np.sum(labels >= 0)
103 |         positive_weights = np.ones((1, 4)) * 1.0 / num_examples
104 |         negative_weights = np.ones((1, 4)) * 1.0 / num_examples
105 |     else:
106 |         assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
107 |                 (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
108 |         positive_weights = (
109 |             cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1))
110 |         negative_weights = (
111 |             (1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0))
112 |     bbox_outside_weights[labels == 1, :] = positive_weights
113 |     bbox_outside_weights[labels == 0, :] = negative_weights
114 | 
115 |     # map up to original set of anchors
116 |     labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
117 |     bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
118 |     bbox_inside_weights = _unmap(
119 |         bbox_inside_weights, total_anchors, inds_inside, fill=0)
120 |     bbox_outside_weights = _unmap(
121 |         bbox_outside_weights, total_anchors, inds_inside, fill=0)
122 | 
123 |     # labels
124 |     labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
125 |     labels = labels.reshape((1, 1, A * height, width))
126 |     rpn_labels = labels
127 | 
128 |     # bbox_targets
129 |     bbox_targets = bbox_targets \
130 |       .reshape((1, height, width, A * 4))
131 | 
132 |     rpn_bbox_targets = bbox_targets
133 |     # bbox_inside_weights
134 |     bbox_inside_weights = bbox_inside_weights \
135 |       .reshape((1, height, width, A * 4))
136 | 
137 |     rpn_bbox_inside_weights = bbox_inside_weights
138 | 
139 |     # bbox_outside_weights
140 |     bbox_outside_weights = bbox_outside_weights \
141 |       .reshape((1, height, width, A * 4))
142 | 
143 |     rpn_bbox_outside_weights = bbox_outside_weights
144 |     return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
145 | 
146 | 
147 | def _unmap(data, count, inds, fill=0):
148 |     """ Unmap a subset of item (data) back to the original set of items (of
149 |   size count) """
150 |     if len(data.shape) == 1:
151 |         ret = np.empty((count, ), dtype=np.float32)
152 |         ret.fill(fill)
153 |         ret[inds] = data
154 |     else:
155 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
156 |         ret.fill(fill)
157 |         ret[inds, :] = data
158 |     return ret
159 | 
160 | 
161 | def _compute_targets(ex_rois, gt_rois):
162 |     """Compute bounding-box regression targets for an image."""
163 | 
164 |     assert ex_rois.shape[0] == gt_rois.shape[0]
165 |     assert ex_rois.shape[1] == 4
166 |     assert gt_rois.shape[1] == 5
167 | 
168 |     return bbox_transform(
169 |         torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
26 |                                 const at::Tensor& rois,
27 |                                 const float spatial_scale,
28 |                                 const int pooled_height,
29 |                                 const int pooled_width);
30 | 
31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
32 |                                  const at::Tensor& input,
33 |                                  const at::Tensor& rois,
34 |                                  const at::Tensor& argmax,
35 |                                  const float spatial_scale,
36 |                                  const int pooled_height,
37 |                                  const int pooled_width,
38 |                                  const int batch_size,
39 |                                  const int channels,
40 |                                  const int height,
41 |                                  const int width);
42 | 
43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
44 | 
45 | 
46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
47 |                              const int height,
48 |                              const int width);
49 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/lib/layer_utils/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | 
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 | }
14 | 


--------------------------------------------------------------------------------
/lib/layer_utils/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import numpy as np
 12 | 
 13 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 14 | #
 15 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 16 | #    >> anchors
 17 | #
 18 | #    anchors =
 19 | #
 20 | #       -83   -39   100    56
 21 | #      -175   -87   192   104
 22 | #      -359  -183   376   200
 23 | #       -55   -55    72    72
 24 | #      -119  -119   136   136
 25 | #      -247  -247   264   264
 26 | #       -35   -79    52    96
 27 | #       -79  -167    96   184
 28 | #      -167  -343   184   360
 29 | 
 30 | # array([[ -83.,  -39.,  100.,   56.],
 31 | #       [-175.,  -87.,  192.,  104.],
 32 | #       [-359., -183.,  376.,  200.],
 33 | #       [ -55.,  -55.,   72.,   72.],
 34 | #       [-119., -119.,  136.,  136.],
 35 | #       [-247., -247.,  264.,  264.],
 36 | #       [ -35.,  -79.,   52.,   96.],
 37 | #       [ -79., -167.,   96.,  184.],
 38 | #       [-167., -343.,  184.,  360.]])
 39 | 
 40 | 
 41 | def generate_anchors(base_size=16,
 42 |                      ratios=[0.5, 1, 2],
 43 |                      scales=2**np.arange(3, 6)):
 44 |     """
 45 |   Generate anchor (reference) windows by enumerating aspect ratios X
 46 |   scales wrt a reference (0, 0, 15, 15) window.
 47 |   """
 48 | 
 49 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 50 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 51 |     anchors = np.vstack([
 52 |         _scale_enum(ratio_anchors[i, :], scales)
 53 |         for i in range(ratio_anchors.shape[0])
 54 |     ])
 55 |     return anchors
 56 | 
 57 | 
 58 | def _whctrs(anchor):
 59 |     """
 60 |   Return width, height, x center, and y center for an anchor (window).
 61 |   """
 62 | 
 63 |     w = anchor[2] - anchor[0] + 1
 64 |     h = anchor[3] - anchor[1] + 1
 65 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 66 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 67 |     return w, h, x_ctr, y_ctr
 68 | 
 69 | 
 70 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 71 |     """
 72 |   Given a vector of widths (ws) and heights (hs) around a center
 73 |   (x_ctr, y_ctr), output a set of anchors (windows).
 74 |   """
 75 | 
 76 |     ws = ws[:, np.newaxis]
 77 |     hs = hs[:, np.newaxis]
 78 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
 79 |                          x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)))
 80 |     return anchors
 81 | 
 82 | 
 83 | def _ratio_enum(anchor, ratios):
 84 |     """
 85 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 86 |   """
 87 | 
 88 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |     size = w * h
 90 |     size_ratios = size / ratios
 91 |     ws = np.round(np.sqrt(size_ratios))
 92 |     hs = np.round(ws * ratios)
 93 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 94 |     return anchors
 95 | 
 96 | 
 97 | def _scale_enum(anchor, scales):
 98 |     """
 99 |   Enumerate a set of anchors for each scale wrt an anchor.
100 |   """
101 | 
102 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
103 |     ws = w * scales
104 |     hs = h * scales
105 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
106 |     return anchors
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     import time
111 | 
112 |     t = time.time()
113 |     a = generate_anchors()
114 |     print(time.time() - t)
115 |     print(a)
116 |     from IPython import embed
117 | 
118 |     embed()
119 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick and Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | from layer_utils.roi_layers import nms
14 | 
15 | import torch
16 | 
17 | 
18 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
19 |                    anchors, num_anchors):
20 |     """A simplified version compared to fast/er RCNN
21 |      For details please see the technical report
22 |   """
23 |     if type(cfg_key) == bytes:
24 |         cfg_key = cfg_key.decode('utf-8')
25 |     pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
26 |     post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
27 |     nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
28 | 
29 |     # Get the scores and bounding boxes
30 |     scores = rpn_cls_prob[:, :, :, num_anchors:]
31 |     rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
32 |     scores = scores.contiguous().view(-1, 1)
33 |     proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
34 |     proposals = clip_boxes(proposals, im_info[:2])
35 | 
36 |     # Pick the top region proposals
37 |     scores, order = scores.view(-1).sort(descending=True)
38 |     if pre_nms_topN > 0:
39 |         order = order[:pre_nms_topN]
40 |         scores = scores[:pre_nms_topN].view(-1, 1)
41 |     proposals = proposals[order.data, :]
42 | 
43 |     # Non-maximal suppression
44 |     keep = nms(proposals, scores.squeeze(1), nms_thresh)
45 | 
46 |     # Pick th top region proposals after NMS
47 |     if post_nms_topN > 0:
48 |         keep = keep[:post_nms_topN]
49 |     proposals = proposals[keep, :]
50 |     scores = scores[keep, ]
51 | 
52 |     # Only support single image as input
53 |     batch_inds = proposals.new_zeros(proposals.size(0), 1)
54 |     blob = torch.cat((batch_inds, proposals), 1)
55 | 
56 |     return blob, scores
57 | 


--------------------------------------------------------------------------------
/lib/layer_utils/proposal_top_layer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from model.config import cfg
12 | from model.bbox_transform import bbox_transform_inv, clip_boxes
13 | import numpy.random as npr
14 | 
15 | import torch
16 | 
17 | 
18 | def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
19 |                        anchors, num_anchors):
20 |     """A layer that just selects the top region proposals
21 |      without using non-maximal suppression,
22 |      For details please see the technical report
23 |   """
24 |     rpn_top_n = cfg.TEST.RPN_TOP_N
25 | 
26 |     scores = rpn_cls_prob[:, :, :, num_anchors:]
27 | 
28 |     rpn_bbox_pred = rpn_bbox_pred.view(-1, 4)
29 |     scores = scores.contiguous().view(-1, 1)
30 | 
31 |     length = scores.size(0)
32 |     if length < rpn_top_n:
33 |         # Random selection, maybe unnecessary and loses good proposals
34 |         # But such case rarely happens
35 |         top_inds = torch.from_numpy(
36 |             npr.choice(length, size=rpn_top_n,
37 |                        replace=True)).long().to(anchors.device)
38 |     else:
39 |         top_inds = scores.sort(0, descending=True)[1]
40 |         top_inds = top_inds[:rpn_top_n]
41 |         top_inds = top_inds.view(rpn_top_n)
42 | 
43 |     # Do the selection here
44 |     anchors = anchors[top_inds, :].contiguous()
45 |     rpn_bbox_pred = rpn_bbox_pred[top_inds, :].contiguous()
46 |     scores = scores[top_inds].contiguous()
47 | 
48 |     # Convert anchors into proposals via bbox transformations
49 |     proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
50 | 
51 |     # Clip predicted boxes to image
52 |     proposals = clip_boxes(proposals, im_info[:2])
53 | 
54 |     # Output rois blob
55 |     # Our RPN implementation only supports a single input image, so all
56 |     # batch inds are 0
57 |     batch_inds = proposals.new_zeros(proposals.size(0), 1)
58 |     blob = torch.cat([batch_inds, proposals], 1)
59 |     return blob, scores
60 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from .nms import nms
 4 | from .roi_align import ROIAlign
 5 | from .roi_align import roi_align
 6 | from .roi_pool import ROIPool
 7 | from .roi_pool import roi_pool
 8 | 
 9 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool"]
10 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from layer_utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 
9 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from layer_utils import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(input, roi, spatial_scale,
20 |                                       output_size[0], output_size[1],
21 |                                       sampling_ratio)
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     def forward(self, input, rois):
58 |         return roi_align(input, rois, self.output_size, self.spatial_scale,
59 |                          self.sampling_ratio)
60 | 
61 |     def __repr__(self):
62 |         tmpstr = self.__class__.__name__ + "("
63 |         tmpstr += "output_size=" + str(self.output_size)
64 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
65 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
66 |         tmpstr += ")"
67 |         return tmpstr
68 | 


--------------------------------------------------------------------------------
/lib/layer_utils/roi_layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from layer_utils import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(input, roi, spatial_scale,
18 |                                              output_size[0], output_size[1])
19 |         ctx.save_for_backward(input, roi, argmax)
20 |         return output
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, grad_output):
25 |         input, rois, argmax = ctx.saved_tensors
26 |         output_size = ctx.output_size
27 |         spatial_scale = ctx.spatial_scale
28 |         bs, ch, h, w = ctx.input_shape
29 |         grad_input = _C.roi_pool_backward(
30 |             grad_output,
31 |             input,
32 |             rois,
33 |             argmax,
34 |             spatial_scale,
35 |             output_size[0],
36 |             output_size[1],
37 |             bs,
38 |             ch,
39 |             h,
40 |             w,
41 |         )
42 |         return grad_input, None, None, None
43 | 
44 | 
45 | roi_pool = _ROIPool.apply
46 | 
47 | 
48 | class ROIPool(nn.Module):
49 |     def __init__(self, output_size, spatial_scale):
50 |         super(ROIPool, self).__init__()
51 |         self.output_size = output_size
52 |         self.spatial_scale = spatial_scale
53 | 
54 |     def forward(self, input, rois):
55 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
56 | 
57 |     def __repr__(self):
58 |         tmpstr = self.__class__.__name__ + "("
59 |         tmpstr += "output_size=" + str(self.output_size)
60 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
61 |         tmpstr += ")"
62 |         return tmpstr
63 | 


--------------------------------------------------------------------------------
/lib/layer_utils/snippets.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | from layer_utils.generate_anchors import generate_anchors
12 | 
13 | 
14 | def generate_anchors_pre(height,
15 |                          width,
16 |                          feat_stride,
17 |                          anchor_scales=(8, 16, 32),
18 |                          anchor_ratios=(0.5, 1, 2)):
19 |     """ A wrapper function to generate anchors given different scales
20 |     Also return the number of anchors in variable 'length'
21 |   """
22 |     anchors = generate_anchors(
23 |         ratios=np.array(anchor_ratios), scales=np.array(anchor_scales))
24 |     A = anchors.shape[0]
25 |     shift_x = np.arange(0, width) * feat_stride
26 |     shift_y = np.arange(0, height) * feat_stride
27 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
28 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
29 |                         shift_y.ravel())).transpose()
30 |     K = shifts.shape[0]
31 |     # width changes faster, so here it is H, W, C
32 |     anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose(
33 |         (1, 0, 2))
34 |     anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
35 |     length = np.int32(anchors.shape[0])
36 | 
37 |     return anchors, length
38 | 


--------------------------------------------------------------------------------
/lib/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/lib/nms/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/lib/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | #this_file = os.path.dirname(__file__)
 6 | 
 7 | sources = []
 8 | headers = []
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/nms_cuda.c']
15 |     headers += ['src/nms_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/nms_cuda_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | print(extra_objects)
24 | 
25 | ffi = create_extension(
26 |     '_ext.nms',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from _ext import nms
 4 | import pdb
 5 | 
 6 | def nms_gpu(dets, thresh):
 7 | 	dets = torch.Tensor(dets).cuda()
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep.view(-1)
13 | 


--------------------------------------------------------------------------------
/lib/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/nms/nms_retain_all.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def nms_retain_all(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     cdef np.ndarray[np.int_t, ndim=1] keep = \
32 |             np.zeros((ndets), dtype=np.int)
33 | 
34 |     # nominal indices
35 |     cdef int _i, _j
36 |     # sorted indices
37 |     cdef int i, j
38 |     # temp variables for box i's (the box currently under consideration)
39 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
40 |     # variables for computing overlap with box j (lower scoring box)
41 |     cdef np.float32_t xx1, yy1, xx2, yy2
42 |     cdef np.float32_t w, h
43 |     cdef np.float32_t inter, ovr
44 | 
45 |     for _i in range(ndets):
46 |         i = order[_i]
47 |         if suppressed[i] == 1:
48 |             continue
49 |         keep[i] = i
50 |         ix1 = x1[i]
51 |         iy1 = y1[i]
52 |         ix2 = x2[i]
53 |         iy2 = y2[i]
54 |         iarea = areas[i]
55 |         for _j in range(_i + 1, ndets):
56 |             j = order[_j]
57 |             if suppressed[j] == 1:
58 |                 continue
59 |             xx1 = max(ix1, x1[j])
60 |             yy1 = max(iy1, y1[j])
61 |             xx2 = min(ix2, x2[j])
62 |             yy2 = min(iy2, y2[j])
63 |             w = max(0.0, xx2 - xx1 + 1)
64 |             h = max(0.0, yy2 - yy1 + 1)
65 |             inter = w * h
66 |             ovr = inter / (iarea + areas[j] - inter)
67 |             if ovr >= thresh:
68 |                 keep[j] = i
69 |                 suppressed[j] = 1
70 | 
71 |     return keep


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         boxes_host->size[0], 
15 | 		         boxes_host->size[1],
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/generate.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | from ..utils.blob import im_list_to_blob
 13 | from ..utils.timer import Timer
 14 | 
 15 | # TODO: make fast_rcnn irrelevant
 16 | # >>>> obsolete, because it depends on sth outside of this project
 17 | from ..fast_rcnn.config import cfg
 18 | # <<<< obsolete
 19 | 
 20 | 
 21 | def _vis_proposals(im, dets, thresh=0.5):
 22 |     """Draw detected bounding boxes."""
 23 |     inds = np.where(dets[:, -1] >= thresh)[0]
 24 |     if len(inds) == 0:
 25 |         return
 26 | 
 27 |     class_name = 'obj'
 28 |     im = im[:, :, (2, 1, 0)]
 29 |     fig, ax = plt.subplots(figsize=(12, 12))
 30 |     ax.imshow(im, aspect='equal')
 31 |     for i in inds:
 32 |         bbox = dets[i, :4]
 33 |         score = dets[i, -1]
 34 | 
 35 |         ax.add_patch(
 36 |             plt.Rectangle((bbox[0], bbox[1]),
 37 |                           bbox[2] - bbox[0],
 38 |                           bbox[3] - bbox[1], fill=False,
 39 |                           edgecolor='red', linewidth=3.5)
 40 |             )
 41 |         ax.text(bbox[0], bbox[1] - 2,
 42 |                 '{:s} {:.3f}'.format(class_name, score),
 43 |                 bbox=dict(facecolor='blue', alpha=0.5),
 44 |                 fontsize=14, color='white')
 45 | 
 46 |     ax.set_title(('{} detections with '
 47 |                   'p({} | box) >= {:.1f}').format(class_name, class_name,
 48 |                                                   thresh),
 49 |                   fontsize=14)
 50 |     plt.axis('off')
 51 |     plt.tight_layout()
 52 |     plt.draw()
 53 | 
 54 | def _get_image_blob(im):
 55 |     """Converts an image into a network input.
 56 | 
 57 |     Arguments:
 58 |         im (ndarray): a color image in BGR order
 59 | 
 60 |     Returns:
 61 |         blob (ndarray): a data blob holding an image pyramid
 62 |         im_scale_factors (list): list of image scales (relative to im) used
 63 |             in the image pyramid
 64 |     """
 65 |     im_orig = im.astype(np.float32, copy=True)
 66 |     im_orig -= cfg.PIXEL_MEANS
 67 | 
 68 |     processed_ims = []
 69 | 
 70 |     assert len(cfg.TEST.SCALES_BASE) == 1
 71 |     im_scale = cfg.TRAIN.SCALES_BASE[0]
 72 | 
 73 |     im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 74 |                     interpolation=cv2.INTER_LINEAR)
 75 |     im_info = np.hstack((im.shape[:2], im_scale))[np.newaxis, :]
 76 |     processed_ims.append(im)
 77 | 
 78 |     # Create a blob to hold the input images
 79 |     blob = im_list_to_blob(processed_ims)
 80 | 
 81 |     return blob, im_info
 82 | 
 83 | def im_proposals(net, im):
 84 |     """Generate RPN proposals on a single image."""
 85 |     blobs = {}
 86 |     blobs['data'], blobs['im_info'] = _get_image_blob(im)
 87 |     net.blobs['data'].reshape(*(blobs['data'].shape))
 88 |     net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
 89 |     blobs_out = net.forward(
 90 |             data=blobs['data'].astype(np.float32, copy=False),
 91 |             im_info=blobs['im_info'].astype(np.float32, copy=False))
 92 | 
 93 |     scale = blobs['im_info'][0, 2]
 94 |     boxes = blobs_out['rois'][:, 1:].copy() / scale
 95 |     scores = blobs_out['scores'].copy()
 96 |     return boxes, scores
 97 | 
 98 | def imdb_proposals(net, imdb):
 99 |     """Generate RPN proposals on all images in an imdb."""
100 | 
101 |     _t = Timer()
102 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
103 |     for i in xrange(imdb.num_images):
104 |         im = cv2.imread(imdb.image_path_at(i))
105 |         _t.tic()
106 |         imdb_boxes[i], scores = im_proposals(net, im)
107 |         _t.toc()
108 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
109 |               .format(i + 1, imdb.num_images, _t.average_time)
110 |         if 0:
111 |             dets = np.hstack((imdb_boxes[i], scores))
112 |             # from IPython import embed; embed()
113 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
114 |             plt.show()
115 | 
116 |     return imdb_boxes
117 | 
118 | def imdb_proposals_det(net, imdb):
119 |     """Generate RPN proposals on all images in an imdb."""
120 | 
121 |     _t = Timer()
122 |     imdb_boxes = [[] for _ in xrange(imdb.num_images)]
123 |     for i in xrange(imdb.num_images):
124 |         im = cv2.imread(imdb.image_path_at(i))
125 |         _t.tic()
126 |         boxes, scores = im_proposals(net, im)
127 |         _t.toc()
128 |         print 'im_proposals: {:d}/{:d} {:.3f}s' \
129 |               .format(i + 1, imdb.num_images, _t.average_time)
130 |         dets = np.hstack((boxes, scores))
131 |         imdb_boxes[i] = dets
132 | 
133 |         if 0:            
134 |             # from IPython import embed; embed()
135 |             _vis_proposals(im, dets[:3, :], thresh=0.9)
136 |             plt.show()
137 | 
138 |     return imdb_boxes
139 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | 
 10 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 11 | #
 12 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 13 | #    >> anchors
 14 | #
 15 | #    anchors =
 16 | #
 17 | #       -83   -39   100    56
 18 | #      -175   -87   192   104
 19 | #      -359  -183   376   200
 20 | #       -55   -55    72    72
 21 | #      -119  -119   136   136
 22 | #      -247  -247   264   264
 23 | #       -35   -79    52    96
 24 | #       -79  -167    96   184
 25 | #      -167  -343   184   360
 26 | 
 27 | #array([[ -83.,  -39.,  100.,   56.],
 28 | #       [-175.,  -87.,  192.,  104.],
 29 | #       [-359., -183.,  376.,  200.],
 30 | #       [ -55.,  -55.,   72.,   72.],
 31 | #       [-119., -119.,  136.,  136.],
 32 | #       [-247., -247.,  264.,  264.],
 33 | #       [ -35.,  -79.,   52.,   96.],
 34 | #       [ -79., -167.,   96.,  184.],
 35 | #       [-167., -343.,  184.,  360.]])
 36 | 
 37 | def generate_anchors_bak(ratios=[0.5, 1, 2],
 38 |                      scales=2**np.arange(3, 6), base_size=16):
 39 |     """
 40 |     Generate anchor (reference) windows by enumerating aspect ratios X
 41 |     scales wrt a reference (0, 0, 15, 15) window.
 42 |     """
 43 |     ratois = np.array(ratios)
 44 |     scales = np.array(scales)
 45 | 
 46 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 47 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 48 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 49 |                          for i in xrange(ratio_anchors.shape[0])])
 50 |     return anchors
 51 | 
 52 | 
 53 | 
 54 | def generate_anchors(ratios, scales, base_size=16):
 55 |     # print 'ratios', ratios
 56 |     # print 'scales', scales
 57 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 58 |     w, h, x_ctr, y_ctr = _whctrs(base_anchor)
 59 |     size = w * h
 60 |     size_ratios = size / ratios
 61 |     ws = np.round(np.sqrt(size_ratios))
 62 |     hs = np.round(ws * ratios)
 63 |     ws = ws * np.array(scales)
 64 |     hs = hs * np.array(scales)
 65 |     # print 'ws', ws
 66 |     # print 'hs', hs
 67 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 68 |     # print 'anchors', anchors
 69 |     return anchors
 70 | 
 71 | 
 72 | 
 73 | def _whctrs(anchor):
 74 |     """
 75 |     Return width, height, x center, and y center for an anchor (window).
 76 |     """
 77 | 
 78 |     w = anchor[2] - anchor[0] + 1
 79 |     h = anchor[3] - anchor[1] + 1
 80 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 81 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 82 |     return w, h, x_ctr, y_ctr
 83 | 
 84 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 85 |     """
 86 |     Given a vector of widths (ws) and heights (hs) around a center
 87 |     (x_ctr, y_ctr), output a set of anchors (windows).
 88 |     """
 89 |     # print 'ws', ws
 90 |     ws = ws[:, np.newaxis]
 91 |     hs = hs[:, np.newaxis]
 92 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 93 |                          y_ctr - 0.5 * (hs - 1),
 94 |                          x_ctr + 0.5 * (ws - 1),
 95 |                          y_ctr + 0.5 * (hs - 1)))
 96 |     return anchors
 97 | 
 98 | def _ratio_enum(anchor, ratios):
 99 |     """
100 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
101 |     """
102 | 
103 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
104 |     size = w * h
105 |     size_ratios = size / ratios
106 |     ws = np.round(np.sqrt(size_ratios))
107 |     hs = np.round(ws * ratios)
108 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
109 |     return anchors
110 | 
111 | def _scale_enum(anchor, scales):
112 |     """
113 |     Enumerate a set of anchors for each scale wrt an anchor.
114 |     """
115 | 
116 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
117 |     ws = w * scales
118 |     hs = h * scales
119 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
120 |     return anchors
121 | 
122 | if __name__ == '__main__':
123 |     import time
124 |     t = time.time()
125 |     a = generate_anchors()
126 |     print time.time() - t
127 |     print a
128 |     from IPython import embed; embed()
129 | 


--------------------------------------------------------------------------------
/lib/rpn_msr/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | from lib.fast_rcnn.nms_wrapper import nms
 10 | 
 11 | from lib.fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 12 | from generate_anchors import generate_anchors
 13 | 
 14 | import pdb
 15 | 
 16 | 
 17 | DEBUG = False
 18 | """
 19 | Outputs object detection proposals by applying estimated bounding-box
 20 | transformations to a set of regular boxes (called "anchors").
 21 | """
 22 | 
 23 | 
 24 | def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_infos, 
 25 |                     _feat_stride, opts, anchor_scales, anchor_ratios,
 26 |                     mappings):
 27 |     # Algorithm:
 28 |     #
 29 |     # for each (H, W) location i
 30 |     #   generate A anchor boxes centered on cell i
 31 |     #   apply predicted bbox deltas at cell i to each of the A anchors
 32 |     # clip predicted boxes to image
 33 |     # remove predicted boxes with either height or width < threshold
 34 |     # sort all (proposal, score) pairs by score from highest to lowest
 35 |     # take top pre_nms_topN proposals before NMS
 36 |     # apply NMS with threshold 0.7 to remaining proposals
 37 |     # take after_nms_topN proposals after NMS
 38 |     # return the top proposals (-> RoIs top, scores top)
 39 |     # layer_params = yaml.load(self.param_str_)
 40 |     batch_size = rpn_cls_prob_reshape.shape[0]
 41 |     _anchors = generate_anchors(scales=anchor_scales, ratios=anchor_ratios)
 42 |     _num_anchors = _anchors.shape[0]
 43 |     pre_nms_topN = opts['num_box_pre_NMS']
 44 |     post_nms_topN = opts['num_box_post_NMS']
 45 |     nms_thres = opts['nms_thres']
 46 |     min_size = opts['min_size']
 47 | 
 48 |     blob = []
 49 |     
 50 |     for i in range(batch_size):
 51 |         im_info = im_infos[i]
 52 |         # the first set of _num_anchors channels are bg probs
 53 |         # the second set are the fg probs, which we want
 54 |         height = mappings[int(im_info[0])]
 55 |         width = mappings[int(im_info[1])]
 56 |         scores = rpn_cls_prob_reshape[i, _num_anchors:, :height, :width]
 57 |         bbox_deltas = rpn_bbox_pred[i, :, :height, :width]
 58 | 
 59 |         if DEBUG:
 60 |             print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 61 |             print 'scale: {}'.format(im_info[2])
 62 |         if DEBUG:
 63 |             print 'score map size: {}'.format(scores.shape)
 64 | 
 65 |         # Enumerate all shifts
 66 |         shift_x = np.arange(0, width) * _feat_stride
 67 |         shift_y = np.arange(0, height) * _feat_stride
 68 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 69 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 70 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 71 | 
 72 |         # Enumerate all shifted anchors:
 73 |         #
 74 |         # add A anchors (1, A, 4) to
 75 |         # cell K shifts (K, 1, 4) to get
 76 |         # shift anchors (K, A, 4)
 77 |         # reshape to (K*A, 4) shifted anchors
 78 |         A = _num_anchors
 79 |         K = shifts.shape[0]
 80 |         anchors = _anchors.reshape((1, A, 4)) + \
 81 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 82 |         anchors = anchors.reshape((K * A, 4))
 83 | 
 84 |         # Transpose and reshape predicted bbox transformations to get them
 85 |         # into the same order as the anchors:
 86 |         #
 87 |         # bbox deltas will be (1, 4 * A, H, W) format
 88 |         # transpose to (1, H, W, 4 * A)
 89 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
 90 |         # in slowest to fastest order
 91 |         bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4))
 92 | 
 93 |         # Same story for the scores:
 94 |         #
 95 |         # scores are (1, A, H, W) format
 96 |         # transpose to (1, H, W, A)
 97 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
 98 |         scores = scores.transpose((1, 2, 0)).reshape((-1, 1))
 99 | 
100 |         # Convert anchors into proposals via bbox transformations
101 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
102 | 
103 |         # 2. clip predicted boxes to image
104 |         if opts['dropout_box_runoff_image']:
105 |             _allowed_border = 16
106 |             inds_inside = np.where(
107 |                 (proposals[:, 0] >= -_allowed_border) &
108 |                 (proposals[:, 1] >= -_allowed_border) &
109 |                 (proposals[:, 2] < im_info[1] + _allowed_border) &  # width
110 |                 (proposals[:, 3] < im_info[0] + _allowed_border)  # height
111 |             )[0]
112 |             proposals = proposals[inds_inside, :]
113 |         proposals = clip_boxes(proposals, im_info[:2])
114 | 
115 |         # 3. remove predicted boxes with either height or width < threshold
116 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
117 |         keep = _filter_boxes(proposals, min_size * im_info[2])
118 |         proposals = proposals[keep, :]
119 |         scores = scores[keep]
120 | 
121 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
122 |         # 5. take top pre_nms_topN (e.g. 6000)
123 |         order = scores.ravel().argsort()[::-1]
124 |         if pre_nms_topN > 0:
125 |             order = order[:pre_nms_topN]
126 |         proposals = proposals[order, :]
127 |         scores = scores[order]
128 | 
129 |         # 6. apply nms (e.g. threshold = 0.7)
130 |         # 7. take after_nms_topN (e.g. 300)
131 |         # 8. return the top proposals (-> RoIs top)
132 |         # print 'proposals', proposals
133 |         # print 'scores', scores
134 |         keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thres)
135 |         if post_nms_topN > 0:
136 |             keep = keep[:post_nms_topN]
137 |         proposals = proposals[keep, :]
138 |         scores = scores[keep]
139 |         # Output rois blob
140 |         # Our RPN implementation only supports a single input image, so all
141 |         # batch inds are 0
142 |         batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * i
143 |         blob.append(np.hstack((batch_inds, proposals.astype(np.float32, copy=False), scores.astype(np.float32, copy=False))))
144 | 
145 |     return np.concatenate(blob, axis=0)
146 | 
147 | 
148 | def _filter_boxes(boxes, min_size):
149 |     """Remove all boxes with any side smaller than min_size."""
150 |     ws = boxes[:, 2] - boxes[:, 0] + 1
151 |     hs = boxes[:, 3] - boxes[:, 1] + 1
152 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
153 |     return keep
154 | 


--------------------------------------------------------------------------------
/lib/setup_cuda.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "layer_utils", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "layer_utils._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="faster_rcnn",
61 |     version="0.1",
62 |     description="object detection in pytorch",
63 |     packages=find_packages(exclude=("configs", "tests",)),
64 |     # install_requires=requirements,
65 |     ext_modules=get_extensions(),
66 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
67 | )


--------------------------------------------------------------------------------
/lib/setup_cython.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | 
 16 | def find_in_path(name, path):
 17 |     "Find a file in a search path"
 18 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 19 |     for dir in path.split(os.pathsep):
 20 |         binpath = pjoin(dir, name)
 21 |         if os.path.exists(binpath):
 22 |             return os.path.abspath(binpath)
 23 |     return None
 24 | 
 25 | 
 26 | # def locate_cuda():
 27 | #     """Locate the CUDA environment on the system
 28 | #
 29 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 30 | #     and values giving the absolute path to each directory.
 31 | #
 32 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 33 | #     is based on finding 'nvcc' in the PATH.
 34 | #     """
 35 | #
 36 | #     # first check if the CUDAHOME env variable is in use
 37 | #     if 'CUDAHOME' in os.environ:
 38 | #         home = os.environ['CUDAHOME']
 39 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 40 | #     else:
 41 | #         # otherwise, search the PATH for NVCC
 42 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 43 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 44 | #         if nvcc is None:
 45 | #             raise EnvironmentError('The nvcc binary could not be '
 46 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 47 | #         home = os.path.dirname(os.path.dirname(nvcc))
 48 | #
 49 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 50 | #                   'include': pjoin(home, 'include'),
 51 | #                   'lib64': pjoin(home, 'lib64')}
 52 | #     for k, v in cudaconfig.iteritems():
 53 | #         if not os.path.exists(v):
 54 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 55 | #
 56 | #     return cudaconfig
 57 | 
 58 | 
 59 | # CUDA = locate_cuda()
 60 | 
 61 | # Obtain the numpy include directory.  This logic works across numpy versions.
 62 | try:
 63 |     numpy_include = np.get_include()
 64 | except AttributeError:
 65 |     numpy_include = np.get_numpy_include()
 66 | 
 67 | 
 68 | def customize_compiler_for_nvcc(self):
 69 |     """inject deep into distutils to customize how the dispatch
 70 |     to gcc/nvcc works.
 71 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 72 |     injected in, and still have the right customizations (i.e.
 73 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 74 |     the OO route, I have this. Note, it's kindof like a wierd functional
 75 |     subclassing going on."""
 76 | 
 77 |     # tell the compiler it can processes .cu
 78 |     self.src_extensions.append('.cu')
 79 | 
 80 |     # save references to the default compiler_so and _comple methods
 81 |     default_compiler_so = self.compiler_so
 82 |     super = self._compile
 83 | 
 84 |     # now redefine the _compile method. This gets executed for each
 85 |     # object but distutils doesn't have the ability to change compilers
 86 |     # based on source extension: we add it.
 87 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 88 |         print extra_postargs
 89 |         if os.path.splitext(src)[1] == '.cu':
 90 |             # use the cuda for .cu files
 91 |             self.set_executable('compiler_so', CUDA['nvcc'])
 92 |             # use only a subset of the extra_postargs, which are 1-1 translated
 93 |             # from the extra_compile_args in the Extension class
 94 |             postargs = extra_postargs['nvcc']
 95 |         else:
 96 |             postargs = extra_postargs['gcc']
 97 | 
 98 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 99 |         # reset the default compiler_so, which we might have changed for cuda
100 |         self.compiler_so = default_compiler_so
101 | 
102 |     # inject our redefined _compile method into the class
103 |     self._compile = _compile
104 | 
105 | 
106 | # run the customize_compiler
107 | class custom_build_ext(build_ext):
108 |     def build_extensions(self):
109 |         customize_compiler_for_nvcc(self.compiler)
110 |         build_ext.build_extensions(self)
111 | 
112 | 
113 | ext_modules = [
114 |     Extension(
115 |         "utils.cython_bbox",
116 |         ["utils/bbox.pyx"],
117 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
118 |         include_dirs=[numpy_include]
119 |     ),
120 |     Extension(
121 |         "utils.cython_nms",
122 |         ["utils/nms.pyx"],
123 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
124 |         include_dirs=[numpy_include]
125 |     ),
126 |     Extension(
127 |         "nms.nms_retain_all",
128 |         ["nms/nms_retain_all.pyx"],
129 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
130 |         include_dirs=[numpy_include]
131 |     ),
132 |     Extension(
133 |         'pycocotools._mask',
134 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
135 |         include_dirs=[numpy_include, 'pycocotools'],
136 |         extra_compile_args={
137 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
138 |     ),
139 | ]
140 | 
141 | setup(
142 |     name='faster_rcnn',
143 |     ext_modules=ext_modules,
144 |     # inject our custom trigger
145 |     cmdclass={'build_ext': custom_build_ext},
146 | )


--------------------------------------------------------------------------------
/lib/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/utils/HDN_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | import pdb
 6 | from .cython_bbox import bbox_overlaps, bbox_intersections
 7 | 
 8 | 
 9 | def get_model_name(arguments):
10 | 
11 | 
12 |     if arguments.nesterov:
13 |         arguments.model_name += '_nesterov'
14 | 
15 |     if arguments.MPS_iter < 0:
16 |         print 'Using random MPS iterations to training'
17 |         arguments.model_name += '_rand_iters'
18 |     else:
19 |         arguments.model_name += '_{}_iters'.format(arguments.MPS_iter)
20 | 
21 | 
22 |     if arguments.use_kernel_function:
23 |         arguments.model_name += '_with_kernel'
24 |     if arguments.load_RPN or arguments.resume_training:
25 |         arguments.model_name += '_alt'
26 |     else:
27 |         arguments.model_name += '_end2end'
28 |     if arguments.dropout:
29 |         arguments.model_name += '_dropout'
30 |     arguments.model_name += '_{}'.format(arguments.dataset_option)
31 |     if arguments.disable_language_model:
32 |         arguments.model_name += '_no_caption'
33 |     else:
34 |         if arguments.rnn_type == 'LSTM_im':
35 |             arguments.model_name += '_H_LSTM'
36 |         elif arguments.rnn_type == 'LSTM_normal':
37 |             arguments.model_name += '_I_LSTM'
38 |         elif arguments.rnn_type == 'LSTM_baseline':
39 |             arguments.model_name += '_B_LSTM'
40 |         else:
41 |             raise Exception('Error in RNN type')
42 |         if arguments.caption_use_bias:
43 |             arguments.model_name += '_with_bias'
44 |         else:
45 |             arguments.model_name += '_no_bias'
46 |         if arguments.caption_use_dropout > 0:
47 |             arguments.model_name += '_with_dropout_{}'.format(arguments.caption_use_dropout).replace('.', '_')
48 |         else:
49 |             arguments.model_name += '_no_dropout'
50 |         arguments.model_name += '_nembed_{}'.format(arguments.nembedding)
51 |         arguments.model_name += '_nhidden_{}'.format(arguments.nhidden_caption)
52 | 
53 |         if arguments.region_bbox_reg:
54 |             arguments.model_name += '_with_region_regression'
55 | 
56 |     if arguments.resume_training:
57 |         arguments.model_name += '_resume'
58 | 
59 |     if arguments.finetune_language_model:
60 |         arguments.model_name += '_finetune'
61 |     if arguments.optimizer == 0:
62 |         arguments.model_name += '_SGD'
63 |         arguments.solver = 'SGD'
64 |     elif arguments.optimizer == 1:
65 |         arguments.model_name += '_Adam'
66 |         arguments.solver = 'Adam'
67 |     elif arguments.optimizer == 2:    
68 |         arguments.model_name += '_Adagrad'
69 |         arguments.solver = 'Adagrad'
70 |     else:
71 |         raise Exception('Unrecognized optimization algorithm specified!')
72 | 
73 |     return arguments
74 | 
75 | 
76 | def group_features(net_):
77 |     vgg_features_fix = list(net_.rpn.features.parameters())[:8]
78 |     vgg_features_var = list(net_.rpn.features.parameters())[8:]
79 |     vgg_feature_len = len(list(net_.rpn.features.parameters()))
80 |     rpn_feature_len = len(list(net_.rpn.parameters())) - vgg_feature_len
81 |     rpn_features = list(net_.rpn.parameters())[vgg_feature_len:]
82 |     language_features = list(net_.caption_prediction.parameters())
83 |     language_feature_len = len(language_features)
84 |     hdn_features = list(net_.parameters())[(rpn_feature_len + vgg_feature_len):(-1 * language_feature_len)]
85 |     print 'vgg feature length:', vgg_feature_len
86 |     print 'rpn feature length:', rpn_feature_len
87 |     print 'HDN feature length:', len(hdn_features)
88 |     print 'language_feature_len:', language_feature_len
89 |     return vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features
90 | 
91 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from . import cython_nms
 8 | from . import cython_bbox
 9 | import blob
10 | import nms
11 | import timer


--------------------------------------------------------------------------------
/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | 
58 | def bbox_intersections(
59 |         np.ndarray[DTYPE_t, ndim=2] boxes,
60 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
61 |     """
62 |     For each query box compute the intersection ratio covered by boxes
63 |     ----------
64 |     Parameters
65 |     ----------
66 |     boxes: (N, 4) ndarray of float
67 |     query_boxes: (K, 4) ndarray of float
68 |     Returns
69 |     -------
70 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
71 |     """
72 |     cdef unsigned int N = boxes.shape[0]
73 |     cdef unsigned int K = query_boxes.shape[0]
74 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
75 |     cdef DTYPE_t iw, ih, box_area
76 |     cdef DTYPE_t ua
77 |     cdef unsigned int k, n
78 |     for k in range(K):
79 |         box_area = (
80 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
81 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
82 |         )
83 |         for n in range(N):
84 |             iw = (
85 |                 min(boxes[n, 2], query_boxes[k, 2]) -
86 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
87 |             )
88 |             if iw > 0:
89 |                 ih = (
90 |                     min(boxes[n, 3], query_boxes[k, 3]) -
91 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
92 |                 )
93 |                 if ih > 0:
94 |                     intersec[n, k] = iw * ih / box_area
95 |     return intersec


--------------------------------------------------------------------------------
/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | 
13 | def im_list_to_blob(ims):
14 |     """Convert a list of images into a network input.
15 | 
16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
17 |     """
18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
19 |     num_images = len(ims)
20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
21 |                     dtype=np.float32)
22 |     for i in xrange(num_images):
23 |         im = ims[i]
24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
25 | 
26 |     return blob
27 | 
28 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
29 |     """Mean subtract and scale an image for use in a blob."""
30 |     im = im.astype(np.float32, copy=False)
31 |     im -= pixel_means
32 |     im_shape = im.shape
33 |     im_size_min = np.min(im_shape[0:2])
34 |     im_size_max = np.max(im_shape[0:2])
35 |     im_scale = float(target_size) / float(im_size_min)
36 |     # Prevent the biggest axis from being more than MAX_SIZE
37 |     if np.round(im_scale * im_size_max) > max_size:
38 |         im_scale = float(max_size) / float(im_size_max)
39 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
40 |                     interpolation=cv2.INTER_LINEAR)
41 | 
42 |     return im, im_scale
43 | 


--------------------------------------------------------------------------------
/lib/utils/boxes_grid.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Subcategory CNN
 3 | # Copyright (c) 2015 CVGL Stanford
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yu Xiang
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import math
10 | # TODO: make fast_rcnn irrelevant
11 | # >>>> obsolete, because it depends on sth outside of this project
12 | from ..fast_rcnn.config import cfg
13 | # <<<< obsolete
14 | 
15 | def get_boxes_grid(image_height, image_width):
16 |     """
17 |     Return the boxes on image grid.
18 |     calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead.
19 |     """
20 | 
21 |     # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE
22 |     # coz, here needs a ratio around 1.0, not the accutual size.
23 |     # height and width of the feature map
24 |     if cfg.NET_NAME == 'CaffeNet':
25 |         height = np.floor((image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
26 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
27 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
28 | 
29 |         width = np.floor((image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
30 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
31 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
32 |     elif cfg.NET_NAME == 'VGGnet':
33 |         height = np.floor(image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
34 |         height = np.floor(height / 2.0 + 0.5)
35 |         height = np.floor(height / 2.0 + 0.5)
36 |         height = np.floor(height / 2.0 + 0.5)
37 | 
38 |         width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
39 |         width = np.floor(width / 2.0 + 0.5)
40 |         width = np.floor(width / 2.0 + 0.5)
41 |         width = np.floor(width / 2.0 + 0.5)
42 |     else:
43 |         assert (1), 'The network architecture is not supported in utils.get_boxes_grid!'
44 | 
45 |     # compute the grid box centers
46 |     h = np.arange(height)
47 |     w = np.arange(width)
48 |     y, x = np.meshgrid(h, w, indexing='ij') 
49 |     centers = np.dstack((x, y))
50 |     centers = np.reshape(centers, (-1, 2))
51 |     num = centers.shape[0]
52 | 
53 |     # compute width and height of grid box
54 |     area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE
55 |     aspect = cfg.TRAIN.ASPECTS  # height / width
56 |     num_aspect = len(aspect)
57 |     widths = np.zeros((1, num_aspect), dtype=np.float32)
58 |     heights = np.zeros((1, num_aspect), dtype=np.float32)
59 |     for i in xrange(num_aspect):
60 |         widths[0,i] = math.sqrt(area / aspect[i])
61 |         heights[0,i] = widths[0,i] * aspect[i]
62 | 
63 |     # construct grid boxes
64 |     centers = np.repeat(centers, num_aspect, axis=0)
65 |     widths = np.tile(widths, num).transpose()
66 |     heights = np.tile(heights, num).transpose()
67 | 
68 |     x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5
69 |     x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5
70 |     y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5
71 |     y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5
72 |     
73 |     boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE
74 | 
75 |     return boxes_grid, centers[:,0], centers[:,1]
76 | 


--------------------------------------------------------------------------------
/lib/utils/general_utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import collections
 3 | import torch
 4 | import numpy as np
 5 | 
 6 | 
 7 | def update_values(dict_from, dict_to):
 8 | 	for key, value in dict_from.items():
 9 | 		if isinstance(value, dict):
10 | 			update_values(dict_from[key], dict_to[key])
11 | 		elif value is not None:
12 | 			dict_to[key] = dict_from[key]
13 | 
14 | 	return dict_to
15 | 
16 | 
17 | def params_count(model):
18 |     count = 0
19 |     for p in model.parameters():
20 |         c = 1
21 |         for i in range(p.dim()):
22 |             c *= p.size(i)
23 |         count += c
24 |     return count


--------------------------------------------------------------------------------
/lib/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import time
  3 | import json
  4 | import numpy as np
  5 | import os
  6 | from collections import defaultdict
  7 | 
  8 | class Experiment(object):
  9 | 
 10 |     def __init__(self, name, options=dict()):
 11 |         """ Create an experiment
 12 |         """
 13 |         super(Experiment, self).__init__()
 14 | 
 15 |         self.name = name
 16 |         self.options = options
 17 |         self.date_and_time = time.strftime('%d-%m-%Y--%H-%M-%S')
 18 | 
 19 |         self.info = defaultdict(dict)
 20 |         self.logged = defaultdict(dict)
 21 |         self.meters = defaultdict(dict)
 22 | 
 23 |     def add_meters(self, tag, meters_dict):
 24 |         assert tag not in (self.meters.keys())
 25 |         for name, meter in meters_dict.items():
 26 |             self.add_meter(tag, name, meter)
 27 | 
 28 |     def add_meter(self, tag, name, meter):
 29 |         assert name not in list(self.meters[tag].keys()), \
 30 |             "meter with tag {} and name {} already exists".format(tag, name)
 31 |         self.meters[tag][name] = meter
 32 | 
 33 |     def update_options(self, options_dict):
 34 |         self.options.update(options_dict)
 35 | 
 36 |     def log_meter(self, tag, name, n=1):
 37 |         meter = self.get_meter(tag, name)
 38 |         if name not in self.logged[tag]:
 39 |             self.logged[tag][name] = {}
 40 |         self.logged[tag][name][n] = meter.value()
 41 | 
 42 |     def log_meters(self, tag, n=1):
 43 |         for name, meter in self.get_meters(tag).items():
 44 |             self.log_meter(tag, name, n=n)
 45 | 
 46 |     def reset_meters(self, tag):
 47 |         meters = self.get_meters(tag)
 48 |         for name, meter in meters.items():
 49 |             meter.reset()
 50 |         return meters
 51 | 
 52 |     def get_meters(self, tag):
 53 |         assert tag in list(self.meters.keys())
 54 |         return self.meters[tag]
 55 | 
 56 |     def get_meter(self, tag, name):
 57 |         assert tag in list(self.meters.keys())
 58 |         assert name in list(self.meters[tag].keys())
 59 |         return self.meters[tag][name]
 60 | 
 61 |     def to_json(self, filename):
 62 |         os.system('mkdir -p ' + os.path.dirname(filename))
 63 |         var_dict = copy.copy(vars(self))
 64 |         var_dict.pop('meters')
 65 |         for key in ('viz', 'viz_dict'):
 66 |             if key in list(var_dict.keys()):
 67 |                 var_dict.pop(key)
 68 |         with open(filename, 'w') as f:
 69 |             json.dump(var_dict, f)
 70 | 
 71 |     def from_json(filename):
 72 |         with open(filename, 'r') as f:
 73 |             var_dict = json.load(f)
 74 |         xp = Experiment('')
 75 |         xp.date_and_time = var_dict['date_and_time']
 76 |         xp.logged        = var_dict['logged']
 77 |         # TODO: Remove
 78 |         if 'info' in var_dict:
 79 |             xp.info          = var_dict['info']
 80 |         xp.options       = var_dict['options']
 81 |         xp.name          = var_dict['name']
 82 |         return xp
 83 | 
 84 | 
 85 | class AvgMeter(object):
 86 |     """Computes and stores the average and current value"""
 87 |     def __init__(self):
 88 |         self.reset()
 89 | 
 90 |     def reset(self):
 91 |         self.val = 0
 92 |         self.avg = 0
 93 |         self.sum = 0
 94 |         self.count = 0
 95 | 
 96 |     def update(self, val, n=1):
 97 |         self.val = val
 98 |         self.sum += val * n
 99 |         self.count += n
100 |         self.avg = self.sum / self.count
101 | 
102 |     def value(self):
103 |         return self.avg
104 | 
105 | 
106 | class SumMeter(object):
107 |     """Computes and stores the sum and current value"""
108 |     def __init__(self):
109 |         self.reset()
110 | 
111 |     def reset(self):
112 |         self.val = 0
113 |         self.sum = 0
114 |         self.count = 0
115 | 
116 |     def update(self, val, n=1):
117 |         self.val = val
118 |         self.sum += val * n
119 |         self.count += n
120 | 
121 |     def value(self):
122 |         return self.sum
123 | 
124 | 
125 | class ValueMeter(object):
126 |     """Computes and stores the average and current value"""
127 |     def __init__(self):
128 |         self.reset()
129 | 
130 |     def reset(self):
131 |         self.val = 0
132 | 
133 |     def update(self, val):
134 |         self.val = val
135 | 
136 |     def value(self):
137 |         return self.val


--------------------------------------------------------------------------------
/lib/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import numpy as np
  5 | import pdb
  6 | from lib.visualize_graph.vis_utils import expand_relationships_mat, expand_relationships_list
  7 | from .cython_bbox import bbox_overlaps, bbox_intersections
  8 | 
  9 | def recall(rois, gt_objects, top_N, thres):
 10 |     overlaps = bbox_overlaps(
 11 |         np.ascontiguousarray(rois[:top_N, 1:5], dtype=np.float),
 12 |         np.ascontiguousarray(gt_objects[:, :4], dtype=np.float))
 13 | 
 14 |     overlap_gt = np.amax(overlaps, axis=0)
 15 |     correct_cnt = np.sum(overlap_gt >= thres)
 16 |     total_cnt = overlap_gt.size 
 17 |     return correct_cnt, total_cnt
 18 | 
 19 | def check_recall(rois, gt_objects, top_N, thres=0.5):
 20 | 
 21 |     rois = rois.cpu().data.numpy()
 22 |     if isinstance(gt_objects, list):
 23 |         correct_cnt, total_cnt = 0, 0
 24 |         for i, gt in enumerate(gt_objects):
 25 |             im_rois = rois[np.where(rois[:, 0] == i)[0]]
 26 |             r = recall(im_rois, gt, top_N, thres)
 27 |             correct_cnt += r[0]
 28 |             total_cnt += r[1]
 29 |         return correct_cnt, total_cnt
 30 |     else:
 31 |         return recall(rois, gt_objects, top_N, thres)
 32 | 
 33 | def get_phrase_boxes(sub_boxes, obj_boxes):
 34 |     phrase_boxes = [np.minimum(sub_boxes[:, 0], obj_boxes[:, 0]), 
 35 |                     np.minimum(sub_boxes[:, 1], obj_boxes[:, 1]),
 36 |                     np.maximum(sub_boxes[:, 2], obj_boxes[:, 2]),
 37 |                     np.maximum(sub_boxes[:, 3], obj_boxes[:, 3])]
 38 |     phrase_boxes = np.stack(phrase_boxes, axis=1)
 39 |     return phrase_boxes
 40 | 
 41 | def check_phrase_recall(gt_objects, gt_relationships, 
 42 |         subject_inds, object_inds, predicate_inds, 
 43 |         subject_boxes, object_boxes, top_Ns, thres=0.5):
 44 |     # rearrange the ground truth
 45 |     gt_rel_sub_idx, gt_rel_obj_idx = np.where(gt_relationships > 0) # ground truth number
 46 |     gt_sub = gt_objects[gt_rel_sub_idx, :5]
 47 |     gt_obj = gt_objects[gt_rel_obj_idx, :5]
 48 |     gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx]
 49 | 
 50 |     rel_cnt = len(gt_rel)
 51 |     rel_correct_cnt = np.zeros(len(top_Ns))
 52 |     max_topN = max(top_Ns)
 53 | 
 54 |     # compute the overlap
 55 |     try:
 56 |         phrase_overlaps = bbox_overlaps(
 57 |             np.ascontiguousarray(
 58 |                 get_phrase_boxes(subject_boxes[:max_topN], object_boxes[:max_topN]), dtype=np.float),
 59 |             np.ascontiguousarray(
 60 |                 get_phrase_boxes(gt_sub[:, :4], gt_obj[:, :4]), dtype=np.float))
 61 |     except:
 62 |         print('[Warning] No relationship remaining.')
 63 |         return rel_cnt, rel_correct_cnt
 64 | 
 65 | 
 66 |     for idx, top_N in enumerate(top_Ns):
 67 |         for gt_id in xrange(rel_cnt):
 68 |             fg_candidate = np.where(phrase_overlaps[:top_N, gt_id] >= thres)[0]
 69 |             
 70 |             for candidate_id in fg_candidate:
 71 |                 if predicate_inds[candidate_id] == gt_rel[gt_id] and \
 72 |                         subject_inds[candidate_id] == gt_sub[gt_id, 4] and \
 73 |                         object_inds[candidate_id] == gt_obj[gt_id, 4]:
 74 |                     rel_correct_cnt[idx] += 1 
 75 |                     break
 76 |     return rel_cnt, rel_correct_cnt
 77 | 
 78 | 
 79 | def check_relationship_recall(gt_objects, gt_relationships, 
 80 |         subject_inds, object_inds, predicate_inds, 
 81 |         subject_boxes, object_boxes, top_Ns, thres=0.5):
 82 |     # rearrange the ground truth
 83 |     gt_rel_sub_idx, gt_rel_obj_idx = np.where(gt_relationships > 0) # ground truth number
 84 |     gt_sub = gt_objects[gt_rel_sub_idx, :5]
 85 |     gt_obj = gt_objects[gt_rel_obj_idx, :5]
 86 |     gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx]
 87 | 
 88 |     rel_cnt = len(gt_rel)
 89 |     pred_correct_cnt = np.zeros(len(top_Ns))
 90 |     rel_correct_cnt = np.zeros(len(top_Ns))
 91 |     max_topN = max(top_Ns)
 92 | 
 93 |     # compute the overlap
 94 |     try:
 95 |         sub_overlaps = bbox_overlaps(
 96 |             np.ascontiguousarray(subject_boxes[:max_topN], dtype=np.float),
 97 |             np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
 98 |         obj_overlaps = bbox_overlaps(
 99 |             np.ascontiguousarray(object_boxes[:max_topN], dtype=np.float),
100 |             np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))
101 |     except:
102 |         print('[Warning] No relationship remaining.')
103 |         return rel_cnt, rel_correct_cnt, pred_correct_cnt
104 | 
105 | 
106 |     for idx, top_N in enumerate(top_Ns):
107 |         for gt_id in xrange(rel_cnt):
108 |             fg_candidate = np.where(np.logical_and(
109 |                 sub_overlaps[:top_N, gt_id] >= thres, 
110 |                 obj_overlaps[:top_N, gt_id] >= thres))[0]
111 |             
112 |             pred_flag = 1
113 |             for candidate_id in fg_candidate:
114 |                 if predicate_inds[candidate_id] == gt_rel[gt_id]:
115 |                     pred_correct_cnt[idx] += pred_flag
116 |                     pred_flag = 0 # only add once
117 |                     if subject_inds[candidate_id] == gt_sub[gt_id, 4] and \
118 |                             object_inds[candidate_id] == gt_obj[gt_id, 4]:
119 |                         
120 |                         rel_correct_cnt[idx] += 1 
121 |                         break
122 |     return rel_cnt, rel_correct_cnt, pred_correct_cnt
123 | 
124 | 
125 | def check_hit_detections(gt_objects, gt_relationships, 
126 |         pred_objects, pred_relationships, thres=0.5):
127 | 
128 |     
129 |     # rearrange the ground truth
130 |     gt_sub, gt_obj, gt_rel,_, _  = expand_relationships_mat(gt_objects, gt_relationships)
131 |     pred_sub, pred_obj, pred_rel  = expand_relationships_list(pred_objects, pred_relationships)
132 |     hit_pred = np.zeros_like(pred_rel)
133 |     assigned_gt = np.ones_like(gt_rel)
134 |     # compute the overlap
135 |     try:
136 |         sub_overlaps = bbox_overlaps(
137 |             np.ascontiguousarray(pred_sub[:, :4], dtype=np.float),
138 |             np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
139 |         obj_overlaps = bbox_overlaps(
140 |             np.ascontiguousarray(pred_obj[:, :4], dtype=np.float),
141 |             np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))
142 |     except:
143 |         print('[Warning] No relationship remaining.')
144 |         return hit_pred
145 | 
146 | 
147 |     
148 |     for pred_id in xrange(pred_rel.shape[0]):
149 | 
150 |         fg_candidate = np.where(np.logical_and(
151 |             sub_overlaps[pred_id] >= thres, 
152 |             obj_overlaps[pred_id] >= thres))[0]
153 |         for candidate_id in fg_candidate:
154 |             if pred_rel[pred_id] == gt_rel[candidate_id] and \
155 |                    pred_sub[pred_id, 4] == gt_sub[candidate_id, 4] and \
156 |                    pred_obj[pred_id, 4] == gt_obj[candidate_id, 4] and assigned_gt[candidate_id]:
157 |                 
158 |                 hit_pred[pred_id] = 1
159 |                 assigned_gt[candidate_id] = 0
160 |                 break
161 | 
162 |     return hit_pred
163 | 
164 | 


--------------------------------------------------------------------------------
/lib/utils/nms.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import pdb
 10 | 
 11 | def nms(dets, thresh):
 12 |     x1 = dets[:, 0]
 13 |     y1 = dets[:, 1]
 14 |     x2 = dets[:, 2]
 15 |     y2 = dets[:, 3]
 16 |     scores = dets[:, 4]
 17 | 
 18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 19 |     order = scores.argsort()[::-1]
 20 | 
 21 |     keep = []
 22 |     while order.size > 0:
 23 |         i = order[0]
 24 |         keep.append(i)
 25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 29 | 
 30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 32 |         inter = w * h
 33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 34 | 
 35 |         inds = np.where(ovr <= thresh)[0]
 36 |         order = order[inds + 1]
 37 | 
 38 |     return keep
 39 | 
 40 | def unary_nms(dets, classes, thresh):
 41 |     x1 = dets[:, 0]
 42 |     y1 = dets[:, 1]
 43 |     x2 = dets[:, 2]
 44 |     y2 = dets[:, 3]
 45 |     scores = dets[:, 4]
 46 | 
 47 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 48 |     order = scores.argsort()[::-1]
 49 | 
 50 |     keep = []
 51 |     while order.size > 0:
 52 |         i = order[0]
 53 |         keep.append(i)
 54 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 55 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 56 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 57 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 58 | 
 59 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 60 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 61 |         inter = w * h
 62 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 63 | 
 64 |         inds = np.where((ovr <= thresh) | (classes[i] != classes[order[1:]]))[0]
 65 |         order = order[inds + 1]
 66 | 
 67 |     return keep
 68 | 
 69 | def triplet_nms(sub_ids, obj_ids, pred_ids, sub_boxes, obj_boxes, thresh):
 70 |     #print('before: {}'.format(len(sub_ids))),
 71 |     sub_x1 = sub_boxes[:, 0]
 72 |     sub_y1 = sub_boxes[:, 1]
 73 |     sub_x2 = sub_boxes[:, 2]
 74 |     sub_y2 = sub_boxes[:, 3]
 75 |     obj_x1 = obj_boxes[:, 0]
 76 |     obj_y1 = obj_boxes[:, 1]
 77 |     obj_x2 = obj_boxes[:, 2]
 78 |     obj_y2 = obj_boxes[:, 3]
 79 | 
 80 | 
 81 |     sub_areas = (sub_x2 - sub_x1 + 1) * (sub_y2 - sub_y1 + 1)
 82 |     obj_areas = (obj_x2 - obj_x1 + 1) * (obj_y2 - obj_y1 + 1)
 83 |     order = np.array(range(len(sub_ids)))
 84 | 
 85 |     keep = []
 86 |     while order.size > 0:
 87 |         i = order[0]
 88 |         keep.append(i)
 89 |         sub_xx1 = np.maximum(sub_x1[i], sub_x1[order[1:]])
 90 |         sub_yy1 = np.maximum(sub_y1[i], sub_y1[order[1:]])
 91 |         sub_xx2 = np.minimum(sub_x2[i], sub_x2[order[1:]])
 92 |         sub_yy2 = np.minimum(sub_y2[i], sub_y2[order[1:]])
 93 |         sub_id = sub_ids[i]
 94 |         obj_xx1 = np.maximum(obj_x1[i], obj_x1[order[1:]])
 95 |         obj_yy1 = np.maximum(obj_y1[i], obj_y1[order[1:]])
 96 |         obj_xx2 = np.minimum(obj_x2[i], obj_x2[order[1:]])
 97 |         obj_yy2 = np.minimum(obj_y2[i], obj_y2[order[1:]])
 98 |         obj_id = obj_ids[i]
 99 |         pred_id = pred_ids[i]
100 | 
101 |         w = np.maximum(0.0, sub_xx2 - sub_xx1 + 1)
102 |         h = np.maximum(0.0, sub_yy2 - sub_yy1 + 1)
103 |         inter = w * h
104 |         sub_ovr = inter / (sub_areas[i] + sub_areas[order[1:]] - inter)
105 | 
106 |         w = np.maximum(0.0, obj_xx2 - obj_xx1 + 1)
107 |         h = np.maximum(0.0, obj_yy2 - obj_yy1 + 1)
108 |         inter = w * h
109 |         obj_ovr = inter / (obj_areas[i] + obj_areas[order[1:]] - inter)
110 |         inds = np.where( (sub_ovr <= thresh) |
111 |                                     (obj_ovr <= thresh) |
112 |                                     (sub_ids[order[1:]] != sub_id) |
113 |                                     (obj_ids[order[1:]] != obj_id) |
114 |                                     (pred_ids[order[1:]] != pred_id) )[0]
115 |         order = order[inds + 1]
116 |     #print(' After: {}'.format(len(keep)))
117 |     return sub_ids[keep], obj_ids[keep], pred_ids[keep], sub_boxes[keep], obj_boxes[keep], keep
118 | 


--------------------------------------------------------------------------------
/lib/utils/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/lib/visualize_graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/lib/visualize_graph/__init__.py


--------------------------------------------------------------------------------
/lib/visualize_graph/vis_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pdb
  3 | from ..utils.cython_bbox import bbox_overlaps
  4 | 
  5 | def _compute_gt_target(pred_boxes, gt_boxes):
  6 |     """
  7 |     compute which gt gets mapped to each predicted box
  8 |     [Modified from Danfei's implementation. 
  9 |     Directly use top-1-score boxes. 
 10 |     In Danfei's implementation, per-class-boxes 
 11 |     are used.]
 12 |     """
 13 | 
 14 |     num_boxes = pred_boxes.shape[0]
 15 |     # map predicted boxes to ground-truth
 16 |     gt_targets = np.zeros(num_boxes).astype(np.int32)
 17 |     gt_target_iou = np.zeros(num_boxes)
 18 |     gt_target_iou.fill(-1)
 19 | 
 20 |     for j in xrange(num_boxes):
 21 |         # prepare inputs
 22 |         bb = pred_boxes[j].astype(float)
 23 |         # # compute max IoU over classes
 24 |         # # for c in xrange(1, num_classes):
 25 |         # for c in xrange(pred_class_scores.shape[1]):
 26 |         #     bb = bbox[4*c:4*(c+1)]
 27 |         if gt_boxes.size > 0:
 28 |             # compute overlaps
 29 |             # intersection
 30 |             ixmin = np.maximum(gt_boxes[:, 0], bb[0])
 31 |             iymin = np.maximum(gt_boxes[:, 1], bb[1])
 32 |             ixmax = np.minimum(gt_boxes[:, 2], bb[2])
 33 |             iymax = np.minimum(gt_boxes[:, 3], bb[3])
 34 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 35 |             ih = np.maximum(iymax - iymin + 1., 0.)
 36 |             inters = iw * ih
 37 | 
 38 |             # union
 39 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 40 |                     (gt_boxes[:, 2] - gt_boxes[:, 0] + 1.) *
 41 |                     (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) - inters)
 42 | 
 43 |             overlaps = inters / uni
 44 |             max_iou_class = np.max(overlaps)
 45 |             max_k_class = np.argmax(overlaps)
 46 | 
 47 |             # select max over classes
 48 |             if max_iou_class > gt_target_iou[j]:
 49 |                 gt_target_iou[j] = max_iou_class
 50 |                 gt_targets[j] = max_k_class
 51 | 
 52 |     return gt_targets, gt_target_iou
 53 | 
 54 | 
 55 | def ground_predictions(boxes, gt_boxes, ovthresh=0.5):
 56 |     """
 57 |     ground graph predictions onto ground truth annotations
 58 |     boxes: predicted boxes
 59 |     """
 60 | 
 61 |     # get predictions
 62 |     num_boxes = boxes.shape[0]
 63 | 
 64 |     
 65 |     # compute which gt index each roi gets mapped to
 66 |     gt_targets, gt_target_iou = _compute_gt_target(boxes, gt_boxes)
 67 | 
 68 |     # filter out predictions with low IoUs
 69 |     filter_inds = np.where(gt_target_iou > ovthresh)[0]
 70 | 
 71 |     # make sure each gt box is referenced only once
 72 |     # if referenced more than once, use the one that
 73 |     # has the maximum IoU
 74 |     gt_to_pred = {} # {gt_ind: pred_ind}
 75 |     for j in xrange(num_boxes):
 76 |         gti = gt_targets[j] # referenced gt ind
 77 |         if gti in gt_to_pred:
 78 |             pred_ind = gt_to_pred[gti]
 79 |             if gt_target_iou[j] > gt_target_iou[pred_ind]:
 80 |                 gt_to_pred[gti] = j
 81 |         elif j in filter_inds: # also must survive filtering
 82 |             gt_to_pred[gti] = j
 83 | 
 84 |     return gt_to_pred
 85 | 
 86 | def expand_relationships_mat(objects, relationships):
 87 |     rel_sub_idx, rel_obj_idx = np.where(relationships > 0) # ground truth number
 88 |     sub = objects[rel_sub_idx, :5]
 89 |     obj = objects[rel_obj_idx, :5]
 90 |     rel = relationships[rel_sub_idx, rel_obj_idx]
 91 |     return sub, obj, rel, rel_sub_idx, rel_obj_idx
 92 | def expand_relationships_list(objects, relationships):
 93 |     relationships = np.array(relationships, dtype=np.int)
 94 |     sub = objects[relationships[:, 0]][:, :5]
 95 |     obj = objects[relationships[:, 1]][:, :5]
 96 |     rel = relationships[:, 2]
 97 |     return sub, obj, rel 
 98 | 
 99 | 
100 | def check_recalled_graph(gt_objects, gt_relationships, 
101 |         pred_objects, pred_relationships, thres=0.5):
102 |     # rearrange the ground truth
103 |     gt_sub, gt_obj, gt_rel,gt_sub_assign, gt_obj_assign  = expand_relationships_mat(gt_objects, gt_relationships)
104 |     pred_sub, pred_obj, pred_rel,_, _  = expand_relationships_mat(pred_objects, pred_relationships)
105 |     rec_rel = np.zeros_like(gt_relationships)
106 |     # compute the overlap
107 |     try:
108 |         sub_overlaps = bbox_overlaps(
109 |             np.ascontiguousarray(pred_sub[:, :4], dtype=np.float),
110 |             np.ascontiguousarray(gt_sub[:, :4], dtype=np.float))
111 |         obj_overlaps = bbox_overlaps(
112 |             np.ascontiguousarray(pred_obj[:, :4], dtype=np.float),
113 |             np.ascontiguousarray(gt_obj[:, :4], dtype=np.float))
114 |     except:
115 |         print('[Warning] No relationship remaining.')
116 |         return gt_objects, gt_relationships
117 | 
118 | 
119 |     for gt_id in xrange(gt_sub.shape[0]):
120 |         fg_candidate = np.where(np.logical_and(
121 |             sub_overlaps[:, gt_id] >= thres, 
122 |             obj_overlaps[:, gt_id] >= thres))[0]
123 |         
124 |         for candidate_id in fg_candidate:
125 |             if pred_rel[candidate_id] == gt_rel[gt_id] and \
126 |                    pred_sub[candidate_id, 4] == gt_sub[gt_id, 4] and \
127 |                    pred_obj[candidate_id, 4] == gt_obj[gt_id, 4]:
128 |                 
129 |                 rec_rel[gt_sub_assign[gt_id], gt_obj_assign[gt_id]] = gt_rel[gt_id]
130 |                 break
131 | 
132 |     rec_sub, rec_obj = np.where(rec_rel > 0)    
133 |     rec_objects = np.union1d(rec_sub, rec_obj)
134 | 
135 |     return gt_objects[rec_objects], rec_rel[rec_objects][:,rec_objects]
136 | 


--------------------------------------------------------------------------------
/lib/visualize_graph/visualize.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Scene Graph Generation by Iterative Message Passing
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Danfei Xu
  5 | # --------------------------------------------------------
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | from graphviz import Digraph
 10 | import cv2
 11 | 
 12 | import pdb
 13 | 
 14 | """
 15 | Utility for visualizing a scene graph
 16 | """
 17 | 
 18 | 
 19 | 
 20 | 
 21 | def draw_scene_graph(labels, inds, rels, ind_to_class, ind_to_predicate, filename):
 22 |     """
 23 |     draw a graphviz graph of the scene graph topology
 24 |     """
 25 |     viz_labels = labels[inds]
 26 |     viz_rels = None
 27 |     if rels is not None:
 28 |         viz_rels = []
 29 |         for rel in rels:
 30 |             if rel[0] in inds and rel[1] in inds :
 31 |                 sub_idx = np.where(inds == rel[0])[0][0]
 32 |                 obj_idx = np.where(inds == rel[1])[0][0]
 33 |                 viz_rels.append([sub_idx, obj_idx, rel[2]])
 34 |     return draw_graph(viz_labels, viz_rels, ind_to_class, ind_to_predicate, filename)
 35 | 
 36 | 
 37 | def draw_graph(labels, rels, ind_to_class, ind_to_predicate, filename):
 38 |     u = Digraph('sg', filename=filename)
 39 |     u.body.append('size="6,6"')
 40 |     u.body.append('rankdir="LR"')
 41 |     u.node_attr.update(style='filled')
 42 | 
 43 |     out_dict = {'ind_to_class': ind_to_class, 'ind_to_predicate': ind_to_predicate}
 44 |     out_dict['labels'] = labels.tolist()
 45 |     out_dict['relations'] = rels
 46 | 
 47 |     rels = np.array(rels)
 48 |     rel_inds = rels[:,:2].ravel().tolist()
 49 |     name_list = []
 50 |     for i, l in enumerate(labels):
 51 |         if i in rel_inds:
 52 |             name = ind_to_class[l]
 53 |             name_suffix = 1
 54 |             obj_name = name
 55 |             while obj_name in name_list:
 56 |                 obj_name = name + '_' + str(name_suffix)
 57 |                 name_suffix += 1
 58 |             name_list.append(obj_name)
 59 |             u.node(str(i), label=obj_name, color='lightblue2')
 60 | 
 61 |     for rel in rels:
 62 |         edge_key = '%s_%s' % (rel[0], rel[1])
 63 |         u.node(edge_key, label=ind_to_predicate[rel[2]], color='red')
 64 | 
 65 |         u.edge(str(rel[0]), edge_key)
 66 |         u.edge(edge_key, str(rel[1]))
 67 | 
 68 |     u.render(cleanup=True) # save the graph to file and remove the source
 69 |     return out_dict
 70 | 
 71 | 
 72 | def viz_scene_graph(im, rois, labels, ind_to_class, ind_to_predicate, inds=None, rels=None, filename=None):
 73 |     """
 74 |     visualize a scene graph on an image
 75 |     """
 76 |     if inds is None:
 77 |         inds = np.arange(rois.shape[0])
 78 |     viz_rois = rois[inds]
 79 |     viz_labels = labels[inds]
 80 |     viz_rels = None
 81 |     if rels is not None:
 82 |         viz_rels = []
 83 |         for rel in rels:
 84 |             if rel[0] in inds and rel[1] in inds :
 85 |                 sub_idx = np.where(inds == rel[0])[0][0]
 86 |                 obj_idx = np.where(inds == rel[1])[0][0]
 87 |                 viz_rels.append([sub_idx, obj_idx, rel[2]])
 88 |         viz_rels = np.array(viz_rels)
 89 |     return _viz_scene_graph(im, viz_rois, viz_labels, ind_to_class, ind_to_predicate, viz_rels, filename)
 90 | 
 91 | 
 92 | def _viz_scene_graph(im, rois, labels, ind_to_class, ind_to_predicate, rels, filename):
 93 |     fig, ax = plt.subplots(figsize=(12, 12))
 94 |     ax.imshow(cv2.cvtColor(im, cv2.COLOR_BGR2RGB), aspect='equal')
 95 |     if rels.size > 0:
 96 |         rel_inds = rels[:,:2].ravel().tolist()
 97 |     else:
 98 |         rel_inds = []
 99 |     # draw bounding boxes
100 |     for i, bbox in enumerate(rois):
101 |         if int(labels[i]) == 0 and i not in rel_inds:
102 |             continue
103 |         ax.add_patch(
104 |             plt.Rectangle((bbox[0], bbox[1]),
105 |                           bbox[2] - bbox[0],
106 |                           bbox[3] - bbox[1], fill=False,
107 |                           edgecolor='red', linewidth=3.5)
108 |             )
109 |         label_str = ind_to_class[int(labels[i])]
110 |         ax.text(bbox[0], bbox[1] - 2,
111 |                 label_str,
112 |                 bbox=dict(facecolor='blue', alpha=0.5),
113 |                 fontsize=14, color='white')
114 | 
115 |     # draw relations
116 |     for i, rel in enumerate(rels):
117 |         if rel[2] == 0: # ignore bachground
118 |             continue
119 |         sub_box = rois[rel[0], :]
120 |         obj_box = rois[rel[1], :]
121 |         obj_ctr = [obj_box[0], obj_box[1] - 2]
122 |         sub_ctr = [sub_box[0], sub_box[1] - 2]
123 |         line_ctr = [(sub_ctr[0] + obj_ctr[0]) / 2, (sub_ctr[1] + obj_ctr[1]) / 2]
124 |         predicate = ind_to_predicate[int(rel[2])]
125 |         ax.arrow(sub_ctr[0], sub_ctr[1], obj_ctr[0]-sub_ctr[0], obj_ctr[1]-sub_ctr[1], color='green')
126 | 
127 |         ax.text(line_ctr[0], line_ctr[1], predicate,
128 |                 bbox=dict(facecolor='green', alpha=0.5),
129 |                 fontsize=14, color='white')
130 | 
131 |     ax.set_title('Scene Graph Visualization', fontsize=14)
132 |     ax.axis('off')
133 |     fig.tight_layout()
134 |     if filename is not None:
135 |         fig.savefig(filename + '.png')
136 |     plt.close(fig)
137 | 


--------------------------------------------------------------------------------
/models/HDN_v2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/models/HDN_v2/__init__.py


--------------------------------------------------------------------------------
/models/HDN_v2/criteria.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .utils import build_loss_bbox, build_loss_cls
 4 | 
 5 | import lib.network as network
 6 | 
 7 | 
 8 | def loss_FN_v1(pred_obj, pred_rel, roi_data_object, roi_data_predicate, 
 9 | 				obj_loss_weight, rel_loss_weight):
10 | 	roi_data_object = [network.np_to_variable(roi_data_object[0], is_cuda=True, dtype=torch.LongTensor), 
11 | 					   network.np_to_variable(roi_data_object[1], is_cuda=True), 
12 | 					   network.np_to_variable(roi_data_object[2], is_cuda=True), 
13 | 					   network.np_to_variable(roi_data_object[3], is_cuda=True), ]
14 | 	roi_data_predicate = [network.np_to_variable(roi_data_predicate[0], is_cuda=True, dtype=torch.LongTensor)]
15 | 	# object cls loss
16 | 	loss_cls_obj, acc_obj = build_loss_cls(pred_obj[0], roi_data_object[0], obj_loss_weight)
17 | 	loss_reg_obj= build_loss_bbox(pred_obj[1], roi_data_object, acc_obj[2])
18 | 	loss_cls_rel,  acc_rel= build_loss_cls(pred_rel[0], roi_data_predicate[0], rel_loss_weight)
19 | 
20 | 	return loss_cls_obj, loss_reg_obj, loss_cls_rel


--------------------------------------------------------------------------------
/models/RPN/RPN.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import os.path as osp
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from lib.utils.timer import Timer
  9 | from lib.utils.blob import im_list_to_blob
 10 | from lib.rpn_msr.proposal_layer import proposal_layer as proposal_layer_py
 11 | from lib.rpn_msr.anchor_target_layer import anchor_target_layer as anchor_target_layer_py
 12 | from lib.fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
 13 | 
 14 | from lib import network
 15 | from lib.network import Conv2d, FC
 16 | import torchvision.models as models
 17 | import math
 18 | import json
 19 | import yaml
 20 | import pdb
 21 | 
 22 | from .utils import nms_detections, build_loss, reshape_layer, generate_output_mapping
 23 | 
 24 | DEBUG = False
 25 | 
 26 | 
 27 | 
 28 | class RPN(nn.Module):
 29 |     _feat_stride = 16
 30 | 
 31 |     anchor_scales_normal = [2, 4, 8, 16, 32, 64]
 32 |     anchor_ratios_normal = [0.25, 0.5, 1, 2, 4]
 33 |     anchor_scales_normal_region = [4, 8, 16, 32, 64]
 34 |     anchor_ratios_normal_region = [0.25, 0.5, 1, 2, 4]
 35 | 
 36 |     def __init__(self, opts):
 37 |         super(RPN, self).__init__()
 38 | 
 39 |         # loading RPN configs
 40 |         self.opts = opts
 41 |         if self.opts['kmeans_anchors']:
 42 |             # Loading k-means anchors
 43 |             kmeans_anchors_file = osp.join(self.opts['anchor_dir'], 'kmeans_anchors.json')
 44 |             print 'using k-means anchors: {}'.format(kmeans_anchors_file)
 45 |             anchors = json.load(open(kmeans_anchors_file))
 46 |             if 'scale' not in self.opts:
 47 |                 print('No RPN scale is given, default [600] is set')
 48 |             self.opts['object']['anchor_scales'] = list(np.array(anchors['anchor_scales_kmeans']) / 600.0 * self.opts.get('scale', 600.))
 49 |             self.opts['object']['anchor_ratios'] = anchors['anchor_ratios_kmeans']
 50 |         else:
 51 |             print 'using normal anchors'
 52 |             anchor_scales, anchor_ratios = \
 53 |                 np.meshgrid(self.anchor_scales_normal, self.anchor_ratios_normal, indexing='ij')
 54 |             self.opts['object']['anchor_scales'] = anchor_scales.reshape(-1)
 55 |             self.opts['object']['anchor_ratios'] = anchor_ratios.reshape(-1)
 56 | 
 57 |         self.anchor_num = len(self.opts['object']['anchor_scales'])
 58 | 
 59 |         self.features = models.vgg16(pretrained=True).features
 60 |         self.features.__delattr__('30') # to delete the max pooling
 61 |         # by default, fix the first four layers
 62 |         network.set_trainable_param(list(self.features.parameters())[:8], requires_grad=False)
 63 | 
 64 |         # self.features = models.vgg16().features
 65 |         self.conv1 = Conv2d(512, 512, 3, same_padding=True)
 66 |         self.score_conv = Conv2d(512, self.anchor_num * 2, 1, relu=False, same_padding=False)
 67 |         self.bbox_conv = Conv2d(512, self.anchor_num * 4, 1, relu=False, same_padding=False)
 68 | 
 69 |         # initialize the parameters
 70 |         self.initialize_parameters()
 71 |         self.opts['mappings'] = generate_output_mapping(osp.join(self.opts['anchor_dir'], 'vgg16_mappings.json'),
 72 |                                                         self.features)
 73 | 
 74 |     def initialize_parameters(self, normal_method='normal'):
 75 | 
 76 |         if normal_method == 'normal':
 77 |             normal_fun = network.weights_normal_init
 78 |         elif normal_method == 'MSRA':
 79 |             normal_fun = network.weights_MSRA_init
 80 |         else:
 81 |             raise(Exception('Cannot recognize the normal method:'.format(normal_method)))
 82 | 
 83 |         normal_fun(self.conv1, 0.025)
 84 |         normal_fun(self.score_conv, 0.025)
 85 |         normal_fun(self.bbox_conv, 0.01)
 86 | 
 87 | 
 88 |     # @property
 89 |     # def loss(self):
 90 |     #     return self.loss_cls + self.loss_box * 0.2
 91 | 
 92 |     def forward(self, im_data, im_info, gt_objects=None, dontcare_areas=None, rpn_data=None):
 93 | 
 94 |         features = self.features(im_data)
 95 |         # print 'features.std()', features.data.std()
 96 |         rpn_conv1 = self.conv1(features)
 97 |         # print 'rpn_conv1.std()', rpn_conv1.data.std()
 98 |         # object proposal score
 99 |         rpn_cls_score = self.score_conv(rpn_conv1)
100 |         # print 'rpn_cls_score.std()', rpn_cls_score.data.std()
101 |         rpn_cls_score_reshape = reshape_layer(rpn_cls_score, 2)
102 |         rpn_cls_prob = F.softmax(rpn_cls_score_reshape, dim=1)
103 |         rpn_cls_prob_reshape = reshape_layer(rpn_cls_prob, self.anchor_num*2)
104 |         # rpn boxes
105 |         rpn_bbox_pred = self.bbox_conv(rpn_conv1)
106 |         # print 'rpn_bbox_pred.std()', rpn_bbox_pred.data.std() * 4
107 | 
108 | 
109 |         # proposal layer
110 |         cfg_key = 'train' if self.training else 'test'
111 |         rois = self.proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
112 |                                    self._feat_stride, self.opts['object'][cfg_key],
113 |                                    self.opts['object']['anchor_scales'],
114 |                                    self.opts['object']['anchor_ratios'],
115 |                                    mappings=self.opts['mappings'])
116 | 
117 |         # generating training labels and build the rpn loss
118 |         losses = {}
119 |         if self.training and rpn_data is not None:
120 |             loss_cls, loss_box, accs = build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)
121 |             tp, tf, fg_cnt, bg_cnt = accs
122 |             losses = {
123 |                 'loss_cls': loss_cls,
124 |                 'loss_box': loss_box,
125 |                 'loss': loss_cls + loss_box * 0.2,
126 |                 'tp': tp,
127 |                 'tf': tf,
128 |                 'fg_cnt': fg_cnt,
129 |                 'bg_cnt': bg_cnt,
130 | 
131 |             }
132 |         return features, rois, losses
133 | 
134 | 
135 |     @staticmethod
136 |     def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
137 |                     _feat_stride, opts, anchor_scales, anchor_ratios, mappings):
138 |         rpn_cls_prob_reshape = rpn_cls_prob_reshape.data.cpu().numpy()
139 |         rpn_bbox_pred = rpn_bbox_pred.data.cpu().numpy()
140 |         x = proposal_layer_py(rpn_cls_prob_reshape, rpn_bbox_pred, im_info,
141 |                     _feat_stride, opts, anchor_scales, anchor_ratios, mappings)
142 |         x = network.np_to_variable(x, is_cuda=True)
143 |         return x.view(-1, 6)
144 | 


--------------------------------------------------------------------------------
/models/RPN/__init__.py:
--------------------------------------------------------------------------------
1 | from .RPN import RPN 
2 | from .RPN_region import RPN as RPN_region


--------------------------------------------------------------------------------
/models/RPN/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import os.path as osp
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | import numpy as np
  9 | import json
 10 | import shutil
 11 | 
 12 | from lib.fast_rcnn.nms_wrapper import nms
 13 | from lib import network
 14 | 
 15 | import pdb
 16 | 
 17 | def save_checkpoint(filename, model, epoch, is_best):
 18 |     model_name = '{}_epoch_{}.h5'.format(filename, epoch)
 19 |     model_name_best = '{}_best.h5'.format(filename)
 20 |     info_name = '{}_epoch_{}_info.json'.format(filename, epoch)
 21 |     info_name_best = '{}_best_info.json'.format(filename)
 22 |     network.save_net(model_name, model)
 23 |     with open(info_name, 'w') as f:
 24 |         json.dump(model.opts, f)
 25 |     print('save model: {}'.format(model_name))
 26 |     if is_best:
 27 |         shutil.copyfile(model_name, model_name_best)
 28 |         shutil.copyfile(info_name, info_name_best)
 29 | 
 30 | def load_checkpoint(filename, model):
 31 |     model_name = '{}.h5'.format(filename)
 32 |     info_name = '{}_info.json'.format(filename)
 33 |     network.load_net(model_name, model)
 34 |     if False:  # disable info loading #osp.isfile(info_name):
 35 |         with open(info_name, 'r') as f:
 36 |             model.opts = json.load(f)
 37 |     else:
 38 |         print('Info file missed, using the default options')
 39 | 
 40 | 
 41 | 
 42 | def reshape_layer(x, d):
 43 |         input_shape = x.size()
 44 |         # x = x.permute(0, 3, 1, 2)
 45 |         # b c w h
 46 |         x = x.view(
 47 |             input_shape[0],
 48 |             int(d),
 49 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 50 |             input_shape[3]
 51 |         )
 52 | 
 53 |         return x
 54 |         # x = x.permute(0, 2, 3, 1)
 55 | 
 56 | def nms_detections(pred_boxes, scores, nms_thresh, inds=None):
 57 |     dets = np.hstack((pred_boxes,
 58 |                       scores[:, np.newaxis])).astype(np.float32)
 59 |     keep = nms(dets, nms_thresh)
 60 |     if inds is None:
 61 |         return pred_boxes[keep], scores[keep]
 62 |     return pred_boxes[keep], scores[keep], inds[keep]
 63 | 
 64 | def build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
 65 |     # classification loss
 66 |     rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
 67 |     rpn_label = rpn_data[0].view(-1)
 68 |     # print rpn_label.size(), rpn_cls_score.size()
 69 |     rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze())
 70 |     rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
 71 |     rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
 72 | 
 73 |     fg_cnt = torch.sum(rpn_label.data.ne(0))
 74 |     bg_cnt = rpn_label.data.numel() - fg_cnt
 75 | 
 76 |     _, predict = torch.max(rpn_cls_score.data, 1)
 77 |     error = torch.sum(torch.abs(predict - rpn_label.data))
 78 |     #  try:
 79 |     if fg_cnt == 0:
 80 |         tp = 0.
 81 |         tf = tf = torch.sum(predict.eq(rpn_label.data))
 82 |     else:
 83 |         tp = torch.sum(predict[:fg_cnt].eq(rpn_label.data[:fg_cnt]))
 84 |         tf = torch.sum(predict[fg_cnt:].eq(rpn_label.data[fg_cnt:]))
 85 |     fg_cnt = fg_cnt
 86 |     bg_cnt = bg_cnt
 87 |     # print 'accuracy: %2.2f%%' % ((self.tp + self.tf) / float(fg_cnt + bg_cnt) * 100)
 88 |     rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
 89 |     # print rpn_cross_entropy
 90 | 
 91 |     # box loss
 92 |     rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
 93 |     rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
 94 |     rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)
 95 |     rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) /  (fg_cnt + 1e-4)
 96 | 
 97 |     return rpn_cross_entropy, rpn_loss_box, (tp, tf, fg_cnt, bg_cnt)
 98 | 
 99 | 
100 | def generate_output_mapping(mapping_file, conv_layers, min_size=16, max_size=1001):
101 |     if osp.isfile(mapping_file):
102 |         with open(mapping_file, 'r') as f:
103 |             mappings = json.load(f)
104 |             
105 |         mappings = {int(k):int(v) for k,v in mappings.items()}
106 |         return mappings
107 |     else:
108 |         conv_layers.cuda()
109 |         print('Generating input/output size mappings')
110 |         mappings = {}
111 |         for i in range(min_size, max_size):
112 |             t_in = Variable(torch.zeros(1, 3, i, i).cuda())
113 |             t_out = conv_layers(t_in)
114 |             mappings[i] = t_out.size(2)
115 | 
116 |         with open(mapping_file, 'w') as f:
117 |             json.dump(mappings, f)
118 |     print('Done')
119 |     return mappings
120 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from HDN_v2.factorizable_network_v4 import Factorizable_network as FN_v4
2 | from HDN_v2.factorizable_network_v4s import Factorizable_network as FN_v4s
3 | 


--------------------------------------------------------------------------------
/models/modules/NMS.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | from torch.nn import Parameter
 7 | import pdb
 8 | import relation_module
 9 | #from options.config_FN import cfg
10 | 
11 | class Dumplicate_Removal(nn.Module):
12 |   def __init__(self, opts):
13 |     super(Dumplicate_Removal, self).__init__()
14 |     self.opts = opts
15 |     self.relation_transform = relation_module.Relation_Module(
16 |         self.opts['dim_mm'],
17 |         self.opts['dim_mm'],
18 |         self.opts['dim_mm'] // 2,
19 |         geometry_trans=self.opts.get('geometry', 'Geometry_Transform_v2')
20 |       )
21 |     self.transform_visual = nn.Linear(self.opts['dim_ho'], self.opts['dim_mm'])
22 |     self.rank_embeddings = nn.Embedding(256, self.opts['dim_mm']) # cfg.TRAIN.BATCH_SIZE, self.opts['dim_mm'])
23 |     self.transform_rescore = nn.Linear(self.opts['dim_mm'], 1)
24 | 
25 | 
26 |   def forward(self, feature_obj, highest_prob, rois_obj):
27 |     '''
28 |     Training stage: object probability is that of the assigned ground truth label
29 |     Testing stage: object probability is the one with highest probability
30 |     '''
31 |     assert highest_prob.size(0) <= self.rank_embeddings.num_embeddings
32 |     if isinstance(highest_prob, Variable):
33 |         highest_prob = highest_prob.data
34 |     _, rank = torch.sort(highest_prob, descending=True, dim=0)
35 |     rank = Variable(rank)
36 |     feature_rank = self.rank_embeddings(rank)
37 |     feature_obj = self.transform_visual(feature_obj)
38 |     feature_visual = feature_rank + feature_obj
39 |     feature_visual = self.relation_transform(feature_visual, rois_obj)
40 |     reranked_score = self.transform_rescore(F.relu(feature_visual, inplace=True)) 
41 |     reranked_score = torch.sigmoid(reranked_score)
42 | 
43 |     return reranked_score
44 | 
45 | 
46 | 
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     opts = {
51 |       'dim_mm': 6,
52 |       'dim_ho': 4,
53 |     }
54 |     nms_module = Dumplicate_Removal(opts)
55 |     visual_features = Variable(torch.normal(torch.zeros(10, 4)))
56 |     rois = Variable(torch.cat((torch.zeros(10, 1), (torch.rand(10, 4) + torch.FloatTensor([[0, 1, 2, 3], ])) * 100 ), dim=1))
57 |     duplicate_labels = Variable(torch.ones(5, 1)).type(torch.LongTensor)
58 |     cls_prob_object = Variable(torch.rand(10, 20))
59 | 
60 |     mask = torch.zeros_like(cls_prob_object[:duplicate_labels.size(0)]).type(torch.ByteTensor)
61 |     for i in range(duplicate_labels.size(0)):
62 |         mask[i, duplicate_labels.data[i][0]] = 1
63 |     selected_prob = torch.masked_select(cls_prob_object[:duplicate_labels.size(0)], mask)
64 |     reranked_score = nms_module(visual_features[:duplicate_labels.size(0)], selected_prob, rois[:duplicate_labels.size(0)])
65 |     selected_prob = selected_prob.unsqueeze(1) * reranked_score
66 | 
67 |     loss = F.binary_cross_entropy(selected_prob, duplicate_labels.float())
68 |     loss.backward()
69 |     print(nms_module.transform_rescore.weight.grad)
70 | 


--------------------------------------------------------------------------------
/models/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .factor_updating_structure_v3 import factor_updating_structure as factor_updating_structure_v3
2 | from .factor_updating_structure_v3r import factor_updating_structure as factor_updating_structure_v3r
3 | from .NMS import Dumplicate_Removal
4 | 


--------------------------------------------------------------------------------
/models/modules/dataParallel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.nn import DataParallel as DataParallel_raw
 5 | import numpy as np
 6 | 
 7 | 
 8 | class DataParallel(DataParallel_raw):
 9 |     """
10 |     we do the scatter outside of the DataPrallel.
11 |     input: Scattered Inputs without kwargs.
12 |     """
13 | 
14 |     def __init__(self, module):
15 |         # Disable all the other parameters
16 |         super(DataParallel, self).__init__(module)
17 | 
18 | 
19 |     def forward(self, *inputs, **kwargs):
20 |         assert len(inputs) == 0, "Only support arguments like [variable_name = xxx]"
21 |         new_inputs = [{} for _ in self.device_ids]
22 |         for key in kwargs:
23 |             if key == 'im_data':
24 |                 for i, device in enumerate(self.device_ids):
25 |                     new_inputs[i][key] = kwargs[key][i].to(device)
26 |             elif key.startswith("rpn_anchor_targets"):
27 |                 for i, device in enumerate(self.device_ids):
28 |                     new_inputs[i][key] = [item.to(device) for item in kwargs[key][i]]
29 |                 
30 |             else:
31 |                 assert isinstance(kwargs[key], list)
32 |                 for i in range(len(self.device_ids)):
33 |                     new_inputs[i][key] = [kwargs[key][i], ]
34 |         nones = [[] for _ in self.device_ids]
35 |         replicas = self.replicate(self.module, self.device_ids)
36 |         outputs = self.parallel_apply(replicas, nones, new_inputs)
37 |         return self.gather(outputs, self.output_device)


--------------------------------------------------------------------------------
/models/modules/factor_updating_structure.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from torch.nn import Parameter
  7 | from lib.utils.timer import Timer
  8 | import pdb
  9 | 
 10 | 
 11 | VISUALIZE_RESULTS = False
 12 | 
 13 | class Kernel_Attention_Module(nn.Module):
 14 | 	def __init__(self, dim_source, dim_target, dim_mm):
 15 | 		super(Kernel_Attention_Module, self).__init__()
 16 | 		self.ws = nn.Linear(dim_source, dim_mm, bias=False)
 17 | 		self.wt = nn.Linear(dim_target, dim_mm, bias=False)
 18 | 
 19 | 	def forward(self, source_feat, target_feat, return_gate_value=False):
 20 | 		# print '[unary_term, pair_term]', [unary_term, pair_term]
 21 | 		gate = torch.sigmoid(torch.mean((self.ws(source_feat) * self.wt(target_feat)), 1, keepdim=True))
 22 | 		# print 'gate', gate
 23 | 		output = source_feat * gate.expand(gate.size(0), source_feat.size(1))
 24 | 		if return_gate_value:
 25 | 			return output, gate
 26 | 		else:
 27 | 			return output
 28 | 
 29 | class Attention_Module(nn.Module):
 30 | 	def __init__(self, dim_source, dim_target, filter_size = 128):
 31 | 		super(Attention_Module, self).__init__()
 32 | 		self.filter_size = filter_size
 33 | 		if filter_size > 0:
 34 | 			self.w = nn.Linear(dim_source+dim_target, filter_size, bias=True)
 35 | 
 36 | 	def forward(self, source_feat, target_feat, return_gate_value=False):
 37 | 
 38 | 		if self.filter_size > 0:
 39 | 			gate = torch.cat([source_feat, target_feat], 1)
 40 | 			gate = F.relu(gate)
 41 | 			gate = torch.mean(torch.sigmoid(self.w(gate)), 1, keepdim=True)
 42 | 			# print 'gate', gate
 43 | 			output = source_feat * gate.expand_as(source_feat)
 44 | 			if return_gate_value:
 45 | 				return output, gate
 46 | 			else:
 47 | 				return output
 48 | 		else:
 49 | 			return source_feat
 50 | 
 51 | 
 52 | class factor_updating_structure(nn.Module):
 53 | 	def __init__(self, opts):
 54 | 		super(factor_updating_structure, self).__init__()
 55 | 
 56 | 		# Attention modules
 57 | 		if opts['use_attention']:
 58 | 			if opts['use_kernel']:
 59 | 				self.gate_object2region = Kernel_Attention_Module(opts['dim_ho'], opts['dim_hr'], opts['dim_mm'])
 60 | 				self.gate_region2object = Kernel_Attention_Module(opts['dim_hr'], opts['dim_ho'], opts['dim_mm'])
 61 | 			else:
 62 | 				self.gate_object2region = Attention_Module(opts['dim_ho'], opts['dim_hr'], opts['gate_width'])
 63 | 				self.gate_region2object = Attention_Module(opts['dim_hr'], opts['dim_ho'], opts['gate_width'])
 64 | 		else:
 65 | 			self.gate_object2region = None
 66 | 			self.gate_region2object = None
 67 | 		# To transform the attentioned features
 68 | 		self.transform_object2region = nn.Sequential(
 69 | 											nn.ReLU(),
 70 | 											nn.Linear(opts['dim_ho'], opts['dim_hr'], bias=opts['use_bias']))
 71 | 		self.transform_region2object = nn.Sequential(
 72 | 											nn.ReLU(),
 73 | 											nn.Linear(opts['dim_hr'], opts['dim_ho'], bias=opts['use_bias']))
 74 | 
 75 | 		self.use_average = opts['mps_use_average']
 76 | 
 77 | 
 78 | 
 79 | 	def forward(self, feature_obj, feature_region, mat_object, mat_region):
 80 | 
 81 | 		feature_region2object = self.prepare_message(feature_obj, feature_region, mat_object, self.gate_region2object)
 82 | 		# Transform the features
 83 | 		out_feature_object = feature_obj + self.transform_region2object(feature_region2object)
 84 | 		# gather the attentioned features
 85 | 		feature_object2region = self.prepare_message(feature_region, feature_obj, mat_region, self.gate_object2region)
 86 | 		# Transform the features
 87 | 		out_feature_region = feature_region + self.transform_object2region(feature_object2region)
 88 | 
 89 | 		return out_feature_object, out_feature_region
 90 | 
 91 | 	def prepare_message(self, target_features, source_features, select_mat, attend_module=None):
 92 | 		feature_data = []
 93 | 		transfer_list = np.where(select_mat > 0)
 94 | 
 95 | 		for f_id in range(target_features.size(0)):
 96 | 			if len(np.where(select_mat[f_id, :] > 0)[0]) > 0:
 97 | 				source_indices = transfer_list[1][transfer_list[0] == f_id]
 98 | 				source_indices = Variable(torch.from_numpy(source_indices).type(torch.LongTensor)).cuda()
 99 | 				features = torch.index_select(source_features, 0, source_indices)
100 | 				if attend_module is not None:
101 | 					target_f = target_features[f_id].view(1, -1).expand(features.size(0), -1)
102 | 					features = attend_module(features, target_f)
103 | 				if self.use_average:
104 | 					features = features.mean(0)
105 | 				else:
106 | 					features = features.sum(0)
107 | 				feature_data.append(features)
108 | 			else:
109 | 				temp = Variable(torch.zeros(target_features.size()[1:]), requires_grad=False).type(torch.FloatTensor).cuda()
110 | 				feature_data.append(temp)
111 | 		return torch.stack(feature_data, 0)
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/models/modules/factor_updating_structure_v3r.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | from torch.nn import Parameter
 7 | from lib.utils.timer import Timer
 8 | import pdb
 9 | from lib.network import GroupDropout
10 | 
11 | from .factor_updating_structure_v3 import factor_updating_structure as FS_v3
12 | from .relation_module import Relation_Module
13 | 
14 | 
15 | VISUALIZE_RESULTS = False
16 | TIME_IT = False
17 | 
18 | 
19 | class factor_updating_structure(FS_v3):
20 |   def __init__(self, opts):
21 |     super(factor_updating_structure, self).__init__(opts)
22 | 
23 |     kernel_size = opts.get('kernel_size', 1)
24 |     assert kernel_size % 2, 'Odd kernel size required.'
25 |     padding = (kernel_size - 1) // 2
26 |     # To transform the attentioned features
27 |     self.transform_object2object = Relation_Module(opts['dim_ho'], opts['dim_ho'], opts['dim_ho'] // 2, 
28 |                                       geometry_trans=self.opts.get('geometry', 'Geometry_Transform_v2'))
29 | 
30 | 
31 | 
32 |   def forward(self, feature_obj, feature_region, mat_object, mat_region, object_rois, region_rois):
33 | 
34 |     self.timer_r2o.tic()
35 |     feature_region2object = self.region_to_object(feature_obj, feature_region, mat_object)
36 |     # Transform the features
37 |     out_feature_object = feature_obj + self.transform_region2object(feature_region2object) \
38 |                           + self.transform_object2object(feature_obj, object_rois)
39 |     self.timer_r2o.toc()
40 | 
41 | 
42 |     self.timer_o2r.tic()
43 |     # gather the attentioned features
44 |     feature_object2region = self.object_to_region(feature_region, feature_obj, mat_region)
45 |     # Transform the features
46 |     out_feature_region = feature_region + self.transform_object2region(feature_object2region)
47 |     self.timer_o2r.toc()
48 | 
49 |     if TIME_IT:
50 |       print('[MPS Timing:]')
51 |       print('\t[R2O]: {0:.3f} s'.format(self.timer_r2o.average_time))
52 |       print('\t[O2R]: {0:.3f} s'.format(self.timer_o2r.average_time))
53 | 
54 |     return out_feature_object, out_feature_region
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/models/modules/geometry_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | import pdb
 7 | 
 8 | 
 9 | def geometry_transform(rois_keys, rois_queries=None):
10 |       if rois_queries is None:
11 |             rois_queries = rois_keys
12 |       if isinstance(rois_keys, Variable): # transform to Tensor
13 |             rois_keys = rois_keys.data
14 |             rois_queries = rois_queries.data
15 |       if rois_keys.size(1) == 5: # Remove the ID
16 |             rois_keys = rois_keys[:, 1:]
17 |             rois_queries = rois_queries[:, 1:]
18 | 
19 |       assert rois_keys.size(1) == 4
20 |       # keys
21 |       w_keys = (rois_keys[:, 2] - rois_keys[:, 0] + 1e-10).unsqueeze(1)
22 |       h_keys = (rois_keys[:, 3] - rois_keys[:, 1] + 1e-10).unsqueeze(1)
23 |       x_keys = ((rois_keys[:, 2] + rois_keys[:, 0]) / 2).unsqueeze(1)
24 |       y_keys = ((rois_keys[:, 3] + rois_keys[:, 1]) / 2).unsqueeze(1)
25 |       # queries
26 |       w_queries = (rois_queries[:, 2] - rois_queries[:, 0] + 1e-10).unsqueeze(0)
27 |       h_queries = (rois_queries[:, 3] - rois_queries[:, 1] + 1e-10).unsqueeze(0)
28 |       x_queries = ((rois_queries[:, 2] + rois_queries[:, 0]) / 2).unsqueeze(0)
29 |       y_queries = ((rois_queries[:, 3] + rois_queries[:, 1]) / 2).unsqueeze(0)
30 | 
31 |      # slightly different from [Relation Networks for Object Detection]
32 |       geometry_feature = torch.stack(
33 |           [ (x_keys - x_queries).abs() / w_keys,
34 |            (y_keys - y_queries).abs() / h_keys,
35 |            w_keys / w_queries,
36 |            h_keys / h_queries,], dim=2)
37 | 
38 |       geometry_log = geometry_feature.log()
39 |       geometry_log[geometry_feature == 0] = 0
40 | 
41 |       return geometry_log
42 | 
43 | def positional_encoding(position_mat, dim_output, wave_length=1000):
44 |       '''Sinusoidal Positional_Encoding.
45 |       Returns:
46 |          Sinusoidal Positional embedding of different objects
47 |       '''
48 |       # position_mat: [num_keys, num_queries, 4]
49 |       assert dim_output % 8 == 0, "[dim_output] is expected to be an integral multiple of 8"
50 |       position_enc = torch.Tensor([np.power(wave_length, 8.*i/dim_output) for i in range(dim_output / 8)]).view(1, 1, 1, -1).type_as(position_mat)
51 |       # position_enc: [num_keys, num_queries, 4, dim_output / 8]
52 |       position_enc = position_mat.unsqueeze(-1) * 100 / position_enc
53 |       # Second part, apply the cosine to even columns and sin to odds.
54 |       # position_enc: [num_keys, num_queries, 4, dim_output / 4]
55 |       position_enc = torch.cat([torch.sin(position_enc), torch.cos(position_enc)], dim=3)
56 |       position_enc = position_enc.view(position_enc.size(0), position_enc.size(1), -1)
57 | 
58 |       return position_enc 
59 | 
60 | class Geometry_Transform_v1(nn.Module):
61 |       def __init__(self, dim_mm):
62 |             super(Geometry_Transform_v1, self).__init__()
63 |             self.transform_geometry = nn.Sequential(
64 |                 nn.Linear(4, dim_mm),
65 |                 nn.ReLU(),
66 |                 nn.Linear(dim_mm, 1),
67 |                 nn.ReLU(),)
68 | 
69 |       def forward(self, rois_keys, rois_queries=None):
70 |             position_mat = Variable(geometry_transform(rois_keys, rois_queries), requires_grad=True)
71 |             geometry_weight = self.transform_geometry(position_mat).squeeze(2)
72 |             return geometry_weight
73 | 
74 | 
75 | class Geometry_Transform_v2(nn.Module):
76 |       '''
77 |       expand the geometry features
78 |       '''
79 |       def __init__(self, dim_mm):
80 |             super(Geometry_Transform_v2, self).__init__()
81 |             self.transform_geometry = nn.Sequential(
82 |                 nn.Linear(dim_mm, 1),
83 |                 nn.ReLU(),)
84 |             self.dim_mm = dim_mm
85 | 
86 |       def forward(self, rois_keys, rois_queries=None):
87 |             position_mat = geometry_transform(rois_keys, rois_queries)
88 |             geometry_weight = positional_encoding(position_mat, self.dim_mm)
89 |             geometry_weight = Variable(geometry_weight, requires_grad=True)  
90 |             geometry_weight = self.transform_geometry(geometry_weight).squeeze(2)
91 |             return geometry_weight


--------------------------------------------------------------------------------
/models/modules/phrase_inference_structure.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from torch.nn import Parameter
  7 | from lib.utils.timer import Timer
  8 | import pdb
  9 | from lib.network import GroupDropout
 10 | from copy import deepcopy
 11 | 
 12 | class Abstract_Phrase_Inference_Structure(nn.Module):
 13 | 	def __init__(self, opts):
 14 | 		super(Abstract_Phrase_Inference_Structure, self).__init__()
 15 | 		self.opts = opts
 16 | 
 17 | 	def forward(self, feature_obj, feature_region, mat_predicate):
 18 | 
 19 | 		raise NotImplementedError
 20 | 
 21 | 
 22 | class Basic_Phrase_Inference_Structure(Abstract_Phrase_Inference_Structure):
 23 | 	def __init__(self, opts):
 24 | 		super(Basic_Phrase_Inference_Structure, self).__init__(opts)
 25 | 		self.opts = opts
 26 | 		#self.w_object = Parameter()
 27 | 
 28 | 		# To transform the attentioned features
 29 | 		self.transform_subject = nn.Sequential(
 30 | 											nn.ReLU(),
 31 | 											#nn.BatchNorm1d(opts['dim_ho'], eps=0.001, momentum=0, affine=True),
 32 | 											nn.Linear(opts['dim_ho'], opts['dim_mm'], bias=opts['use_bias']))
 33 | 		self.transform_object = nn.Sequential(
 34 | 											nn.ReLU(),
 35 | 											#nn.BatchNorm1d(opts['dim_ho'], eps=0.001, momentum=0, affine=True),
 36 | 											nn.Linear(opts['dim_ho'], opts['dim_mm'], bias=opts['use_bias']))
 37 | 		self.transform_region = None
 38 | 
 39 | 	def _fusion(self, transformed_feat_sub, transformed_feat_obj, transformed_feat_region):
 40 | 		raise NotImplementedError
 41 | 
 42 | 	def _prepare(self, feature_obj, feature_region, indices_sub, indices_obj, indices_region):
 43 | 		raise NotImplementedError
 44 | 
 45 | 	def forward(self, feature_obj, feature_region, mat_predicate):
 46 | 		indices_sub = Variable(torch.from_numpy(mat_predicate[:, 0]).type(torch.LongTensor)).cuda().detach()
 47 | 		indices_obj = Variable(torch.from_numpy(mat_predicate[:, 1]).type(torch.LongTensor)).cuda().detach()
 48 | 		indices_region = Variable(torch.from_numpy(mat_predicate[:, 2]).type(torch.LongTensor)).cuda().detach()
 49 | 		transformed_feat_sub, transformed_feat_obj, transformed_feat_region = self._prepare(
 50 | 			feature_obj, feature_region, indices_sub, indices_obj, indices_region)
 51 | 		# y = x_[p] + W_[s,p] * x_[s] + W_[o,p] * x_[o]
 52 | 		out_feature_phrase = self._fusion(transformed_feat_sub, transformed_feat_obj, transformed_feat_region)
 53 | 		return out_feature_phrase
 54 | 
 55 | 
 56 | class PI_v5(Basic_Phrase_Inference_Structure):
 57 | 	'''
 58 | 	sub/obj feature vector --> feature map --> merge with region
 59 | 	--> Full connection for inference
 60 | 	'''
 61 | 	def __init__(self, opts):
 62 | 		super(PI_v5, self).__init__(opts)
 63 | 		self.transform_region = nn.Sequential(
 64 | 								nn.ReLU(),
 65 | 								#nn.BatchNorm2d(opts['dim_hr'], eps=0.001, momentum=0, affine=True),
 66 | 								nn.Conv2d(opts['dim_hr'], opts['dim_mm'], kernel_size=1, bias=opts['use_bias']),
 67 | 								GroupDropout(p=opts['dropout'], inplace=True),)
 68 | 		if opts.get('bottleneck', False):
 69 | 			print('Bottleneck enabled.')
 70 | 			self.predicate_feat_pre = nn.Sequential(
 71 | 								nn.ReLU(),
 72 | 								nn.Conv2d(opts['dim_mm'], opts['dim_mm'] // 2, kernel_size=1, bias=opts['use_bias']),
 73 | 								GroupDropout(p=opts['dropout'], inplace=True),
 74 | 								nn.ReLU(),)
 75 | 								#nn.BatchNorm2d(opts['dim_mm'], eps=0.001, momentum=0, affine=True),)
 76 | 			self.predicate_feat_fc = nn.Sequential(
 77 | 								nn.Linear((opts['dim_mm'] // 2)* opts['pool_size'] * opts['pool_size'] ,
 78 | 									opts['dim_hp'], bias=opts['use_bias']),
 79 | 								GroupDropout(p=opts['dropout'], inplace=True),)
 80 | 		else:
 81 | 			print('Bottleneck disabled.')
 82 | 			self.predicate_feat_pre = nn.Sequential(
 83 | 								nn.ReLU(),)
 84 | 			self.predicate_feat_fc = nn.Sequential(
 85 | 								nn.Linear(opts['dim_mm'] * opts['pool_size'] * opts['pool_size'] ,
 86 | 									opts['dim_hp'], bias=opts['use_bias']),
 87 | 								GroupDropout(p=opts['dropout'], inplace=True),)
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 	def _prepare(self, feature_obj, feature_region, indices_sub, indices_obj, indices_region):
 94 | 		transformed_feat_sub = self.transform_subject(feature_obj)
 95 | 		transformed_feat_sub = torch.index_select(transformed_feat_sub, 0, indices_sub)
 96 | 		transformed_feat_obj = self.transform_object(feature_obj)
 97 | 		transformed_feat_obj = torch.index_select(transformed_feat_obj, 0, indices_obj)
 98 | 		transformed_feat_region = self.transform_region(feature_region)
 99 | 		transformed_feat_region = torch.index_select(transformed_feat_region, 0, indices_region)
100 | 		return transformed_feat_sub, transformed_feat_obj, transformed_feat_region
101 | 
102 | 	# @staticmethod
103 | 	# def _attention_merge(reference, query):
104 | 	# 	B, C, H, W = reference.size()
105 | 	# 	similarity = torch.sum(query * reference, dim=1, keepdim=True)
106 | 	# 	prob = F.sigmoid(similarity) # use sigmoid to retain scale of feature
107 | 	# 	weighted_feature = query * prob
108 | 	# 	return weighted_feature
109 | 
110 | 
111 | 	def _fusion(self, transformed_feat_sub, transformed_feat_obj, transformed_feat_region):
112 | 		batch_size = transformed_feat_sub.size(0)
113 | 		transformed_feat_sub = transformed_feat_sub.view(batch_size, -1, 1, 1)
114 | 		transformed_feat_obj = transformed_feat_obj.view(batch_size, -1, 1, 1)
115 | 		op = self.opts.get('op', 'Sum')
116 | 		if op == 'Sum':
117 | 			output_feature = transformed_feat_region + transformed_feat_sub + transformed_feat_obj
118 | 		elif op == 'Prod':
119 | 			output_feature = transformed_feat_region * transformed_feat_sub * transformed_feat_obj
120 | 		elif op == 'Sum_Prod':
121 | 			output_feature = transformed_feat_region * (transformed_feat_sub + transformed_feat_obj)
122 | 		output_feature = self.predicate_feat_pre(output_feature).view(batch_size, -1)
123 | 		output_feature = self.predicate_feat_fc(output_feature)
124 | 		return output_feature
125 | 


--------------------------------------------------------------------------------
/models/modules/relation_module.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | import pdb
 7 | 
 8 | import geometry_transform 
 9 | 
10 | 
11 | class Relation_Module(nn.Module):
12 |       def __init__(self, dim_v, dim_o, dim_mm, geometry_trans='Geometry_Transform_v2'):
13 |             super(Relation_Module, self).__init__()
14 |             self.dim_key = dim_mm
15 |             self.transform_key = nn.Linear(dim_v, dim_mm)
16 |             self.transform_query = nn.Linear(dim_v, dim_mm)
17 |             self.transform_visual = nn.Linear(dim_v, dim_o)
18 |             self.transform_geometry = getattr(geometry_transform, geometry_trans)(dim_mm)
19 | 
20 | 
21 |       def forward(self, feature_visual, rois):
22 |             '''
23 |             Relation Module adopts pre-non-linear-activated features
24 |             '''
25 |             feature_visual = nn.functional.relu(feature_visual)
26 |             feature_key = self.transform_key(feature_visual)
27 |             feature_query = self.transform_query(feature_visual)
28 |             feature_visual = self.transform_visual(feature_visual)
29 | 
30 |             visual_weight = (feature_query.unsqueeze(0) * feature_key.unsqueeze(1)).sum(dim=2, keepdim=False) / np.sqrt(self.dim_key)
31 |             geometry_weight = self.transform_geometry(rois)
32 | 
33 |             attention = visual_weight.exp() * geometry_weight
34 |             for i in range(attention.size(0)):
35 |                   attention[i, i] = 0
36 |             attention = attention / (attention.sum(dim=1, keepdim=True) + 1e-10)
37 |             feature_out = torch.sum(attention.unsqueeze(2) * feature_visual.unsqueeze(0), dim=1, keepdim=False)
38 | 
39 |             return feature_out
40 | 
41 | if __name__ == '__main__':
42 |       relation_module = Relation_Module_v2(4, 5, 3, 4)
43 |       visual_features = Variable(torch.normal(torch.zeros(10, 4)))
44 |       rois = Variable(torch.cat((torch.zeros(10, 1), (torch.rand(10, 4) + torch.FloatTensor([[0, 1, 2, 3], ])) * 100 ), dim=1))
45 |       feature_out = relation_module(visual_features, rois)
46 | 
47 |       print(feature_out)
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/models/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/models/utils/__init__.py


--------------------------------------------------------------------------------
/models/utils/vgg16.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | 
 7 | from utils.blob import im_list_to_blob
 8 | from network import Conv2d
 9 | import network
10 | 
11 | 
12 | class VGG16(nn.Module):
13 |     def __init__(self, bn=False):
14 |         super(VGG16, self).__init__()
15 | 
16 |         self.conv1 = nn.Sequential(Conv2d(3, 64, 3, same_padding=True, bn=bn),
17 |                                    Conv2d(64, 64, 3, same_padding=True, bn=bn),
18 |                                    nn.MaxPool2d(2))
19 |         self.conv2 = nn.Sequential(Conv2d(64, 128, 3, same_padding=True, bn=bn),
20 |                                    Conv2d(128, 128, 3, same_padding=True, bn=bn),
21 |                                    nn.MaxPool2d(2))
22 |         network.set_trainable(self.conv1, requires_grad=False)
23 |         network.set_trainable(self.conv2, requires_grad=False)
24 | 
25 |         self.conv3 = nn.Sequential(Conv2d(128, 256, 3, same_padding=True, bn=bn),
26 |                                    Conv2d(256, 256, 3, same_padding=True, bn=bn),
27 |                                    Conv2d(256, 256, 3, same_padding=True, bn=bn),
28 |                                    nn.MaxPool2d(2))
29 |         self.conv4 = nn.Sequential(Conv2d(256, 512, 3, same_padding=True, bn=bn),
30 |                                    Conv2d(512, 512, 3, same_padding=True, bn=bn),
31 |                                    Conv2d(512, 512, 3, same_padding=True, bn=bn),
32 |                                    nn.MaxPool2d(2))
33 |         self.conv5 = nn.Sequential(Conv2d(512, 512, 3, same_padding=True, bn=bn),
34 |                                    Conv2d(512, 512, 3, same_padding=True, bn=bn),
35 |                                    Conv2d(512, 512, 3, same_padding=True, bn=bn))
36 | 
37 |     def forward(self, im_data):
38 |         # im_data, im_scales = get_blobs(image)
39 |         # im_info = np.array(
40 |         #     [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
41 |         #     dtype=np.float32)
42 |         # data = Variable(torch.from_numpy(im_data)).cuda()
43 |         # x = data.permute(0, 3, 1, 2)
44 | 
45 |         x = self.conv1(im_data)
46 |         x = self.conv2(x)
47 |         x = self.conv3(x)
48 |         x = self.conv4(x)
49 |         x = self.conv5(x)
50 |         return x
51 | 
52 |     def load_from_npz(self, params):
53 |         # params = np.load(npz_file)
54 |         own_dict = self.state_dict()
55 |         for name, val in own_dict.items():
56 |             i, j = int(name[4]), int(name[6]) + 1
57 |             ptype = 'weights' if name[-1] == 't' else 'biases'
58 |             key = 'conv{}_{}/{}:0'.format(i, j, ptype)
59 |             param = torch.from_numpy(params[key])
60 |             if ptype == 'weights':
61 |                 param = param.permute(3, 2, 0, 1)
62 |             val.copy_(param)
63 | 
64 |     def load_from_npy_file(self, fname):
65 |         own_dict = self.state_dict()
66 |         params = np.load(fname).item()
67 |         for name, val in own_dict.items():
68 |             # # print name
69 |             # # print val.size()
70 |             # # print param.size()
71 |             # if name.find('bn.') >= 0:
72 |             #     continue
73 | 
74 |             i, j = int(name[4]), int(name[6]) + 1
75 |             ptype = 'weights' if name[-1] == 't' else 'biases'
76 |             key = 'conv{}_{}'.format(i, j)
77 |             param = torch.from_numpy(params[key][ptype])
78 | 
79 |             if ptype == 'weights':
80 |                 param = param.permute(3, 2, 0, 1)
81 | 
82 |             val.copy_(param)
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     vgg = VGG16()
87 |     vgg.load_from_npy_file('/media/longc/Data/models/VGG_imagenet.npy')


--------------------------------------------------------------------------------
/options/RPN/RPN_FN.yaml:
--------------------------------------------------------------------------------
 1 | # Training Settings
 2 | 
 3 | kmeans_anchors: True
 4 | anchor_dir: data/visual_genome # set
 5 | 
 6 | object:
 7 |       train:
 8 |             num_box_pre_NMS: 12000
 9 |             num_box_post_NMS: 2000
10 |             nms_thres: 0.7
11 |             min_size: 16
12 |             dropout_box_runoff_image: False
13 |             allowed_border: 128
14 |             clobber_positives: False
15 |             negative_overlap: 0.35
16 |             positive_overlap: 0.5
17 |             dontcare_area_intersection_hi: 0.5
18 |             fg_fraction: 0.5
19 |             batch_size: 512
20 |             BBOX_INSIDE_WEIGHTS: [1.0, 1.0, 1.0, 1.0]
21 |             POSITIVE_WEIGHT: -1.0
22 |       test:
23 |             num_box_pre_NMS: 12000
24 |             num_box_post_NMS: 300
25 |             nms_thres: 0.6
26 |             min_size: 16
27 |             dropout_box_runoff_image: False
28 |             allowed_border: 128
29 | 


--------------------------------------------------------------------------------
/options/RPN/RPN_FN_VRD.yaml:
--------------------------------------------------------------------------------
 1 | # Training Settings
 2 | 
 3 | kmeans_anchors: True
 4 | anchor_dir: data/VRD # set
 5 | 
 6 | object:
 7 |       train:
 8 |             num_box_pre_NMS: 12000
 9 |             num_box_post_NMS: 2000
10 |             nms_thres: 0.7
11 |             min_size: 16
12 |             dropout_box_runoff_image: False
13 |             allowed_border: 128
14 |             clobber_positives: False
15 |             negative_overlap: 0.3
16 |             positive_overlap: 0.7
17 |             dontcare_area_intersection_hi: 0.5
18 |             fg_fraction: 0.5
19 |             batch_size: 512
20 |             BBOX_INSIDE_WEIGHTS: [1.0, 1.0, 1.0, 1.0]
21 |             POSITIVE_WEIGHT: -1.0
22 |       test:
23 |             num_box_pre_NMS: 12000
24 |             num_box_post_NMS: 300
25 |             nms_thres: 0.6
26 |             min_size: 16
27 |             dropout_box_runoff_image: False
28 |             allowed_border: 128
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/options/RPN/RPN_FN_svg.yaml:
--------------------------------------------------------------------------------
 1 | # Training Settings
 2 | 
 3 | kmeans_anchors: True
 4 | anchor_dir: data/svg # set
 5 | 
 6 | object:
 7 |       train:
 8 |             num_box_pre_NMS: 12000
 9 |             num_box_post_NMS: 2000
10 |             nms_thres: 0.7
11 |             min_size: 16
12 |             dropout_box_runoff_image: False
13 |             allowed_border: 128
14 |             clobber_positives: False
15 |             negative_overlap: 0.3
16 |             positive_overlap: 0.7
17 |             dontcare_area_intersection_hi: 0.5
18 |             fg_fraction: 0.5
19 |             batch_size: 512
20 |             BBOX_INSIDE_WEIGHTS: [1.0, 1.0, 1.0, 1.0]
21 |             POSITIVE_WEIGHT: -1.0
22 |       test:
23 |             num_box_pre_NMS: 12000
24 |             num_box_post_NMS: 300
25 |             nms_thres: 0.6
26 |             min_size: 16
27 |             dropout_box_runoff_image: False
28 |             allowed_border: 128
29 | 
30 | 


--------------------------------------------------------------------------------
/options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yikang-li/FactorizableNet/1e3e1813630d8d647ab0f01bce7782198272e2e8/options/__init__.py


--------------------------------------------------------------------------------
/options/config_FN.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | """Fast R-CNN config system.
  9 | 
 10 | This file specifies default config options for Fast R-CNN. You should not
 11 | change values in this file. Instead, you should write a config file (in yaml)
 12 | and use cfg_from_file(yaml_file) to load it and override the default options.
 13 | 
 14 | Most tools in $ROOT/tools take a --cfg option to specify an override file.
 15 |     - See tools/{train,test}_net.py for example code that uses cfg_from_file()
 16 |     - See experiments/cfgs/*.yml for example YAML config override files
 17 | """
 18 | 
 19 | import os
 20 | import os.path as osp
 21 | import numpy as np
 22 | from time import strftime, localtime
 23 | from easydict import EasyDict as edict
 24 | 
 25 | __C = edict()
 26 | # Consumers can get config by:
 27 | #   from fast_rcnn_config import cfg
 28 | cfg = __C
 29 | 
 30 | ## Training settings
 31 | __C.TRAIN = edict()
 32 | __C.TRAIN.BATCH_SIZE = 256
 33 | __C.TRAIN.BATCH_SIZE_RELATIONSHIP = 512
 34 | __C.TRAIN.BATCH_SIZE_CAPTION = 128
 35 | 
 36 | __C.TRAIN.FG_FRACTION = 0.5 # [pending] higher fraction may be different from the inference case (since we introduce message passing)
 37 | __C.TRAIN.FG_FRACTION_RELATIONSHIP = 0.5
 38 | __C.TRAIN.FG_FRACTION_CAPTION = 0.5
 39 | 
 40 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 41 | __C.TRAIN.FG_THRESH = 0.5 # change to 0.5 from [Feb 2], previously 0.6
 42 | __C.TRAIN.FG_THRESH_REGION = 0.5
 43 | 
 44 | # used for assigning weights for each coords (x1, y1, w, h)
 45 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 46 | 
 47 | # Overlap threshold for a ROI to be considered background (class = 0 if
 48 | # overlap in [LO, HI))
 49 | __C.TRAIN.BG_THRESH_HI = 0.4
 50 | __C.TRAIN.BG_THRESH_LO = 0.0 #  in Faster R-CNN by Shaoqing Ren, it is set to 0.1
 51 | __C.TRAIN.BG_THRESH_HI_REGION = 0.4
 52 | __C.TRAIN.BG_THRESH_LO_REGION = 0.0
 53 | 
 54 | 
 55 | 
 56 | # Config for ROI-merging
 57 | __C.TRAIN.REGION_NMS_THRES =0.5
 58 | __C.TRAIN.CAPTION_COVERAGE_THRES =0.8
 59 | 
 60 | 
 61 | ## Testing settings
 62 | __C.TEST = edict()
 63 | __C.TEST.BBOX_NUM = 200
 64 | __C.TEST.REGION_NUM = 128
 65 | 
 66 | # Config for ROI-merging
 67 | __C.TEST.CAPTION_COVERAGE_THRES =0.8
 68 | __C.TEST.REGION_NMS_THRES = 0.5
 69 | 
 70 | 
 71 | 
 72 | def get_output_dir(imdb, weights_filename):
 73 |     """Return the directory where experimental artifacts are placed.
 74 |     If the directory does not exist, it is created.
 75 | 
 76 |     A canonical path is built using the name from an imdb and a network
 77 |     (if not None).
 78 |     """
 79 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
 80 |     if weights_filename is not None:
 81 |         outdir = osp.join(outdir, weights_filename)
 82 |     if not os.path.exists(outdir):
 83 |         os.makedirs(outdir)
 84 |     return outdir
 85 | 
 86 | 
 87 | def get_log_dir(imdb):
 88 |     """Return the directory where experimental artifacts are placed.
 89 |     If the directory does not exist, it is created.
 90 |     A canonical path is built using the name from an imdb and a network
 91 |     (if not None).
 92 |     """
 93 |     log_dir = osp.abspath( \
 94 |         osp.join(__C.ROOT_DIR, 'logs', __C.LOG_DIR, imdb.name, strftime("%Y-%m-%d-%H-%M-%S", localtime())))
 95 |     if not os.path.exists(log_dir):
 96 |         os.makedirs(log_dir)
 97 |     return log_dir
 98 | 
 99 | 
100 | def _merge_a_into_b(a, b):
101 |     """Merge config dictionary a into config dictionary b, clobbering the
102 |     options in b whenever they are also specified in a.
103 |     """
104 |     if type(a) is not edict:
105 |         return
106 | 
107 |     for k, v in a.iteritems():
108 |         # a must specify keys that are in b
109 |         if not b.has_key(k):
110 |             raise KeyError('{} is not a valid config key'.format(k))
111 | 
112 |         # the types must match, too
113 |         old_type = type(b[k])
114 |         if old_type is not type(v):
115 |             if isinstance(b[k], np.ndarray):
116 |                 v = np.array(v, dtype=b[k].dtype)
117 |             else:
118 |                 raise ValueError(('Type mismatch ({} vs. {}) '
119 |                                   'for config key: {}').format(type(b[k]),
120 |                                                                type(v), k))
121 | 
122 |         # recursively merge dicts
123 |         if type(v) is edict:
124 |             try:
125 |                 _merge_a_into_b(a[k], b[k])
126 |             except:
127 |                 print('Error under config key: {}'.format(k))
128 |                 raise
129 |         else:
130 |             b[k] = v
131 | 
132 | 
133 | def cfg_from_file(filename):
134 |     """Load a config file and merge it into the default options."""
135 |     import yaml
136 |     with open(filename, 'r') as f:
137 |         yaml_cfg = edict(yaml.load(f))
138 | 
139 |     _merge_a_into_b(yaml_cfg, __C)
140 | 
141 | 
142 | def cfg_from_list(cfg_list):
143 |     """Set config keys via list (e.g., from command line)."""
144 |     from ast import literal_eval
145 |     assert len(cfg_list) % 2 == 0
146 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
147 |         key_list = k.split('.')
148 |         d = __C
149 |         for subkey in key_list[:-1]:
150 |             assert d.has_key(subkey)
151 |             d = d[subkey]
152 |         subkey = key_list[-1]
153 |         assert d.has_key(subkey)
154 |         try:
155 |             value = literal_eval(v)
156 |         except:
157 |             # handle the case when v is a string literal
158 |             value = v
159 |         assert type(value) == type(d[subkey]), \
160 |             'type {} does not match original type {}'.format(
161 |                 type(value), type(d[subkey]))
162 |         d[subkey] = value
163 | 


--------------------------------------------------------------------------------
/options/data.yaml:
--------------------------------------------------------------------------------
1 | dir: data/visual_genome
2 | dataset_version: top_150_50_new
3 | train:
4 |     SCALES: [500, 550, 600, 600, 650, 700] # more probability to 600
5 |     MAX_SIZE: 1000
6 | test:
7 |     SCALES: [600,]
8 |     MAX_SIZE: 1000
9 | 


--------------------------------------------------------------------------------
/options/data_VRD.yaml:
--------------------------------------------------------------------------------
1 | dir: data/VRD
2 | train:
3 |     SCALES: [450, 500, 550, 600, 600, 650, 700, 750]# more probability to 600
4 |     MAX_SIZE: 1000
5 | test:
6 |     SCALES: [600,]
7 |     MAX_SIZE: 1000
8 | 


--------------------------------------------------------------------------------
/options/data_sVG.yaml:
--------------------------------------------------------------------------------
1 | dir: data/svg
2 | train:
3 |     SCALES: [450, 500, 550, 600, 600, 650, 700, 750] # more probability to 600
4 |     MAX_SIZE: 1000
5 | test:
6 |     SCALES: [600,]
7 |     MAX_SIZE: 1000
8 | 


--------------------------------------------------------------------------------
/options/models/VG-DR-Net.yaml:
--------------------------------------------------------------------------------
 1 | logs:
 2 |     dir_logs: output/
 3 |     model_name: FN_VG-DR-Net
 4 | data:
 5 |     dataset: sVG
 6 |     opts: options/data_sVG.yaml
 7 |     use_region: false
 8 |     batch_size: 1
 9 | model:
10 |     arch: FN_v4s
11 |     rpn_opts: options/RPN/RPN_FN_svg.yaml
12 |     # feature vector size
13 |     dim_hr: 512
14 |     dim_ho: 512
15 |     dim_hp: 512
16 |     pool_size: 5
17 |     op: Sum
18 |     # for both kernel-based attention and Mutan
19 |     dim_mm: 256
20 |     activation: relu
21 |     # Iters for message passing, 0 means disable that
22 |     MPS_iter: 1
23 |     geometry: Geometry_Transform_v1
24 |     # settings for attention gate
25 |     use_bias: True
26 |     dropout: 0.
27 |     # Settings for inference part
28 |     fusion: PI_v5 # PI is short for [Predicate Inference]
29 |     # loss_weight
30 |     cls_obj: 1.
31 |     cls_pred: 2.
32 |     reg_obj: 0.5
33 | 
34 | optim:
35 |     lr: 0.01
36 |     lr_decay: 0.1
37 |     lr_decay_epoch: 3
38 |     epochs: 15
39 |     optimizer: 0 # [0: SGD | 1: Adam | 2: Adagrad]
40 |     nesterov: True
41 |     weight_decay: 0.00001
42 |     momentum: 0.9
43 |     clip_gradient: True
44 | 


--------------------------------------------------------------------------------
/options/models/VG-MSDN.yaml:
--------------------------------------------------------------------------------
 1 | logs:
 2 |     dir_logs: output/
 3 |     model_name: FN_VG-MSDN
 4 | data:
 5 |     dataset: visual_genome
 6 |     opts: options/data.yaml
 7 |     dataset_option: small # (small | normal | fat)
 8 |     batch_size: 1
 9 |     use_region: False
10 | model:
11 |     arch: FN_v4
12 |     rpn_opts: options/RPN/RPN_FN.yaml
13 |     # feature vector size
14 |     dim_hr: 512
15 |     dim_ho: 512
16 |     dim_hp: 512
17 |     use_shortcut: True
18 |     pool_size: 7
19 |     op: Sum
20 |     # for both kernel-based attention and Mutan
21 |     dim_mm: 256
22 |     activation: relu
23 |     # Iters for message passing, 0 means disable that
24 |     MPS_iter: 1
25 |     # settings for attention gate
26 |     use_bias: True
27 |     dropout: 0.
28 |     # Settings for inference part
29 |     fusion: PI_v5 # PI is short for [Predicate Inference]
30 |     bottleneck: True
31 |     # loss_weight
32 |     cls_obj: 1.
33 |     cls_pred: 2.
34 |     reg_obj: 0.5
35 | 
36 | optim:
37 |     lr: 0.01
38 |     lr_decay: 0.1
39 |     lr_decay_epoch: 2
40 |     epochs: 10
41 |     optimizer: 0 # [0: SGD | 1: Adam | 2: Adagrad]
42 |     nesterov: True
43 |     weight_decay: 0.00001
44 |     momentum: 0.9
45 |     clip_gradient: True
46 | 
47 | 


--------------------------------------------------------------------------------
/options/models/VRD.yaml:
--------------------------------------------------------------------------------
 1 | logs:
 2 |     dir_logs: output/
 3 |     model_name: FN_VRD
 4 | data:
 5 |     dataset: VRD
 6 |     opts: options/data_VRD.yaml
 7 |     use_region: false
 8 |     batch_size: 1
 9 | model:
10 |     arch: FN_v4s
11 |     rpn_opts: options/RPN/RPN_FN_VRD.yaml
12 |     # feature vector size
13 |     dim_hr: 512
14 |     dim_ho: 512
15 |     dim_hp: 512
16 |     pool_size: 5
17 |     op: Sum
18 |     # for both kernel-based attention and Mutan
19 |     dim_mm: 256
20 |     activation: relu
21 |     # Iters for message passing, 0 means disable that
22 |     MPS_iter: 1
23 |     # settings for attention gate
24 |     use_bias: True
25 |     dropout: 0.
26 |     # Settings for inference part
27 |     fusion: PI_v5 # PI is short for [Predicate Inference]
28 |     bottleneck: True
29 |     # loss_weight
30 |     cls_obj: 1.
31 |     cls_pred: 2.
32 |     reg_obj: 0.5
33 | 
34 | optim:
35 |     lr: 0.01
36 |     lr_decay: 0.1
37 |     lr_decay_epoch: 3
38 |     epochs: 15
39 |     optimizer: 0 # [0: SGD | 1: Adam | 2: Adagrad]
40 |     nesterov: True
41 |     weight_decay: 0.00001
42 |     momentum: 0.9
43 |     clip_gradient: True
44 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | cffi
3 | opencv-python
4 | scipy
5 | easydict
6 | matplotlib
7 | pyyaml
8 | 


--------------------------------------------------------------------------------
/scripts/collect_samples.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | import os
 3 | import os.path as osp
 4 | 
 5 | import argparse
 6 | import pdb
 7 | 
 8 | 
 9 | parser = argparse.ArgumentParser('Options')
10 | 
11 | parser.add_argument('--path_files', default='output/graph_top_100/high_recall_cases.txt', type=str,
12 |                     help='path to a data file')
13 | parser.add_argument('--output_dir', default='output/graph_top_100/high_recall_cases', type=str)
14 | parser.add_argument('--base_dir', default='output/graph_top_100')
15 | 
16 | 
17 | args = parser.parse_args()
18 | 
19 | def main():
20 | 	global args
21 | 
22 | 	if osp.isdir(args.output_dir):
23 | 		shutil.rmtree(args.output_dir)
24 | 
25 | 	os.makedirs(args.output_dir)
26 | 
27 | 
28 | 	with open(args.path_files, 'r') as f:
29 | 		data = f.readlines()
30 | 	data = [v.strip('\n') for v in data]
31 | 	for f in data:
32 | 		try:
33 | 			shutil.copyfile(osp.join(args.base_dir, f+'.png'),
34 | 							osp.join(args.output_dir, f+'.png'))
35 | 			shutil.copyfile(osp.join(args.base_dir, f+'.pdf'),
36 | 							osp.join(args.output_dir, f+'.pdf'))
37 | 		except:
38 | 			continue
39 | 	print('Done.')
40 | 
41 | 
42 | 
43 | if __name__ == '__main__':
44 | 	main()


--------------------------------------------------------------------------------
/scripts/preprocessing_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import nltk
  4 | import json
  5 | import enchant
  6 | from nltk.corpus import wordnet as wn
  7 | 
  8 | current_dir = os.getcwd()
  9 | os.chdir('../')
 10 | 
 11 | 
 12 | ## Loading data
 13 | image_data = json.load(open('image_data.json'))
 14 | print('image data length: ' + str(len(image_data)))
 15 | relationships_data = json.load(open('relationships.json'))
 16 | print('relationship data length: ' + str(len(relationships_data)))
 17 | 
 18 | ## The subject and object should be none
 19 | en_dict = enchant.Dict("en_US")
 20 | nouns = {x.name().split('.', 1)[0] for x in wn.all_synsets('n')}
 21 | 
 22 | 
 23 | 
 24 | relationship_count = 0
 25 | predicate_dataset = {}
 26 | 
 27 | spelling_error_counter = 0
 28 | length_matching_counter = 0
 29 | # word_mismatch_counter = 0
 30 | 
 31 | relationships = {}
 32 | 
 33 | for d_id,rs in enumerate(relationships_data):
 34 |     im_relationships = {}
 35 |     for r_id,r in enumerate(rs['relationships']):
 36 |         try:
 37 |             normalized_predicate = '_'.join([nltk.stem.WordNetLemmatizer().lemmatize(x, 'v') for x in
 38 |                                              r['predicate'].strip('.').strip(',').encode('ascii', 'replace').split()])
 39 |             normalized_subject = '_'.join([nltk.stem.WordNetLemmatizer().lemmatize(x, 'n') for x in
 40 |                                            r['subject']['name'].strip('.').strip(',').encode('ascii', 'replace').split()])
 41 |             normalized_object = '_'.join([nltk.stem.WordNetLemmatizer().lemmatize(x, 'n') for x in
 42 |                                            r['object']['name'].strip('.').strip(',').encode('ascii', 'replace').split()])
 43 | 
 44 |             if (not en_dict.check(normalized_predicate.replace('_', '-'))) or \
 45 |                     (not en_dict.check(normalized_subject.replace('_', '-'))) or \
 46 |                     (not en_dict.check(normalized_object.replace('_', '-'))):
 47 |                 spelling_error_counter += 1
 48 |                 # print('Wrong spelling({}):{}-{}-{}\n'.format(spelling_error_counter, normalized_subject, normalized_predicate, normalized_object));
 49 |                 continue
 50 | 
 51 |             normalized_predicate = normalized_predicate.lower().replace('-', '_')
 52 |             normalized_subject = normalized_subject.lower().replace('-', '_')
 53 |             normalized_object = normalized_object.lower().replace('-', '_')
 54 | 
 55 |             if len(normalized_predicate) <= 1 or len(normalized_subject) <=1 or len(normalized_object) <=1:
 56 |                 length_matching_counter += 1
 57 |                 # print('length not matched:{}-{}-{}\n'.format(r['subject']['name'], r['predicate'], r['object']['name']))
 58 |                 continue
 59 | 
 60 |             # if normalized_object not in nouns or normalized_subject not in nouns:
 61 |             #     # print('Subject or Object no in Nouns:{}-{}-{}\n'.format(r['subject']['name'], r['predicate'], r['object']['name']))
 62 |             #     word_mismatch_counter += 1
 63 |             #     continue
 64 |             relationship_item = {}
 65 |             relationship_item['object'] = normalized_object
 66 |             relationship_item['subject'] = normalized_subject
 67 |             relationship_item['sub_box'] = \
 68 |                     (r['subject']['x'], r['subject']['y'], r['subject']['x'] + r['subject']['w'], \
 69 |                      r['subject']['y'] + r['subject']['h'])
 70 |             relationship_item['obj_box'] = \
 71 |                     (r['object']['x'], r['object']['y'], r['object']['x'] + r['object']['w'], \
 72 |                      r['object']['y'] + r['object']['h'])
 73 |             relationship_item['predicate'] = normalized_predicate
 74 |             if 'relationships' not in im_relationships.keys():
 75 |                 im_relationships['relationships'] = [relationship_item]
 76 |             else:
 77 |                 im_relationships['relationships'].append(relationship_item)
 78 |             relationship_count += 1
 79 |         except Exception as inst:
 80 |             print inst
 81 |             print d_id
 82 |             print r_id
 83 |             # raw_input("Press Enter to continue...")
 84 |             print('({}, {}): [{}]-[{}]-[{}]\n'.format(d_id, r_id, r['subject']['name'], r['predicate'], r['object']['name']))
 85 |             print('Error: [{}]-[{}]-[{}]\n'.format(r['subject']['name'].encode('ascii', 'replace'), r['predicate'].encode('ascii', 'replace'), r['object']['name'].encode('ascii', 'replace')))
 86 |             #  raw_input('Press Enter to continue...')
 87 |             pass
 88 |     if d_id%5000 == 0:
 89 |         print(str(d_id) + ' images processed, ' + str(relationship_count) + ' relationships')
 90 | 
 91 |     if 'relationships' in im_relationships.keys():
 92 |         im_relationships['path'] = str(image_data[d_id]['image_id']) + '.jpg'
 93 |         im_relationships['width'] = image_data[d_id]['width']
 94 |         im_relationships['height'] = image_data[d_id]['height']
 95 |         relationships[d_id] = im_relationships
 96 | 
 97 | del relationships_data
 98 | print('Currently, we have ' + str(relationship_count) + ' relationship tuples and {} images'.format(len(relationships.keys())))
 99 | print('Spelling error: {}'.format(spelling_error_counter))
100 | print('Length matching error: {}'.format(length_matching_counter))
101 | # print('word mismatch error: {}'.format(word_mismatch_counter))
102 | 
103 | if __name__ == "__main__":
104 |     def output_annoatation(output_path, relationships):
105 |         with open(output_path, 'w') as f:
106 |             output_counter = 0
107 |             for item_key in relationships:
108 |                 im_item = relationships[item_key]
109 |                 f.write('# {}\n'.format(item_key))  # to output the item key
110 |                 f.write(im_item['path'] + '\n')  # to output the image path
111 |                 f.write('{}\n{}\n'.format(im_item['height'], im_item['width']))  # output the height and width
112 |                 f.write('{}\n'.format(len(im_item['relationships'])))
113 |                 for r in im_item['relationships']:  # output the relationship item [subject]-[predicate]-[object]-[sub_box]-[obj_box]
114 |                     f.write(r['subject'].replace(' ', '_'))
115 |                     f.write(' ' + r['predicate'].replace(' ', '_'))
116 |                     f.write(' ' + r['object'].replace(' ', '_'))
117 |                     for item in r['sub_box']:
118 |                         f.write(' ' + str(item))
119 |                     for item in r['obj_box']:
120 |                         f.write(' ' + str(item))
121 |                     f.write('\n')
122 |                 output_counter += 1
123 |                 if output_counter % 1000 == 0:
124 |                     print('{}/{} images processed'.format(output_counter, len(relationships.keys())))
125 | 
126 |         print('Result output to: {}'.format(output_path))
127 | 
128 | 
129 |     os.chdir(current_dir)
130 |     output_annoatation('output/filtered_relationship.txt', relationships)
131 | 
132 | 
133 | 
134 | os.chdir(current_dir)
135 | 


--------------------------------------------------------------------------------
/scripts/setup_eval.sh:
--------------------------------------------------------------------------------
 1 | cd eval
 2 | wget http://www.cs.cmu.edu/~alavie/METEOR/download/meteor-1.5.tar.gz
 3 | tar -xzvf meteor-1.5.tar.gz
 4 | cp meteor-1.5/meteor-1.5.jar .
 5 | mkdir data
 6 | cp meteor-1.5/data/paraphrase-en.gz data/
 7 | rm -r meteor-1.5
 8 | rm meteor-1.5.tar.gz
 9 | cd ..
10 | 


--------------------------------------------------------------------------------
/scripts/voc_converter.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | '''
  3 | This file is a tool to parse json file and generate voc format xml file.
  4 | '''
  5 | import json
  6 | import xml.etree.ElementTree as ET
  7 | import cv2
  8 | import os
  9 | import os.path as osp
 10 | 
 11 | 
 12 | def main():
 13 |   base_data_dir = '/DATA/ykli/workspace/scene_generation/data/VRD'
 14 |   out_xml_path = osp.join(base_data_dir, "object_xml")
 15 |   image_dir = osp.join(base_data_dir, 'images', 'sg_test_images')
 16 | 
 17 |   if not osp.isdir(out_xml_path):
 18 |       os.makedirs(out_xml_path)
 19 | 
 20 |   annotations = json.load(open(osp.join(base_data_dir, "test.json")))
 21 |   classes_object = json.load(open(osp.join(base_data_dir, "objects.json")))
 22 | 
 23 |   #jpg files folder
 24 |   counter = 0
 25 | 
 26 |   for i in range(len(annotations)):
 27 |       jpg_name = annotations[i]['path']
 28 |       xml_file_name = os.path.splitext(jpg_name)[0] + ".xml"
 29 | 
 30 |       img_path = osp.join(image_dir, jpg_name)
 31 |       image = cv2.imread(img_path)
 32 |       im_height = image.shape[0]
 33 |       im_width = image.shape[1]
 34 |       im_ch = image.shape[2]
 35 |       counter += 1
 36 | 
 37 |       #create a xml
 38 |       out = ET.Element('annotation')
 39 |       #folder
 40 |       folder = ET.SubElement(out,"folder")
 41 |       folder.text = "VOC2007"
 42 |       #filename
 43 |       filename = ET.SubElement(out,"filename")
 44 |       filename.text = jpg_name
 45 |       #filesource
 46 |       file_source = ET.SubElement(out,"source")
 47 |       database = ET.SubElement(file_source,"database")
 48 |       database.text = "VRD Database"
 49 |       annotation = ET.SubElement(file_source,"annotation")
 50 |       annotation.text = "VRD"
 51 |       image = ET.SubElement(file_source,"image")
 52 |       image.text = "flickr"
 53 |       flickid = ET.SubElement(file_source,"flickrid")
 54 |       flickid.text = "Yikang"
 55 | 
 56 |       #file owner
 57 |       owner = ET.SubElement(out,"owner")
 58 |       flickid = ET.SubElement(owner,"flickrid")
 59 |       flickid.text = "Yikang"
 60 |       name = ET.SubElement(owner,"name")
 61 |       name.text = "Yikang"
 62 | 
 63 |       #file size
 64 |       file_size = ET.SubElement(out,"size")
 65 |       file_width = ET.SubElement(file_size,"width")
 66 |       file_width.text = str(im_height)
 67 |       file_height = ET.SubElement(file_size,"height")
 68 |       file_height.text = str(im_width)
 69 |       file_depth = ET.SubElement(file_size,"depth")
 70 |       file_depth.text = str(im_ch)
 71 | 
 72 |       #file segmented
 73 |       file_segmented = ET.SubElement(out,"segmented")
 74 |       file_segmented.text = "0"
 75 | 
 76 |       for obj in annotations[i]['objects']:
 77 |           idx = obj['class']
 78 |           bbox_x1 = obj['bbox'][0]
 79 |           bbox_y1 = obj['bbox'][1]
 80 |           bbox_x2 = obj['bbox'][2]
 81 |           bbox_y2 = obj['bbox'][3]
 82 |           #create a car obj
 83 |           obj = ET.SubElement(out,'object')
 84 |           obj_name = ET.SubElement(obj,"name")
 85 |           obj_name.text = classes_object[idx-1]
 86 | 
 87 |           obj_pose = ET.SubElement(obj,"pose")
 88 |           obj_pose.text = "Unspecified"
 89 | 
 90 |           obj_truncated = ET.SubElement(obj,"truncated")
 91 |           obj_truncated.text = "1"
 92 | 
 93 |           obj_difficult = ET.SubElement(obj,"difficult")
 94 |           obj_difficult.text = "0"
 95 | 
 96 |           #create boundingbox
 97 |           bndbox = ET.SubElement(obj,"bndbox")
 98 |           xmin = ET.SubElement(bndbox,'xmin')
 99 |           xmin.text = str(bbox_x1)
100 | 
101 |           ymin = ET.SubElement(bndbox,'ymin')
102 |           ymin.text = str(bbox_y1)
103 | 
104 |           xmax = ET.SubElement(bndbox,'xmax')
105 |           xmax.text = str(bbox_x2)
106 | 
107 |           ymax = ET.SubElement(bndbox,'ymax')
108 |           ymax.text = str(bbox_y2)
109 | 
110 |       out_tree = ET.ElementTree(out)
111 |       out_tree.write(out_xml_path + xml_file_name)
112 | 
113 |       if (i+1) % 100 == 0:
114 |           print('{} / {} images processed'.format(i+1, len(annotations)))
115 | 
116 |   print "Process done"
117 | 
118 | 
119 | if __name__ == '__main__':
120 |   main()
121 | 


--------------------------------------------------------------------------------
/scripts/voc_converter_vg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | '''
  3 | This file is a tool to parse json file and generate voc format xml file.
  4 | '''
  5 | import json
  6 | import xml.etree.ElementTree as ET
  7 | import cv2
  8 | import os
  9 | import os.path as osp
 10 | import pdb
 11 | 
 12 | 
 13 | def main():
 14 |   base_data_dir = '/DATA/ykli/workspace/scene_generation/data/visual_genome/vg_cleansing/output/top_150_50_new'
 15 |   out_xml_path = osp.join(base_data_dir, "object_xml")
 16 | 
 17 |   if not osp.isdir(out_xml_path):
 18 |       os.makedirs(out_xml_path)
 19 | 
 20 |   annotations = json.load(open(osp.join(base_data_dir, "test.json")))
 21 | 
 22 |   #jpg files folder
 23 |   counter = 0
 24 | 
 25 |   for i in range(len(annotations)):
 26 |       jpg_name = annotations[i]['path']
 27 |       xml_file_name = os.path.splitext(jpg_name)[0] + ".xml"
 28 |       im_height = annotations[i]['height']
 29 |       im_width = annotations[i]['width']
 30 |       im_ch = 3
 31 |       counter += 1
 32 | 
 33 |       #create a xml
 34 |       out = ET.Element('annotation')
 35 |       #folder
 36 |       folder = ET.SubElement(out,"folder")
 37 |       folder.text = "VOC2007"
 38 |       #filename
 39 |       filename = ET.SubElement(out,"filename")
 40 |       filename.text = jpg_name
 41 |       #filesource
 42 |       file_source = ET.SubElement(out,"source")
 43 |       database = ET.SubElement(file_source,"database")
 44 |       database.text = "VRD Database"
 45 |       annotation = ET.SubElement(file_source,"annotation")
 46 |       annotation.text = "VRD"
 47 |       image = ET.SubElement(file_source,"image")
 48 |       image.text = "flickr"
 49 |       flickid = ET.SubElement(file_source,"flickrid")
 50 |       flickid.text = "Yikang"
 51 | 
 52 |       #file owner
 53 |       owner = ET.SubElement(out,"owner")
 54 |       flickid = ET.SubElement(owner,"flickrid")
 55 |       flickid.text = "Yikang"
 56 |       name = ET.SubElement(owner,"name")
 57 |       name.text = "Yikang"
 58 | 
 59 |       #file size
 60 |       file_size = ET.SubElement(out,"size")
 61 |       file_width = ET.SubElement(file_size,"width")
 62 |       file_width.text = str(im_height)
 63 |       file_height = ET.SubElement(file_size,"height")
 64 |       file_height.text = str(im_width)
 65 |       file_depth = ET.SubElement(file_size,"depth")
 66 |       file_depth.text = str(im_ch)
 67 | 
 68 |       #file segmented
 69 |       file_segmented = ET.SubElement(out,"segmented")
 70 |       file_segmented.text = "0"
 71 | 
 72 |       for obj in annotations[i]['objects']:
 73 |           bbox_x1 = obj['box'][0]
 74 |           bbox_y1 = obj['box'][1]
 75 |           bbox_x2 = obj['box'][2]
 76 |           bbox_y2 = obj['box'][3]
 77 |           obj_class = obj['class']
 78 |           #create a car obj
 79 |           obj = ET.SubElement(out,'object')
 80 |           obj_name = ET.SubElement(obj,"name")
 81 |           obj_name.text = obj_class
 82 | 
 83 |           obj_pose = ET.SubElement(obj,"pose")
 84 |           obj_pose.text = "Unspecified"
 85 | 
 86 |           obj_truncated = ET.SubElement(obj,"truncated")
 87 |           obj_truncated.text = "1"
 88 | 
 89 |           obj_difficult = ET.SubElement(obj,"difficult")
 90 |           obj_difficult.text = "0"
 91 | 
 92 |           #create boundingbox
 93 |           bndbox = ET.SubElement(obj,"bndbox")
 94 |           xmin = ET.SubElement(bndbox,'xmin')
 95 |           xmin.text = str(bbox_x1)
 96 | 
 97 |           ymin = ET.SubElement(bndbox,'ymin')
 98 |           ymin.text = str(bbox_y1)
 99 | 
100 |           xmax = ET.SubElement(bndbox,'xmax')
101 |           xmax.text = str(bbox_x2)
102 | 
103 |           ymax = ET.SubElement(bndbox,'ymax')
104 |           ymax.text = str(bbox_y2)
105 | 
106 |       out_tree = ET.ElementTree(out)
107 |       out_tree.write(osp.join(out_xml_path, xml_file_name))
108 | 
109 |       if (i+1) % 100 == 0:
110 |           print('{} / {} images processed'.format(i+1, len(annotations)))
111 | 
112 |   print "Process done"
113 | 
114 | 
115 | if __name__ == '__main__':
116 |   main()
117 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | CUDA_VISIBLE_DEVICES=0 python train_FN.py --dataset_option=normal --path_opt options/models/VG-MSDN.yaml --rpn output/RPN.h5
4 | 


--------------------------------------------------------------------------------
/visualize_graph.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import torch
  4 | import numpy as np
  5 | import random
  6 | import numpy.random as npr
  7 | import json
  8 | import cPickle as pickle
  9 | import yaml
 10 | import cv2
 11 | 
 12 | from pprint import pprint
 13 | 
 14 | # from faster_rcnn.datasets.factory import get_imdb
 15 | import lib.datasets as datasets
 16 | from lib.visualize_graph.vis_utils import ground_predictions
 17 | from lib.visualize_graph.visualize import viz_scene_graph, draw_scene_graph
 18 | 
 19 | 
 20 | import argparse
 21 | import pdb
 22 | 
 23 | from PIL import Image 
 24 | 
 25 | from eval.evaluator import DenseCaptioningEvaluator
 26 | 
 27 | 
 28 | parser = argparse.ArgumentParser('Options for Meteor evaluation')
 29 | 
 30 | parser.add_argument('--path_data_opts', default='options/data_VRD.yaml', type=str,
 31 |                     help='path to a data file')
 32 | parser.add_argument('--path_result', default='output/testing_result.pkl', type=str,
 33 |                     help='path to the evaluation result file')
 34 | parser.add_argument('--output_dir', default='output/graph_results/VRD', type=str,
 35 |                     help='path to the evaluation result file')
 36 | parser.add_argument('--dataset_option', default='small', type=str,
 37 |                     help='path to the evaluation result file')
 38 | parser.add_argument('--dataset', default='VRD', type=str,
 39 |                     help='path to the evaluation result file')
 40 | 
 41 | args = parser.parse_args()
 42 | 
 43 | if args.dataset is not 'visual_genome':
 44 |     args.dataset_option = None
 45 | 
 46 | # def prepare_rel_matrix(relationships, object_num):
 47 | #     rel_mat = np.zeros()
 48 | #     for rel in len(relationships):
 49 | #         rel_mat[rel[0], rel[1]] = rel_cls[i]
 50 | #     return rel_mat
 51 | 
 52 | 
 53 | def visualize():
 54 | 
 55 |     global args
 56 |     print('=========== Visualizing Scene Graph =========')
 57 | 
 58 | 
 59 |     print('Loading dataset...'),
 60 |     with open(args.path_data_opts, 'r') as handle:
 61 |         options = yaml.load(handle)
 62 |     test_set = getattr(datasets, args.dataset)(options, 'test',
 63 |                              dataset_option=args.dataset_option,
 64 |                              use_region=True)
 65 |     test_loader = torch.utils.data.DataLoader(test_set, batch_size=1,
 66 |                                                 shuffle=False, num_workers=4,
 67 |                                                 pin_memory=True, 
 68 |                                                 collate_fn=getattr(datasets, args.dataset).collate)
 69 |     print('Done Loading')
 70 | 
 71 |     with open(args.path_result, 'rb') as f:
 72 |         print('Loading result....'),
 73 |         result = pickle.load(f)
 74 |         print('Done')
 75 |         print('Total: {} images'.format(len(result)))
 76 | 
 77 |     for i, sample in enumerate(test_loader): # (im_data, im_info, gt_objects, gt_relationships)
 78 |         objects = result[i]['objects']
 79 |         relationships = result[i]['relationships']
 80 |         gt_boxes = sample['objects'][0][:, :4] / sample['image_info'][0][2]
 81 |         gt_relations = sample['relations'][0]
 82 |         gt_relations = zip(*np.where(gt_relations > 0))
 83 |         gt_to_pred = ground_predictions(objects['bbox'], gt_boxes, 0.5)
 84 |         assert sample['path'][0] == result[i]['path'], 'Image mismatch.'
 85 |         im = cv2.imread(osp.join(test_set._data_path, sample['path'][0]))
 86 |         image_name = sample['path'][0].split('/')[-1].split('.')[0]
 87 |         image_name = osp.join(args.output_dir, image_name)
 88 |         draw_graph_pred(im, objects['bbox'], objects['class'], relationships,
 89 |                              gt_to_pred, gt_relations, test_set._object_classes, 
 90 |                              test_set._predicate_classes, filename=image_name)
 91 | 
 92 |     print 'Done generating scene graphs.'
 93 | 
 94 | 
 95 | def draw_graph_pred(im, boxes, obj_ids, pred_relationships, gt_to_pred, 
 96 |             gt_relations, ind_to_class, ind_to_predicate, filename):
 97 |     """
 98 |     Draw a predicted scene graph. To keep the graph interpretable, only draw
 99 |     the node and edge predictions that have correspounding ground truth
100 |     labels.
101 |     args:
102 |         im: image
103 |         boxes: prediceted boxes
104 |         obj_ids: object id list
105 |         rel_pred_mat: relation classification matrix
106 |         gt_to_pred: a mapping from ground truth box indices to predicted box indices
107 |         idx: for saving
108 |         gt_relations: gt_relationships
109 |     """
110 |     rel_pred = []
111 |     all_rels = []
112 | 
113 |     for pred_rel in pred_relationships:
114 |         for rel in gt_relations:
115 |             if rel[0] not in gt_to_pred or rel[1] not in gt_to_pred:
116 |                 continue
117 | 
118 |             # discard duplicate grounding
119 |             if pred_rel[0] == gt_to_pred[rel[0]] and pred_rel[1] == gt_to_pred[rel[1]]:
120 |                 rel_pred.append(pred_rel)
121 |                 all_rels.append([pred_rel[0], pred_rel[1]])
122 |                 break
123 |     # rel_pred = pred_relationships[:5]  # uncomment to visualize top-5 relationships
124 |     rel_pred = np.array(rel_pred)
125 |     if rel_pred.size < 4:
126 |         print('Image Skipped.')
127 |         return
128 |     # indices of predicted boxes
129 |     pred_inds = rel_pred[:, :2].ravel()
130 | 
131 |     # draw graph predictions
132 |     graph_dict = draw_scene_graph(obj_ids, pred_inds, rel_pred, ind_to_class, ind_to_predicate, filename=filename)
133 |     viz_scene_graph(im, boxes, obj_ids, ind_to_class, ind_to_predicate, pred_inds, rel_pred, filename=filename)
134 |     """
135 |     out_boxes = []
136 |     for box, cls in zip(boxes[pred_inds], cls_pred[pred_inds]):
137 |         out_boxes.append(box[cls*4:(cls+1)*4].tolist())
138 | 
139 |     graph_dict['boxes'] = out_boxes
140 | 
141 |     if do_save == 'y':
142 |         scipy.misc.imsave('cherry/im_%i.png' % idx, im)
143 |         fn = open('cherry/graph_%i.json' % idx, 'w+')
144 |         json.dump(graph_dict, fn)
145 |     print(idx)
146 |     """
147 | 
148 | 
149 | if __name__ == '__main__':
150 |     visualize()
151 | 
152 | 
153 | 


--------------------------------------------------------------------------------