├── lib
    ├── core
    │   ├── __init__.py
    │   ├── test_rel.py
    │   └── test_engine_rel.py
    ├── datasets_rel
    │   ├── __init__.py
    │   ├── pytorch_misc.py
    │   ├── dataset_catalog_rel.py
    │   ├── ap_eval_rel.py
    │   ├── task_evaluation_vg_and_vrd.py
    │   ├── roidb_rel.py
    │   └── task_evaluation_sg.py
    ├── modeling_rel
    │   ├── __init__.py
    │   ├── sparse_targets_rel.py
    │   ├── generate_rel_proposal_labels.py
    │   ├── VGG16.py
    │   ├── rel_pyramid_module.py
    │   ├── get_dataset_counts_rel.py
    │   ├── relpn_heads.py
    │   └── fast_rcnn_heads.py
    ├── roi_data_rel
    │   ├── __init__.py
    │   ├── minibatch_rel.py
    │   ├── loader_rel.py
    │   └── fast_rcnn_rel.py
    ├── utils_rel
    │   ├── __init__.py
    │   ├── cython_bbox_rel.pyx
    │   ├── net_rel.py
    │   ├── logging_rel.py
    │   ├── boxes_rel.py
    │   ├── subprocess_rel.py
    │   └── training_stats_rel.py
    ├── make.sh
    └── setup.py
├── Examples.PNG
├── Loss_illustration.PNG
├── .gitmodules
├── docker
    └── Dockerfile
├── tools
    ├── _init_paths.py
    ├── rename_vrd_with_numbers.py
    ├── convert_vrd_anno_to_coco_format.py
    └── test_net_rel.py
├── configs
    ├── vg
    │   ├── e2e_faster_rcnn_VGG16_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_no_spt.yaml
    │   └── e2e_faster_rcnn_X-101-64x4d-FPN_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml
    ├── vrd
    │   ├── e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_IN_pretrained.yaml
    │   └── e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_COCO_pretrained.yaml
    ├── oi_rel_mini
    │   └── e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_mini_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml
    └── oi_rel
    │   └── e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml
├── LICENSE
└── README.md


/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/roi_data_rel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/utils_rel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Examples.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ContrastiveLosses4VRD/master/Examples.PNG


--------------------------------------------------------------------------------
/Loss_illustration.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/ContrastiveLosses4VRD/master/Loss_illustration.PNG


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Detectron_pytorch"]
2 | 	path = Detectron_pytorch
3 | 	url = https://github.com/roytseng-tw/Detectron.pytorch
4 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | 
4 | CUDA_PATH=/usr/local/cuda/
5 | 
6 | python3 setup.py build_ext --inplace
7 | rm -rf build
8 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:0.4-cuda9-cudnn7-devel
2 | RUN apt-get update --fix-missing
3 | RUN apt-get install -y software-properties-common
4 | RUN apt-get install -y libsm6 libxext6 libxrender1 libfontconfig1
5 | RUN pip install --upgrade pip
6 | RUN pip install Cython matplotlib numpy scipy pyyaml packaging tensorboardX scikit-image pillow tqdm gensim
7 | RUN pip install pycocotools
8 | RUN conda install opencv


--------------------------------------------------------------------------------
/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # Based on Detectron.pytorch/tools/_init_paths.py by Roy Tseng
 2 | # modified for this project by Ji Zhang
 3 | 
 4 | """Add {PROJECT_ROOT}/lib. to PYTHONPATH
 5 | 
 6 | Usage:
 7 | import this module before import any modules under lib/
 8 | e.g 
 9 |     import _init_paths
10 |     from core.config import cfg
11 | """ 
12 | 
13 | import os.path as osp
14 | import sys
15 | 
16 | 
17 | def add_path(path):
18 |     if path not in sys.path:
19 |         sys.path.insert(0, path)
20 | 
21 | this_dir = osp.abspath(osp.dirname(osp.dirname(__file__)))
22 | 
23 | # add Detectron.PyTorch/lib
24 | detectron_path = osp.join(this_dir, 'Detectron_pytorch', 'lib')
25 | add_path(detectron_path)
26 | 
27 | # Add lib to PYTHONPATH
28 | lib_path = osp.join(this_dir, 'lib')
29 | add_path(lib_path)
30 | 
31 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # Based on:
 2 | # Detectron.pytorch/lib/setup.py
 3 | # and modified for this project
 4 | # Original source license text:
 5 | # --------------------------------------------------------
 6 | # Fast R-CNN
 7 | # Copyright (c) 2015 Microsoft
 8 | # Licensed under The MIT License [see LICENSE for details]
 9 | # Written by Ross Girshick
10 | # --------------------------------------------------------
11 | 
12 | from __future__ import print_function
13 | 
14 | from Cython.Build import cythonize
15 | from Cython.Distutils import build_ext
16 | from setuptools import Extension
17 | from setuptools import setup
18 | 
19 | import numpy as np
20 | 
21 | 
22 | # Obtain the numpy include directory.  This logic works across numpy versions.
23 | try:
24 |     numpy_include = np.get_include()
25 | except AttributeError:
26 |     numpy_include = np.get_numpy_include()
27 | 
28 | 
29 | ext_modules = [
30 |     Extension(
31 |         name='utils_rel.cython_bbox_rel',
32 |         sources=['utils_rel/cython_bbox_rel.pyx'],
33 |         extra_compile_args=['-Wno-cpp'],
34 |         include_dirs=[numpy_include]
35 |     )
36 | ]
37 | 
38 | setup(
39 |     name='mask_rcnn_rel',
40 |     ext_modules=cythonize(ext_modules)
41 | )
42 | 
43 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/pytorch_misc.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file is from https://github.com/rowanz/neural-motifs/blob/master/lib/pytorch_misc.py
 3 | # Unused imports and functions are deleted
 4 | 
 5 | """
 6 | Miscellaneous functions that might be useful for pytorch
 7 | """
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def intersect_2d(x1, x2):
13 |     """
14 |     Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those
15 |     rows match.
16 |     :param x1: [m1, n] numpy array
17 |     :param x2: [m2, n] numpy array
18 |     :return: [m1, m2] bool array of the intersections
19 |     """
20 |     if x1.shape[1] != x2.shape[1]:
21 |         raise ValueError("Input arrays must have same #columns")
22 | 
23 |     # This performs a matrix multiplication-esque thing between the two arrays
24 |     # Instead of summing, we want the equality, so we reduce in that way
25 |     res = (x1[..., None] == x2.T[None, ...]).all(1)
26 |     return res
27 | 
28 | def argsort_desc(scores):
29 |     """
30 |     Returns the indices that sort scores descending in a smart way
31 |     :param scores: Numpy array of arbitrary size
32 |     :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd
33 |              need to get the score.
34 |     """
35 |     return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape))


--------------------------------------------------------------------------------
/configs/vg/e2e_faster_rcnn_VGG16_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_no_spt.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: False
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: VGG16.VGG16_conv_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   BASE_LR: 0.01
27 |   GAMMA: 0.1
28 |   MAX_ITER: 62723  # 62723 images
29 |   STEPS: [0, 41815, 55754]
30 | VGG16:
31 |   VG_PRETRAINED_WEIGHTS: 'detection_models/vg/VGG16/model_step479999.pth'
32 |   VG_PRD_PRETRAINED_WEIGHTS: 'detection_models/vg/VGG16/model_step479999.pth'
33 | FAST_RCNN:
34 |   ROI_BOX_HEAD: VGG16.VGG16_roi_conv5_head
35 |   ROI_XFORM_METHOD: RoIAlign
36 | RPN:
37 |   SIZES: (32, 64, 128, 256, 512)
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 | TEST:
44 |   FORCE_JSON_DATASET_EVAL: True
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 6000
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_IN_pretrained.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: True
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: VGG16.VGG16_conv_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   BASE_LR: 0.01
27 |   GAMMA: 0.1
28 |   MAX_ITER: 7560  # 7560 roidbs
29 |   STEPS: [0, 5040, 6720]
30 | VGG16:
31 |   VRD_PRETRAINED_WEIGHTS: 'detection_models/vrd/VGG16/IN_pretrained/model_step8999.pth'
32 |   VRD_PRD_PRETRAINED_WEIGHTS: 'detection_models/vrd/VGG16/IN_pretrained/model_step8999.pth'
33 | FAST_RCNN:
34 |   ROI_BOX_HEAD: VGG16.VGG16_roi_conv5_head
35 |   ROI_XFORM_METHOD: RoIAlign
36 | RPN:
37 |   SIZES: (32, 64, 128, 256, 512)
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 | TEST:
44 |   FORCE_JSON_DATASET_EVAL: True
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 6000
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_COCO_pretrained.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: True
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: VGG16.VGG16_conv_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   BASE_LR: 0.01
27 |   GAMMA: 0.1
28 |   MAX_ITER: 7560  # 7560 roidbs
29 |   STEPS: [0, 5040, 6720]
30 | VGG16:
31 |   VRD_PRETRAINED_WEIGHTS: 'detection_models/vrd/VGG16/COCO_pretrained/model_step4499.pth'
32 |   VRD_PRD_PRETRAINED_WEIGHTS: 'detection_models/vrd/VGG16/COCO_pretrained/model_step4499.pth'
33 | FAST_RCNN:
34 |   ROI_BOX_HEAD: VGG16.VGG16_roi_conv5_head
35 |   ROI_XFORM_METHOD: RoIAlign
36 | RPN:
37 |   SIZES: (32, 64, 128, 256, 512)
38 | TRAIN:
39 |   SCALES: (800,)
40 |   MAX_SIZE: 1333
41 |   IMS_PER_BATCH: 1
42 |   BATCH_SIZE_PER_IM: 512
43 | TEST:
44 |   FORCE_JSON_DATASET_EVAL: True
45 |   SCALE: 800
46 |   MAX_SIZE: 1333
47 |   NMS: 0.5
48 |   RPN_PRE_NMS_TOP_N: 6000
49 |   RPN_POST_NMS_TOP_N: 1000
50 | 


--------------------------------------------------------------------------------
/configs/vg/e2e_faster_rcnn_X-101-64x4d-FPN_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: True
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
27 |   BASE_LR: 0.01
28 |   GAMMA: 0.1
29 |   MAX_ITER: 62723  # 62723 images
30 |   STEPS: [0, 41815, 55754]
31 | FPN:
32 |   FPN_ON: True
33 |   MULTILEVEL_ROIS: True
34 |   MULTILEVEL_RPN: True
35 | RESNETS:
36 |   VG_PRETRAINED_WEIGHTS: 'detection_models/vg/X-101-64x4d-FPN/model_step119999.pth'
37 |   VG_PRD_PRETRAINED_WEIGHTS: 'detection_models/vg/X-101-64x4d-FPN/model_step119999.pth'
38 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
39 |   TRANS_FUNC: bottleneck_transformation
40 |   NUM_GROUPS: 64
41 |   WIDTH_PER_GROUP: 4
42 | FAST_RCNN:
43 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
44 |   ROI_XFORM_METHOD: RoIAlign
45 |   ROI_XFORM_RESOLUTION: 7
46 |   ROI_XFORM_SAMPLING_RATIO: 2
47 | TRAIN:
48 |   SCALES: (800,)
49 |   MAX_SIZE: 1333
50 |   IMS_PER_BATCH: 1
51 |   BATCH_SIZE_PER_IM: 512
52 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
53 | TEST:
54 |   FORCE_JSON_DATASET_EVAL: True
55 |   SCALE: 800
56 |   MAX_SIZE: 1333
57 |   NMS: 0.5
58 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
59 |   RPN_POST_NMS_TOP_N: 1000
60 | 


--------------------------------------------------------------------------------
/configs/oi_rel_mini/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_mini_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: True
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
27 |   BASE_LR: 0.01
28 |   GAMMA: 0.1
29 |   MAX_ITER: 6750  # 4500 images
30 |   STEPS: [0, 4500, 6000]
31 | FPN:
32 |   FPN_ON: True
33 |   MULTILEVEL_ROIS: True
34 |   MULTILEVEL_RPN: True
35 | RESNETS:
36 |   OI_REL_PRETRAINED_WEIGHTS: 'detection_models/oi_rel/X-101-64x4d-FPN/model_step599999.pth'
37 |   OI_REL_PRD_PRETRAINED_WEIGHTS: 'detection_models/oi_rel/X-101-64x4d-FPN/model_step599999.pth'
38 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
39 |   TRANS_FUNC: bottleneck_transformation
40 |   NUM_GROUPS: 64
41 |   WIDTH_PER_GROUP: 4
42 | FAST_RCNN:
43 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
44 |   ROI_XFORM_METHOD: RoIAlign
45 |   ROI_XFORM_RESOLUTION: 7
46 |   ROI_XFORM_SAMPLING_RATIO: 2
47 | TRAIN:
48 |   SCALES: (800,)
49 |   MAX_SIZE: 1333
50 |   IMS_PER_BATCH: 1
51 |   BATCH_SIZE_PER_IM: 512
52 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
53 | TEST:
54 |   FORCE_JSON_DATASET_EVAL: True
55 |   SCALE: 800
56 |   MAX_SIZE: 1333
57 |   NMS: 0.5
58 |   # DETECTIONS_PER_IM: 100
59 |   # SCORE_THRESH: 0.05  # sometimes the number of sbj_rois is 0 if SCORE_THRESH is 0.05(default)
60 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
61 |   RPN_POST_NMS_TOP_N: 1000
62 |   PRD_Ks: (1, 10)
63 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/sparse_targets_rel.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Some functions are adapted from Rowan Zellers:
 3 | https://github.com/rowanz/neural-motifs
 4 | """
 5 | import os
 6 | import torch.nn as nn
 7 | import torch
 8 | from torch.autograd import Variable
 9 | import numpy as np
10 | import logging
11 | from six.moves import cPickle as pickle
12 | 
13 | from core.config import cfg
14 | from modeling_rel.get_dataset_counts_rel import get_rel_counts
15 | 
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | # This module is adapted from Rowan Zellers:
21 | # https://github.com/rowanz/neural-motifs/blob/master/lib/sparse_targets.py
22 | # Modified for this project
23 | class FrequencyBias(nn.Module):
24 |     """
25 |     The goal of this is to provide a simplified way of computing
26 |     P(predicate | obj1, obj2, img).
27 |     """
28 | 
29 |     def __init__(self, ds_name, eps=1e-3):
30 |         super(FrequencyBias, self).__init__()
31 | 
32 |         if ds_name.find('vg') >= 0:
33 |             ds_name = 'vg'
34 |         elif ds_name.find('oi') >= 0:
35 |             ds_name = 'oi'
36 |         elif ds_name.find('vrd') >= 0:
37 |             ds_name = 'vrd'
38 |         else:
39 |             raise NotImplementedError
40 | 
41 |         if cfg.MODEL.USE_OVLP_FILTER:
42 |             must_overlap = True
43 |         else:
44 |             must_overlap = False
45 |         fg_matrix, bg_matrix = get_rel_counts(ds_name, must_overlap=must_overlap)
46 |         bg_matrix += 1
47 |         fg_matrix[:, :, 0] = bg_matrix
48 | 
49 |         pred_dist = np.log(fg_matrix / (fg_matrix.sum(2)[:, :, None] + 1e-08) + eps)
50 | 
51 |         self.num_objs = pred_dist.shape[0]
52 |         pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])
53 | 
54 |         self.rel_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
55 |         self.rel_baseline.weight.data = pred_dist
56 |         
57 |         logger.info('Frequency bias tables loaded.')
58 | 
59 |     def rel_index_with_labels(self, labels):
60 |         """
61 |         :param labels: [batch_size, 2] 
62 |         :return: 
63 |         """
64 |         return self.rel_baseline(labels[:, 0] * self.num_objs + labels[:, 1])
65 | 


--------------------------------------------------------------------------------
/configs/oi_rel/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   TYPE: generalized_rcnn
 3 |   SUBTYPE: 3
 4 |   USE_OVLP_FILTER: True
 5 |   USE_FREQ_BIAS: True
 6 |   NO_FC7_RELU: True
 7 |   USE_SPATIAL_FEAT: True
 8 |   ADD_SO_SCORES: True
 9 |   ADD_SCORES_ALL: True
10 |   USE_BG: True
11 |   CONV_BODY: FPN.fpn_ResNet101_conv5_body
12 |   USE_NODE_CONTRASTIVE_LOSS: True
13 |   NODE_CONTRASTIVE_MARGIN: 0.2
14 |   USE_NODE_CONTRASTIVE_SO_AWARE_LOSS: True
15 |   NODE_CONTRASTIVE_SO_AWARE_MARGIN: 0.2
16 |   NODE_CONTRASTIVE_SO_AWARE_WEIGHT: 0.5
17 |   USE_NODE_CONTRASTIVE_P_AWARE_LOSS: True
18 |   NODE_CONTRASTIVE_P_AWARE_MARGIN: 0.2
19 |   NODE_CONTRASTIVE_P_AWARE_WEIGHT: 0.1
20 |   NODE_SAMPLE_SIZE: 128
21 |   FASTER_RCNN: True
22 | NUM_GPUS: 8
23 | SOLVER:
24 |   WEIGHT_DECAY: 0.0001
25 |   LR_POLICY: steps_with_decay
26 |   # 1x schedule (note TRAIN.IMS_PER_BATCH: 1)
27 |   BASE_LR: 0.01
28 |   GAMMA: 0.1
29 |   MAX_ITER: 80930  # 53953 images * 12 / 8 = 80929.5
30 |   STEPS: [0, 53954, 71937]
31 | FPN:
32 |   FPN_ON: True
33 |   MULTILEVEL_ROIS: True
34 |   MULTILEVEL_RPN: True
35 | RESNETS:
36 |   OI_REL_PRETRAINED_WEIGHTS: 'detection_models/oi_rel/X-101-64x4d-FPN/model_step599999.pth'
37 |   OI_REL_PRD_PRETRAINED_WEIGHTS: 'detection_models/oi_rel/X-101-64x4d-FPN/model_step599999.pth'
38 |   STRIDE_1X1: False  # default True for MSRA; False for C2 or Torch models
39 |   TRANS_FUNC: bottleneck_transformation
40 |   NUM_GROUPS: 64
41 |   WIDTH_PER_GROUP: 4
42 | FAST_RCNN:
43 |   ROI_BOX_HEAD: fast_rcnn_heads.roi_2mlp_head
44 |   ROI_XFORM_METHOD: RoIAlign
45 |   ROI_XFORM_RESOLUTION: 7
46 |   ROI_XFORM_SAMPLING_RATIO: 2
47 | TRAIN:
48 |   SCALES: (800,)
49 |   MAX_SIZE: 1333
50 |   IMS_PER_BATCH: 1
51 |   BATCH_SIZE_PER_IM: 512
52 |   RPN_PRE_NMS_TOP_N: 2000  # Per FPN level
53 | TEST:
54 |   FORCE_JSON_DATASET_EVAL: True
55 |   SCALE: 800
56 |   MAX_SIZE: 1333
57 |   NMS: 0.5
58 |   # DETECTIONS_PER_IM: 100
59 |   # SCORE_THRESH: 0.05  # sometimes the number of sbj_rois is 0 if SCORE_THRESH is 0.05(default)
60 |   RPN_PRE_NMS_TOP_N: 1000  # Per FPN level
61 |   RPN_POST_NMS_TOP_N: 1000
62 |   PRD_Ks: (1, 10)
63 | 


--------------------------------------------------------------------------------
/tools/rename_vrd_with_numbers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | # In[23]:
 5 | 
 6 | 
 7 | import json
 8 | import numpy as np
 9 | import os
10 | from PIL import Image
11 | from tqdm import tqdm
12 | import copy
13 | from shutil import copyfile
14 | 
15 | 
16 | # take the images from the sg_dataset folder and rename them
17 | # Also converts the gif and png images into jpg
18 | 
19 | def process_vrd_split(in_split, out_split):
20 |     vrd_dir = 'data/vrd/sg_dataset/sg_' + in_split + '_images/'
21 |     new_dir = 'data/vrd/'+ out_split + '_images/'
22 |     os.mkdir(new_dir)
23 |     
24 |     cnt = 1
25 |     name_map = {}
26 |     for f in tqdm(sorted(os.listdir(vrd_dir))):
27 |     # for f in os.listdir(vrd_dir):
28 |         ext = f.split('.')[1]
29 |         if ext.find('png') >= 0 or ext.find('gif') >= 0:
30 |             img = Image.open(vrd_dir + f).convert('RGB')
31 |         else:        
32 |             copyfile(vrd_dir + f, new_dir + '{:012d}'.format(cnt) + '.jpg')
33 | 
34 |             
35 |         if ext.find('gif') >= 0:
36 |             img.save(new_dir + '{:012d}'.format(cnt) + '.jpg')
37 |         elif ext.find('png') >= 0:
38 |             img.save(new_dir + '{:012d}'.format(cnt) + '.jpg')
39 |         name_map[f] = cnt
40 |         cnt += 1
41 | 
42 |     print(len(name_map))
43 | 
44 | 
45 |     # store the filename mappings here
46 |     name_map_fname = 'data/vrd/%s_fname_mapping.json' %(out_split)
47 |     with open(name_map_fname, 'w') as f:
48 |         json.dump(name_map, f, sort_keys=True, indent=4)
49 |         f.close()
50 | 
51 |     # load the original annotations
52 |     with open('data/vrd/annotations_' + in_split + '.json', 'r') as f:
53 |         vrd_anns = json.load(f)
54 |         f.close()
55 |     new_anns = {}
56 |     for k, v in tqdm(vrd_anns.items()):
57 |         # apparently this gif file has been renamed in the original annotations
58 |         if k == '4392556686_44d71ff5a0_o.jpg':
59 |             k = '4392556686_44d71ff5a0_o.gif'
60 |         new_k = '{:012d}'.format(name_map[k]) + '.jpg'
61 |         
62 |         new_anns[new_k] = v
63 | 
64 | 
65 |     # create the new annotations 
66 |     with open('data/vrd/new_annotations_' + out_split + '.json', 'w') as outfile:
67 |         json.dump(new_anns, outfile)
68 | 
69 | 
70 | if __name__ == '__main__':
71 | 
72 |     # using the test split as our val. We won't have a true test split for VRD
73 |     process_vrd_split('test', 'val')
74 |     
75 |     process_vrd_split('train', 'train')
76 | 


--------------------------------------------------------------------------------
/tools/convert_vrd_anno_to_coco_format.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | 
 5 | 
 6 | 
 7 | import json
 8 | import numpy as np
 9 | from PIL import Image
10 | from tqdm import tqdm
11 | 
12 | 
13 | 
14 | # [ymin, ymax, xmin, xmax] to [x, y, w, h]
15 | def box_transform(box):
16 |     x = box[2]
17 |     y = box[0]
18 |     w = box[3] - box[2] + 1
19 |     h = box[1] - box[0] + 1
20 |     return [x, y, w, h]
21 | 
22 | 
23 | 
24 | def convert_anno(split):
25 | 
26 |     with open('data/vrd/new_annotations_' + split + '.json', 'r') as f:
27 |         vrd_anns = json.load(f)
28 | 
29 | 
30 |     print(len(vrd_anns))
31 | 
32 |     img_dir = 'data/vrd/' + split + '_images/'
33 |     new_imgs = []
34 |     new_anns = []
35 |     ann_id = 1
36 |     for f, anns in tqdm(vrd_anns.items()):
37 |         im_w, im_h = Image.open(img_dir + f).size
38 |         image_id = int(f.split('.')[0])
39 |         new_imgs.append(dict(file_name=f, height=im_h, width=im_w, id=image_id))
40 |         # used for duplicate checking
41 |         bbox_set = set()
42 |         for ann in anns:
43 |             # "area" in COCO is the area of segmentation mask, while here it's the area of bbox
44 |             # also need to fake a 'iscrowd' which is always 0
45 |             s_box = ann['subject']['bbox']
46 |             bbox = box_transform(s_box)
47 |             if not tuple(bbox) in bbox_set:
48 |                 bbox_set.add(tuple(bbox))
49 |                 area = bbox[2] * bbox[3]
50 |                 cat = ann['subject']['category']
51 |                 new_anns.append(dict(area=area, bbox=bbox, category_id=cat, id=ann_id, image_id=image_id, iscrowd=0))
52 |                 ann_id += 1
53 | 
54 |             o_box = ann['object']['bbox']
55 |             bbox = box_transform(o_box)
56 |             if not tuple(bbox) in bbox_set:
57 |                 bbox_set.add(tuple(bbox))
58 |                 area = bbox[2] * bbox[3]
59 |                 cat = ann['object']['category']
60 |                 new_anns.append(dict(area=area, bbox=bbox, category_id=cat, id=ann_id, image_id=image_id, iscrowd=0))
61 |                 ann_id += 1
62 | 
63 |     with open('data/vrd/objects.json', 'r') as f:
64 |         vrd_objs = json.load(f)
65 | 
66 | 
67 |     new_objs = []
68 |     for i, obj in enumerate(vrd_objs):
69 |         new_objs.append(dict(id=i, name=obj, supercategory=obj))
70 | 
71 | 
72 |     new_data = dict(images=new_imgs, annotations=new_anns, categories=new_objs)
73 | 
74 |     with open('data/vrd/detections_' + split + '.json', 'w') as outfile:
75 |         json.dump(new_data, outfile)
76 | 
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     convert_anno('train')
81 |     convert_anno('val')
82 |     
83 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/generate_rel_proposal_labels.py:
--------------------------------------------------------------------------------
 1 | # Adapted from Detectron.pytorch/lib/modeling/generate_proposal_labels.py
 2 | # for this project by Ji Zhang, 2019
 3 | 
 4 | from torch import nn
 5 | 
 6 | from core.config import cfg
 7 | from datasets_rel import json_dataset_rel
 8 | from roi_data_rel.fast_rcnn_rel import add_rel_blobs
 9 | 
10 | 
11 | class GenerateRelProposalLabelsOp(nn.Module):
12 |     def __init__(self):
13 |         super().__init__()
14 | 
15 |     def forward(self, sbj_rois, obj_rois, det_rois, roidb, im_info):
16 |         
17 |         im_scales = im_info.data.numpy()[:, 2]
18 |         # For historical consistency with the original Faster R-CNN
19 |         # implementation we are *not* filtering crowd proposals.
20 |         # This choice should be investigated in the future (it likely does
21 |         # not matter).
22 |         # Note: crowd_thresh=0 will ignore _filter_crowd_proposals
23 |         json_dataset_rel.add_rel_proposals(roidb, sbj_rois, obj_rois, det_rois, im_scales)
24 |         output_blob_names = ['sbj_rois', 'obj_rois', 'rel_rois', 'fg_prd_labels_int32', 'all_prd_labels_int32', 'fg_size']
25 |         if cfg.MODEL.USE_SPATIAL_FEAT:
26 |             output_blob_names += ['spt_feat']
27 |         if cfg.MODEL.USE_FREQ_BIAS:
28 |             output_blob_names += ['all_sbj_labels_int32']
29 |             output_blob_names += ['all_obj_labels_int32']
30 |         if cfg.MODEL.USE_NODE_CONTRASTIVE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_SO_AWARE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_P_AWARE_LOSS:
31 |             output_blob_names += ['binary_labels_sbj_pos_int32',
32 |                                   'sbj_rois_sbj_pos', 'obj_rois_sbj_pos', 'rel_rois_sbj_pos',
33 |                                   'spt_feat_sbj_pos',
34 |                                   'sbj_labels_sbj_pos_int32', 'obj_labels_sbj_pos_int32', 'prd_labels_sbj_pos_int32',
35 |                                   'sbj_labels_sbj_pos_fg_int32', 'obj_labels_sbj_pos_fg_int32',
36 |                                   'inds_unique_sbj_pos',
37 |                                   'inds_reverse_sbj_pos',
38 |                                   'binary_labels_obj_pos_int32',
39 |                                   'sbj_rois_obj_pos', 'obj_rois_obj_pos', 'rel_rois_obj_pos',
40 |                                   'spt_feat_obj_pos',
41 |                                   'sbj_labels_obj_pos_int32', 'obj_labels_obj_pos_int32', 'prd_labels_obj_pos_int32',
42 |                                   'sbj_labels_obj_pos_fg_int32', 'obj_labels_obj_pos_fg_int32',
43 |                                   'inds_unique_obj_pos',
44 |                                   'inds_reverse_obj_pos']
45 |         blobs = {k: [] for k in output_blob_names}
46 |         
47 |         add_rel_blobs(blobs, im_scales, roidb)
48 | 
49 |         return blobs
50 | 


--------------------------------------------------------------------------------
/lib/utils_rel/cython_bbox_rel.pyx:
--------------------------------------------------------------------------------
 1 | # Adapted by Ji Zhang for this project in 2019
 2 | #
 3 | # Original license text below:
 4 | #############################################################################
 5 | # Copyright (c) 2017-present, Facebook, Inc.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | ##############################################################################
19 | #
20 | # Based on:
21 | # --------------------------------------------------------
22 | # Fast R-CNN
23 | # Copyright (c) 2015 Microsoft
24 | # Licensed under The MIT License [see LICENSE for details]
25 | # Written by Sergey Karayev
26 | # --------------------------------------------------------
27 | 
28 | cimport cython
29 | import numpy as np
30 | cimport numpy as np
31 | 
32 | DTYPE = np.float32
33 | ctypedef np.float32_t DTYPE_t
34 | 
35 | 
36 | @cython.boundscheck(False)
37 | def bbox_pair_overlaps(
38 |         np.ndarray[DTYPE_t, ndim=2] boxes1,
39 |         np.ndarray[DTYPE_t, ndim=2] boxes2):
40 |     """
41 |     Parameters
42 |     ----------
43 |     boxes1: (N, 4) ndarray of float
44 |     boxes2: (N, 4) ndarray of float
45 |     Returns
46 |     -------
47 |     overlaps: (N,) ndarray of overlaps between each pair of boxes1 and boxes2
48 |     """
49 |     assert boxes1.shape[0] == boxes2.shape[0]
50 |     cdef unsigned int N = boxes1.shape[0]
51 |     cdef np.ndarray[DTYPE_t, ndim=1] overlaps = np.zeros(N, dtype=DTYPE)
52 |     cdef DTYPE_t iw, ih, box_area
53 |     cdef DTYPE_t ua
54 |     cdef unsigned int n
55 |     with nogil:
56 |         for n in range(N):
57 |             box_area = (
58 |                 (boxes2[n, 2] - boxes2[n, 0] + 1) *
59 |                 (boxes2[n, 3] - boxes2[n, 1] + 1)
60 |             )
61 |             iw = (
62 |                 min(boxes1[n, 2], boxes2[n, 2]) -
63 |                 max(boxes1[n, 0], boxes2[n, 0]) + 1
64 |             )
65 |             if iw > 0:
66 |                 ih = (
67 |                     min(boxes1[n, 3], boxes2[n, 3]) -
68 |                     max(boxes1[n, 1], boxes2[n, 1]) + 1
69 |                 )
70 |                 if ih > 0:
71 |                     ua = float(
72 |                         (boxes1[n, 2] - boxes1[n, 0] + 1) *
73 |                         (boxes1[n, 3] - boxes1[n, 1] + 1) +
74 |                         box_area - iw * ih
75 |                     )
76 |                     overlaps[n] = iw * ih / ua
77 |     return overlaps
78 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/VGG16.py:
--------------------------------------------------------------------------------
 1 | # Written by Ji Zhang in 2019
 2 | 
 3 | import os
 4 | import numpy as np
 5 | import logging
 6 | from collections import OrderedDict
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | from core.config import cfg
13 | import nn as mynn
14 | import torchvision.models as models
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | # ---------------------------------------------------------------------------- #
20 | # VGG16 architecture
21 | # ---------------------------------------------------------------------------- #
22 | 
23 | vgg = models.vgg16()
24 | if cfg.VGG16.IMAGENET_PRETRAINED_WEIGHTS != '':
25 |     logger.info("Loading imagenet pretrained weights from %s", cfg.VGG16.IMAGENET_PRETRAINED_WEIGHTS)
26 |     state_dict = torch.load(cfg.VGG16.IMAGENET_PRETRAINED_WEIGHTS)
27 |     vgg.load_state_dict({k:v for k, v in state_dict.items() if k in vgg.state_dict()})
28 | 
29 | class VGG16_conv_body(nn.Module):
30 |     def __init__(self):
31 |         super().__init__()
32 |         self.num_layers = 16
33 |         self.spatial_scale = 1. / 16.  # final feature scale wrt. original image scale
34 |         self.dim_out = 512
35 | 
36 |         self._init_modules()
37 | 
38 |     def _init_modules(self):
39 |         
40 |         # not using the last maxpool layer
41 |         self.convs = nn.Sequential(*list(vgg.features._modules.values())[:-1])
42 |         
43 |         for layer in range(10):
44 |             for p in self.convs[layer].parameters(): p.requires_grad = False
45 | 
46 |     def forward(self, x):
47 |         
48 |         return self.convs(x)
49 | 
50 | 
51 | class VGG16_roi_conv5_head(nn.Module):
52 |     def __init__(self, dim_in, roi_xform_func, spatial_scale):
53 |         super().__init__()
54 |         self.roi_xform = roi_xform_func
55 |         self.spatial_scale = spatial_scale
56 | 
57 |         self.dim_out = 4096
58 |         self.dim_roi_out = dim_in  # 512
59 | 
60 |         self._init_modules()
61 | 
62 |     def _init_modules(self):
63 | 
64 |         self.heads = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
65 | 
66 |     def forward(self, x, rpn_ret, rois_name='rois', use_relu=True):
67 |         x = self.roi_xform(
68 |             x, rpn_ret,
69 |             blob_rois=rois_name,
70 |             method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
71 |             resolution=7,
72 |             spatial_scale=self.spatial_scale,
73 |             sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
74 |         )
75 | 
76 |         feat = x.view(x.size(0), -1)
77 |         
78 |         if use_relu:
79 |             for layer in list(self.heads.children()):
80 |                 feat = layer(feat)
81 |         else:
82 |             # not use the last Drop-out and ReLU in fc7 (keep it the same with Rawan's paper)
83 |             for layer in list(self.heads.children())[:-2]:
84 |                 feat = layer(feat)
85 |         
86 |         return feat
87 | 


--------------------------------------------------------------------------------
/lib/utils_rel/net_rel.py:
--------------------------------------------------------------------------------
 1 | # Adapted by Ji Zhang in 2019
 2 | #
 3 | # Based on Detectron.pytorch/lib/utils/net.py written by Roy Tseng
 4 | 
 5 | import logging
 6 | import os
 7 | import numpy as np
 8 | 
 9 | import torch
10 | import torch.nn.functional as F
11 | from torch.autograd import Variable
12 | 
13 | from core.config import cfg
14 | from utils.net import _get_lr_change_ratio
15 | from utils.net import _CorrectMomentum
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | def update_learning_rate_att(optimizer, cur_lr, new_lr):
21 |     """Update learning rate"""
22 |     if cur_lr != new_lr:
23 |         ratio = _get_lr_change_ratio(cur_lr, new_lr)
24 |         if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
25 |             logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr)
26 |         # Update learning rate, note that different parameter may have different learning rate
27 |         param_keys = []
28 |         for ind, param_group in enumerate(optimizer.param_groups):
29 |             if (ind == 1 or ind == 3) and cfg.SOLVER.BIAS_DOUBLE_LR:  # bias params
30 |                 param_group['lr'] = new_lr * 2
31 |             else:
32 |                 param_group['lr'] = new_lr
33 |             if ind <= 1:  # backbone params
34 |                 param_group['lr'] = cfg.SOLVER.BACKBONE_LR_SCALAR * param_group['lr']  # 0.1 * param_group['lr']
35 |             param_keys += param_group['params']
36 |         if cfg.SOLVER.TYPE in ['SGD'] and cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
37 |                 ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
38 |             _CorrectMomentum(optimizer, param_keys, new_lr / cur_lr)
39 |             
40 | 
41 | def update_learning_rate_rel(optimizer, cur_lr, new_lr):
42 |     """Update learning rate"""
43 |     if cur_lr != new_lr:
44 |         ratio = _get_lr_change_ratio(cur_lr, new_lr)
45 |         if ratio > cfg.SOLVER.LOG_LR_CHANGE_THRESHOLD:
46 |             logger.info('Changing learning rate %.6f -> %.6f', cur_lr, new_lr)
47 |         # Update learning rate, note that different parameter may have different learning rate
48 |         param_keys = []
49 |         for ind, param_group in enumerate(optimizer.param_groups):
50 |             if (ind == 1 or ind == 3) and cfg.SOLVER.BIAS_DOUBLE_LR:  # bias params
51 |                 param_group['lr'] = new_lr * 2
52 |             else:
53 |                 param_group['lr'] = new_lr
54 |             if ind <= 1:  # backbone params
55 |                 param_group['lr'] = cfg.SOLVER.BACKBONE_LR_SCALAR * param_group['lr']  # 0.1 * param_group['lr']
56 |             param_keys += param_group['params']
57 |         if cfg.SOLVER.TYPE in ['SGD'] and cfg.SOLVER.SCALE_MOMENTUM and cur_lr > 1e-7 and \
58 |                 ratio > cfg.SOLVER.SCALE_MOMENTUM_THRESHOLD:
59 |             _CorrectMomentum(optimizer, param_keys, new_lr / cur_lr)
60 | 
61 | 
62 | def load_ckpt_rel(model, ckpt):
63 |     """Load checkpoint"""
64 |     
65 |     model.load_state_dict(ckpt, strict=False)
66 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Unless otherwise stated, all files are released under the following license:
 2 | 
 3 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
 4 | *
 5 | * Redistribution and use in source and binary forms, with or without
 6 | * modification, are permitted provided that the following conditions
 7 | * are met:
 8 | *  * Redistributions of source code must retain the above copyright
 9 | *    notice, this list of conditions and the following disclaimer.
10 | *  * Redistributions in binary form must reproduce the above copyright
11 | *    notice, this list of conditions and the following disclaimer in the
12 | *    documentation and/or other materials provided with the distribution.
13 | *  * Neither the name of NVIDIA CORPORATION nor the names of its
14 | *    contributors may be used to endorse or promote products derived
15 | *    from this software without specific prior written permission.
16 | *
17 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
18 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 | * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
21 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
25 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 
29 | 
30 | The license for files within Detectron_pytorch git submodule can be found in
31 | Detectron.pytorch/LICENSE. This license also applies to our source files derived from
32 | those in Detectron.pytorch and written by Roy Tseng. Additional information will
33 | be included in the header of all derived source files.
34 | 
35 | Additional code is derived from the Neural-Motifs repository by Rowan Zellers:
36 | https://github.com/rowanz/neural-motifs
37 | The license for this repository is reproduced below:
38 | 
39 | MIT License
40 | 
41 | Copyright (c) 2018 Rowan Zellers
42 | 
43 | Permission is hereby granted, free of charge, to any person obtaining a copy
44 | of this software and associated documentation files (the "Software"), to deal
45 | in the Software without restriction, including without limitation the rights
46 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
47 | copies of the Software, and to permit persons to whom the Software is
48 | furnished to do so, subject to the following conditions:
49 | 
50 | The above copyright notice and this permission notice shall be included in all
51 | copies or substantial portions of the Software.
52 | 
53 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
58 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
59 | SOFTWARE.
60 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/rel_pyramid_module.py:
--------------------------------------------------------------------------------
 1 | # Written by Ji Zhang in 2019
 2 | 
 3 | import collections
 4 | import numpy as np
 5 | import logging
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | from torch.nn import init
11 | 
12 | from core.config import cfg
13 | import utils.net as net_utils
14 | import modeling.ResNet as ResNet
15 | from modeling.generate_anchors import generate_anchors
16 | from modeling.generate_proposals import GenerateProposalsOp
17 | from modeling.collect_and_distribute_fpn_rpn_proposals import CollectAndDistributeFpnRpnProposalsOp
18 | import nn as mynn
19 | 
20 | logger = logging.getLogger(__name__)
21 | 
22 | 
23 | class rel_pyramid_module(nn.Module):
24 |     def __init__(self, num_backbone_stages):
25 |         super().__init__()
26 |         
27 |         fpn_dim = cfg.FPN.DIM
28 |         self.num_backbone_stages = num_backbone_stages
29 |         
30 |         self.prd_conv_lateral = nn.ModuleList()
31 |         for i in range(self.num_backbone_stages):
32 |             if cfg.FPN.USE_GN:
33 |                 self.prd_conv_lateral.append(nn.Sequential(
34 |                     nn.Conv2d(fpn_dim, fpn_dim, 1, 1, 0, bias=False),
35 |                     nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
36 |                                  eps=cfg.GROUP_NORM.EPSILON)))
37 |             else:
38 |                 self.prd_conv_lateral.append(nn.Conv2d(fpn_dim, fpn_dim, 1, 1, 0))
39 |         
40 |         self.posthoc_modules = nn.ModuleList()
41 |         for i in range(self.num_backbone_stages):
42 |             if cfg.FPN.USE_GN:
43 |                 self.posthoc_modules.append(nn.Sequential(
44 |                     nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias=False),
45 |                     nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim,
46 |                                  eps=cfg.GROUP_NORM.EPSILON)))
47 |             else:
48 |                 self.posthoc_modules.append(nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1))
49 |         
50 |         self._init_weights()
51 |         
52 |     def _init_weights(self):
53 |         for m in self.modules():
54 |             if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
55 |                 mynn.init.XavierFill(m.weight)
56 |                 if m.bias is not None:
57 |                     nn.init.constant_(m.bias, 0)
58 |             elif isinstance(m, nn.BatchNorm2d):
59 |                 nn.init.constant_(m.weight, 1)
60 |                 nn.init.constant_(m.bias, 0)
61 | 
62 |     def forward(self, blob_conv):
63 |         # blob_conv is in the order (P5, P4, P3, P2)
64 |         rel_lateral_inner_blob = None
65 |         rel_lateral_output_blobs = []
66 |         for i in range(self.num_backbone_stages):
67 |             if rel_lateral_inner_blob is not None:
68 |                 bu = F.max_pool2d(rel_lateral_inner_blob, 2, stride=2)
69 |                 rel_lateral_inner_blob = \
70 |                     self.prd_conv_lateral[i](blob_conv[-1 - i]) + bu
71 |             else:
72 |                 rel_lateral_inner_blob = \
73 |                     self.prd_conv_lateral[i](blob_conv[-1 - i])
74 |             rel_lateral_output_blobs.append(self.posthoc_modules[i](rel_lateral_inner_blob))
75 |         
76 |         # the output is in the order of (P2, P3, P4, P5), we need to recover it back to (P5, P4, P3, P2)
77 |         rel_lateral_output_blobs.reverse()
78 |         return rel_lateral_output_blobs
79 | 


--------------------------------------------------------------------------------
/lib/roi_data_rel/minibatch_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang in 2019
  2 | #
  3 | # Based on Detectron.pytorch/lib/roi_data/minibatch.py written by Roy Tseng
  4 | 
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | from core.config import cfg
  9 | import utils.blob as blob_utils
 10 | import roi_data.rpn
 11 | 
 12 | 
 13 | def get_minibatch_blob_names(is_training=True):
 14 |     """Return blob names in the order in which they are read by the data loader.
 15 |     """
 16 |     # data blob: holds a batch of N images, each with 3 channels
 17 |     blob_names = ['data']
 18 |     if cfg.RPN.RPN_ON:
 19 |         # RPN-only or end-to-end Faster R-CNN
 20 |         blob_names += roi_data.rpn.get_rpn_blob_names(is_training=is_training)
 21 |     elif cfg.RETINANET.RETINANET_ON:
 22 |         raise NotImplementedError
 23 |     else:
 24 |         # Fast R-CNN like models trained on precomputed proposals
 25 |         blob_names += roi_data.fast_rcnn.get_fast_rcnn_blob_names(
 26 |             is_training=is_training
 27 |         )
 28 |     return blob_names
 29 | 
 30 | 
 31 | def get_minibatch(roidb):
 32 |     """Given a roidb, construct a minibatch sampled from it."""
 33 |     # We collect blobs from each image onto a list and then concat them into a
 34 |     # single tensor, hence we initialize each blob to an empty list
 35 |     blobs = {k: [] for k in get_minibatch_blob_names()}
 36 | 
 37 |     # Get the input image blob
 38 |     im_blob, im_scales = _get_image_blob(roidb)
 39 |     blobs['data'] = im_blob
 40 |     if cfg.RPN.RPN_ON:
 41 |         # RPN-only or end-to-end Faster/Mask R-CNN
 42 |         valid = roi_data.rpn.add_rpn_blobs(blobs, im_scales, roidb)
 43 |     elif cfg.RETINANET.RETINANET_ON:
 44 |         raise NotImplementedError
 45 |     else:
 46 |         # Fast R-CNN like models trained on precomputed proposals
 47 |         valid = roi_data.fast_rcnn.add_fast_rcnn_blobs(blobs, im_scales, roidb)
 48 |     # add relpn blobs
 49 |     add_relpn_blobs(blobs, im_scales, roidb)
 50 |     return blobs, valid
 51 | 
 52 | 
 53 | def add_relpn_blobs(blobs, im_scales, roidb):
 54 |     
 55 |     assert 'roidb' in blobs
 56 |     valid_keys = ['dataset_name',
 57 |                   'sbj_gt_boxes', 'sbj_gt_classes', 'obj_gt_boxes', 'obj_gt_classes', 'prd_gt_classes',
 58 |                   'sbj_gt_overlaps', 'obj_gt_overlaps', 'prd_gt_overlaps', 'pair_to_gt_ind_map',
 59 |                   'width', 'height']
 60 |     for i, e in enumerate(roidb):
 61 |         for k in valid_keys:
 62 |             if k in e:
 63 |                 blobs['roidb'][i][k] = e[k]
 64 |     
 65 |     # Always return valid=True, since RPN minibatches are valid by design
 66 |     return True
 67 | 
 68 | 
 69 | def _get_image_blob(roidb):
 70 |     """Builds an input blob from the images in the roidb at the specified
 71 |     scales.
 72 |     """
 73 |     num_images = len(roidb)
 74 |     # Sample random scales to use for each image in this batch
 75 |     scale_inds = np.random.randint(
 76 |         0, high=len(cfg.TRAIN.SCALES), size=num_images)
 77 |     processed_ims = []
 78 |     im_scales = []
 79 |     for i in range(num_images):
 80 |         im = cv2.imread(roidb[i]['image'])
 81 |         assert im is not None, \
 82 |             'Failed to read image \'{}\''.format(roidb[i]['image'])
 83 |         # If NOT using opencv to read in images, uncomment following lines
 84 |         # if len(im.shape) == 2:
 85 |         #     im = im[:, :, np.newaxis]
 86 |         #     im = np.concatenate((im, im, im), axis=2)
 87 |         # # flip the channel, since the original one using cv2
 88 |         # # rgb -> bgr
 89 |         # im = im[:, :, ::-1]
 90 |         if roidb[i]['flipped']:
 91 |             im = im[:, ::-1, :]
 92 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
 93 |         im, im_scale = blob_utils.prep_im_for_blob(
 94 |             im, cfg.PIXEL_MEANS, [target_size], cfg.TRAIN.MAX_SIZE)
 95 |         im_scales.append(im_scale[0])
 96 |         processed_ims.append(im[0])
 97 | 
 98 |     # Create a blob to hold the input images [n, c, h, w]
 99 |     blob = blob_utils.im_list_to_blob(processed_ims)
100 | 
101 |     return blob, im_scales
102 | 


--------------------------------------------------------------------------------
/lib/utils_rel/logging_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang for this project in 2019
  2 | # Based on Detectron.pytorch/lib/utils/logging.py
  3 | # Original license text below:
  4 | #
  5 | ############################################################################
  6 | # Copyright (c) 2017-present, Facebook, Inc.
  7 | #
  8 | # Licensed under the Apache License, Version 2.0 (the "License");
  9 | # you may not use this file except in compliance with the License.
 10 | # You may obtain a copy of the License at
 11 | #
 12 | #     http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | # Unless required by applicable law or agreed to in writing, software
 15 | # distributed under the License is distributed on an "AS IS" BASIS,
 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | # See the License for the specific language governing permissions and
 18 | # limitations under the License.
 19 | ##############################################################################
 20 | 
 21 | """Utilities for logging."""
 22 | 
 23 | from __future__ import absolute_import
 24 | from __future__ import division
 25 | from __future__ import print_function
 26 | from __future__ import unicode_literals
 27 | 
 28 | from collections import deque
 29 | from email.mime.text import MIMEText
 30 | import json
 31 | import logging
 32 | import numpy as np
 33 | import smtplib
 34 | import sys
 35 | 
 36 | from core.config import cfg
 37 | 
 38 | # Print lower precision floating point values than default FLOAT_REPR
 39 | # Note! Has no use for json encode with C speedups
 40 | json.encoder.FLOAT_REPR = lambda o: format(o, '.6f')
 41 | 
 42 | 
 43 | def log_json_stats(stats, sort_keys=True):
 44 |     print('json_stats: {:s}'.format(json.dumps(stats, sort_keys=sort_keys)))
 45 | 
 46 | 
 47 | def log_stats(stats, misc_args):
 48 |     """Log training statistics to terminal"""
 49 |     if hasattr(misc_args, 'epoch'):
 50 |         lines = "[%s][%s][Epoch %d][Iter %d / %d]\n" % (
 51 |             misc_args.run_name, misc_args.cfg_filename,
 52 |             misc_args.epoch, misc_args.step, misc_args.iters_per_epoch)
 53 |     else:
 54 |         lines = "[%s][%s][Step %d / %d]\n" % (
 55 |             misc_args.run_name, misc_args.cfg_filename, stats['iter'], cfg.SOLVER.MAX_ITER)
 56 | 
 57 |     lines += "\t\tloss: %.6f, lr: %.6f backbone_lr: %.6f time: %.6f, eta: %s\n" % (
 58 |         stats['loss'], stats['lr'], stats['backbone_lr'], stats['time'], stats['eta']
 59 |     )
 60 |     if stats['metrics']:
 61 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['metrics'].items()) + "\n"
 62 |     if stats['head_losses']:
 63 |         lines += "\t\t" + ", ".join("%s: %.6f" % (k, v) for k, v in stats['head_losses'].items()) + "\n"
 64 |     print(lines[:-1])  # remove last new line
 65 | 
 66 | 
 67 | class SmoothedValue(object):
 68 |     """Track a series of values and provide access to smoothed values over a
 69 |     window or the global series average.
 70 |     """
 71 | 
 72 |     def __init__(self, window_size):
 73 |         self.deque = deque(maxlen=window_size)
 74 |         self.series = []
 75 |         self.total = 0.0
 76 |         self.count = 0
 77 | 
 78 |     def AddValue(self, value):
 79 |         self.deque.append(value)
 80 |         self.series.append(value)
 81 |         self.count += 1
 82 |         self.total += value
 83 | 
 84 |     def GetMedianValue(self):
 85 |         return np.median(self.deque)
 86 | 
 87 |     def GetAverageValue(self):
 88 |         return np.mean(self.deque)
 89 | 
 90 |     def GetGlobalAverageValue(self):
 91 |         return self.total / self.count
 92 | 
 93 | 
 94 | def send_email(subject, body, to):
 95 |     s = smtplib.SMTP('localhost')
 96 |     mime = MIMEText(body)
 97 |     mime['Subject'] = subject
 98 |     mime['To'] = to
 99 |     s.sendmail('detectron', to, mime.as_string())
100 | 
101 | 
102 | def setup_logging(name):
103 |     FORMAT = '%(levelname)s %(filename)s:%(lineno)4d: %(message)s'
104 |     # Manually clear root loggers to prevent any module that may have called
105 |     # logging.basicConfig() from blocking our logging setup
106 |     logging.root.handlers = []
107 |     logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
108 |     logger = logging.getLogger(name)
109 |     return logger
110 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/get_dataset_counts_rel.py:
--------------------------------------------------------------------------------
  1 | # Some functions are adapted from Rowan Zellers:
  2 | # https://github.com/rowanz/neural-motifs
  3 | # Get counts of all of the examples in the dataset. Used for creating the baseline
  4 | # dictionary model
  5 | 
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | from __future__ import unicode_literals
 10 | 
 11 | import numpy as np
 12 | import json
 13 | 
 14 | import utils.boxes as box_utils
 15 | import utils_rel.boxes_rel as box_utils_rel
 16 | from core.config import cfg
 17 | 
 18 | from datasets_rel.dataset_catalog_rel import ANN_FN2
 19 | from datasets_rel.dataset_catalog_rel import DATASETS
 20 | 
 21 | 
 22 | # This function is adapted from Rowan Zellers:
 23 | # https://github.com/rowanz/neural-motifs/blob/master/lib/get_dataset_counts.py
 24 | # Modified for this project
 25 | def get_rel_counts(ds_name, must_overlap=True):
 26 |     """
 27 |     Get counts of all of the relations. Used for modeling directly P(rel | o1, o2)
 28 |     :param train_data: 
 29 |     :param must_overlap: 
 30 |     :return: 
 31 |     """
 32 | 
 33 |     if ds_name.find('vg') >= 0:
 34 |         with open(DATASETS['vg_train'][ANN_FN2]) as f:
 35 |             train_data = json.load(f)
 36 |     elif ds_name.find('oi') >= 0:
 37 |         with open(DATASETS['oi_rel_train'][ANN_FN2]) as f:
 38 |             train_data = json.load(f)
 39 |     elif ds_name.find('vrd') >= 0:
 40 |         with open(DATASETS['vrd_train'][ANN_FN2]) as f:
 41 |             train_data = json.load(f)
 42 |     else:
 43 |         raise NotImplementedError
 44 | 
 45 |     fg_matrix = np.zeros((
 46 |         cfg.MODEL.NUM_CLASSES - 1,  # not include background
 47 |         cfg.MODEL.NUM_CLASSES - 1,  # not include background
 48 |         cfg.MODEL.NUM_PRD_CLASSES + 1,  # include background
 49 |     ), dtype=np.int64)
 50 | 
 51 |     bg_matrix = np.zeros((
 52 |         cfg.MODEL.NUM_CLASSES - 1,  # not include background
 53 |         cfg.MODEL.NUM_CLASSES - 1,  # not include background
 54 |     ), dtype=np.int64)
 55 |     
 56 |     for _, im_rels in train_data.items():
 57 |         # get all object boxes
 58 |         gt_box_to_label = {}
 59 |         for i, rel in enumerate(im_rels):
 60 |             sbj_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['subject']['bbox'])
 61 |             obj_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2(rel['object']['bbox'])
 62 |             sbj_lbl = rel['subject']['category']  # not include background
 63 |             obj_lbl = rel['object']['category']  # not include background
 64 |             prd_lbl = rel['predicate']  # not include background
 65 |             if tuple(sbj_box) not in gt_box_to_label:
 66 |                 gt_box_to_label[tuple(sbj_box)] = sbj_lbl
 67 |             if tuple(obj_box) not in gt_box_to_label:
 68 |                 gt_box_to_label[tuple(obj_box)] = obj_lbl
 69 |             
 70 |             fg_matrix[sbj_lbl, obj_lbl, prd_lbl + 1] += 1
 71 |         
 72 |         if cfg.MODEL.USE_OVLP_FILTER:
 73 |             if len(gt_box_to_label):
 74 |                 gt_boxes = np.array(list(gt_box_to_label.keys()), dtype=np.int32)
 75 |                 gt_classes = np.array(list(gt_box_to_label.values()), dtype=np.int32)
 76 |                 o1o2_total = gt_classes[np.array(
 77 |                     box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)]
 78 |                 for (o1, o2) in o1o2_total:
 79 |                     bg_matrix[o1, o2] += 1
 80 |         else:
 81 |             # consider all pairs of boxes, overlapped or non-overlapped
 82 |             for b1, l1 in gt_box_to_label.items():
 83 |                 for b2, l2 in gt_box_to_label.items():
 84 |                     if b1 == b2:
 85 |                         continue
 86 |                     bg_matrix[l1, l2] += 1
 87 | 
 88 |     return fg_matrix, bg_matrix
 89 | 
 90 | 
 91 | # This function is adapted from Rowan Zellers:
 92 | # https://github.com/rowanz/neural-motifs/blob/master/lib/get_dataset_counts.py
 93 | # Modified for this project
 94 | def box_filter(boxes, must_overlap=False):
 95 |     """ Only include boxes that overlap as possible relations. 
 96 |     If no overlapping boxes, use all of them."""
 97 |     n_cands = boxes.shape[0]
 98 | 
 99 |     overlaps = box_utils.bbox_overlaps(boxes.astype(np.float32), boxes.astype(np.float32)) > 0
100 |     np.fill_diagonal(overlaps, 0)
101 | 
102 |     all_possib = np.ones_like(overlaps, dtype=np.bool)
103 |     np.fill_diagonal(all_possib, 0)
104 | 
105 |     if must_overlap:
106 |         possible_boxes = np.column_stack(np.where(overlaps))
107 | 
108 |         if possible_boxes.size == 0:
109 |             possible_boxes = np.column_stack(np.where(all_possib))
110 |     else:
111 |         possible_boxes = np.column_stack(np.where(all_possib))
112 |     return possible_boxes
113 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/dataset_catalog_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted from Detectron.pytorch/lib/datasets/dataset_catalog.py
  2 | # for this project by Ji Zhang,2019
  3 | #-----------------------------------------------------------------------------
  4 | # Copyright (c) 2017-present, Facebook, Inc.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | ##############################################################################
 18 | 
 19 | """Collection of available datasets."""
 20 | 
 21 | from __future__ import absolute_import
 22 | from __future__ import division
 23 | from __future__ import print_function
 24 | from __future__ import unicode_literals
 25 | 
 26 | import os
 27 | 
 28 | from core.config import cfg
 29 | 
 30 | # Path to data dir
 31 | _DATA_DIR = cfg.DATA_DIR
 32 | 
 33 | # Required dataset entry keys
 34 | IM_DIR = 'image_directory'
 35 | ANN_FN = 'annotation_file'
 36 | ANN_FN2 = 'annotation_file2'
 37 | ANN_FN3 = 'predicate_file'
 38 | 
 39 | # Optional dataset entry keys
 40 | IM_PREFIX = 'image_prefix'
 41 | DEVKIT_DIR = 'devkit_directory'
 42 | RAW_DIR = 'raw_dir'
 43 | 
 44 | # Available datasets
 45 | DATASETS = {
 46 |     # OpenImages_v4 rel dataset for relationship task
 47 |     'oi_rel_train': {
 48 |         IM_DIR:
 49 |             _DATA_DIR + '/openimages_v4/train',
 50 |         ANN_FN:
 51 |             _DATA_DIR + '/openimages_v4/rel/detections_train.json',
 52 |         ANN_FN2:
 53 |             _DATA_DIR + '/openimages_v4/rel/rel_only_annotations_train.json',
 54 |         ANN_FN3:
 55 |             _DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json',
 56 |     },
 57 |     'oi_rel_train_mini': {
 58 |         IM_DIR:
 59 |             _DATA_DIR + '/openimages_v4/train',
 60 |         ANN_FN:
 61 |             _DATA_DIR + '/openimages_v4/rel/detections_train.json',
 62 |         ANN_FN2:
 63 |             _DATA_DIR + '/openimages_v4/rel/rel_only_annotations_train_mini.json',
 64 |         ANN_FN3:
 65 |             _DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json',
 66 |     },
 67 |     'oi_rel_val': {
 68 |         IM_DIR:
 69 |             _DATA_DIR + '/openimages_v4/train',
 70 |         ANN_FN:
 71 |             _DATA_DIR + '/openimages_v4/rel/detections_val.json',
 72 |         ANN_FN2:
 73 |             _DATA_DIR + '/openimages_v4/rel/rel_only_annotations_val.json',
 74 |         ANN_FN3:
 75 |             _DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json',
 76 |     },
 77 |     'oi_rel_val_mini': {
 78 |         IM_DIR:
 79 |             _DATA_DIR + '/openimages_v4/train',
 80 |         ANN_FN:
 81 |             _DATA_DIR + '/openimages_v4/rel/detections_val.json',
 82 |         ANN_FN2:
 83 |             _DATA_DIR + '/openimages_v4/rel/rel_only_annotations_val_mini.json',
 84 |         ANN_FN3:
 85 |             _DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json',
 86 |     },
 87 |     # for Kaggle test
 88 |     'oi_kaggle_rel_test': {
 89 |         IM_DIR:
 90 |             _DATA_DIR + '/openimages_v4/rel/kaggle_test_images/challenge2018_test',
 91 |         ANN_FN:  # pseudo annotation
 92 |             _DATA_DIR + '/openimages_v4/rel/kaggle_test_images/detections_test.json',
 93 |         ANN_FN2:
 94 |             _DATA_DIR + '/openimages_v4/rel/kaggle_test_images/all_rel_only_annotations_test.json',
 95 |         ANN_FN3:
 96 |             _DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json',
 97 |     },
 98 |     # VG dataset
 99 |     'vg_train': {
100 |         IM_DIR:
101 |             _DATA_DIR + '/vg/VG_100K',
102 |         ANN_FN:
103 |             _DATA_DIR + '/vg/detections_train.json',
104 |         ANN_FN2:
105 |             _DATA_DIR + '/vg/rel_annotations_train.json',
106 |         ANN_FN3:
107 |             _DATA_DIR + '/vg/predicates.json',
108 |     },
109 |     'vg_val': {
110 |         IM_DIR:
111 |             _DATA_DIR + '/vg/VG_100K',
112 |         ANN_FN:
113 |             _DATA_DIR + '/vg/detections_val.json',
114 |         ANN_FN2:
115 |             _DATA_DIR + '/vg/rel_annotations_val.json',
116 |         ANN_FN3:
117 |             _DATA_DIR + '/vg/predicates.json',
118 |     },
119 |     # VRD dataset
120 |     'vrd_train': {
121 |         IM_DIR:
122 |             _DATA_DIR + '/vrd/train_images',
123 |         ANN_FN:
124 |             _DATA_DIR + '/vrd/detections_train.json',
125 |         ANN_FN2:
126 |             _DATA_DIR + '/vrd/new_annotations_train.json',
127 |         ANN_FN3:
128 |             _DATA_DIR + '/vrd/predicates.json',
129 |     },
130 |     'vrd_val': {
131 |         IM_DIR:
132 |             _DATA_DIR + '/vrd/val_images',
133 |         ANN_FN:
134 |             _DATA_DIR + '/vrd/detections_val.json',
135 |         ANN_FN2:
136 |             _DATA_DIR + '/vrd/new_annotations_val.json',
137 |         ANN_FN3:
138 |             _DATA_DIR + '/vrd/predicates.json',
139 |     },
140 | }
141 | 


--------------------------------------------------------------------------------
/lib/utils_rel/boxes_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang in 2019 for this project
  2 | # Based on Detectron.pytorch/lib/utils/boxes.py
  3 | #
  4 | # Original license text below:
  5 | # 
  6 | #############################################################################
  7 | # Copyright (c) 2017-present, Facebook, Inc.
  8 | #
  9 | # Licensed under the Apache License, Version 2.0 (the "License");
 10 | # you may not use this file except in compliance with the License.
 11 | # You may obtain a copy of the License at
 12 | #
 13 | #     http://www.apache.org/licenses/LICENSE-2.0
 14 | #
 15 | # Unless required by applicable law or agreed to in writing, software
 16 | # distributed under the License is distributed on an "AS IS" BASIS,
 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # See the License for the specific language governing permissions and
 19 | # limitations under the License.
 20 | ##############################################################################
 21 | #
 22 | # Based on:
 23 | # --------------------------------------------------------
 24 | # Fast/er R-CNN
 25 | # Licensed under The MIT License [see LICENSE for details]
 26 | # Written by Ross Girshick
 27 | # --------------------------------------------------------
 28 | 
 29 | """Box manipulation functions. The internal Detectron box format is
 30 | [x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2)
 31 | specify the bottom-right box corner. Boxes from external sources, e.g.,
 32 | datasets, may be in other formats (such as [x, y, w, h]) and require conversion.
 33 | 
 34 | This module uses a convention that may seem strange at first: the width of a box
 35 | is computed as x2 - x1 + 1 (likewise for height). The "+ 1" dates back to old
 36 | object detection days when the coordinates were integer pixel indices, rather
 37 | than floating point coordinates in a subpixel coordinate frame. A box with x2 =
 38 | x1 and y2 = y1 was taken to include a single pixel, having a width of 1, and
 39 | hence requiring the "+ 1". Now, most datasets will likely provide boxes with
 40 | floating point coordinates and the width should be more reasonably computed as
 41 | x2 - x1.
 42 | 
 43 | In practice, as long as a model is trained and tested with a consistent
 44 | convention either decision seems to be ok (at least in our experience on COCO).
 45 | Since we have a long history of training models with the "+ 1" convention, we
 46 | are reluctant to change it even if our modern tastes prefer not to use it.
 47 | """
 48 | 
 49 | from __future__ import absolute_import
 50 | from __future__ import division
 51 | from __future__ import print_function
 52 | from __future__ import unicode_literals
 53 | 
 54 | import warnings
 55 | import numpy as np
 56 | 
 57 | from core.config import cfg
 58 | import utils_rel.cython_bbox_rel as cython_bbox_rel
 59 | from utils.boxes import bbox_transform_inv
 60 | 
 61 | 
 62 | bbox_pair_overlaps = cython_bbox_rel.bbox_pair_overlaps
 63 | 
 64 | 
 65 | def get_spt_features(boxes1, boxes2, width, height):
 66 |     boxes_u = boxes_union(boxes1, boxes2)
 67 |     spt_feat_1 = get_box_feature(boxes1, width, height)
 68 |     spt_feat_2 = get_box_feature(boxes2, width, height)
 69 |     spt_feat_12 = get_pair_feature(boxes1, boxes2)
 70 |     spt_feat_1u = get_pair_feature(boxes1, boxes_u)
 71 |     spt_feat_u2 = get_pair_feature(boxes_u, boxes2)
 72 |     return np.hstack((spt_feat_12, spt_feat_1u, spt_feat_u2, spt_feat_1, spt_feat_2))
 73 | 
 74 |     
 75 | def get_pair_feature(boxes1, boxes2):
 76 |     delta_1 = bbox_transform_inv(boxes1, boxes2)
 77 |     delta_2 = bbox_transform_inv(boxes2, boxes1)
 78 |     spt_feat = np.hstack((delta_1, delta_2[:, :2]))
 79 |     return spt_feat
 80 | 
 81 | 
 82 | def get_box_feature(boxes, width, height):
 83 |     f1 = boxes[:, 0] / width
 84 |     f2 = boxes[:, 1] / height
 85 |     f3 = boxes[:, 2] / width
 86 |     f4 = boxes[:, 3] / height
 87 |     f5 = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) / (width * height)
 88 |     return np.vstack((f1, f2, f3, f4, f5)).transpose()
 89 |     
 90 | 
 91 | def boxes_union(boxes1, boxes2):
 92 |     assert boxes1.shape == boxes2.shape
 93 |     xmin = np.minimum(boxes1[:, 0], boxes2[:, 0])
 94 |     ymin = np.minimum(boxes1[:, 1], boxes2[:, 1])
 95 |     xmax = np.maximum(boxes1[:, 2], boxes2[:, 2])
 96 |     ymax = np.maximum(boxes1[:, 3], boxes2[:, 3])
 97 |     return np.vstack((xmin, ymin, xmax, ymax)).transpose()
 98 | 
 99 | 
100 | def rois_union(rois1, rois2):
101 |     assert (rois1[:, 0] == rois2[:, 0]).all()
102 |     xmin = np.minimum(rois1[:, 1], rois2[:, 1])
103 |     ymin = np.minimum(rois1[:, 2], rois2[:, 2])
104 |     xmax = np.maximum(rois1[:, 3], rois2[:, 3])
105 |     ymax = np.maximum(rois1[:, 4], rois2[:, 4])
106 |     return np.vstack((rois1[:, 0], xmin, ymin, xmax, ymax)).transpose()
107 | 
108 | 
109 | def boxes_intersect(boxes1, boxes2):
110 |     assert boxes1.shape == boxes2.shape
111 |     xmin = np.maximum(boxes1[:, 0], boxes2[:, 0])
112 |     ymin = np.maximum(boxes1[:, 1], boxes2[:, 1])
113 |     xmax = np.minimum(boxes1[:, 2], boxes2[:, 2])
114 |     ymax = np.minimum(boxes1[:, 3], boxes2[:, 3])
115 |     return np.vstack((xmin, ymin, xmax, ymax)).transpose()
116 | 
117 | 
118 | def rois_intersect(rois1, rois2):
119 |     assert (rois1[:, 0] == rois2[:, 0]).all()
120 |     xmin = np.maximum(rois1[:, 1], rois2[:, 1])
121 |     ymin = np.maximum(rois1[:, 2], rois2[:, 2])
122 |     xmax = np.minimum(rois1[:, 3], rois2[:, 3])
123 |     ymax = np.minimum(rois1[:, 4], rois2[:, 4])
124 |     return np.vstack((rois1[:, 0], xmin, ymin, xmax, ymax)).transpose()
125 | 
126 | 
127 | def y1y2x1x2_to_x1y1x2y2(y1y2x1x2):
128 |     x1 = y1y2x1x2[2]
129 |     y1 = y1y2x1x2[0]
130 |     x2 = y1y2x1x2[3]
131 |     y2 = y1y2x1x2[1]
132 |     return [x1, y1, x2, y2]
133 | 


--------------------------------------------------------------------------------
/lib/utils_rel/subprocess_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang in 2019
  2 | # Based on Detectron.pytorch/lib/utils/subprocess.py
  3 | # Original license text below:
  4 | #
  5 | #############################################################################
  6 | #
  7 | # Copyright (c) 2017-present, Facebook, Inc.
  8 | #
  9 | # Licensed under the Apache License, Version 2.0 (the "License");
 10 | # you may not use this file except in compliance with the License.
 11 | # You may obtain a copy of the License at
 12 | #
 13 | #     http://www.apache.org/licenses/LICENSE-2.0
 14 | #
 15 | # Unless required by applicable law or agreed to in writing, software
 16 | # distributed under the License is distributed on an "AS IS" BASIS,
 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | # See the License for the specific language governing permissions and
 19 | # limitations under the License.
 20 | ##############################################################################
 21 | 
 22 | """Primitives for running multiple single-GPU jobs in parallel over subranges of
 23 | data. These are used for running multi-GPU inference. Subprocesses are used to
 24 | avoid the GIL since inference may involve non-trivial amounts of Python code.
 25 | """
 26 | 
 27 | from io import IOBase
 28 | import logging
 29 | import os
 30 | import subprocess
 31 | from six.moves import shlex_quote
 32 | from six.moves import cPickle as pickle
 33 | import yaml
 34 | import numpy as np
 35 | import torch
 36 | 
 37 | from core.config import cfg
 38 | 
 39 | logger = logging.getLogger(__name__)
 40 | 
 41 | 
 42 | def process_in_parallel(
 43 |         tag, total_range_size, binary, output_dir,
 44 |         load_ckpt, load_detectron, opts=''):
 45 |     """Run the specified binary NUM_GPUS times in parallel, each time as a
 46 |     subprocess that uses one GPU. The binary must accept the command line
 47 |     arguments `--range {start} {end}` that specify a data processing range.
 48 |     """
 49 |     # Snapshot the current cfg state in order to pass to the inference
 50 |     # subprocesses
 51 |     cfg_file = os.path.join(output_dir, '{}_range_config.yaml'.format(tag))
 52 |     with open(cfg_file, 'w') as f:
 53 |         yaml.dump(cfg, stream=f)
 54 |     subprocess_env = os.environ.copy()
 55 |     processes = []
 56 |     NUM_GPUS = torch.cuda.device_count()
 57 |     subinds = np.array_split(range(total_range_size), NUM_GPUS)
 58 |     # Determine GPUs to use
 59 |     cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES')
 60 |     if cuda_visible_devices:
 61 |         gpu_inds = list(map(int, cuda_visible_devices.split(',')))
 62 |         assert -1 not in gpu_inds, \
 63 |             'Hiding GPU indices using the \'-1\' index is not supported'
 64 |     else:
 65 |         gpu_inds = range(cfg.NUM_GPUS)
 66 |     gpu_inds = list(gpu_inds)
 67 |     # Run the binary in cfg.NUM_GPUS subprocesses
 68 |     for i, gpu_ind in enumerate(gpu_inds):
 69 |         start = subinds[i][0]
 70 |         end = subinds[i][-1] + 1
 71 |         subprocess_env['CUDA_VISIBLE_DEVICES'] = str(gpu_ind)
 72 |         cmd = ('python3 {binary} --range {start} {end} --cfg {cfg_file} --set {opts} '
 73 |                '--output_dir {output_dir}')
 74 |         if load_ckpt is not None:
 75 |             cmd += ' --load_ckpt {load_ckpt}'
 76 |         elif load_detectron is not None:
 77 |             cmd += ' --load_detectron {load_detectron}'
 78 |         cmd = cmd.format(
 79 |             binary=shlex_quote(binary),
 80 |             start=int(start),
 81 |             end=int(end),
 82 |             cfg_file=shlex_quote(cfg_file),
 83 |             output_dir=output_dir,
 84 |             load_ckpt=load_ckpt,
 85 |             load_detectron=load_detectron,
 86 |             opts=' '.join([shlex_quote(opt) for opt in opts])
 87 |         )
 88 |         logger.info('{} range command {}: {}'.format(tag, i, cmd))
 89 |         if i == 0:
 90 |             subprocess_stdout = subprocess.PIPE
 91 |         else:
 92 |             filename = os.path.join(
 93 |                 output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
 94 |             )
 95 |             subprocess_stdout = open(filename, 'w')
 96 |         p = subprocess.Popen(
 97 |             cmd,
 98 |             shell=True,
 99 |             env=subprocess_env,
100 |             stdout=subprocess_stdout,
101 |             stderr=subprocess.STDOUT,
102 |             bufsize=1
103 |         )
104 |         processes.append((i, p, start, end, subprocess_stdout))
105 |     # Log output from inference processes and collate their results
106 |     outputs = []
107 |     for i, p, start, end, subprocess_stdout in processes:
108 |         log_subprocess_output(i, p, output_dir, tag, start, end)
109 |         if isinstance(subprocess_stdout, IOBase):
110 |             subprocess_stdout.close()
111 |         range_file = os.path.join(
112 |             output_dir, '%s_range_%s_%s.pkl' % (tag, start, end)
113 |         )
114 |         range_data = pickle.load(open(range_file, 'rb'))
115 |         outputs.append(range_data)
116 |     return outputs
117 | 
118 | 
119 | def log_subprocess_output(i, p, output_dir, tag, start, end):
120 |     """Capture the output of each subprocess and log it in the parent process.
121 |     The first subprocess's output is logged in realtime. The output from the
122 |     other subprocesses is buffered and then printed all at once (in order) when
123 |     subprocesses finish.
124 |     """
125 |     outfile = os.path.join(
126 |         output_dir, '%s_range_%s_%s.stdout' % (tag, start, end)
127 |     )
128 |     logger.info('# ' + '-' * 76 + ' #')
129 |     logger.info(
130 |         'stdout of subprocess %s with range [%s, %s]' % (i, start + 1, end)
131 |     )
132 |     logger.info('# ' + '-' * 76 + ' #')
133 |     if i == 0:
134 |         # Stream the piped stdout from the first subprocess in realtime
135 |         with open(outfile, 'w') as f:
136 |             for line in iter(p.stdout.readline, b''):
137 |                 print(line.rstrip().decode('ascii'))
138 |                 f.write(str(line, encoding='ascii'))
139 |         p.stdout.close()
140 |         ret = p.wait()
141 |     else:
142 |         # For subprocesses >= 1, wait and dump their log file
143 |         ret = p.wait()
144 |         with open(outfile, 'r') as f:
145 |             print(''.join(f.readlines()))
146 |     assert ret == 0, 'Range subprocess failed (exit code: {})'.format(ret)
147 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/relpn_heads.py:
--------------------------------------------------------------------------------
  1 | # Written by Ji Zhang in 2019
  2 | 
  3 | import numpy as np
  4 | from numpy import linalg as la
  5 | import json
  6 | import logging
  7 | 
  8 | from torch import nn
  9 | from torch.nn import init
 10 | import torch.nn.functional as F
 11 | 
 12 | from core.config import cfg
 13 | from modeling_rel.generate_rel_proposal_labels import GenerateRelProposalLabelsOp
 14 | import modeling.FPN as FPN
 15 | import utils_rel.boxes_rel as box_utils_rel
 16 | import utils.fpn as fpn_utils
 17 | 
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | def generic_relpn_outputs():
 23 |     return single_scale_relpn_outputs()
 24 | 
 25 | 
 26 | class single_scale_relpn_outputs(nn.Module):
 27 |     """Add RelPN outputs to a single scale model (i.e., no FPN)."""
 28 |     def __init__(self):
 29 |         super().__init__()
 30 |         
 31 |         self.RelPN_GenerateProposalLabels = GenerateRelProposalLabelsOp()
 32 |         ds_name = cfg.TRAIN.DATASETS[0] if len(cfg.TRAIN.DATASETS) else cfg.TEST.DATASETS[0]
 33 |     
 34 |     def get_roi_inds(self, det_labels, lbls):
 35 |         lbl_set = np.array(lbls)
 36 |         inds = np.where(np.isin(det_labels, lbl_set))[0]
 37 |         return inds
 38 | 
 39 |     def remove_self_pairs(self, det_size, sbj_inds, obj_inds):
 40 |         mask = np.ones(sbj_inds.shape[0], dtype=bool)
 41 |         for i in range(det_size):
 42 |             mask[i + det_size * i] = False
 43 |         keeps = np.where(mask)[0]
 44 |         sbj_inds = sbj_inds[keeps]
 45 |         obj_inds = obj_inds[keeps]
 46 |         return sbj_inds, obj_inds
 47 | 
 48 |     def forward(self, det_rois, det_labels, det_scores, im_info, dataset_name, roidb=None):
 49 |         """
 50 |         det_rois: feature maps from the backbone network. (Variable)
 51 |         im_info: (CPU Variable)
 52 |         roidb: (list of ndarray)
 53 |         """
 54 |         
 55 |         # Get pairwise proposals first
 56 |         if roidb is not None:
 57 |             # we always feed one image per batch during training
 58 |             assert len(roidb) == 1
 59 | 
 60 |         sbj_inds = np.repeat(np.arange(det_rois.shape[0]), det_rois.shape[0])
 61 |         obj_inds = np.tile(np.arange(det_rois.shape[0]), det_rois.shape[0])
 62 |         # remove self paired rois
 63 |         if det_rois.shape[0] > 1:  # no pairs to remove when there is at most one detection
 64 |             sbj_inds, obj_inds = self.remove_self_pairs(det_rois.shape[0], sbj_inds, obj_inds)
 65 |         sbj_rois = det_rois[sbj_inds]
 66 |         obj_rois = det_rois[obj_inds]
 67 |             
 68 |         im_scale = im_info.data.numpy()[:, 2][0]
 69 |         sbj_boxes = sbj_rois[:, 1:] / im_scale
 70 |         obj_boxes = obj_rois[:, 1:] / im_scale
 71 |         # filters out those roi pairs whose boxes are not overlapping in the original scales
 72 |         if cfg.MODEL.USE_OVLP_FILTER:
 73 |             ovlp_so = box_utils_rel.bbox_pair_overlaps(
 74 |                 sbj_boxes.astype(dtype=np.float32, copy=False),
 75 |                 obj_boxes.astype(dtype=np.float32, copy=False))
 76 |             ovlp_inds = np.where(ovlp_so > 0)[0]
 77 |             sbj_inds = sbj_inds[ovlp_inds]
 78 |             obj_inds = obj_inds[ovlp_inds]
 79 |             sbj_rois = sbj_rois[ovlp_inds]
 80 |             obj_rois = obj_rois[ovlp_inds]
 81 |             sbj_boxes = sbj_boxes[ovlp_inds]
 82 |             obj_boxes = obj_boxes[ovlp_inds]
 83 |             
 84 |         return_dict = {}
 85 |         if self.training:
 86 |             # Add binary relationships
 87 |             blobs_out = self.RelPN_GenerateProposalLabels(sbj_rois, obj_rois, det_rois, roidb, im_info)
 88 |             return_dict.update(blobs_out)
 89 |         else:
 90 |             sbj_labels = det_labels[sbj_inds]
 91 |             obj_labels = det_labels[obj_inds]
 92 |             sbj_scores = det_scores[sbj_inds]
 93 |             obj_scores = det_scores[obj_inds]
 94 |             rel_rois = box_utils_rel.rois_union(sbj_rois, obj_rois)
 95 |             return_dict['det_rois'] = det_rois
 96 |             return_dict['sbj_inds'] = sbj_inds
 97 |             return_dict['obj_inds'] = obj_inds
 98 |             return_dict['sbj_rois'] = sbj_rois
 99 |             return_dict['obj_rois'] = obj_rois
100 |             return_dict['rel_rois'] = rel_rois
101 |             return_dict['sbj_labels'] = sbj_labels
102 |             return_dict['obj_labels'] = obj_labels
103 |             return_dict['sbj_scores'] = sbj_scores
104 |             return_dict['obj_scores'] = obj_scores
105 |             return_dict['fg_size'] = np.array([sbj_rois.shape[0]], dtype=np.int32)
106 | 
107 |             im_scale = im_info.data.numpy()[:, 2][0]
108 |             im_w = im_info.data.numpy()[:, 1][0]
109 |             im_h = im_info.data.numpy()[:, 0][0]
110 |             if cfg.MODEL.USE_SPATIAL_FEAT:
111 |                 spt_feat = box_utils_rel.get_spt_features(sbj_boxes, obj_boxes, im_w, im_h)
112 |                 return_dict['spt_feat'] = spt_feat
113 |             if cfg.MODEL.USE_FREQ_BIAS or cfg.MODEL.RUN_BASELINE:
114 |                 return_dict['all_sbj_labels_int32'] = sbj_labels.astype(np.int32, copy=False) - 1  # det_labels start from 1
115 |                 return_dict['all_obj_labels_int32'] = obj_labels.astype(np.int32, copy=False) - 1  # det_labels start from 1
116 |             if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
117 |                 lvl_min = cfg.FPN.ROI_MIN_LEVEL
118 |                 lvl_max = cfg.FPN.ROI_MAX_LEVEL
119 |                 # when use min_rel_area, the same sbj/obj area could be mapped to different feature levels
120 |                 # when they are associated with different relationships
121 |                 # Thus we cannot get det_rois features then gather sbj/obj features
122 |                 # The only way is gather sbj/obj per relationship, thus need to return sbj_rois/obj_rois
123 |                 rois_blob_names = ['det_rois', 'rel_rois']
124 |                 for rois_blob_name in rois_blob_names:
125 |                     # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
126 |                     target_lvls = fpn_utils.map_rois_to_fpn_levels(
127 |                         return_dict[rois_blob_name][:, 1:5], lvl_min, lvl_max)
128 |                     fpn_utils.add_multilevel_roi_blobs(
129 |                         return_dict, rois_blob_name, return_dict[rois_blob_name], target_lvls,
130 |                         lvl_min, lvl_max)
131 | 
132 |         return return_dict
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/tools/test_net_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang, 2019
  2 | #
  3 | # Based on Detectron.pytorch/tools/test_net.py Written by Roy Tseng
  4 | 
  5 | """Perform inference on one or more datasets."""
  6 | 
  7 | import argparse
  8 | import cv2
  9 | import os
 10 | import pprint
 11 | import sys
 12 | import time
 13 | from six.moves import cPickle as pickle
 14 | 
 15 | import torch
 16 | 
 17 | import _init_paths  # pylint: disable=unused-import
 18 | from core.config import cfg, merge_cfg_from_file, merge_cfg_from_list, assert_and_infer_cfg
 19 | from core.test_engine_rel import run_inference
 20 | import utils.logging
 21 | 
 22 | from datasets_rel import task_evaluation_sg as task_evaluation_sg
 23 | from datasets_rel import task_evaluation_vg_and_vrd as task_evaluation_vg_and_vrd
 24 | 
 25 | # OpenCL may be enabled by default in OpenCV3; disable it because it's not
 26 | # thread safe and causes unwanted GPU memory allocations.
 27 | cv2.ocl.setUseOpenCL(False)
 28 | 
 29 | 
 30 | def parse_args():
 31 |     """Parse in command line arguments"""
 32 |     parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
 33 |     parser.add_argument(
 34 |         '--dataset',
 35 |         help='training dataset')
 36 |     parser.add_argument(
 37 |         '--cfg', dest='cfg_file', required=True,
 38 |         help='optional config file')
 39 | 
 40 |     parser.add_argument(
 41 |         '--load_ckpt', help='path of checkpoint to load')
 42 |     parser.add_argument(
 43 |         '--load_detectron', help='path to the detectron weight pickle file')
 44 | 
 45 |     parser.add_argument(
 46 |         '--output_dir',
 47 |         help='output directory to save the testing results. If not provided, '
 48 |              'defaults to [args.load_ckpt|args.load_detectron]/../test.')
 49 | 
 50 |     parser.add_argument(
 51 |         '--set', dest='set_cfgs',
 52 |         help='set config keys, will overwrite config in the cfg_file.'
 53 |              ' See lib/core/config.py for all options',
 54 |         default=[], nargs='*')
 55 | 
 56 |     parser.add_argument(
 57 |         '--range',
 58 |         help='start (inclusive) and end (exclusive) indices',
 59 |         type=int, nargs=2)
 60 |     parser.add_argument(
 61 |         '--multi-gpu-testing', help='using multiple gpus for inference',
 62 |         action='store_true')
 63 |     parser.add_argument(
 64 |         '--do_val', dest='do_val', help='do evaluation', action='store_true')
 65 |     parser.add_argument(
 66 |         '--do_vis', dest='do_vis', help='visualize the last layer of conv_body', action='store_true')
 67 |     parser.add_argument(
 68 |         '--do_special', dest='do_special', help='visualize the last layer of conv_body', action='store_true')
 69 |     parser.add_argument(
 70 |         '--use_gt_boxes', dest='use_gt_boxes', help='use gt boxes for sgcls/prdcls', action='store_true')
 71 |     parser.add_argument(
 72 |         '--use_gt_labels', dest='use_gt_labels', help='use gt boxes for sgcls/prdcls', action='store_true')
 73 | 
 74 |     return parser.parse_args()
 75 | 
 76 | 
 77 | if __name__ == '__main__':
 78 |     
 79 |     if not torch.cuda.is_available():
 80 |         sys.exit("Need a CUDA device to run the code.")
 81 | 
 82 |     logger = utils.logging.setup_logging(__name__)
 83 |     args = parse_args()
 84 |     logger.info('Called with args:')
 85 |     logger.info(args)
 86 |     
 87 |     assert (torch.cuda.device_count() == 1) ^ bool(args.multi_gpu_testing)
 88 | 
 89 |     if args.cfg_file is not None:
 90 |         merge_cfg_from_file(args.cfg_file)
 91 |     if args.set_cfgs is not None:
 92 |         merge_cfg_from_file(args.cfg_file)
 93 |         
 94 |     if args.dataset == "oi_rel":
 95 |         cfg.TEST.DATASETS = ('oi_rel_val',)
 96 |         cfg.MODEL.NUM_CLASSES = 58
 97 |         cfg.MODEL.NUM_PRD_CLASSES = 9  # rel, exclude background
 98 |     elif args.dataset == "oi_rel_mini":
 99 |         cfg.TEST.DATASETS = ('oi_rel_val_mini',)
100 |         cfg.MODEL.NUM_CLASSES = 58
101 |         cfg.MODEL.NUM_PRD_CLASSES = 9  # rel, exclude background
102 |     elif args.dataset == "oi_all_rel_train":
103 |         cfg.TEST.DATASETS = ('oi_all_rel_train',)
104 |         cfg.MODEL.NUM_CLASSES = 58
105 |         cfg.MODEL.NUM_PRD_CLASSES = 9  # rel, exclude background
106 |     elif args.dataset == "oi_all_rel":
107 |         cfg.TEST.DATASETS = ('oi_all_rel_val',)
108 |         cfg.MODEL.NUM_CLASSES = 58
109 |         cfg.MODEL.NUM_PRD_CLASSES = 9  # rel, exclude background
110 |     elif args.dataset == "oi_kaggle":
111 |         cfg.TEST.DATASETS = ('oi_kaggle_rel_test',)
112 |         cfg.MODEL.NUM_CLASSES = 58
113 |         cfg.MODEL.NUM_PRD_CLASSES = 9  # rel, exclude background
114 |     elif args.dataset == "vg_mini":
115 |         cfg.TEST.DATASETS = ('vg_val_mini',)
116 |         cfg.MODEL.NUM_CLASSES = 151
117 |         cfg.MODEL.NUM_PRD_CLASSES = 50  # exclude background
118 |     elif args.dataset == "vg":
119 |         cfg.TEST.DATASETS = ('vg_val',)
120 |         cfg.MODEL.NUM_CLASSES = 151
121 |         cfg.MODEL.NUM_PRD_CLASSES = 50  # exclude background
122 |     elif args.dataset == "vrd_train":
123 |         cfg.TEST.DATASETS = ('vrd_train',)
124 |         cfg.MODEL.NUM_CLASSES = 101
125 |         cfg.MODEL.NUM_PRD_CLASSES = 70  # exclude background
126 |     elif args.dataset == "vrd":
127 |         cfg.TEST.DATASETS = ('vrd_val',)
128 |         cfg.MODEL.NUM_CLASSES = 101
129 |         cfg.MODEL.NUM_PRD_CLASSES = 70  # exclude background
130 |     else:  # For subprocess call
131 |         assert cfg.TEST.DATASETS, 'cfg.TEST.DATASETS shouldn\'t be empty'
132 | 
133 |     assert_and_infer_cfg()
134 |     
135 |     if not cfg.MODEL.RUN_BASELINE:
136 |         assert bool(args.load_ckpt) ^ bool(args.load_detectron), \
137 |             'Exactly one of --load_ckpt and --load_detectron should be specified.'
138 |     if args.output_dir is None:
139 |         ckpt_path = args.load_ckpt if args.load_ckpt else args.load_detectron
140 |         args.output_dir = os.path.join(
141 |             os.path.dirname(os.path.dirname(ckpt_path)), 'test')
142 |         logger.info('Automatically set output directory to %s', args.output_dir)
143 |     if not os.path.exists(args.output_dir):
144 |         os.makedirs(args.output_dir)
145 | 
146 |     logger.info('Testing with config:')
147 |     logger.info(pprint.pformat(cfg))
148 | 
149 |     # For test_engine.multi_gpu_test_net_on_dataset
150 |     args.test_net_file, _ = os.path.splitext(__file__)
151 |     # manually set args.cuda
152 |     args.cuda = True
153 | 
154 |     if args.use_gt_boxes:
155 |         if args.use_gt_labels:
156 |             det_file = os.path.join(args.output_dir, 'rel_detections_gt_boxes_prdcls.pkl')
157 |         else:
158 |             det_file = os.path.join(args.output_dir, 'rel_detections_gt_boxes_sgcls.pkl')
159 |     else:
160 |         det_file = os.path.join(args.output_dir, 'rel_detections.pkl')
161 |     if os.path.exists(det_file):
162 |         logger.info('Loading results from {}'.format(det_file))
163 |         with open(det_file, 'rb') as f:
164 |             all_results = pickle.load(f)
165 |         logger.info('Starting evaluation now...')
166 |         if args.dataset.find('vg') >= 0 or args.dataset.find('vrd') >= 0:
167 |             task_evaluation_vg_and_vrd.eval_rel_results(all_results, args.output_dir, args.do_val)
168 |         else:
169 |             task_evaluation_sg.eval_rel_results(all_results, args.output_dir, args.do_val, args.do_vis, args.do_special)
170 |     else:
171 |         run_inference(
172 |             args,
173 |             ind_range=args.range,
174 |             multi_gpu_testing=args.multi_gpu_testing,
175 |             check_expected_results=True)
176 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/ap_eval_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted from Detectron.pytorch/lib/datasets/voc_eval.py for
  2 | # this project by Ji Zhang, 2019
  3 | #-----------------------------------------------------------------------------
  4 | # Copyright (c) 2017-present, Facebook, Inc.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | ##############################################################################
 18 | #
 19 | # Based on:
 20 | # --------------------------------------------------------
 21 | # Fast/er R-CNN
 22 | # Licensed under The MIT License [see LICENSE for details]
 23 | # Written by Bharath Hariharan
 24 | # --------------------------------------------------------
 25 | 
 26 | """relationship AP evaluation code."""
 27 | 
 28 | from six.moves import cPickle as pickle
 29 | import logging
 30 | import numpy as np
 31 | import os
 32 | from tqdm import tqdm
 33 | 
 34 | from utils.boxes import bbox_overlaps
 35 | from utils_rel.boxes_rel import boxes_union
 36 | 
 37 | logger = logging.getLogger(__name__)
 38 | 
 39 | 
 40 | def prepare_mAP_dets(topk_dets, cls_num):
 41 |     cls_image_ids = [[] for _ in range(cls_num)]
 42 |     cls_dets = [{'confidence': np.empty(0),
 43 |                  'BB_s': np.empty((0, 4)),
 44 |                  'BB_o': np.empty((0, 4)),
 45 |                  'BB_r': np.empty((0, 4)),
 46 |                  'LBL_s': np.empty(0),
 47 |                  'LBL_o': np.empty(0)} for _ in range(cls_num)]
 48 |     cls_gts = [{} for _ in range(cls_num)]
 49 |     npos = [0 for _ in range(cls_num)]
 50 |     for dets in tqdm(topk_dets):
 51 |         image_id = dets['image'].split('/')[-1].split('.')[0]
 52 |         sbj_boxes = dets['det_boxes_s_top']
 53 |         obj_boxes = dets['det_boxes_o_top']
 54 |         rel_boxes = boxes_union(sbj_boxes, obj_boxes)
 55 |         sbj_labels = dets['det_labels_s_top']
 56 |         obj_labels = dets['det_labels_o_top']
 57 |         prd_labels = dets['det_labels_p_top']
 58 |         det_scores = dets['det_scores_top']
 59 |         gt_boxes_sbj = dets['gt_boxes_sbj']
 60 |         gt_boxes_obj = dets['gt_boxes_obj']
 61 |         gt_boxes_rel = boxes_union(gt_boxes_sbj, gt_boxes_obj)
 62 |         gt_labels_sbj = dets['gt_labels_sbj']
 63 |         gt_labels_prd = dets['gt_labels_prd']
 64 |         gt_labels_obj = dets['gt_labels_obj']
 65 |         for c in range(cls_num):
 66 |             cls_inds = np.where(prd_labels == c)[0]
 67 |             # logger.info(cls_inds)
 68 |             if len(cls_inds):
 69 |                 cls_sbj_boxes = sbj_boxes[cls_inds]
 70 |                 cls_obj_boxes = obj_boxes[cls_inds]
 71 |                 cls_rel_boxes = rel_boxes[cls_inds]
 72 |                 cls_sbj_labels = sbj_labels[cls_inds]
 73 |                 cls_obj_labels = obj_labels[cls_inds]
 74 |                 cls_det_scores = det_scores[cls_inds]
 75 |                 cls_dets[c]['confidence'] = np.concatenate((cls_dets[c]['confidence'], cls_det_scores))
 76 |                 cls_dets[c]['BB_s'] = np.concatenate((cls_dets[c]['BB_s'], cls_sbj_boxes), 0)
 77 |                 cls_dets[c]['BB_o'] = np.concatenate((cls_dets[c]['BB_o'], cls_obj_boxes), 0)
 78 |                 cls_dets[c]['BB_r'] = np.concatenate((cls_dets[c]['BB_r'], cls_rel_boxes), 0)
 79 |                 cls_dets[c]['LBL_s'] = np.concatenate((cls_dets[c]['LBL_s'], cls_sbj_labels))
 80 |                 cls_dets[c]['LBL_o'] = np.concatenate((cls_dets[c]['LBL_o'], cls_obj_labels))
 81 |                 cls_image_ids[c] += [image_id] * len(cls_inds)
 82 |             cls_gt_inds = np.where(gt_labels_prd == c)[0]
 83 |             cls_gt_boxes_sbj = gt_boxes_sbj[cls_gt_inds]
 84 |             cls_gt_boxes_obj = gt_boxes_obj[cls_gt_inds]
 85 |             cls_gt_boxes_rel = gt_boxes_rel[cls_gt_inds]
 86 |             cls_gt_labels_sbj = gt_labels_sbj[cls_gt_inds]
 87 |             cls_gt_labels_obj = gt_labels_obj[cls_gt_inds]
 88 |             cls_gt_num = len(cls_gt_inds)
 89 |             det = [False] * cls_gt_num
 90 |             npos[c] = npos[c] + cls_gt_num
 91 |             cls_gts[c][image_id] = {'gt_boxes_sbj': cls_gt_boxes_sbj,
 92 |                                     'gt_boxes_obj': cls_gt_boxes_obj,
 93 |                                     'gt_boxes_rel': cls_gt_boxes_rel,
 94 |                                     'gt_labels_sbj': cls_gt_labels_sbj,
 95 |                                     'gt_labels_obj': cls_gt_labels_obj,
 96 |                                     'gt_num': cls_gt_num,
 97 |                                     'det': det}
 98 |     return cls_image_ids, cls_dets, cls_gts, npos
 99 | 
100 | 
101 | def get_ap(rec, prec):
102 |     """Compute AP given precision and recall.
103 |     """
104 |     # correct AP calculation
105 |     # first append sentinel values at the end
106 |     mrec = np.concatenate(([0.], rec, [1.]))
107 |     mpre = np.concatenate(([0.], prec, [0.]))
108 | 
109 |     # compute the precision envelope
110 |     for i in range(mpre.size - 1, 0, -1):
111 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
112 | 
113 |     # to calculate area under PR curve, look for points
114 |     # where X axis (recall) changes value
115 |     i = np.where(mrec[1:] != mrec[:-1])[0]
116 | 
117 |     # and sum (\Delta recall) * prec
118 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
119 |     return ap
120 | 
121 | 
122 | def ap_eval(image_ids,
123 |              dets,
124 |              gts,
125 |              npos,
126 |              rel_or_phr=True,
127 |              ovthresh=0.5):
128 |     """
129 |     Top level function that does the relationship AP evaluation.
130 | 
131 |     detpath: Path to detections
132 |         detpath.format(classname) should produce the detection results file.
133 |     classname: Category name (duh)
134 |     [ovthresh]: Overlap threshold (default = 0.5)
135 |     """
136 |     
137 |     confidence = dets['confidence']
138 |     BB_s = dets['BB_s']
139 |     BB_o = dets['BB_o']
140 |     BB_r = dets['BB_r']
141 |     LBL_s = dets['LBL_s']
142 |     LBL_o = dets['LBL_o']
143 | 
144 |     # sort by confidence
145 |     sorted_ind = np.argsort(-confidence)
146 |     BB_s = BB_s[sorted_ind, :]
147 |     BB_o = BB_o[sorted_ind, :]
148 |     BB_r = BB_r[sorted_ind, :]
149 |     LBL_s = LBL_s[sorted_ind]
150 |     LBL_o = LBL_o[sorted_ind]
151 |     image_ids = [image_ids[x] for x in sorted_ind]
152 |     
153 |     # go down dets and mark TPs and FPs
154 |     nd = len(image_ids)
155 |     tp = np.zeros(nd)
156 |     fp = np.zeros(nd)
157 |     gts_visited = {k: [False] * v['gt_num'] for k, v in gts.items()}
158 |     for d in range(nd):
159 |         R = gts[image_ids[d]]
160 |         visited = gts_visited[image_ids[d]]
161 |         bb_s = BB_s[d, :].astype(float)
162 |         bb_o = BB_o[d, :].astype(float)
163 |         bb_r = BB_r[d, :].astype(float)
164 |         lbl_s = LBL_s[d]
165 |         lbl_o = LBL_o[d]
166 |         ovmax = -np.inf
167 |         BBGT_s = R['gt_boxes_sbj'].astype(float)
168 |         BBGT_o = R['gt_boxes_obj'].astype(float)
169 |         BBGT_r = R['gt_boxes_rel'].astype(float)
170 |         LBLGT_s = R['gt_labels_sbj']
171 |         LBLGT_o = R['gt_labels_obj']
172 |         if BBGT_s.size > 0:
173 |             valid_mask = np.logical_and(LBLGT_s == lbl_s, LBLGT_o == lbl_o)
174 |             if valid_mask.any():
175 |                 if rel_or_phr:  # means it is evaluating relationships
176 |                     overlaps_s = bbox_overlaps(
177 |                         bb_s[None, :].astype(dtype=np.float32, copy=False),
178 |                         BBGT_s.astype(dtype=np.float32, copy=False))[0]
179 |                     overlaps_o = bbox_overlaps(
180 |                         bb_o[None, :].astype(dtype=np.float32, copy=False),
181 |                         BBGT_o.astype(dtype=np.float32, copy=False))[0]
182 |                     overlaps = np.minimum(overlaps_s, overlaps_o)
183 |                 else:
184 |                     overlaps = bbox_overlaps(
185 |                         bb_r[None, :].astype(dtype=np.float32, copy=False),
186 |                         BBGT_r.astype(dtype=np.float32, copy=False))[0]
187 |                 overlaps *= valid_mask
188 |                 ovmax = np.max(overlaps)
189 |                 jmax = np.argmax(overlaps)
190 |             else:
191 |                 ovmax = 0.
192 |                 jmax = -1
193 | 
194 |         if ovmax > ovthresh:
195 |             if not visited[jmax]:
196 |                 tp[d] = 1.
197 |                 visited[jmax] = 1
198 |             else:
199 |                 fp[d] = 1.
200 |         else:
201 |             fp[d] = 1.
202 | 
203 |     # compute precision recall
204 |     fp = np.cumsum(fp)
205 |     tp = np.cumsum(tp)
206 |     rec = tp / (float(npos) + 1e-12)
207 |     # ground truth
208 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
209 |     ap = get_ap(rec, prec)
210 | 
211 |     return rec, prec, ap
212 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/task_evaluation_vg_and_vrd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Written by Ji Zhang, 2019
  3 | Some functions are adapted from Rowan Zellers
  4 | Original source:
  5 | https://github.com/rowanz/neural-motifs/blob/master/lib/evaluation/sg_eval.py
  6 | """
  7 | 
  8 | import os
  9 | import numpy as np
 10 | import logging
 11 | from six.moves import cPickle as pickle
 12 | import json
 13 | import csv
 14 | from tqdm import tqdm
 15 | 
 16 | from core.config import cfg
 17 | from functools import reduce
 18 | from utils.boxes import bbox_overlaps
 19 | from utils_rel.boxes_rel import boxes_union
 20 | 
 21 | from .pytorch_misc import intersect_2d, argsort_desc
 22 | 
 23 | np.set_printoptions(precision=3)
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | 
 27 | 
 28 | topk = 100
 29 | 
 30 | 
 31 | def eval_rel_results(all_results, output_dir, do_val):
 32 |     
 33 |     if cfg.TEST.DATASETS[0].find('vg') >= 0:
 34 |         prd_k_set = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20)
 35 |     elif cfg.TEST.DATASETS[0].find('vrd') >= 0:
 36 |         prd_k_set = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 70)
 37 |     else:
 38 |         prd_k_set = (1, 2, 3, 4, 5, 6, 7, 8, 9)
 39 |         
 40 |     if cfg.TEST.DATASETS[0].find('vg') >= 0:
 41 |         eval_sets = (False,)
 42 |     else:
 43 |         eval_sets = (False, True)
 44 | 
 45 |     for phrdet in eval_sets:
 46 |         eval_metric = 'phrdet' if phrdet else 'reldet'
 47 |         print('================== {} =================='.format(eval_metric))
 48 | 
 49 |         for prd_k in prd_k_set:
 50 |             print('prd_k = {}:'.format(prd_k))
 51 | 
 52 |             recalls = {20: 0, 50: 0, 100: 0}
 53 |             if do_val:
 54 |                 all_gt_cnt = 0
 55 | 
 56 |             topk_dets = []
 57 |             for im_i, res in enumerate(tqdm(all_results)):
 58 | 
 59 |                 # in oi_all_rel some images have no dets
 60 |                 if res['prd_scores'] is None:
 61 |                     det_boxes_s_top = np.zeros((0, 4), dtype=np.float32)
 62 |                     det_boxes_o_top = np.zeros((0, 4), dtype=np.float32)
 63 |                     det_labels_s_top = np.zeros(0, dtype=np.int32)
 64 |                     det_labels_p_top = np.zeros(0, dtype=np.int32)
 65 |                     det_labels_o_top = np.zeros(0, dtype=np.int32)
 66 |                     det_scores_top = np.zeros(0, dtype=np.float32)
 67 |                 else:
 68 |                     det_boxes_sbj = res['sbj_boxes']  # (#num_rel, 4)
 69 |                     det_boxes_obj = res['obj_boxes']  # (#num_rel, 4)
 70 |                     det_labels_sbj = res['sbj_labels']  # (#num_rel,)
 71 |                     det_labels_obj = res['obj_labels']  # (#num_rel,)
 72 |                     det_scores_sbj = res['sbj_scores']  # (#num_rel,)
 73 |                     det_scores_obj = res['obj_scores']  # (#num_rel,)
 74 |                     if 'prd_scores_ttl' in res:
 75 |                         det_scores_prd = res['prd_scores_ttl'][:, 1:]
 76 |                     else:
 77 |                         det_scores_prd = res['prd_scores'][:, 1:]
 78 | 
 79 |                     det_labels_prd = np.argsort(-det_scores_prd, axis=1)
 80 |                     det_scores_prd = -np.sort(-det_scores_prd, axis=1)
 81 | 
 82 |                     det_scores_so = det_scores_sbj * det_scores_obj
 83 |                     det_scores_spo = det_scores_so[:, None] * det_scores_prd[:, :prd_k]
 84 |                     det_scores_inds = argsort_desc(det_scores_spo)[:topk]
 85 |                     det_scores_top = det_scores_spo[det_scores_inds[:, 0], det_scores_inds[:, 1]]
 86 |                     det_boxes_so_top = np.hstack(
 87 |                         (det_boxes_sbj[det_scores_inds[:, 0]], det_boxes_obj[det_scores_inds[:, 0]]))
 88 |                     det_labels_p_top = det_labels_prd[det_scores_inds[:, 0], det_scores_inds[:, 1]]
 89 |                     det_labels_spo_top = np.vstack(
 90 |                         (det_labels_sbj[det_scores_inds[:, 0]], det_labels_p_top, det_labels_obj[det_scores_inds[:, 0]])).transpose()
 91 | 
 92 |                     det_boxes_s_top = det_boxes_so_top[:, :4]
 93 |                     det_boxes_o_top = det_boxes_so_top[:, 4:]
 94 |                     det_labels_s_top = det_labels_spo_top[:, 0]
 95 |                     det_labels_p_top = det_labels_spo_top[:, 1]
 96 |                     det_labels_o_top = det_labels_spo_top[:, 2]
 97 | 
 98 |                 topk_dets.append(dict(image=res['image'],
 99 |                                       det_boxes_s_top=det_boxes_s_top,
100 |                                       det_boxes_o_top=det_boxes_o_top,
101 |                                       det_labels_s_top=det_labels_s_top,
102 |                                       det_labels_p_top=det_labels_p_top,
103 |                                       det_labels_o_top=det_labels_o_top,
104 |                                       det_scores_top=det_scores_top))
105 | 
106 |                 if do_val:
107 |                     gt_boxes_sbj = res['gt_sbj_boxes']  # (#num_gt, 4)
108 |                     gt_boxes_obj = res['gt_obj_boxes']  # (#num_gt, 4)
109 |                     gt_labels_sbj = res['gt_sbj_labels']  # (#num_gt,)
110 |                     gt_labels_obj = res['gt_obj_labels']  # (#num_gt,)
111 |                     gt_labels_prd = res['gt_prd_labels']  # (#num_gt,)
112 |                     gt_boxes_so = np.hstack((gt_boxes_sbj, gt_boxes_obj))
113 |                     gt_labels_spo = np.vstack((gt_labels_sbj, gt_labels_prd, gt_labels_obj)).transpose()
114 |                     # Compute recall. It's most efficient to match once and then do recall after
115 |                     # det_boxes_so_top is (#num_rel, 8)
116 |                     # det_labels_spo_top is (#num_rel, 3)
117 |                     if phrdet:
118 |                         det_boxes_r_top = boxes_union(det_boxes_s_top, det_boxes_o_top)
119 |                         gt_boxes_r = boxes_union(gt_boxes_sbj, gt_boxes_obj)
120 |                         pred_to_gt = _compute_pred_matches(
121 |                             gt_labels_spo, det_labels_spo_top,
122 |                             gt_boxes_r, det_boxes_r_top,
123 |                             phrdet=phrdet)
124 |                     else:
125 |                         pred_to_gt = _compute_pred_matches(
126 |                             gt_labels_spo, det_labels_spo_top,
127 |                             gt_boxes_so, det_boxes_so_top,
128 |                             phrdet=phrdet)
129 |                     all_gt_cnt += gt_labels_spo.shape[0]
130 |                     for k in recalls:
131 |                         if len(pred_to_gt):
132 |                              match = reduce(np.union1d, pred_to_gt[:k])
133 |                         else:
134 |                             match = []
135 |                         recalls[k] += len(match)
136 | 
137 |                     topk_dets[-1].update(dict(gt_boxes_sbj=gt_boxes_sbj,
138 |                                               gt_boxes_obj=gt_boxes_obj,
139 |                                               gt_labels_sbj=gt_labels_sbj,
140 |                                               gt_labels_obj=gt_labels_obj,
141 |                                               gt_labels_prd=gt_labels_prd))
142 | 
143 |             if do_val:
144 |                 for k in recalls:
145 |                     recalls[k] = float(recalls[k]) / (float(all_gt_cnt) + 1e-12)
146 |                 print_stats(recalls)
147 | 
148 | 
149 | def print_stats(recalls):
150 |     # print('====================== ' + 'sgdet' + ' ============================')
151 |     for k, v in recalls.items():
152 |         print('R@%i: %.2f' % (k, 100 * v))
153 | 
154 | 
155 | # This function is adapted from Rowan Zellers' code:
156 | # https://github.com/rowanz/neural-motifs/blob/master/lib/evaluation/sg_eval.py
157 | # Modified for this project to work with PyTorch v0.4
158 | def _compute_pred_matches(gt_triplets, pred_triplets,
159 |                  gt_boxes, pred_boxes, iou_thresh=0.5, phrdet=False):
160 |     """
161 |     Given a set of predicted triplets, return the list of matching GT's for each of the
162 |     given predictions
163 |     :param gt_triplets: 
164 |     :param pred_triplets: 
165 |     :param gt_boxes: 
166 |     :param pred_boxes: 
167 |     :param iou_thresh: 
168 |     :return: 
169 |     """
170 |     # This performs a matrix multiplication-esque thing between the two arrays
171 |     # Instead of summing, we want the equality, so we reduce in that way
172 |     # The rows correspond to GT triplets, columns to pred triplets
173 |     keeps = intersect_2d(gt_triplets, pred_triplets)
174 |     gt_has_match = keeps.any(1)
175 |     pred_to_gt = [[] for x in range(pred_boxes.shape[0])]
176 |     for gt_ind, gt_box, keep_inds in zip(np.where(gt_has_match)[0],
177 |                                          gt_boxes[gt_has_match],
178 |                                          keeps[gt_has_match],
179 |                                          ):
180 |         boxes = pred_boxes[keep_inds]
181 |         if phrdet:
182 |             gt_box = gt_box.astype(dtype=np.float32, copy=False)
183 |             boxes = boxes.astype(dtype=np.float32, copy=False)
184 |             rel_iou = bbox_overlaps(gt_box[None, :], boxes)[0]
185 | 
186 |             inds = rel_iou >= iou_thresh
187 |         else:
188 |             gt_box = gt_box.astype(dtype=np.float32, copy=False)
189 |             boxes = boxes.astype(dtype=np.float32, copy=False)
190 |             sub_iou = bbox_overlaps(gt_box[None,:4], boxes[:, :4])[0]
191 |             obj_iou = bbox_overlaps(gt_box[None,4:], boxes[:, 4:])[0]
192 | 
193 |             inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)
194 | 
195 |         for i in np.where(keep_inds)[0][inds]:
196 |             pred_to_gt[i].append(int(gt_ind))
197 |     return pred_to_gt
198 | 


--------------------------------------------------------------------------------
/lib/core/test_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang in 2019
  2 | # From Detectron.pytorch/lib/core/test.py
  3 | # Original license text below
  4 | # --------------------------------------------------------
  5 | # Written by Roy Tseng
  6 | #
  7 | # Based on:
  8 | # --------------------------------------------------------
  9 | # Copyright (c) 2017-present, Facebook, Inc.
 10 | #
 11 | # Licensed under the Apache License, Version 2.0 (the "License");
 12 | # you may not use this file except in compliance with the License.
 13 | # You may obtain a copy of the License at
 14 | #
 15 | #     http://www.apache.org/licenses/LICENSE-2.0
 16 | #
 17 | # Unless required by applicable law or agreed to in writing, software
 18 | # distributed under the License is distributed on an "AS IS" BASIS,
 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 20 | # See the License for the specific language governing permissions and
 21 | # limitations under the License.
 22 | ##############################################################################
 23 | #
 24 | # Based on:
 25 | # --------------------------------------------------------
 26 | # Fast R-CNN
 27 | # Copyright (c) 2015 Microsoft
 28 | # Licensed under The MIT License [see LICENSE for details]
 29 | # Written by Ross Girshick
 30 | # --------------------------------------------------------
 31 | 
 32 | from __future__ import absolute_import
 33 | from __future__ import division
 34 | from __future__ import print_function
 35 | from __future__ import unicode_literals
 36 | 
 37 | from collections import defaultdict
 38 | from six.moves import cPickle as pickle
 39 | import cv2
 40 | import logging
 41 | import numpy as np
 42 | 
 43 | from torch.autograd import Variable
 44 | import torch
 45 | 
 46 | from core.config import cfg
 47 | from utils.timer import Timer
 48 | import utils.blob as blob_utils
 49 | import utils.fpn as fpn_utils
 50 | import utils.image as image_utils
 51 | 
 52 | logger = logging.getLogger(__name__)
 53 | 
 54 | 
 55 | def im_detect_rels(model, im, dataset_name, box_proposals, do_vis=False, timers=None, roidb=None, use_gt_labels=False):
 56 |     
 57 |     if timers is None:
 58 |         timers = defaultdict(Timer)
 59 |     
 60 |     timers['im_detect_rels'].tic()
 61 |     rel_results = im_get_det_rels(model, im, dataset_name, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, box_proposals, do_vis, roidb, use_gt_labels)
 62 |     timers['im_detect_rels'].toc()
 63 |     
 64 |     return rel_results
 65 | 
 66 | 
 67 | def im_get_det_rels(model, im, dataset_name, target_scale, target_max_size, boxes=None, do_vis=False, roidb=None, use_gt_labels=False):
 68 |     """Prepare the bbox for testing"""
 69 | 
 70 |     inputs, im_scale = _get_blobs(im, boxes, target_scale, target_max_size)
 71 | 
 72 |     if cfg.DEDUP_BOXES > 0 and not cfg.MODEL.FASTER_RCNN:
 73 |         v = np.array([1, 1e3, 1e6, 1e9, 1e12])
 74 |         hashes = np.round(inputs['rois'] * cfg.DEDUP_BOXES).dot(v)
 75 |         _, index, inv_index = np.unique(
 76 |             hashes, return_index=True, return_inverse=True
 77 |         )
 78 |         inputs['rois'] = inputs['rois'][index, :]
 79 |         boxes = boxes[index, :]
 80 | 
 81 |     # Add multi-level rois for FPN
 82 |     if cfg.FPN.MULTILEVEL_ROIS and not cfg.MODEL.FASTER_RCNN:
 83 |         _add_multilevel_rois_for_test(inputs, 'rois')
 84 | 
 85 |     if cfg.PYTORCH_VERSION_LESS_THAN_040:
 86 |         inputs['data'] = [Variable(torch.from_numpy(inputs['data']), volatile=True)]
 87 |         inputs['im_info'] = [Variable(torch.from_numpy(inputs['im_info']), volatile=True)]
 88 |     else:
 89 |         inputs['data'] = [torch.from_numpy(inputs['data'])]
 90 |         inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]
 91 |     if dataset_name is not None:
 92 |         inputs['dataset_name'] = [blob_utils.serialize(dataset_name)]
 93 |     
 94 |     inputs['do_vis'] = [do_vis]
 95 |     if roidb is not None:
 96 |         inputs['roidb'] = [roidb]
 97 |     if use_gt_labels:
 98 |         inputs['use_gt_labels'] = [use_gt_labels]
 99 | 
100 |     return_dict = model(**inputs)
101 |     
102 |     return_dict2 = {}
103 |     if return_dict['sbj_rois'] is not None:
104 |         sbj_boxes = return_dict['sbj_rois'].data.cpu().numpy()[:, 1:5] / im_scale
105 |         sbj_labels = return_dict['sbj_labels'].data.cpu().numpy() - 1
106 |         sbj_scores = return_dict['sbj_scores'].data.cpu().numpy()
107 |         obj_boxes = return_dict['obj_rois'].data.cpu().numpy()[:, 1:5] / im_scale
108 |         obj_labels = return_dict['obj_labels'].data.cpu().numpy() - 1
109 |         obj_scores = return_dict['obj_scores'].data.cpu().numpy()
110 |         prd_scores = return_dict['prd_scores'].data.cpu().numpy()
111 |         if cfg.MODEL.USE_FREQ_BIAS:
112 |             prd_scores_bias = return_dict['prd_scores_bias'].data.cpu().numpy()
113 |         if cfg.MODEL.USE_SPATIAL_FEAT:
114 |             prd_scores_spt = return_dict['prd_scores_spt'].data.cpu().numpy()
115 |         if cfg.MODEL.ADD_SCORES_ALL:
116 |             prd_scores_ttl = return_dict['prd_ttl_scores'].data.cpu().numpy()
117 | 
118 |         return_dict2 = dict(sbj_boxes=sbj_boxes,
119 |                             sbj_labels=sbj_labels.astype(np.int32, copy=False),
120 |                             sbj_scores=sbj_scores,
121 |                             obj_boxes=obj_boxes,
122 |                             obj_labels=obj_labels.astype(np.int32, copy=False),
123 |                             obj_scores=obj_scores,
124 |                             prd_scores=prd_scores)
125 |         if cfg.MODEL.ADD_SCORES_ALL:
126 |             return_dict2['prd_scores_ttl'] = prd_scores_ttl
127 | 
128 |         if cfg.MODEL.USE_FREQ_BIAS:
129 |             return_dict2['prd_scores_bias'] = prd_scores_bias
130 |         if cfg.MODEL.USE_SPATIAL_FEAT:
131 |             return_dict2['prd_scores_spt'] = prd_scores_spt
132 |         if do_vis:
133 |             if isinstance(return_dict['blob_conv'], list):
134 |                 blob_conv = [b.data.cpu().numpy().squeeze() for b in return_dict['blob_conv']]
135 |                 blob_conv_prd = [b.data.cpu().numpy().squeeze() for b in return_dict['blob_conv_prd']]
136 |                 blob_conv = [b.mean(axis=0) for b in blob_conv]
137 |                 blob_conv_prd = [b.mean(axis=0) for b in blob_conv_prd]
138 |                 return_dict2['blob_conv'] = blob_conv
139 |                 return_dict2['blob_conv_prd'] = blob_conv_prd
140 |             else:
141 |                 blob_conv = return_dict['blob_conv'].data.cpu().numpy().squeeze()
142 |                 blob_conv_prd = return_dict['blob_conv_prd'].data.cpu().numpy().squeeze()
143 |                 blob_conv = blob_conv.mean(axis=0)
144 |                 blob_conv_prd = blob_conv_prd.mean(axis=0)
145 |                 return_dict2['blob_conv'] = blob_conv
146 |                 return_dict2['blob_conv_prd'] = blob_conv_prd
147 |     else:
148 |         return_dict2 = dict(sbj_boxes=None,
149 |                             sbj_labels=None,
150 |                             sbj_scores=None,
151 |                             obj_boxes=None,
152 |                             obj_labels=None,
153 |                             obj_scores=None,
154 |                             prd_scores=None)
155 |     
156 |     return return_dict2
157 | 
158 | 
159 | def _get_rois_blob(im_rois, im_scale):
160 |     """Converts RoIs into network inputs.
161 | 
162 |     Arguments:
163 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
164 |         im_scale_factors (list): scale factors as returned by _get_image_blob
165 | 
166 |     Returns:
167 |         blob (ndarray): R x 5 matrix of RoIs in the image pyramid with columns
168 |             [level, x1, y1, x2, y2]
169 |     """
170 |     rois, levels = _project_im_rois(im_rois, im_scale)
171 |     rois_blob = np.hstack((levels, rois))
172 |     return rois_blob.astype(np.float32, copy=False)
173 | 
174 | 
175 | def _project_im_rois(im_rois, scales):
176 |     """Project image RoIs into the image pyramid built by _get_image_blob.
177 | 
178 |     Arguments:
179 |         im_rois (ndarray): R x 4 matrix of RoIs in original image coordinates
180 |         scales (list): scale factors as returned by _get_image_blob
181 | 
182 |     Returns:
183 |         rois (ndarray): R x 4 matrix of projected RoI coordinates
184 |         levels (ndarray): image pyramid levels used by each projected RoI
185 |     """
186 |     rois = im_rois.astype(np.float, copy=False) * scales
187 |     levels = np.zeros((im_rois.shape[0], 1), dtype=np.int)
188 |     return rois, levels
189 | 
190 | 
191 | def _add_multilevel_rois_for_test(blobs, name):
192 |     """Distributes a set of RoIs across FPN pyramid levels by creating new level
193 |     specific RoI blobs.
194 | 
195 |     Arguments:
196 |         blobs (dict): dictionary of blobs
197 |         name (str): a key in 'blobs' identifying the source RoI blob
198 | 
199 |     Returns:
200 |         [by ref] blobs (dict): new keys named by `name + 'fpn' + level`
201 |             are added to dict each with a value that's an R_level x 5 ndarray of
202 |             RoIs (see _get_rois_blob for format)
203 |     """
204 |     lvl_min = cfg.FPN.ROI_MIN_LEVEL
205 |     lvl_max = cfg.FPN.ROI_MAX_LEVEL
206 |     lvls = fpn_utils.map_rois_to_fpn_levels(blobs[name][:, 1:5], lvl_min, lvl_max)
207 |     fpn_utils.add_multilevel_roi_blobs(
208 |         blobs, name, blobs[name], lvls, lvl_min, lvl_max
209 |     )
210 | 
211 | 
212 | def _get_blobs(im, rois, target_scale, target_max_size):
213 |     """Convert an image and RoIs within that image into network inputs."""
214 |     blobs = {}
215 |     blobs['data'], im_scale, blobs['im_info'] = \
216 |         blob_utils.get_image_blob(im, target_scale, target_max_size)
217 |     if rois is not None:
218 |         blobs['rois'] = _get_rois_blob(rois, im_scale)
219 |     return blobs, im_scale
220 | 


--------------------------------------------------------------------------------
/lib/modeling_rel/fast_rcnn_heads.py:
--------------------------------------------------------------------------------
  1 | # Adapted from Detectron.pytorch/lib/modeling/fast_rcnn_heads.py
  2 | # for this project by Ji Zhang, 2019
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.nn.init as init
  8 | from torch.autograd import Variable
  9 | 
 10 | from core.config import cfg
 11 | import nn as mynn
 12 | import utils.net as net_utils
 13 | 
 14 | 
 15 | class fast_rcnn_outputs(nn.Module):
 16 |     def __init__(self, dim_in):
 17 |         super().__init__()
 18 |         self.cls_score = nn.Linear(dim_in, cfg.MODEL.NUM_CLASSES)
 19 |         if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:  # bg and fg
 20 |             self.bbox_pred = nn.Linear(dim_in, 4 * 2)
 21 |         else:
 22 |             self.bbox_pred = nn.Linear(dim_in, 4 * cfg.MODEL.NUM_CLASSES)
 23 | 
 24 |         self._init_weights()
 25 | 
 26 |     def _init_weights(self):
 27 |         init.normal_(self.cls_score.weight, std=0.01)
 28 |         init.constant_(self.cls_score.bias, 0)
 29 |         init.normal_(self.bbox_pred.weight, std=0.001)
 30 |         init.constant_(self.bbox_pred.bias, 0)
 31 | 
 32 |     def detectron_weight_mapping(self):
 33 |         detectron_weight_mapping = {
 34 |             'cls_score.weight': 'cls_score_w',
 35 |             'cls_score.bias': 'cls_score_b',
 36 |             'bbox_pred.weight': 'bbox_pred_w',
 37 |             'bbox_pred.bias': 'bbox_pred_b'
 38 |         }
 39 |         orphan_in_detectron = []
 40 |         return detectron_weight_mapping, orphan_in_detectron
 41 | 
 42 |     def forward(self, x):
 43 |         if x.dim() == 4:
 44 |             x = x.squeeze(3).squeeze(2)
 45 |         cls_score = self.cls_score(x)
 46 |         if not self.training:
 47 |             cls_score = F.softmax(cls_score, dim=1)
 48 |         bbox_pred = self.bbox_pred(x)
 49 | 
 50 |         return cls_score, bbox_pred
 51 | 
 52 | 
 53 | def fast_rcnn_losses(cls_score, bbox_pred, label_int32, bbox_targets,
 54 |                      bbox_inside_weights, bbox_outside_weights):
 55 |     device_id = cls_score.get_device()
 56 |     rois_label = Variable(torch.from_numpy(label_int32.astype('int64'))).cuda(device_id)
 57 |     loss_cls = F.cross_entropy(cls_score, rois_label)
 58 | 
 59 |     bbox_targets = Variable(torch.from_numpy(bbox_targets)).cuda(device_id)
 60 |     bbox_inside_weights = Variable(torch.from_numpy(bbox_inside_weights)).cuda(device_id)
 61 |     bbox_outside_weights = Variable(torch.from_numpy(bbox_outside_weights)).cuda(device_id)
 62 |     loss_bbox = net_utils.smooth_l1_loss(
 63 |         bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights)
 64 | 
 65 |     # class accuracy
 66 |     cls_preds = cls_score.max(dim=1)[1].type_as(rois_label)
 67 |     accuracy_cls = cls_preds.eq(rois_label).float().mean(dim=0)
 68 | 
 69 |     return loss_cls, loss_bbox, accuracy_cls
 70 | 
 71 | 
 72 | # ---------------------------------------------------------------------------- #
 73 | # Box heads
 74 | # ---------------------------------------------------------------------------- #
 75 | 
 76 | class roi_2mlp_head(nn.Module):
 77 |     """Add a ReLU MLP with two hidden layers."""
 78 |     def __init__(self, dim_in, roi_xform_func, spatial_scale):
 79 |         super().__init__()
 80 |         self.dim_in = dim_in
 81 |         self.roi_xform = roi_xform_func
 82 |         self.spatial_scale = spatial_scale
 83 |         self.dim_out = hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
 84 | 
 85 |         roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
 86 |         self.fc1 = nn.Linear(dim_in * roi_size**2, hidden_dim)
 87 |         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
 88 | 
 89 |         self._init_weights()
 90 | 
 91 |     def _init_weights(self):
 92 |         mynn.init.XavierFill(self.fc1.weight)
 93 |         init.constant_(self.fc1.bias, 0)
 94 |         mynn.init.XavierFill(self.fc2.weight)
 95 |         init.constant_(self.fc2.bias, 0)
 96 | 
 97 |     def detectron_weight_mapping(self):
 98 |         detectron_weight_mapping = {
 99 |             'fc1.weight': 'fc6_w',
100 |             'fc1.bias': 'fc6_b',
101 |             'fc2.weight': 'fc7_w',
102 |             'fc2.bias': 'fc7_b'
103 |         }
104 |         return detectron_weight_mapping, []
105 | 
106 |     def forward(self, x, rpn_ret, rois_name='rois', use_relu=True):
107 |         x = self.roi_xform(
108 |             x, rpn_ret,
109 |             blob_rois=rois_name,
110 |             method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
111 |             resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
112 |             spatial_scale=self.spatial_scale,
113 |             sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
114 |         )
115 |         batch_size = x.size(0)
116 |         x = F.relu(self.fc1(x.view(batch_size, -1)), inplace=True)
117 |         if use_relu:
118 |             x = F.relu(self.fc2(x), inplace=True)
119 |         else:
120 |             x = self.fc2(x)
121 | 
122 |         return x
123 | 
124 | 
125 | class roi_Xconv1fc_head(nn.Module):
126 |     """Add a X conv + 1fc head, as a reference if not using GroupNorm"""
127 |     def __init__(self, dim_in, roi_xform_func, spatial_scale):
128 |         super().__init__()
129 |         self.dim_in = dim_in
130 |         self.roi_xform = roi_xform_func
131 |         self.spatial_scale = spatial_scale
132 | 
133 |         hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
134 |         module_list = []
135 |         for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
136 |             module_list.extend([
137 |                 nn.Conv2d(dim_in, hidden_dim, 3, 1, 1),
138 |                 nn.ReLU(inplace=True)
139 |             ])
140 |             dim_in = hidden_dim
141 |         self.convs = nn.Sequential(*module_list)
142 | 
143 |         self.dim_out = fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
144 |         roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
145 |         self.fc = nn.Linear(dim_in * roi_size * roi_size, fc_dim)
146 | 
147 |         self._init_weights()
148 | 
149 |     def _init_weights(self):
150 |         def _init(m):
151 |             if isinstance(m, nn.Conv2d):
152 |                 mynn.init.MSRAFill(m.weight)
153 |                 init.constant_(m.bias, 0)
154 |             elif isinstance(m, nn.Linear):
155 |                 mynn.init.XavierFill(m.weight)
156 |                 init.constant_(m.bias, 0)
157 |         self.apply(_init)
158 | 
159 |     def detectron_weight_mapping(self):
160 |         mapping = {}
161 |         for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
162 |             mapping.update({
163 |                 'convs.%d.weight' % (i*2): 'head_conv%d_w' % (i+1),
164 |                 'convs.%d.bias' % (i*2): 'head_conv%d_b' % (i+1)
165 |             })
166 |         mapping.update({
167 |             'fc.weight': 'fc6_w',
168 |             'fc.bias': 'fc6_b'
169 |         })
170 |         return mapping, []
171 | 
172 |     def forward(self, x, rpn_ret):
173 |         x = self.roi_xform(
174 |             x, rpn_ret,
175 |             blob_rois='rois',
176 |             method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
177 |             resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
178 |             spatial_scale=self.spatial_scale,
179 |             sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
180 |         )
181 |         batch_size = x.size(0)
182 |         x = self.convs(x)
183 |         x = F.relu(self.fc(x.view(batch_size, -1)), inplace=True)
184 |         return x
185 | 
186 | 
187 | class roi_Xconv1fc_gn_head(nn.Module):
188 |     """Add a X conv + 1fc head, with GroupNorm"""
189 |     def __init__(self, dim_in, roi_xform_func, spatial_scale):
190 |         super().__init__()
191 |         self.dim_in = dim_in
192 |         self.roi_xform = roi_xform_func
193 |         self.spatial_scale = spatial_scale
194 | 
195 |         hidden_dim = cfg.FAST_RCNN.CONV_HEAD_DIM
196 |         module_list = []
197 |         for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
198 |             module_list.extend([
199 |                 nn.Conv2d(dim_in, hidden_dim, 3, 1, 1, bias=False),
200 |                 nn.GroupNorm(net_utils.get_group_gn(hidden_dim), hidden_dim,
201 |                              eps=cfg.GROUP_NORM.EPSILON),
202 |                 nn.ReLU(inplace=True)
203 |             ])
204 |             dim_in = hidden_dim
205 |         self.convs = nn.Sequential(*module_list)
206 | 
207 |         self.dim_out = fc_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
208 |         roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
209 |         self.fc = nn.Linear(dim_in * roi_size * roi_size, fc_dim)
210 | 
211 |         self._init_weights()
212 | 
213 |     def _init_weights(self):
214 |         def _init(m):
215 |             if isinstance(m, nn.Conv2d):
216 |                 mynn.init.MSRAFill(m.weight)
217 |             elif isinstance(m, nn.Linear):
218 |                 mynn.init.XavierFill(m.weight)
219 |                 init.constant_(m.bias, 0)
220 |         self.apply(_init)
221 | 
222 |     def detectron_weight_mapping(self):
223 |         mapping = {}
224 |         for i in range(cfg.FAST_RCNN.NUM_STACKED_CONVS):
225 |             mapping.update({
226 |                 'convs.%d.weight' % (i*3): 'head_conv%d_w' % (i+1),
227 |                 'convs.%d.weight' % (i*3+1): 'head_conv%d_gn_s' % (i+1),
228 |                 'convs.%d.bias' % (i*3+1): 'head_conv%d_gn_b' % (i+1)
229 |             })
230 |         mapping.update({
231 |             'fc.weight': 'fc6_w',
232 |             'fc.bias': 'fc6_b'
233 |         })
234 |         return mapping, []
235 | 
236 |     def forward(self, x, rpn_ret):
237 |         x = self.roi_xform(
238 |             x, rpn_ret,
239 |             blob_rois='rois',
240 |             method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
241 |             resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
242 |             spatial_scale=self.spatial_scale,
243 |             sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
244 |         )
245 |         batch_size = x.size(0)
246 |         x = self.convs(x)
247 |         x = F.relu(self.fc(x.view(batch_size, -1)), inplace=True)
248 |         return x
249 | 


--------------------------------------------------------------------------------
/lib/utils_rel/training_stats_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang in 2019 for thsi project
  2 | # Based on Detectron.pytorch/lib/utils/training_stats.py
  3 | # Original license text below:
  4 | #
  5 | ##############################################################################
  6 | # Copyright (c) 2017-present, Facebook, Inc.
  7 | #
  8 | # Licensed under the Apache License, Version 2.0 (the "License");
  9 | # you may not use this file except in compliance with the License.
 10 | # You may obtain a copy of the License at
 11 | #
 12 | #     http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | # Unless required by applicable law or agreed to in writing, software
 15 | # distributed under the License is distributed on an "AS IS" BASIS,
 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | # See the License for the specific language governing permissions and
 18 | # limitations under the License.
 19 | ##############################################################################
 20 | 
 21 | 
 22 | """Utilities for training."""
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | from __future__ import unicode_literals
 28 | 
 29 | from collections import defaultdict, OrderedDict
 30 | import datetime
 31 | import numpy as np
 32 | 
 33 | from core.config import cfg
 34 | from utils_rel.logging_rel import log_stats
 35 | from utils_rel.logging_rel import SmoothedValue
 36 | from utils.timer import Timer
 37 | import utils.net as nu
 38 | 
 39 | 
 40 | class TrainingStats(object):
 41 |     """Track vital training statistics."""
 42 | 
 43 |     def __init__(self, misc_args, log_period=20, tensorboard_logger=None):
 44 |         # Output logging period in SGD iterations
 45 |         self.misc_args = misc_args
 46 |         self.LOG_PERIOD = log_period
 47 |         self.tblogger = tensorboard_logger
 48 |         self.tb_ignored_keys = ['iter', 'eta']
 49 |         self.iter_timer = Timer()
 50 |         # Window size for smoothing tracked values (with median filtering)
 51 |         self.WIN_SZ = 20
 52 |         def create_smoothed_value():
 53 |             return SmoothedValue(self.WIN_SZ)
 54 |         self.smoothed_losses = defaultdict(create_smoothed_value)
 55 |         self.smoothed_metrics = defaultdict(create_smoothed_value)
 56 |         self.smoothed_total_loss = SmoothedValue(self.WIN_SZ)
 57 |         # For the support of args.iter_size
 58 |         self.inner_total_loss = []
 59 |         self.inner_losses = defaultdict(list)
 60 |         if cfg.FPN.FPN_ON:
 61 |             self.inner_loss_rpn_cls = []
 62 |             self.inner_loss_rpn_bbox = []
 63 |         self.inner_metrics = defaultdict(list)
 64 | 
 65 |     def IterTic(self):
 66 |         self.iter_timer.tic()
 67 | 
 68 |     def IterToc(self):
 69 |         return self.iter_timer.toc(average=False)
 70 | 
 71 |     def ResetIterTimer(self):
 72 |         self.iter_timer.reset()
 73 | 
 74 |     def UpdateIterStats(self, model_out, inner_iter=None):
 75 |         """Update tracked iteration statistics."""
 76 |         if inner_iter is not None and self.misc_args.iter_size > 1:
 77 |             # For the case of using args.iter_size > 1
 78 |             return self._UpdateIterStats_inner(model_out, inner_iter)
 79 | 
 80 |         # Following code is saved for compatability of train_net.py and iter_size==1
 81 |         total_loss = 0
 82 |         if cfg.FPN.FPN_ON:
 83 |             loss_rpn_cls_data = 0
 84 |             loss_rpn_bbox_data = 0
 85 | 
 86 |         for k, loss in model_out['losses'].items():
 87 |             assert loss.shape[0] == cfg.NUM_GPUS
 88 |             loss = loss.mean(dim=0, keepdim=True)
 89 |             total_loss += loss
 90 |             loss_data = loss.data[0]
 91 |             model_out['losses'][k] = loss
 92 |             if cfg.FPN.FPN_ON:
 93 |                 if k.startswith('loss_rpn_cls_'):
 94 |                     loss_rpn_cls_data += loss_data
 95 |                 elif k.startswith('loss_rpn_bbox_'):
 96 |                     loss_rpn_bbox_data += loss_data
 97 |             self.smoothed_losses[k].AddValue(loss_data)
 98 | 
 99 |         model_out['total_loss'] = total_loss  # Add the total loss for back propagation
100 |         self.smoothed_total_loss.AddValue(total_loss.data[0])
101 |         if cfg.FPN.FPN_ON:
102 |             self.smoothed_losses['loss_rpn_cls'].AddValue(loss_rpn_cls_data)
103 |             self.smoothed_losses['loss_rpn_bbox'].AddValue(loss_rpn_bbox_data)
104 | 
105 |         for k, metric in model_out['metrics'].items():
106 |             metric = metric.mean(dim=0, keepdim=True)
107 |             self.smoothed_metrics[k].AddValue(metric.data[0])
108 | 
109 |     def _UpdateIterStats_inner(self, model_out, inner_iter):
110 |         """Update tracked iteration statistics for the case of iter_size > 1"""
111 |         assert inner_iter < self.misc_args.iter_size
112 | 
113 |         total_loss = 0
114 |         if cfg.FPN.FPN_ON:
115 |             loss_rpn_cls_data = 0
116 |             loss_rpn_bbox_data = 0
117 | 
118 |         if inner_iter == 0:
119 |             self.inner_total_loss = []
120 |             for k in model_out['losses']:
121 |                 self.inner_losses[k] = []
122 |             if cfg.FPN.FPN_ON:
123 |                 self.inner_loss_rpn_cls = []
124 |                 self.inner_loss_rpn_bbox = []
125 |             for k in model_out['metrics']:
126 |                 self.inner_metrics[k] = []
127 | 
128 |         for k, loss in model_out['losses'].items():
129 |             assert loss.shape[0] == cfg.NUM_GPUS
130 |             loss = loss.mean(dim=0, keepdim=True)
131 |             total_loss += loss
132 |             loss_data = loss.data[0]
133 | 
134 |             model_out['losses'][k] = loss
135 |             if cfg.FPN.FPN_ON:
136 |                 if k.startswith('loss_rpn_cls_'):
137 |                     loss_rpn_cls_data += loss_data
138 |                 elif k.startswith('loss_rpn_bbox_'):
139 |                     loss_rpn_bbox_data += loss_data
140 | 
141 |             self.inner_losses[k].append(loss_data)
142 |             if inner_iter == (self.misc_args.iter_size - 1):
143 |                 loss_data = self._mean_and_reset_inner_list('inner_losses', k)
144 |                 self.smoothed_losses[k].AddValue(loss_data)
145 | 
146 |         model_out['total_loss'] = total_loss  # Add the total loss for back propagation
147 |         total_loss_data = total_loss.data[0]
148 |         self.inner_total_loss.append(total_loss_data)
149 |         if cfg.FPN.FPN_ON:
150 |             self.inner_loss_rpn_cls.append(loss_rpn_cls_data)
151 |             self.inner_loss_rpn_bbox.append(loss_rpn_bbox_data)
152 |         if inner_iter == (self.misc_args.iter_size - 1):
153 |             total_loss_data = self._mean_and_reset_inner_list('inner_total_loss')
154 |             self.smoothed_total_loss.AddValue(total_loss_data)
155 |             if cfg.FPN.FPN_ON:
156 |                 loss_rpn_cls_data = self._mean_and_reset_inner_list('inner_loss_rpn_cls')
157 |                 loss_rpn_bbox_data = self._mean_and_reset_inner_list('inner_loss_rpn_bbox')
158 |                 self.smoothed_losses['loss_rpn_cls'].AddValue(loss_rpn_cls_data)
159 |                 self.smoothed_losses['loss_rpn_bbox'].AddValue(loss_rpn_bbox_data)
160 | 
161 |         for k, metric in model_out['metrics'].items():
162 |             metric = metric.mean(dim=0, keepdim=True)
163 |             metric_data = metric.data[0]
164 |             self.inner_metrics[k].append(metric_data)
165 |             if inner_iter == (self.misc_args.iter_size - 1):
166 |                 metric_data = self._mean_and_reset_inner_list('inner_metrics', k)
167 |                 self.smoothed_metrics[k].AddValue(metric_data)
168 | 
169 |     def _mean_and_reset_inner_list(self, attr_name, key=None):
170 |         """Take the mean and reset list empty"""
171 |         if key:
172 |             mean_val = sum(getattr(self, attr_name)[key]) / self.misc_args.iter_size
173 |             getattr(self, attr_name)[key] = []
174 |         else:
175 |             mean_val = sum(getattr(self, attr_name)) / self.misc_args.iter_size
176 |             setattr(self, attr_name, [])
177 |         return mean_val
178 | 
179 |     def LogIterStats(self, cur_iter, lr, backbone_lr):
180 |         """Log the tracked statistics."""
181 |         if (cur_iter % self.LOG_PERIOD == 0 or
182 |                 cur_iter == cfg.SOLVER.MAX_ITER - 1):
183 |             stats = self.GetStats(cur_iter, lr, backbone_lr)
184 |             log_stats(stats, self.misc_args)
185 |             if self.tblogger:
186 |                 self.tb_log_stats(stats, cur_iter)
187 | 
188 |     def tb_log_stats(self, stats, cur_iter):
189 |         """Log the tracked statistics to tensorboard"""
190 |         for k in stats:
191 |             if k not in self.tb_ignored_keys:
192 |                 v = stats[k]
193 |                 if isinstance(v, dict):
194 |                     self.tb_log_stats(v, cur_iter)
195 |                 else:
196 |                     self.tblogger.add_scalar(k, v, cur_iter)
197 | 
198 |     def GetStats(self, cur_iter, lr, backbone_lr):
199 |         eta_seconds = self.iter_timer.average_time * (
200 |             cfg.SOLVER.MAX_ITER - cur_iter
201 |         )
202 |         eta = str(datetime.timedelta(seconds=int(eta_seconds)))
203 |         stats = OrderedDict(
204 |             iter=cur_iter + 1,  # 1-indexed
205 |             time=self.iter_timer.average_time,
206 |             eta=eta,
207 |             loss=self.smoothed_total_loss.GetMedianValue(),
208 |             lr=lr,
209 |             backbone_lr=backbone_lr
210 |         )
211 |         stats['metrics'] = OrderedDict()
212 |         for k in sorted(self.smoothed_metrics):
213 |             stats['metrics'][k] = self.smoothed_metrics[k].GetMedianValue()
214 | 
215 |         head_losses = []
216 |         for k, v in self.smoothed_losses.items():
217 |             head_losses.append((k, v.GetMedianValue()))
218 |         stats['head_losses'] = OrderedDict(head_losses)
219 | 
220 |         return stats
221 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/roidb_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted from Detectron.pytorch/lib/datasets/roidb.py
  2 | # for this project by Ji Zhang, 2019
  3 | #
  4 | # --------------------------------------------------------
  5 | # Copyright (c) 2017-present, Facebook, Inc.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | ##############################################################################
 19 | 
 20 | """Functions for common roidb manipulations."""
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | from __future__ import unicode_literals
 26 | 
 27 | import six
 28 | import logging
 29 | import numpy as np
 30 | 
 31 | import utils.boxes as box_utils
 32 | import utils.blob as blob_utils
 33 | from core.config import cfg
 34 | from .json_dataset_rel import JsonDatasetRel
 35 | 
 36 | logger = logging.getLogger(__name__)
 37 | 
 38 | 
 39 | def combined_roidb_for_training(dataset_names, proposal_files):
 40 |     """Load and concatenate roidbs for one or more datasets, along with optional
 41 |     object proposals. The roidb entries are then prepared for use in training,
 42 |     which involves caching certain types of metadata for each roidb entry.
 43 |     """
 44 |     def get_roidb(dataset_name, proposal_file):
 45 |         ds = JsonDatasetRel(dataset_name)
 46 |         roidb = ds.get_roidb(
 47 |             gt=True,
 48 |             proposal_file=proposal_file,
 49 |             crowd_filter_thresh=cfg.TRAIN.CROWD_FILTER_THRESH
 50 |         )
 51 |         if cfg.TRAIN.USE_FLIPPED:
 52 |             logger.info('Appending horizontally-flipped training examples...')
 53 |             extend_with_flipped_entries(roidb, ds)
 54 |         logger.info('Loaded dataset: {:s}'.format(ds.name))
 55 |         return roidb
 56 | 
 57 |     if isinstance(dataset_names, six.string_types):
 58 |         dataset_names = (dataset_names, )
 59 |     if isinstance(proposal_files, six.string_types):
 60 |         proposal_files = (proposal_files, )
 61 |     if len(proposal_files) == 0:
 62 |         proposal_files = (None, ) * len(dataset_names)
 63 |     assert len(dataset_names) == len(proposal_files)
 64 |     roidbs = [get_roidb(*args) for args in zip(dataset_names, proposal_files)]
 65 |     roidb = roidbs[0]
 66 |     for r in roidbs[1:]:
 67 |         roidb.extend(r)
 68 |     roidb = filter_for_training(roidb)
 69 | 
 70 |     if cfg.TRAIN.ASPECT_GROUPING or cfg.TRAIN.ASPECT_CROPPING:
 71 |         logger.info('Computing image aspect ratios and ordering the ratios...')
 72 |         ratio_list, ratio_index = rank_for_training(roidb)
 73 |         logger.info('done')
 74 |     else:
 75 |         ratio_list, ratio_index = None, None
 76 | 
 77 |     logger.info('Computing bounding-box regression targets...')
 78 |     add_bbox_regression_targets(roidb)
 79 |     logger.info('done')
 80 | 
 81 |     _compute_and_log_stats(roidb)
 82 | 
 83 |     return roidb, ratio_list, ratio_index
 84 | 
 85 | 
 86 | def extend_with_flipped_entries(roidb, dataset):
 87 |     """Flip each entry in the given roidb and return a new roidb that is the
 88 |     concatenation of the original roidb and the flipped entries.
 89 | 
 90 |     "Flipping" an entry means that that image and associated metadata (e.g.,
 91 |     ground truth boxes and object proposals) are horizontally flipped.
 92 |     """
 93 |     flipped_roidb = []
 94 |     for entry in roidb:
 95 |         width = entry['width']
 96 |         boxes = entry['boxes'].copy()
 97 |         oldx1 = boxes[:, 0].copy()
 98 |         oldx2 = boxes[:, 2].copy()
 99 |         boxes[:, 0] = width - oldx2 - 1
100 |         boxes[:, 2] = width - oldx1 - 1
101 |         assert (boxes[:, 2] >= boxes[:, 0]).all()
102 |         # sbj
103 |         sbj_gt_boxes = entry['sbj_gt_boxes'].copy()
104 |         oldx1 = sbj_gt_boxes[:, 0].copy()
105 |         oldx2 = sbj_gt_boxes[:, 2].copy()
106 |         sbj_gt_boxes[:, 0] = width - oldx2 - 1
107 |         sbj_gt_boxes[:, 2] = width - oldx1 - 1
108 |         assert (sbj_gt_boxes[:, 2] >= sbj_gt_boxes[:, 0]).all()
109 |         # obj
110 |         obj_gt_boxes = entry['obj_gt_boxes'].copy()
111 |         oldx1 = obj_gt_boxes[:, 0].copy()
112 |         oldx2 = obj_gt_boxes[:, 2].copy()
113 |         obj_gt_boxes[:, 0] = width - oldx2 - 1
114 |         obj_gt_boxes[:, 2] = width - oldx1 - 1
115 |         assert (obj_gt_boxes[:, 2] >= obj_gt_boxes[:, 0]).all()
116 |         # now flip
117 |         flipped_entry = {}
118 |         dont_copy = ('boxes', 'sbj_gt_boxes', 'obj_gt_boxes', 'segms', 'gt_keypoints', 'flipped')
119 |         for k, v in entry.items():
120 |             if k not in dont_copy:
121 |                 flipped_entry[k] = v
122 |         flipped_entry['boxes'] = boxes
123 |         flipped_entry['sbj_gt_boxes'] = sbj_gt_boxes
124 |         flipped_entry['obj_gt_boxes'] = obj_gt_boxes
125 |         flipped_entry['flipped'] = True
126 |         flipped_roidb.append(flipped_entry)
127 |     roidb.extend(flipped_roidb)
128 | 
129 | 
130 | def filter_for_training(roidb):
131 |     """Remove roidb entries that have no usable RoIs based on config settings.
132 |     """
133 |     def is_valid(entry):
134 |         # Valid images have:
135 |         #   (1) At least one foreground RoI OR
136 |         #   (2) At least one background RoI
137 |         overlaps = entry['max_overlaps']
138 |         # find boxes with sufficient overlap
139 |         fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
140 |         # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
141 |         bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
142 |                            (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
143 |         # image is only valid if such boxes exist
144 |         valid = len(fg_inds) > 0 or len(bg_inds) > 0
145 |         if cfg.MODEL.KEYPOINTS_ON:
146 |             # If we're training for keypoints, exclude images with no keypoints
147 |             valid = valid and entry['has_visible_keypoints']
148 |         return valid
149 | 
150 |     num = len(roidb)
151 |     filtered_roidb = [entry for entry in roidb if is_valid(entry)]
152 |     num_after = len(filtered_roidb)
153 |     logger.info('Filtered {} roidb entries: {} -> {}'.
154 |                 format(num - num_after, num, num_after))
155 |     return filtered_roidb
156 | 
157 | 
158 | def rank_for_training(roidb):
159 |     """Rank the roidb entries according to image aspect ration and mark for cropping
160 |     for efficient batching if image is too long.
161 | 
162 |     Returns:
163 |         ratio_list: ndarray, list of aspect ratios from small to large
164 |         ratio_index: ndarray, list of roidb entry indices correspond to the ratios
165 |     """
166 |     RATIO_HI = cfg.TRAIN.ASPECT_HI  # largest ratio to preserve.
167 |     RATIO_LO = cfg.TRAIN.ASPECT_LO  # smallest ratio to preserve.
168 | 
169 |     need_crop_cnt = 0
170 | 
171 |     ratio_list = []
172 |     for entry in roidb:
173 |         width = entry['width']
174 |         height = entry['height']
175 |         ratio = width / float(height)
176 | 
177 |         if cfg.TRAIN.ASPECT_CROPPING:
178 |             if ratio > RATIO_HI:
179 |                 entry['need_crop'] = True
180 |                 ratio = RATIO_HI
181 |                 need_crop_cnt += 1
182 |             elif ratio < RATIO_LO:
183 |                 entry['need_crop'] = True
184 |                 ratio = RATIO_LO
185 |                 need_crop_cnt += 1
186 |             else:
187 |                 entry['need_crop'] = False
188 |         else:
189 |             entry['need_crop'] = False
190 | 
191 |         ratio_list.append(ratio)
192 | 
193 |     if cfg.TRAIN.ASPECT_CROPPING:
194 |         logging.info('Number of entries that need to be cropped: %d. Ratio bound: [%.2f, %.2f]',
195 |                      need_crop_cnt, RATIO_LO, RATIO_HI)
196 |     ratio_list = np.array(ratio_list)
197 |     ratio_index = np.argsort(ratio_list)
198 |     return ratio_list[ratio_index], ratio_index
199 | 
200 | def add_bbox_regression_targets(roidb):
201 |     """Add information needed to train bounding-box regressors."""
202 |     for entry in roidb:
203 |         entry['bbox_targets'] = _compute_targets(entry)
204 | 
205 | 
206 | def _compute_targets(entry):
207 |     """Compute bounding-box regression targets for an image."""
208 |     # Indices of ground-truth ROIs
209 |     rois = entry['boxes']
210 |     overlaps = entry['max_overlaps']
211 |     labels = entry['max_classes']
212 |     gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
213 |     # Targets has format (class, tx, ty, tw, th)
214 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
215 |     if len(gt_inds) == 0:
216 |         # Bail if the image has no ground-truth ROIs
217 |         return targets
218 | 
219 |     # Indices of examples for which we try to make predictions
220 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
221 | 
222 |     # Get IoU overlap between each ex ROI and gt ROI
223 |     ex_gt_overlaps = box_utils.bbox_overlaps(
224 |         rois[ex_inds, :].astype(dtype=np.float32, copy=False),
225 |         rois[gt_inds, :].astype(dtype=np.float32, copy=False))
226 | 
227 |     # Find which gt ROI each ex ROI has max overlap with:
228 |     # this will be the ex ROI's gt target
229 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
230 |     gt_rois = rois[gt_inds[gt_assignment], :]
231 |     ex_rois = rois[ex_inds, :]
232 |     # Use class "1" for all boxes if using class_agnostic_bbox_reg
233 |     targets[ex_inds, 0] = (
234 |         1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
235 |     targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
236 |         ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
237 |     return targets
238 | 
239 | 
240 | def _compute_and_log_stats(roidb):
241 |     classes = roidb[0]['dataset'].classes
242 |     char_len = np.max([len(c) for c in classes])
243 |     hist_bins = np.arange(len(classes) + 1)
244 | 
245 |     # Histogram of ground-truth objects
246 |     gt_hist = np.zeros((len(classes)), dtype=np.int)
247 |     for entry in roidb:
248 |         gt_inds = np.where(
249 |             (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
250 |         gt_classes = entry['gt_classes'][gt_inds]
251 |         gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
252 |     logger.debug('Ground-truth class histogram:')
253 |     for i, v in enumerate(gt_hist):
254 |         logger.debug(
255 |             '{:d}{:s}: {:d}'.format(
256 |                 i, classes[i].rjust(char_len), v))
257 |     logger.debug('-' * char_len)
258 |     logger.debug(
259 |         '{:s}: {:d}'.format(
260 |             'total'.rjust(char_len), np.sum(gt_hist)))
261 | 


--------------------------------------------------------------------------------
/lib/core/test_engine_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang, 2019
  2 | # from Detectron.pytorch/lib/core/test_engine.py
  3 | # Original license text below
  4 | # --------------------------------------------------------
  5 | # Copyright (c) 2017-present, Facebook, Inc.
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | ##############################################################################
 19 | 
 20 | """Test a Detectron network on an imdb (image database)."""
 21 | 
 22 | from __future__ import absolute_import
 23 | from __future__ import division
 24 | from __future__ import print_function
 25 | from __future__ import unicode_literals
 26 | 
 27 | from collections import defaultdict
 28 | import cv2
 29 | import datetime
 30 | import logging
 31 | import numpy as np
 32 | from numpy import linalg as la
 33 | import os
 34 | import yaml
 35 | import json
 36 | from six.moves import cPickle as pickle
 37 | 
 38 | import torch
 39 | import nn as mynn
 40 | from torch.autograd import Variable
 41 | 
 42 | from core.config import cfg
 43 | from core.test_rel import im_detect_rels
 44 | from datasets_rel import task_evaluation_sg as task_evaluation_sg
 45 | from datasets_rel import task_evaluation_vg_and_vrd as task_evaluation_vg_and_vrd
 46 | from datasets_rel.json_dataset_rel import JsonDatasetRel
 47 | from modeling_rel import model_builder_rel
 48 | from utils.detectron_weight_helper import load_detectron_weight
 49 | import utils.env as envu
 50 | import utils_rel.net_rel as net_utils_rel
 51 | import utils_rel.subprocess_rel as subprocess_utils
 52 | import utils.vis as vis_utils
 53 | from utils.io import save_object
 54 | from utils.timer import Timer
 55 | 
 56 | logger = logging.getLogger(__name__)
 57 | 
 58 | 
 59 | def get_eval_functions():
 60 |     # Determine which parent or child function should handle inference
 61 |     # Generic case that handles all network types other than RPN-only nets
 62 |     # and RetinaNet
 63 |     child_func = test_net
 64 |     parent_func = test_net_on_dataset
 65 | 
 66 |     return parent_func, child_func
 67 | 
 68 | 
 69 | def get_inference_dataset(index, is_parent=True):
 70 |     assert is_parent or len(cfg.TEST.DATASETS) == 1, \
 71 |         'The child inference process can only work on a single dataset'
 72 | 
 73 |     dataset_name = cfg.TEST.DATASETS[index]
 74 |     proposal_file = None
 75 | 
 76 |     return dataset_name, proposal_file
 77 | 
 78 | 
 79 | def run_inference(
 80 |         args, ind_range=None,
 81 |         multi_gpu_testing=False, gpu_id=0,
 82 |         check_expected_results=False):
 83 |     parent_func, child_func = get_eval_functions()
 84 |     is_parent = ind_range is None
 85 | 
 86 |     def result_getter():
 87 |         if is_parent:
 88 |             # Parent case:
 89 |             # In this case we're either running inference on the entire dataset in a
 90 |             # single process or (if multi_gpu_testing is True) using this process to
 91 |             # launch subprocesses that each run inference on a range of the dataset
 92 |             all_results = []
 93 |             for i in range(len(cfg.TEST.DATASETS)):
 94 |                 dataset_name, proposal_file = get_inference_dataset(i)
 95 |                 output_dir = args.output_dir
 96 |                 results = parent_func(
 97 |                     args,
 98 |                     dataset_name,
 99 |                     proposal_file,
100 |                     output_dir,
101 |                     multi_gpu=multi_gpu_testing
102 |                 )
103 |                 all_results.append(results)
104 | 
105 |             return all_results
106 |         else:
107 |             # Subprocess child case:
108 |             # In this case test_net was called via subprocess.Popen to execute on a
109 |             # range of inputs on a single dataset
110 |             dataset_name, proposal_file = get_inference_dataset(0, is_parent=False)
111 |             output_dir = args.output_dir
112 |             return child_func(
113 |                 args,
114 |                 dataset_name,
115 |                 proposal_file,
116 |                 output_dir,
117 |                 ind_range=ind_range,
118 |                 gpu_id=gpu_id
119 |             )
120 | 
121 |     all_results = result_getter()
122 | 
123 |     return all_results
124 | 
125 | 
126 | def test_net_on_dataset(
127 |         args,
128 |         dataset_name,
129 |         proposal_file,
130 |         output_dir,
131 |         multi_gpu=False,
132 |         gpu_id=0):
133 |     """Run inference on a dataset."""
134 |     dataset = JsonDatasetRel(dataset_name)
135 |     test_timer = Timer()
136 |     test_timer.tic()
137 |     if multi_gpu:
138 |         num_images = len(dataset.get_roidb(gt=args.do_val))
139 |         all_results = multi_gpu_test_net_on_dataset(
140 |             args, dataset_name, proposal_file, num_images, output_dir
141 |         )
142 |     else:
143 |         all_results = test_net(
144 |             args, dataset_name, proposal_file, output_dir, gpu_id=gpu_id
145 |         )
146 |     test_timer.toc()
147 |     logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time))
148 |     
149 |     logger.info('Starting evaluation now...')
150 |     if dataset_name.find('vg') >= 0 or dataset_name.find('vrd') >= 0:
151 |         task_evaluation_vg_and_vrd.eval_rel_results(all_results, output_dir, args.do_val)
152 |     else:
153 |         task_evaluation_sg.eval_rel_results(all_results, output_dir, args.do_val, args.do_vis, args.do_special)
154 |     
155 |     return all_results
156 | 
157 | 
158 | def multi_gpu_test_net_on_dataset(
159 |         args, dataset_name, proposal_file, num_images, output_dir):
160 |     """Multi-gpu inference on a dataset."""
161 |     binary_dir = envu.get_runtime_dir()
162 |     binary_ext = envu.get_py_bin_ext()
163 |     binary = os.path.join(binary_dir, args.test_net_file + binary_ext)
164 |     assert os.path.exists(binary), 'Binary \'{}\' not found'.format(binary)
165 | 
166 |     # Pass the target dataset and proposal file (if any) via the command line
167 |     opts = ['TEST.DATASETS', '("{}",)'.format(dataset_name)]
168 |     if proposal_file:
169 |         opts += ['TEST.PROPOSAL_FILES', '("{}",)'.format(proposal_file)]
170 |         
171 |     if args.do_val:
172 |         opts += ['--do_val']
173 |     if args.do_vis:
174 |         opts += ['--do_vis']
175 |     if args.do_special:
176 |         opts += ['--do_special']
177 |     if args.use_gt_boxes:
178 |         opts += ['--use_gt_boxes']
179 |     if args.use_gt_labels:
180 |         opts += ['--use_gt_labels']
181 | 
182 |     # Run inference in parallel in subprocesses
183 |     # Outputs will be a list of outputs from each subprocess, where the output
184 |     # of each subprocess is the dictionary saved by test_net().
185 |     outputs = subprocess_utils.process_in_parallel(
186 |         'rel_detection', num_images, binary, output_dir,
187 |         args.load_ckpt, args.load_detectron, opts
188 |     )
189 | 
190 |     # Collate the results from each subprocess
191 |     all_results = []
192 |     for det_data in outputs:
193 |         all_results += det_data
194 |     
195 |     if args.use_gt_boxes:
196 |         if args.use_gt_labels:
197 |             det_file = os.path.join(args.output_dir, 'rel_detections_gt_boxes_prdcls.pkl')
198 |         else:
199 |             det_file = os.path.join(args.output_dir, 'rel_detections_gt_boxes_sgcls.pkl')
200 |     else:
201 |         det_file = os.path.join(args.output_dir, 'rel_detections.pkl')
202 |     save_object(all_results, det_file)
203 |     logger.info('Wrote rel_detections to: {}'.format(os.path.abspath(det_file)))
204 | 
205 |     return all_results
206 | 
207 | 
208 | def test_net(
209 |         args,
210 |         dataset_name,
211 |         proposal_file,
212 |         output_dir,
213 |         ind_range=None,
214 |         gpu_id=0):
215 |     """Run inference on all images in a dataset or over an index range of images
216 |     in a dataset using a single GPU.
217 |     """
218 |     assert not cfg.MODEL.RPN_ONLY, \
219 |         'Use rpn_generate to generate proposals from RPN-only models'
220 | 
221 |     roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
222 |         dataset_name, proposal_file, ind_range, args.do_val
223 |     )
224 |     model = initialize_model_from_cfg(args, gpu_id=gpu_id)
225 |     
226 |     num_images = len(roidb)
227 |     all_results = [None for _ in range(num_images)]
228 |     timers = defaultdict(Timer)
229 |     for i, entry in enumerate(roidb):
230 |         box_proposals = None
231 | 
232 |         im = cv2.imread(entry['image'])
233 |         if args.use_gt_boxes:
234 |             im_results = im_detect_rels(model, im, dataset_name, box_proposals, args.do_vis, timers, entry, args.use_gt_labels)
235 |         else:
236 |             im_results = im_detect_rels(model, im, dataset_name, box_proposals, args.do_vis, timers)
237 |         
238 |         im_results.update(dict(image=entry['image']))
239 |         # add gt
240 |         if args.do_val:
241 |             im_results.update(
242 |                 dict(gt_sbj_boxes=entry['sbj_gt_boxes'],
243 |                      gt_sbj_labels=entry['sbj_gt_classes'],
244 |                      gt_obj_boxes=entry['obj_gt_boxes'],
245 |                      gt_obj_labels=entry['obj_gt_classes'],
246 |                      gt_prd_labels=entry['prd_gt_classes']))
247 |         
248 |         all_results[i] = im_results
249 | 
250 |         if i % 10 == 0:  # Reduce log file size
251 |             ave_total_time = np.sum([t.average_time for t in timers.values()])
252 |             eta_seconds = ave_total_time * (num_images - i - 1)
253 |             eta = str(datetime.timedelta(seconds=int(eta_seconds)))
254 |             det_time = (timers['im_detect_rels'].average_time)
255 |             logger.info((
256 |                 'im_detect: range [{:d}, {:d}] of {:d}: '
257 |                 '{:d}/{:d} {:.3f}s (eta: {})').format(
258 |                 start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
259 |                 start_ind + num_images, det_time, eta))
260 | 
261 |     cfg_yaml = yaml.dump(cfg)
262 |     if ind_range is not None:
263 |         det_name = 'rel_detection_range_%s_%s.pkl' % tuple(ind_range)
264 |     else:
265 |         if args.use_gt_boxes:
266 |             if args.use_gt_labels:
267 |                 det_name = 'rel_detections_gt_boxes_prdcls.pkl'
268 |             else:
269 |                 det_name = 'rel_detections_gt_boxes_sgcls.pkl'
270 |         else:
271 |             det_name = 'rel_detections.pkl'
272 |     det_file = os.path.join(output_dir, det_name)
273 |     save_object(all_results, det_file)
274 |     logger.info('Wrote rel_detections to: {}'.format(os.path.abspath(det_file)))
275 |     return all_results
276 | 
277 | 
278 | def initialize_model_from_cfg(args, gpu_id=0):
279 |     """Initialize a model from the global cfg. Loads test-time weights and
280 |     set to evaluation mode.
281 |     """
282 |     model = model_builder_rel.Generalized_RCNN()
283 |     model.eval()
284 | 
285 |     if args.cuda:
286 |         model.cuda()
287 | 
288 |     if args.load_ckpt:
289 |         load_name = args.load_ckpt
290 |         logger.info("loading checkpoint %s", load_name)
291 |         checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage)
292 |         net_utils_rel.load_ckpt_rel(model, checkpoint['model'])
293 | 
294 |     if args.load_detectron:
295 |         logger.info("loading detectron weights %s", args.load_detectron)
296 |         load_detectron_weight(model, args.load_detectron)
297 | 
298 |     model = mynn.DataParallel(model, cpu_keywords=['im_info', 'roidb'], minibatch=True)
299 | 
300 |     return model
301 | 
302 | 
303 | def get_roidb_and_dataset(dataset_name, proposal_file, ind_range, do_val=True):
304 |     """Get the roidb for the dataset specified in the global cfg. Optionally
305 |     restrict it to a range of indices if ind_range is a pair of integers.
306 |     """
307 |     dataset = JsonDatasetRel(dataset_name)
308 |     roidb = dataset.get_roidb(gt=do_val)
309 | 
310 |     if ind_range is not None:
311 |         total_num_images = len(roidb)
312 |         start, end = ind_range
313 |         roidb = roidb[start:end]
314 |     else:
315 |         start = 0
316 |         end = len(roidb)
317 |         total_num_images = end
318 | 
319 |     return roidb, dataset, start, end, total_num_images
320 | 


--------------------------------------------------------------------------------
/lib/roi_data_rel/loader_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang for this project in 2019
  2 | #
  3 | # Based on Detectron.Pytorch/lib/roi/loader.py by Roy Tseng
  4 | 
  5 | import math
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | 
  9 | import torch
 10 | import torch.utils.data as data
 11 | import torch.utils.data.sampler as torch_sampler
 12 | from torch.utils.data.dataloader import default_collate
 13 | from torch._six import int_classes as _int_classes
 14 | 
 15 | from core.config import cfg
 16 | from roi_data_rel.minibatch_rel import get_minibatch
 17 | import utils.blob as blob_utils
 18 | 
 19 | 
 20 | class RoiDataLoader(data.Dataset):
 21 |     def __init__(self, roidb, num_classes, training=True):
 22 |         self._roidb = roidb
 23 |         self._num_classes = num_classes
 24 |         self.training = training
 25 |         self.DATA_SIZE = len(self._roidb)
 26 | 
 27 |     def __getitem__(self, index_tuple):
 28 |         index, ratio = index_tuple
 29 |         single_db = [self._roidb[index]]
 30 |         blobs, valid = get_minibatch(single_db)
 31 |         #TODO: Check if minibatch is valid ? If not, abandon it.
 32 |         # Need to change _worker_loop in torch.utils.data.dataloader.py.
 33 | 
 34 |         # Squeeze batch dim
 35 |         for key in blobs:
 36 |             if key != 'roidb':
 37 |                 blobs[key] = blobs[key].squeeze(axis=0)
 38 | 
 39 |         if self._roidb[index]['need_crop']:
 40 |             self.crop_data(blobs, ratio)
 41 |             # Check bounding box
 42 |             entry = blobs['roidb'][0]
 43 |             boxes = entry['boxes']
 44 |             invalid = (boxes[:, 0] == boxes[:, 2]) | (boxes[:, 1] == boxes[:, 3])
 45 |             valid_inds = np.nonzero(~ invalid)[0]
 46 |             if len(valid_inds) < len(boxes):
 47 |                 for key in ['boxes', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd',
 48 |                             'box_to_gt_ind_map', 'gt_keypoints']:
 49 |                     if key in entry:
 50 |                         entry[key] = entry[key][valid_inds]
 51 |                 entry['segms'] = [entry['segms'][ind] for ind in valid_inds]
 52 |             # for rel sanity check
 53 |             sbj_gt_boxes = entry['sbj_gt_boxes']
 54 |             obj_gt_boxes = entry['obj_gt_boxes']
 55 |             sbj_invalid = (sbj_gt_boxes[:, 0] == sbj_gt_boxes[:, 2]) | (sbj_gt_boxes[:, 1] == sbj_gt_boxes[:, 3])
 56 |             obj_invalid = (obj_gt_boxes[:, 0] == obj_gt_boxes[:, 2]) | (obj_gt_boxes[:, 1] == obj_gt_boxes[:, 3])
 57 |             rel_valid = sbj_invalid | obj_invalid
 58 |             rel_valid_inds = np.nonzero(~ rel_invalid)[0]
 59 |             if len(rel_valid_inds) < len(sbj_gt_boxes):
 60 |                 for key in ['sbj_gt_boxes', 'sbj_gt_classes', 'obj_gt_boxes', 'obj_gt_classes', 'prd_gt_classes',
 61 |                             'sbj_gt_overlaps', 'obj_gt_overlaps', 'prd_gt_overlaps', 'pair_to_gt_ind_map',
 62 |                             'width', 'height']:
 63 |                     if key in entry:
 64 |                         entry[key] = entry[key][rel_valid_inds]
 65 | 
 66 |         blobs['roidb'] = blob_utils.serialize(blobs['roidb'])  # CHECK: maybe we can serialize in collate_fn
 67 | 
 68 |         return blobs
 69 | 
 70 |     def crop_data(self, blobs, ratio):
 71 |         data_height, data_width = map(int, blobs['im_info'][:2])
 72 |         boxes = blobs['roidb'][0]['boxes']
 73 |         if ratio < 1:  # width << height, crop height
 74 |             size_crop = math.ceil(data_width / ratio)  # size after crop
 75 |             min_y = math.floor(np.min(boxes[:, 1]))
 76 |             max_y = math.floor(np.max(boxes[:, 3]))
 77 |             box_region = max_y - min_y + 1
 78 |             if min_y == 0:
 79 |                 y_s = 0
 80 |             else:
 81 |                 if (box_region - size_crop) < 0:
 82 |                     y_s_min = max(max_y - size_crop, 0)
 83 |                     y_s_max = min(min_y, data_height - size_crop)
 84 |                     y_s = y_s_min if y_s_min == y_s_max else \
 85 |                         npr.choice(range(y_s_min, y_s_max + 1))
 86 |                 else:
 87 |                     # CHECK: rethinking the mechnism for the case box_region > size_crop
 88 |                     # Now, the crop is biased on the lower part of box_region caused by
 89 |                     # // 2 for y_s_add
 90 |                     y_s_add = (box_region - size_crop) // 2
 91 |                     y_s = min_y if y_s_add == 0 else \
 92 |                         npr.choice(range(min_y, min_y + y_s_add + 1))
 93 |             # Crop the image
 94 |             blobs['data'] = blobs['data'][:, y_s:(y_s + size_crop), :,]
 95 |             # Update im_info
 96 |             blobs['im_info'][0] = size_crop
 97 |             # Shift and clamp boxes ground truth
 98 |             boxes[:, 1] -= y_s
 99 |             boxes[:, 3] -= y_s
100 |             np.clip(boxes[:, 1], 0, size_crop - 1, out=boxes[:, 1])
101 |             np.clip(boxes[:, 3], 0, size_crop - 1, out=boxes[:, 3])
102 |             blobs['roidb'][0]['boxes'] = boxes
103 |         else:  # width >> height, crop width
104 |             size_crop = math.ceil(data_height * ratio)
105 |             min_x = math.floor(np.min(boxes[:, 0]))
106 |             max_x = math.floor(np.max(boxes[:, 2]))
107 |             box_region = max_x - min_x + 1
108 |             if min_x == 0:
109 |                 x_s = 0
110 |             else:
111 |                 if (box_region - size_crop) < 0:
112 |                     x_s_min = max(max_x - size_crop, 0)
113 |                     x_s_max = min(min_x, data_width - size_crop)
114 |                     x_s = x_s_min if x_s_min == x_s_max else \
115 |                         npr.choice(range(x_s_min, x_s_max + 1))
116 |                 else:
117 |                     x_s_add = (box_region - size_crop) // 2
118 |                     x_s = min_x if x_s_add == 0 else \
119 |                         npr.choice(range(min_x, min_x + x_s_add + 1))
120 |             # Crop the image
121 |             blobs['data'] = blobs['data'][:, :, x_s:(x_s + size_crop)]
122 |             # Update im_info
123 |             blobs['im_info'][1] = size_crop
124 |             # Shift and clamp boxes ground truth
125 |             boxes[:, 0] -= x_s
126 |             boxes[:, 2] -= x_s
127 |             np.clip(boxes[:, 0], 0, size_crop - 1, out=boxes[:, 0])
128 |             np.clip(boxes[:, 2], 0, size_crop - 1, out=boxes[:, 2])
129 |             blobs['roidb'][0]['boxes'] = boxes
130 | 
131 |     def __len__(self):
132 |         return self.DATA_SIZE
133 | 
134 | 
135 | def cal_minibatch_ratio(ratio_list):
136 |     """Given the ratio_list, we want to make the RATIO same for each minibatch on each GPU.
137 |     Note: this only work for 1) cfg.TRAIN.MAX_SIZE is ignored during `prep_im_for_blob` 
138 |     and 2) cfg.TRAIN.SCALES containing SINGLE scale.
139 |     Since all prepared images will have same min side length of cfg.TRAIN.SCALES[0], we can
140 |      pad and batch images base on that.
141 |     """
142 |     DATA_SIZE = len(ratio_list)
143 |     ratio_list_minibatch = np.empty((DATA_SIZE,))
144 |     num_minibatch = int(np.ceil(DATA_SIZE / cfg.TRAIN.IMS_PER_BATCH))  # Include leftovers
145 |     for i in range(num_minibatch):
146 |         left_idx = i * cfg.TRAIN.IMS_PER_BATCH
147 |         right_idx = min((i+1) * cfg.TRAIN.IMS_PER_BATCH - 1, DATA_SIZE - 1)
148 | 
149 |         if ratio_list[right_idx] < 1:
150 |             # for ratio < 1, we preserve the leftmost in each batch.
151 |             target_ratio = ratio_list[left_idx]
152 |         elif ratio_list[left_idx] > 1:
153 |             # for ratio > 1, we preserve the rightmost in each batch.
154 |             target_ratio = ratio_list[right_idx]
155 |         else:
156 |             # for ratio cross 1, we make it to be 1.
157 |             target_ratio = 1
158 | 
159 |         ratio_list_minibatch[left_idx:(right_idx+1)] = target_ratio
160 |     return ratio_list_minibatch
161 | 
162 | 
163 | class MinibatchSampler(torch_sampler.Sampler):
164 |     def __init__(self, ratio_list, ratio_index):
165 |         self.ratio_list = ratio_list
166 |         self.ratio_index = ratio_index
167 |         self.num_data = len(ratio_list)
168 | 
169 |         if cfg.TRAIN.ASPECT_GROUPING:
170 |             # Given the ratio_list, we want to make the ratio same
171 |             # for each minibatch on each GPU.
172 |             self.ratio_list_minibatch = cal_minibatch_ratio(ratio_list)
173 | 
174 |     def __iter__(self):
175 |         if cfg.TRAIN.ASPECT_GROUPING:
176 |             # indices for aspect grouping awared permutation
177 |             n, rem = divmod(self.num_data, cfg.TRAIN.IMS_PER_BATCH)
178 |             round_num_data = n * cfg.TRAIN.IMS_PER_BATCH
179 |             indices = np.arange(round_num_data)
180 |             npr.shuffle(indices.reshape(-1, cfg.TRAIN.IMS_PER_BATCH))  # inplace shuffle
181 |             if rem != 0:
182 |                 indices = np.append(indices, np.arange(round_num_data, round_num_data + rem))
183 |             ratio_index = self.ratio_index[indices]
184 |             ratio_list_minibatch = self.ratio_list_minibatch[indices]
185 |         else:
186 |             rand_perm = npr.permutation(self.num_data)
187 |             ratio_list = self.ratio_list[rand_perm]
188 |             ratio_index = self.ratio_index[rand_perm]
189 |             # re-calculate minibatch ratio list
190 |             ratio_list_minibatch = cal_minibatch_ratio(ratio_list)
191 | 
192 |         return iter(zip(ratio_index.tolist(), ratio_list_minibatch.tolist()))
193 | 
194 |     def __len__(self):
195 |         return self.num_data
196 | 
197 | 
198 | class BatchSampler(torch_sampler.BatchSampler):
199 |     r"""Wraps another sampler to yield a mini-batch of indices.
200 |     Args:
201 |         sampler (Sampler): Base sampler.
202 |         batch_size (int): Size of mini-batch.
203 |         drop_last (bool): If ``True``, the sampler will drop the last batch if
204 |             its size would be less than ``batch_size``
205 |     Example:
206 |         >>> list(BatchSampler(range(10), batch_size=3, drop_last=False))
207 |         [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
208 |         >>> list(BatchSampler(range(10), batch_size=3, drop_last=True))
209 |         [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
210 |     """
211 | 
212 |     def __init__(self, sampler, batch_size, drop_last):
213 |         if not isinstance(sampler, torch_sampler.Sampler):
214 |             raise ValueError("sampler should be an instance of "
215 |                              "torch.utils.data.Sampler, but got sampler={}"
216 |                              .format(sampler))
217 |         if not isinstance(batch_size, _int_classes) or isinstance(batch_size, bool) or \
218 |                 batch_size <= 0:
219 |             raise ValueError("batch_size should be a positive integeral value, "
220 |                              "but got batch_size={}".format(batch_size))
221 |         if not isinstance(drop_last, bool):
222 |             raise ValueError("drop_last should be a boolean value, but got "
223 |                              "drop_last={}".format(drop_last))
224 |         self.sampler = sampler
225 |         self.batch_size = batch_size
226 |         self.drop_last = drop_last
227 | 
228 |     def __iter__(self):
229 |         batch = []
230 |         for idx in self.sampler:
231 |             batch.append(idx)  # Difference: batch.append(int(idx))
232 |             if len(batch) == self.batch_size:
233 |                 yield batch
234 |                 batch = []
235 |         if len(batch) > 0 and not self.drop_last:
236 |             yield batch
237 | 
238 |     def __len__(self):
239 |         if self.drop_last:
240 |             return len(self.sampler) // self.batch_size
241 |         else:
242 |             return (len(self.sampler) + self.batch_size - 1) // self.batch_size
243 | 
244 | 
245 | 
246 | def collate_minibatch(list_of_blobs):
247 |     """Stack samples seperately and return a list of minibatches
248 |     A batch contains NUM_GPUS minibatches and image size in different minibatch may be different.
249 |     Hence, we need to stack smaples from each minibatch seperately.
250 |     """
251 |     Batch = {key: [] for key in list_of_blobs[0]}
252 |     # Because roidb consists of entries of variable length, it can't be batch into a tensor.
253 |     # So we keep roidb in the type of "list of ndarray".
254 |     list_of_roidb = [blobs.pop('roidb') for blobs in list_of_blobs]
255 |     for i in range(0, len(list_of_blobs), cfg.TRAIN.IMS_PER_BATCH):
256 |         mini_list = list_of_blobs[i:(i + cfg.TRAIN.IMS_PER_BATCH)]
257 |         # Pad image data
258 |         mini_list = pad_image_data(mini_list)
259 |         minibatch = default_collate(mini_list)
260 |         minibatch['roidb'] = list_of_roidb[i:(i + cfg.TRAIN.IMS_PER_BATCH)]
261 |         for key in minibatch:
262 |             Batch[key].append(minibatch[key])
263 | 
264 |     return Batch
265 | 
266 | 
267 | def pad_image_data(list_of_blobs):
268 |     max_shape = blob_utils.get_max_shape([blobs['data'].shape[1:] for blobs in list_of_blobs])
269 |     output_list = []
270 |     for blobs in list_of_blobs:
271 |         data_padded = np.zeros((3, max_shape[0], max_shape[1]), dtype=np.float32)
272 |         _, h, w = blobs['data'].shape
273 |         data_padded[:, :h, :w] = blobs['data']
274 |         blobs['data'] = data_padded
275 |         output_list.append(blobs)
276 |     return output_list
277 | 


--------------------------------------------------------------------------------
/lib/datasets_rel/task_evaluation_sg.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Written by Ji Zhang, 2019
  3 | Some functions are adapted from Rowan Zellers
  4 | Original source:
  5 | https://github.com/rowanz/neural-motifs/blob/master/lib/evaluation/sg_eval.py
  6 | """
  7 | import os
  8 | import numpy as np
  9 | import logging
 10 | from six.moves import cPickle as pickle
 11 | import json
 12 | import csv
 13 | from tqdm import tqdm
 14 | 
 15 | from core.config import cfg
 16 | from functools import reduce
 17 | from utils.boxes import bbox_overlaps
 18 | from datasets_rel.ap_eval_rel import ap_eval, prepare_mAP_dets
 19 | 
 20 | from .pytorch_misc import intersect_2d, argsort_desc
 21 | 
 22 | np.set_printoptions(precision=3)
 23 | 
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def eval_rel_results(all_results, output_dir, do_val=True, do_vis=False, do_special=False):
 28 |     
 29 |     topk = 100
 30 | 
 31 |     if cfg.TEST.DATASETS[0].find('vg') >= 0:
 32 |         eval_per_img = True
 33 |         # eval_per_img = False
 34 |         prd_k = 1
 35 |     else:
 36 |         eval_per_img = False
 37 |         prd_k = 2
 38 |         
 39 |     if cfg.TEST.DATASETS[0].find('oi') >= 0:
 40 |         eval_ap = True
 41 |     else:
 42 |         eval_ap = False
 43 |     
 44 |     if eval_per_img:
 45 |         recalls = {1: [], 5: [], 10: [], 20: [], 50: [], 100: []}
 46 |     else:
 47 |         recalls = {1: 0, 5: 0, 10: 0, 20: 0, 50: 0, 100: 0}
 48 |         if do_val:
 49 |             all_gt_cnt = 0
 50 | 
 51 |     if do_special:
 52 |         special_img_f = open("/home/jiz/projects/100_img_special_set.txt", "r")
 53 |         special_imgs = special_img_f.readlines()
 54 |         special_imgs = [img[:-1] for img in special_imgs]
 55 |         special_img_set = set(special_imgs)
 56 |         logger.info('Special images len: {}'.format(len(special_img_set)))
 57 |     
 58 |     topk_dets = []
 59 |     for im_i, res in enumerate(tqdm(all_results)):
 60 | 
 61 |         if do_special:
 62 |             img_id = res['image'].split('/')[-1].split('.')[0]
 63 |             if img_id not in special_img_set:
 64 |                 continue
 65 |         
 66 |         # in oi_all_rel some images have no dets
 67 |         if res['prd_scores'] is None:
 68 |             det_boxes_s_top = np.zeros((0, 4), dtype=np.float32)
 69 |             det_boxes_o_top = np.zeros((0, 4), dtype=np.float32)
 70 |             det_labels_s_top = np.zeros(0, dtype=np.int32)
 71 |             det_labels_p_top = np.zeros(0, dtype=np.int32)
 72 |             det_labels_o_top = np.zeros(0, dtype=np.int32)
 73 |             det_scores_top = np.zeros(0, dtype=np.float32)
 74 |             
 75 |             det_scores_top_vis = np.zeros(0, dtype=np.float32)
 76 |             if 'prd_scores_bias' in res:
 77 |                 det_scores_top_bias = np.zeros(0, dtype=np.float32)
 78 |             if 'prd_scores_spt' in res:
 79 |                 det_scores_top_spt = np.zeros(0, dtype=np.float32)
 80 |         else:
 81 |             det_boxes_sbj = res['sbj_boxes']  # (#num_rel, 4)
 82 |             det_boxes_obj = res['obj_boxes']  # (#num_rel, 4)
 83 |             det_labels_sbj = res['sbj_labels']  # (#num_rel,)
 84 |             det_labels_obj = res['obj_labels']  # (#num_rel,)
 85 |             det_scores_sbj = res['sbj_scores']  # (#num_rel,)
 86 |             det_scores_obj = res['obj_scores']  # (#num_rel,)
 87 |             if 'prd_scores_ttl' in res:
 88 |                 det_scores_prd = res['prd_scores_ttl'][:, 1:]
 89 |             else:
 90 |                 det_scores_prd = res['prd_scores'][:, 1:]
 91 | 
 92 |             det_labels_prd = np.argsort(-det_scores_prd, axis=1)
 93 |             det_scores_prd = -np.sort(-det_scores_prd, axis=1)
 94 | 
 95 |             det_scores_so = det_scores_sbj * det_scores_obj
 96 |             det_scores_spo = det_scores_so[:, None] * det_scores_prd[:, :prd_k]
 97 | 
 98 |             det_scores_inds = argsort_desc(det_scores_spo)[:topk]
 99 |             det_scores_top = det_scores_spo[det_scores_inds[:, 0], det_scores_inds[:, 1]]
100 |             det_boxes_so_top = np.hstack(
101 |                 (det_boxes_sbj[det_scores_inds[:, 0]], det_boxes_obj[det_scores_inds[:, 0]]))
102 |             det_labels_p_top = det_labels_prd[det_scores_inds[:, 0], det_scores_inds[:, 1]]
103 |             det_labels_spo_top = np.vstack(
104 |                 (det_labels_sbj[det_scores_inds[:, 0]], det_labels_p_top, det_labels_obj[det_scores_inds[:, 0]])).transpose()
105 | 
106 |             # filter out bad relationships
107 |             cand_inds = np.where(det_scores_top > cfg.TEST.SPO_SCORE_THRESH)[0]
108 |             det_boxes_so_top = det_boxes_so_top[cand_inds]
109 |             det_labels_spo_top = det_labels_spo_top[cand_inds]
110 |             det_scores_top = det_scores_top[cand_inds]
111 | 
112 |             det_scores_vis = res['prd_scores'][:, 1:]
113 |             for i in range(det_labels_prd.shape[0]):
114 |                 det_scores_vis[i] = det_scores_vis[i][det_labels_prd[i]]
115 |             det_scores_vis = det_scores_vis[:, :prd_k]
116 |             det_scores_top_vis = det_scores_vis[det_scores_inds[:, 0], det_scores_inds[:, 1]]
117 |             det_scores_top_vis = det_scores_top_vis[cand_inds]
118 |             if 'prd_scores_bias' in res:
119 |                 det_scores_bias = res['prd_scores_bias'][:, 1:]
120 |                 for i in range(det_labels_prd.shape[0]):
121 |                     det_scores_bias[i] = det_scores_bias[i][det_labels_prd[i]]
122 |                 det_scores_bias = det_scores_bias[:, :prd_k]
123 |                 det_scores_top_bias = det_scores_bias[det_scores_inds[:, 0], det_scores_inds[:, 1]]
124 |                 det_scores_top_bias = det_scores_top_bias[cand_inds]
125 |             if 'prd_scores_spt' in res:
126 |                 det_scores_spt = res['prd_scores_spt'][:, 1:]
127 |                 for i in range(det_labels_prd.shape[0]):
128 |                     det_scores_spt[i] = det_scores_spt[i][det_labels_prd[i]]
129 |                 det_scores_spt = det_scores_spt[:, :prd_k]
130 |                 det_scores_top_spt = det_scores_spt[det_scores_inds[:, 0], det_scores_inds[:, 1]]
131 |                 det_scores_top_spt = det_scores_top_spt[cand_inds]
132 |             
133 |             det_boxes_s_top = det_boxes_so_top[:, :4]
134 |             det_boxes_o_top = det_boxes_so_top[:, 4:]
135 |             det_labels_s_top = det_labels_spo_top[:, 0]
136 |             det_labels_p_top = det_labels_spo_top[:, 1]
137 |             det_labels_o_top = det_labels_spo_top[:, 2]
138 |             
139 |         topk_dets.append(dict(image=res['image'],
140 |                               det_boxes_s_top=det_boxes_s_top,
141 |                               det_boxes_o_top=det_boxes_o_top,
142 |                               det_labels_s_top=det_labels_s_top,
143 |                               det_labels_p_top=det_labels_p_top,
144 |                               det_labels_o_top=det_labels_o_top,
145 |                               det_scores_top=det_scores_top))
146 |         topk_dets[-1]['det_scores_top_vis'] = det_scores_top_vis
147 |         if 'prd_scores_bias' in res:
148 |             topk_dets[-1]['det_scores_top_bias'] = det_scores_top_bias
149 |         if 'prd_scores_spt' in res:
150 |             topk_dets[-1]['det_scores_top_spt'] = det_scores_top_spt
151 |         if do_vis:
152 |             topk_dets[-1].update(dict(blob_conv=res['blob_conv'],
153 |                                       blob_conv_prd=res['blob_conv_prd']))
154 | 
155 |         if do_val:
156 |             gt_boxes_sbj = res['gt_sbj_boxes']  # (#num_gt, 4)
157 |             gt_boxes_obj = res['gt_obj_boxes']  # (#num_gt, 4)
158 |             gt_labels_sbj = res['gt_sbj_labels']  # (#num_gt,)
159 |             gt_labels_obj = res['gt_obj_labels']  # (#num_gt,)
160 |             gt_labels_prd = res['gt_prd_labels']  # (#num_gt,)
161 |             gt_boxes_so = np.hstack((gt_boxes_sbj, gt_boxes_obj))
162 |             gt_labels_spo = np.vstack((gt_labels_sbj, gt_labels_prd, gt_labels_obj)).transpose()
163 |             # Compute recall. It's most efficient to match once and then do recall after
164 |             # det_boxes_so_top is (#num_rel, 8)
165 |             # det_labels_spo_top is (#num_rel, 3)
166 |             pred_to_gt = _compute_pred_matches(
167 |                 gt_labels_spo, det_labels_spo_top,
168 |                 gt_boxes_so, det_boxes_so_top)
169 |             if eval_per_img:
170 |                 for k in recalls:
171 |                     if len(pred_to_gt):
172 |                         match = reduce(np.union1d, pred_to_gt[:k])
173 |                     else:
174 |                         match = []
175 |                     rec_i = float(len(match)) / float(gt_labels_spo.shape[0] + 1e-12)  # in case there is no gt
176 |                     recalls[k].append(rec_i)
177 |             else:    
178 |                 all_gt_cnt += gt_labels_spo.shape[0]
179 |                 for k in recalls:
180 |                     if len(pred_to_gt):
181 |                         match = reduce(np.union1d, pred_to_gt[:k])
182 |                     else:
183 |                         match = []
184 |                     recalls[k] += len(match)
185 |             
186 |             topk_dets[-1].update(dict(gt_boxes_sbj=gt_boxes_sbj,
187 |                                       gt_boxes_obj=gt_boxes_obj,
188 |                                       gt_labels_sbj=gt_labels_sbj,
189 |                                       gt_labels_obj=gt_labels_obj,
190 |                                       gt_labels_prd=gt_labels_prd))
191 | 
192 |     if do_val:
193 |         if eval_per_img:
194 |             for k, v in recalls.items():
195 |                 recalls[k] = np.mean(v)
196 |         else:
197 |             for k in recalls:
198 |                 recalls[k] = float(recalls[k]) / (float(all_gt_cnt) + 1e-12)
199 |         excel_str = print_stats(recalls)      
200 |         if eval_ap:
201 |             # prepare dets for each class
202 |             logger.info('Preparing dets for mAP...')
203 |             cls_image_ids, cls_dets, cls_gts, npos = prepare_mAP_dets(topk_dets, 9)
204 |             all_npos = sum(npos)
205 |             with open(cfg.DATA_DIR + '/openimages_v4/rel/rel_9_predicates.json') as f:
206 |                 rel_prd_cats = json.load(f)
207 | 
208 |             rel_mAP = 0.
209 |             w_rel_mAP = 0.
210 |             ap_str = ''
211 |             for c in range(9):
212 |                 rec, prec, ap = ap_eval(cls_image_ids[c], cls_dets[c], cls_gts[c], npos[c], True)
213 |                 weighted_ap = ap * float(npos[c]) / float(all_npos)
214 |                 w_rel_mAP += weighted_ap
215 |                 rel_mAP += ap
216 |                 ap_str += '{:.2f}, '.format(100 * ap)
217 |                 print('rel AP for class {}: {:.2f} ({:.6f})'.format(rel_prd_cats[c], 100 * ap, float(npos[c]) / float(all_npos)))
218 |             rel_mAP /= 9.
219 |             print('weighted rel mAP: {:.2f}'.format(100 * w_rel_mAP))
220 |             excel_str += ap_str
221 | 
222 |             phr_mAP = 0.
223 |             w_phr_mAP = 0.
224 |             ap_str = ''
225 |             for c in range(9):
226 |                 rec, prec, ap = ap_eval(cls_image_ids[c], cls_dets[c], cls_gts[c], npos[c], False)
227 |                 weighted_ap = ap * float(npos[c]) / float(all_npos)
228 |                 w_phr_mAP += weighted_ap
229 |                 phr_mAP += ap
230 |                 ap_str += '{:.2f}, '.format(100 * ap)
231 |                 print('phr AP for class {}: {:.2f} ({:.6f})'.format(rel_prd_cats[c], 100 * ap, float(npos[c]) / float(all_npos)))
232 |             phr_mAP /= 9.
233 |             print('weighted phr mAP: {:.2f}'.format(100 * w_phr_mAP))
234 |             excel_str += ap_str
235 |             
236 |             # total: 0.4 x rel_mAP + 0.2 x R@50 + 0.4 x phr_mAP
237 |             final_score = 0.4 * rel_mAP + 0.2 * recalls[50] + 0.4 * phr_mAP
238 |             
239 |             # total: 0.4 x w_rel_mAP + 0.2 x R@50 + 0.4 x w_phr_mAP
240 |             w_final_score = 0.4 * w_rel_mAP + 0.2 * recalls[50] + 0.4 * w_phr_mAP
241 |             print('weighted final_score: {:.2f}'.format(100 * w_final_score))
242 |             
243 |             # get excel friendly string
244 |             # excel_str = '{:.2f}, {:.2f}, {:.2f}, {:.2f}, '.format(100 * recalls[50], 100 * w_rel_mAP, 100 * w_phr_mAP, 100 * w_final_score) + excel_str
245 |             # print('Excel-friendly format:')
246 |             # print(excel_str.strip()[:-1])
247 |     
248 | #     print('Saving topk dets...')
249 | #     topk_dets_f = os.path.join(output_dir, 'rel_detections_topk.pkl')
250 | #     with open(topk_dets_f, 'wb') as f:
251 | #         pickle.dump(topk_dets, f, pickle.HIGHEST_PROTOCOL)
252 | #     logger.info('topk_dets size: {}'.format(len(topk_dets)))
253 |     print('Done.')
254 | 
255 | 
256 | def print_stats(recalls):
257 |     # print('====================== ' + 'sgdet' + ' ============================')
258 |     k_str = ''
259 |     for k in recalls.keys():
260 |         if k == 50:
261 |             continue
262 |         k_str += '{}\t'.format(k)
263 |     v_str = ''
264 |     for k, v in recalls.items():
265 |         print('R@%i: %.2f' % (k, 100 * v))
266 |         if k == 50:
267 |             continue
268 |         v_str += '{:.2f}, '.format(100 * v)
269 |     return v_str
270 | 
271 | 
272 | # This function is adapted from Rowan Zellers' code:
273 | # https://github.com/rowanz/neural-motifs/blob/master/lib/evaluation/sg_eval.py
274 | # Modified for this project to work with PyTorch v0.4
275 | def _compute_pred_matches(gt_triplets, pred_triplets,
276 |                  gt_boxes, pred_boxes, iou_thresh=0.5, phrdet=False):
277 |     """
278 |     Given a set of predicted triplets, return the list of matching GT's for each of the
279 |     given predictions
280 |     :param gt_triplets: 
281 |     :param pred_triplets: 
282 |     :param gt_boxes: 
283 |     :param pred_boxes: 
284 |     :param iou_thresh: Do y
285 |     :return: 
286 |     """
287 |     # This performs a matrix multiplication-esque thing between the two arrays
288 |     # Instead of summing, we want the equality, so we reduce in that way
289 |     # The rows correspond to GT triplets, columns to pred triplets
290 |     keeps = intersect_2d(gt_triplets, pred_triplets)
291 |     gt_has_match = keeps.any(1)
292 |     pred_to_gt = [[] for x in range(pred_boxes.shape[0])]
293 |     for gt_ind, gt_box, keep_inds in zip(np.where(gt_has_match)[0],
294 |                                          gt_boxes[gt_has_match],
295 |                                          keeps[gt_has_match],
296 |                                          ):
297 |         boxes = pred_boxes[keep_inds]
298 |         if phrdet:
299 |             # Evaluate where the union box > 0.5
300 |             gt_box_union = gt_box.reshape((2, 4))
301 |             gt_box_union = np.concatenate((gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0)
302 | 
303 |             box_union = boxes.reshape((-1, 2, 4))
304 |             box_union = np.concatenate((box_union.min(1)[:,:2], box_union.max(1)[:,2:]), 1)
305 | 
306 |             gt_box_union = gt_box_union.astype(dtype=np.float32, copy=False)
307 |             box_union = box_union.astype(dtype=np.float32, copy=False)
308 |             inds = bbox_overlaps(gt_box_union[None], 
309 |                                  box_union = box_union)[0] >= iou_thresh
310 | 
311 |         else:
312 |             gt_box = gt_box.astype(dtype=np.float32, copy=False)
313 |             boxes = boxes.astype(dtype=np.float32, copy=False)
314 |             sub_iou = bbox_overlaps(gt_box[None,:4], boxes[:, :4])[0]
315 |             obj_iou = bbox_overlaps(gt_box[None,4:], boxes[:, 4:])[0]
316 | 
317 |             inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)
318 | 
319 |         for i in np.where(keep_inds)[0][inds]:
320 |             pred_to_gt[i].append(int(gt_ind))
321 |     return pred_to_gt
322 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Graphical Contrastive Losses for Scene Graph Parsing
  2 | 
  3 | ![alt text](https://github.com/NVIDIA/ContrastiveLosses4VRD/blob/master/Examples.PNG)
  4 | <p align="center">Example results from the OpenImages dataset.</p>
  5 | 
  6 | ![alt text](https://github.com/NVIDIA/ContrastiveLosses4VRD/blob/master/Loss_illustration.PNG)
  7 | Example results of RelDN with without and with our losses. "L0 only" means using only the original multi-class logistic loss (without our losses). The top row shows RelDN outputs and the bottom row visualizes the learned predicate CNN features of the two models. Red and green boxes highlight the wrong and right outputs (the first row) or feature saliency (the second row).
  8 | 
  9 | This is a PyTorch implementation for [Graphical Contrastive Losses for Scene Graph Parsing, CVPR2019](https://arxiv.org/abs/1903.02728). This is an improved version of the code that won the 1st place in the [Google AI Open Images Visual Relationship Detection Chanllenge](https://www.kaggle.com/c/google-ai-open-images-visual-relationship-track/leaderboard).
 10 | 
 11 | ## News
 12 | We have created a branch for a version supporting pytorch1.0! Just go to the [pytorch1_0](https://github.com/NVIDIA/ContrastiveLosses4VRD/tree/pytorch1_0) branch and check it out!
 13 | 
 14 | ## Benchmarking on Visual Genome
 15 | | Method                         |  Backbone         | SGDET@20 | SGDET@50 | SGDET@100 |
 16 | | :---                           |       :----:      |  :----:  |  :----:  |  :----:   |
 17 | | Frequency \[1\]                |  VGG16            | 17.7     | 23.5     | 27.6      |
 18 | | Frequency+Overlap \[1\]        |  VGG16            | 20.1     | 26.2     | 30.1      |
 19 | | MotifNet \[1\]                 |  VGG16            | 21.4     | 27.2     | 30.3      |
 20 | | Graph-RCNN \[2\]               |  Res-101          | 19.4	    | 25.0     |	28.5      |
 21 | | RelDN, w/o contrastive losses  |  VGG16            | 20.8     | 28.1     | 32.5      |
 22 | | RelDN, full                    |  VGG16            | 21.1     | 28.3     | 32.7      |
 23 | | RelDN, full                    |  ResNext-101-FPN  | 22.5     | 31.0     | 36.7      |
 24 | 
 25 | \*"RelDN" is the relationship detection model we proposed in the paper.
 26 | 
 27 | \*We use the frequency prior in our model by default.
 28 | 
 29 | \*Results of "Graph-RCNN" are directly copied from [their repo](https://github.com/jwyang/graph-rcnn.pytorch).
 30 | 
 31 | \[1\] [Zellers, Rowan, et al. "Neural motifs: Scene graph parsing with global context." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2018.](http://openaccess.thecvf.com/content_cvpr_2018/html/Zellers_Neural_Motifs_Scene_CVPR_2018_paper.html)
 32 | 
 33 | \[2\] [Yang, Jianwei, et al. "Graph r-cnn for scene graph generation." Proceedings of the European Conference on Computer Vision (ECCV). 2018.](http://openaccess.thecvf.com/content_ECCV_2018/html/Jianwei_Yang_Graph_R-CNN_for_ECCV_2018_paper.html)
 34 | 
 35 | ## Cloning
 36 | ```
 37 | git clone https://github.com/NVIDIA/ContrastiveLosses4VRD.git --recurse-submodules
 38 | 
 39 | ```
 40 | 
 41 | ## Requirements
 42 | * Python 3
 43 | * Python packages
 44 |   * pytorch 0.4.0 or 0.4.1.post2 (not guaranteed to work on newer versions)
 45 |   * cython
 46 |   * matplotlib
 47 |   * numpy
 48 |   * scipy
 49 |   * opencv
 50 |   * pyyaml
 51 |   * packaging
 52 |   * [pycocotools](https://github.com/cocodataset/cocoapi)
 53 |   * tensorboardX
 54 |   * tqdm
 55 |   * pillow
 56 |   * scikit-image
 57 | * An NVIDIA GPU and CUDA 8.0 or higher. Some operations only have gpu implementation.
 58 | 
 59 | An easy installation if you already have Anaconda Python 3 and CUDA 9.0:
 60 | ```
 61 | conda install pytorch=0.4.1
 62 | pip install cython
 63 | pip install matplotlib numpy scipy pyyaml packaging pycocotools tensorboardX tqdm pillow scikit-image
 64 | conda install opencv
 65 | ```
 66 | 
 67 | * (Optional) A dockerfile with all necessary dependencies is included in docker/Dockerfile. Requires nvidia-docker
 68 | 
 69 | ```
 70 | # ROOT=path/to/cloned/repository
 71 | cd $ROOT/docker
 72 | # build the docker image and tag it
 73 | docker build -t myname/mydockertag:1.0
 74 | # launch an interactive session with this folder
 75 | nvidia-docker run -v $ROOT:/workspace/visual-relationship-detection:rw -it myname/mydockertag:1.0
 76 | # NOTE: you may need to mount other volumes depending on where your datasets are stored
 77 | ```
 78 | 
 79 | ## Compilation
 80 | Compile the CUDA code in the Detectron submodule and in the repo:
 81 | ```
 82 | # ROOT=path/to/cloned/repository
 83 | cd $ROOT/Detectron_pytorch/lib
 84 | sh make.sh
 85 | cd $ROOT/lib
 86 | sh make.sh
 87 | ```
 88 | 
 89 | ## Annotations
 90 | 
 91 | Create a data folder at the top-level directory of the repository:
 92 | ```
 93 | # ROOT=path/to/cloned/repository
 94 | cd $ROOT
 95 | mkdir data
 96 | ```
 97 | If necessary, one may edit the `DATA_DIR` field in lib/core/config.py to change the expected path to the data directory. Be sure to update the paths in the VRD preprocessing scripts (mentioned below) if this is done.
 98 | 
 99 | ### OpenImages/OpenImages_mini
100 | Download it [here](https://drive.google.com/open?id=1GeUEsiS9Z3eRYnH1GPUz99wjQwjcHl6n). Unzip it under the data folder. You should see an `openimages_v4` folder unzipped there. It contains .json annotation files for both OpenImages and OpenImages_mini, which is a subset of the former created by us including 4500 train and 1000 test images. The .json files are created based on the original .csv annotations.
101 | 
102 | ### Visual Genome
103 | Download it [here](https://drive.google.com/open?id=1VDuba95vIPVhg5DiriPtwuVA6mleYGad). Unzip it under the data folder. You should see a `vg` folder unzipped there. It contains .json annotations that suit the dataloader used in this repo.
104 | 
105 | ### Visual Relation Detection
106 | 
107 | See [Images:VRD](#visual-relation-detection-1)
108 | 
109 | ## Images
110 | 
111 | ### OpenImages
112 | Create a folder `train/` for the training images:
113 | ```
114 | # ROOT=path/to/cloned/repository
115 | cd $ROOT/data/openimages_v4
116 | mkdir train
117 | ```
118 | Download OpenImages v4 training images from the [official page](https://storage.googleapis.com/openimages/web/download.html) (**Warning: this is a very large dataset**). **Note:** only training images are needed since our annotations will split them into a train and a validation set. Put all images in `train/`
119 | 
120 | ### Visual Genome
121 | Create a folder for all images:
122 | ```
123 | # ROOT=path/to/cloned/repository
124 | cd $ROOT/data/vg
125 | mkdir VG_100K
126 | ```
127 | Download Visual Genome images from the [official page](https://visualgenome.org/api/v0/api_home.html). Unzip all images (part 1 and part 2) into `VG_100K/`. There should be a total of 108249 files.
128 | 
129 | ### Visual Relation Detection
130 | Create the vrd folder under `data`:
131 | ```
132 | # ROOT=path/to/cloned/repository
133 | cd $ROOT/data/vrd
134 | ```
135 | Download the original annotation json files from [here](https://cs.stanford.edu/people/ranjaykrishna/vrd/) and unzip `json_dataset.zip` here. The images can be downloaded from [here](http://imagenet.stanford.edu/internal/jcjohns/scene_graphs/sg_dataset.zip). Unzip `sg_dataset.zip` to create an `sg_dataset` folder in `data/vrd`. Next run the preprocessing scripts:
136 | 
137 | ```
138 | cd $ROOT
139 | python tools/rename_vrd_with_numbers.py
140 | python tools/convert_vrd_anno_to_coco_format.py
141 | ```
142 | `rename_vrd_with_numbers.py` converts all non-jpg images (some images are in png or gif) to jpg, and renames them in the {:012d}.jpg format (e.g., "000000000001.jpg"). It also creates new relationship annotations other than the original ones. This is mostly to make things easier for the dataloader. The filename mapping from the original is stored in `data/vrd/*_fname_mapping.json` where "*" is either "train" or "val".
143 | 
144 | `convert_vrd_anno_to_coco_format.py` creates object detection annotations from the new annotations generated above, which are required by the dataloader during training.
145 | 
146 | ## Pre-trained Object Detection Models
147 | Download pre-trained object detection models [here](https://drive.google.com/open?id=1NrqOLbMa_RwHbG3KIXJFWLnlND2kiIpj). Unzip it under the root directory. **Note:** We do not include code for training object detectors. Please refer to the "(Optional) Training Object Detection Models" section in [Large-Scale-VRD.pytorch](https://github.com/jz462/Large-Scale-VRD.pytorch) for this.
148 | 
149 | ## Our Trained Relationship Detection Models
150 | Download our trained models [here](https://drive.google.com/open?id=15w0q3Nuye2ieu_aUNdTS_FNvoVzM4RMF). Unzip it under the root folder and you should see a `trained_models` folder there.
151 | 
152 | ## Directory Structure
153 | The final directories for data and detection models should look like:
154 | ```
155 | |-- detection_models
156 | |   |-- oi_rel
157 | |   |   |-- X-101-64x4d-FPN
158 | |   |   |   |-- model_step599999.pth
159 | |   |-- vg
160 | |   |   |-- VGG16
161 | |   |   |   |-- model_step479999.pth
162 | |   |   |-- X-101-64x4d-FPN
163 | |   |   |   |-- model_step119999.pth
164 | |   |-- vrd
165 | |   |   |-- VGG16
166 | |   |   |   |-- model_step4499.pth
167 | |-- data
168 | |   |-- openimages_v4
169 | |   |   |-- train    <-- (contains OpenImages_v4 training/validation images)
170 | |   |   |-- rel
171 | |   |   |   |-- rel_only_annotations_train.json
172 | |   |   |   |-- rel_only_annotations_val.json
173 | |   |   |   |-- ...
174 | |   |-- vg
175 | |   |   |-- VG_100K    <-- (contains Visual Genome all images)
176 | |   |   |-- rel_annotations_train.json
177 | |   |   |-- rel_annotations_val.json
178 | |   |   |-- ...
179 | |   |-- vrd
180 | |   |   |-- train_images    <-- (contains Visual Relation Detection training images)
181 | |   |   |-- val_images    <-- (contains Visual Relation Detection validation images)
182 | |   |   |-- new_annotations_train.json
183 | |   |   |-- new_annotations_val.json
184 | |   |   |-- ...
185 | |-- trained_models
186 | |   |-- oi_mini_X-101-64x4d-FPN
187 | |   |   |-- model_step6749.pth
188 | |   |-- oi_X-101-64x4d-FPN
189 | |   |   |-- model_step80929.pth
190 | |   |-- vg_VGG16
191 | |   |   |-- model_step62722.pth
192 | |   |-- vg_X-101-64x4d-FPN
193 | |   |   |-- model_step62722.pth
194 | |   |-- vrd_VGG16_IN_pretrained
195 | |   |   |-- model_step7559.pth
196 | |   |-- vrd_VGG16_COCO_pretrained
197 | |   |   |-- model_step7559.pth
198 | ```
199 | 
200 | ## Evaluating Pre-trained Relationship Detection models
201 | 
202 | DO NOT CHANGE anything in the provided config files(configs/xx/xxxx.yaml) even if you want to test with less or more than 8 GPUs. Use the environment variable `CUDA_VISIBLE_DEVICES` to control how many and which GPUs to use. Remove the
203 | `--multi-gpu-test` for single-gpu inference.
204 | 
205 | ### OpenImages_mini
206 | To test a trained model using a ResNeXt-101-64x4d-FPN backbone, run
207 | ```
208 | python ./tools/test_net_rel.py --dataset oi_rel_mini --cfg configs/oi_rel_mini/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_mini_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --load_ckpt trained_models/oi_mini_X-101-64x4d-FPN/model_step6749.pth --output_dir Outputs/oi_mini_X-101-64x4d-FPN --multi-gpu-testing --do_val
209 | ```
210 | This should reproduce the numbers shown at the last line of Table 1 in the paper. 
211 | 
212 | ### OpenImages
213 | To test a trained model using a ResNeXt-101-64x4d-FPN backbone, run
214 | ```
215 | python ./tools/test_net_rel.py --dataset oi_rel --cfg configs/oi_rel/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --load_ckpt trained_models/oi_X-101-64x4d-FPN/model_step80929.pth --output_dir Outputs/oi_X-101-64x4d-FPN --multi-gpu-testing --do_val
216 | ```
217 | 
218 | ### Visual Genome
219 | **NOTE:** May require at least 64GB RAM to evaluate on the Visual Genome test set
220 | 
221 | We use three evaluation metrics for Visual Genome:
222 | 1. SGDET: predict all the three labels and two boxes
223 | 1. SGCLS: predict subject, object and predicate labels given ground truth subject and object boxes
224 | 1. PRDCLS: predict predicate labels given ground truth subject and object boxes and labels
225 | 
226 | To test a trained model using a VGG16 backbone with "SGDET", run
227 | ```
228 | python ./tools/test_net_rel.py --dataset vg --cfg configs/vg/e2e_faster_rcnn_VGG16_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_no_spt.yaml --load_ckpt trained_models/vg_VGG16/model_step62722.pth --output_dir Outputs/vg_VGG16 --multi-gpu-testing --do_val
229 | ```
230 | Use `--use_gt_boxes` option to test it with "SGCLS"; use `--use_gt_boxes --use_gt_labels` options to test it with "PRDCLS". The results will vary slightly with the last line of Table 6 in the paper.
231 | 
232 | To test a trained model using a vg_X-101-64x4d-FPN backbone with "SGDET", run
233 | ```
234 | python ./tools/test_net_rel.py --dataset vg --cfg configs/vg/e2e_faster_rcnn_X-101-64x4d-FPN_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --load_ckpt trained_models/vg_X-101-64x4d-FPN/model_step62722.pth --output_dir Outputs/vg_X-101-64x4d-FPN --multi-gpu-testing --do_val
235 | ```
236 | Use `--use_gt_boxes` option to test it with "SGCLS"; use `--use_gt_boxes --use_gt_labels` options to test it with "PRDCLS". The results will vary slightly with those at the last line of Table 1 in the supplementary.
237 | 
238 | ### Visual Relation Detection
239 | To test a trained model initialized by an ImageNet pre-trained VGG16 model, run
240 | ```
241 | python ./tools/test_net_rel.py --dataset vrd --cfg configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_IN_pretrained.yaml --load_ckpt trained_models/vrd_VGG16_IN_pretrained/model_step7559.pth --output_dir Outputs/vrd_VGG16_IN_pretrained --multi-gpu-testing --do_val
242 | ```
243 | The results are slightly different with those at the second to the last line of Table 7.
244 | 
245 | To test a trained model initialized by an COCO pre-trained VGG16 model, run
246 | ```
247 | python ./tools/test_net_rel.py --dataset vrd --cfg configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_COCO_pretrained.yaml --load_ckpt trained_models/vrd_VGG16_COCO_pretrained/model_step7559.pth --output_dir Outputs/vrd_VGG16_COCO_pretrained --multi-gpu-testing --do_val
248 | ```
249 | The results are slightly different with those at the last line of Table 7.
250 | 
251 | ## Training Relationship Detection Models
252 | 
253 | The section provides the command-line arguments to train our relationship detection models given the pre-trained object detection models described above. **Note:** We do not train object detectors here. We only use trained object detectors (provided in `detection_models/`) to initialize our to-be-trained relationship models.
254 | 
255 | DO NOT CHANGE anything in the provided config files(configs/xx/xxxx.yaml) even if you want to train with less or more than 8 GPUs. Use the environment variable `CUDA_VISIBLE_DEVICES` to control how many and which GPUs to use.
256 | 
257 | With the following command lines, the training results (models and logs) should be in `$ROOT/Outputs/xxx/` where `xxx` is the .yaml file name used in the command without the ".yaml" extension. If you want to test with your trained models, simply run the test commands described above by setting `--load_ckpt` as the path of your trained models.
258 | 
259 | ### OpenImages_mini
260 | To train our relationship network using a ResNeXt-101-64x4d-FPN backbone, run
261 | ```
262 | python tools/train_net_step_rel.py --dataset oi_rel_mini --cfg configs/oi_rel_mini/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_mini_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --nw 8 --use_tfboard
263 | ```
264 | 
265 | ### OpenImages
266 | To train our relationship network using a ResNeXt-101-64x4d-FPN backbone, run
267 | ```
268 | python tools/train_net_step_rel.py --dataset oi_rel --cfg configs/oi_rel/e2e_faster_rcnn_X-101-64x4d-FPN_12_epochs_oi_rel_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --nw 8 --use_tfboard
269 | ```
270 | 
271 | ### Visual Genome
272 | To train our relationship network using a VGG16 backbone, run
273 | ```
274 | python tools/train_net_step_rel.py --dataset vg --cfg configs/vg/e2e_faster_rcnn_VGG16_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_no_spt.yaml --nw 8 --use_tfboard
275 | ```
276 | 
277 | To train our relationship network using a ResNeXt-101-64x4d-FPN backbone, run
278 | ```
279 | python tools/train_net_step_rel.py --dataset vg --cfg configs/vg/e2e_faster_rcnn_X-101-64x4d-FPN_8_epochs_vg_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5.yaml --nw 8 --use_tfboard
280 | ```
281 | 
282 | ### Visual Relation Detection
283 | To train our relationship network initialized by an ImageNet pre-trained VGG16 model, run
284 | ```
285 | python tools/train_net_step_rel.py --dataset vrd --cfg configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_IN_pretrained.yaml --nw 8 --use_tfboard
286 | ```
287 | 
288 | To train our relationship network initialized by a COCO pre-trained VGG16 model, run
289 | ```
290 | python tools/train_net_step_rel.py --dataset vrd --cfg configs/vrd/e2e_faster_rcnn_VGG16_16_epochs_vrd_v3_default_node_contrastive_loss_w_so_p_aware_margin_point2_so_weight_point5_COCO_pretrained.yaml --nw 8 --use_tfboard
291 | ```
292 | 
293 | ## Acknowledgements
294 | This repository uses code based on the [Neural-Motifs](https://github.com/rowanz/neural-motifs) source code from Rowan Zellers, as well as
295 | code from the [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch) repository by Roy Tseng. See LICENSES for additional details.
296 | 
297 | ## Citing
298 | If you use this code in your research, please use the following BibTeX entry.
299 | ```
300 | @conference{zhang2019vrd,
301 |   title={Graphical Contrastive Losses for Scene Graph Parsing},
302 |   author={Zhang, Ji and Shih, Kevin J. and Elgammal, Ahmed and Tao, Andrew and Catanzaro, Bryan},
303 |   booktitle={CVPR},
304 |   year={2019}
305 | }
306 | 


--------------------------------------------------------------------------------
/lib/roi_data_rel/fast_rcnn_rel.py:
--------------------------------------------------------------------------------
  1 | # Adapted by Ji Zhang, 2019
  2 | #
  3 | # Based on Detectron.pytorch/lib/roi_data/fast_rcnn.py
  4 | # Original license text:
  5 | # --------------------------------------------------------
  6 | # Copyright (c) 2017-present, Facebook, Inc.
  7 | #
  8 | # Licensed under the Apache License, Version 2.0 (the "License");
  9 | # you may not use this file except in compliance with the License.
 10 | # You may obtain a copy of the License at
 11 | #
 12 | #     http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | # Unless required by applicable law or agreed to in writing, software
 15 | # distributed under the License is distributed on an "AS IS" BASIS,
 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | # See the License for the specific language governing permissions and
 18 | # limitations under the License.
 19 | ##############################################################################
 20 | 
 21 | """Construct minibatches for Fast R-CNN training. Handles the minibatch blobs
 22 | that are specific to Fast R-CNN. Other blobs that are generic to RPN, etc.
 23 | are handled by their respecitive roi_data modules.
 24 | """
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from __future__ import unicode_literals
 30 | 
 31 | import numpy as np
 32 | import numpy.random as npr
 33 | import logging
 34 | 
 35 | from core.config import cfg
 36 | import utils_rel.boxes_rel as box_utils_rel
 37 | import utils.blob as blob_utils
 38 | import utils.fpn as fpn_utils
 39 | 
 40 | 
 41 | logger = logging.getLogger(__name__)
 42 | 
 43 | 
 44 | def add_rel_blobs(blobs, im_scales, roidb):
 45 |     """Add blobs needed for training Fast R-CNN style models."""
 46 |     # Sample training RoIs from each image and append them to the blob lists
 47 |     for im_i, entry in enumerate(roidb):
 48 |         frcn_blobs = _sample_pairs(entry, im_scales[im_i], im_i)
 49 |         for k, v in frcn_blobs.items():
 50 |             blobs[k].append(v)
 51 |     # Concat the training blob lists into tensors
 52 |     for k, v in blobs.items():
 53 |         if isinstance(v, list) and len(v) > 0:
 54 |             blobs[k] = np.concatenate(v)
 55 |             
 56 |     if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
 57 |         _add_rel_multilevel_rois(blobs)
 58 | 
 59 |     return True
 60 | 
 61 | 
 62 | def _sample_pairs(roidb, im_scale, batch_idx):
 63 |     """Generate a random sample of RoIs comprising foreground and background
 64 |     examples.
 65 |     """
 66 |     fg_pairs_per_image = cfg.TRAIN.FG_REL_SIZE_PER_IM
 67 |     pairs_per_image = int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION)  # need much more pairs since it's quadratic
 68 |     max_pair_overlaps = roidb['max_pair_overlaps']
 69 | 
 70 |     gt_pair_inds = np.where(max_pair_overlaps > 1.0 - 1e-4)[0]
 71 |     fg_pair_inds = np.where((max_pair_overlaps >= cfg.TRAIN.FG_THRESH) &
 72 |                             (max_pair_overlaps <= 1.0 - 1e-4))[0]
 73 |     
 74 |     fg_pairs_per_this_image = np.minimum(fg_pairs_per_image, gt_pair_inds.size + fg_pair_inds.size)
 75 |     # Sample foreground regions without replacement
 76 |     if fg_pair_inds.size > 0:
 77 |         fg_pair_inds = npr.choice(
 78 |             fg_pair_inds, size=(fg_pairs_per_this_image - gt_pair_inds.size), replace=False)
 79 |     fg_pair_inds = np.append(fg_pair_inds, gt_pair_inds)
 80 | 
 81 |     # Label is the class each RoI has max overlap with
 82 |     fg_prd_labels = roidb['max_prd_classes'][fg_pair_inds]
 83 |     blob_dict = dict(
 84 |         fg_prd_labels_int32=fg_prd_labels.astype(np.int32, copy=False))
 85 |     if cfg.MODEL.USE_BG:
 86 |         bg_pair_inds = np.where((max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
 87 |         
 88 |         # Compute number of background RoIs to take from this image (guarding
 89 |         # against there being fewer than desired)
 90 |         bg_pairs_per_this_image = pairs_per_image - fg_pairs_per_this_image
 91 |         bg_pairs_per_this_image = np.minimum(bg_pairs_per_this_image, bg_pair_inds.size)
 92 |         # Sample foreground regions without replacement
 93 |         if bg_pair_inds.size > 0:
 94 |             bg_pair_inds = npr.choice(
 95 |                 bg_pair_inds, size=bg_pairs_per_this_image, replace=False)
 96 |         keep_pair_inds = np.append(fg_pair_inds, bg_pair_inds)
 97 |         all_prd_labels = np.zeros(keep_pair_inds.size, dtype=np.int32)
 98 |         all_prd_labels[:fg_pair_inds.size] = fg_prd_labels + 1  # class should start from 1
 99 |     else:
100 |         keep_pair_inds = fg_pair_inds
101 |         all_prd_labels = fg_prd_labels
102 |     blob_dict['all_prd_labels_int32'] = all_prd_labels.astype(np.int32, copy=False)
103 |     blob_dict['fg_size'] = np.array([fg_pair_inds.size], dtype=np.int32)  # this is used to check if there is at least one fg to learn
104 | 
105 |     sampled_sbj_boxes = roidb['sbj_boxes'][keep_pair_inds]
106 |     sampled_obj_boxes = roidb['obj_boxes'][keep_pair_inds]
107 |     # Scale rois and format as (batch_idx, x1, y1, x2, y2)
108 |     sampled_sbj_rois = sampled_sbj_boxes * im_scale
109 |     sampled_obj_rois = sampled_obj_boxes * im_scale
110 |     repeated_batch_idx = batch_idx * blob_utils.ones((keep_pair_inds.shape[0], 1))
111 |     sampled_sbj_rois = np.hstack((repeated_batch_idx, sampled_sbj_rois))
112 |     sampled_obj_rois = np.hstack((repeated_batch_idx, sampled_obj_rois))
113 |     blob_dict['sbj_rois'] = sampled_sbj_rois
114 |     blob_dict['obj_rois'] = sampled_obj_rois
115 |     sampled_rel_rois = box_utils_rel.rois_union(sampled_sbj_rois, sampled_obj_rois)
116 |     blob_dict['rel_rois'] = sampled_rel_rois
117 |     if cfg.MODEL.USE_SPATIAL_FEAT:
118 |         sampled_spt_feat = box_utils_rel.get_spt_features(
119 |             sampled_sbj_boxes, sampled_obj_boxes, roidb['width'], roidb['height'])
120 |         blob_dict['spt_feat'] = sampled_spt_feat
121 |     if cfg.MODEL.USE_FREQ_BIAS:
122 |         sbj_labels = roidb['max_sbj_classes'][keep_pair_inds]
123 |         obj_labels = roidb['max_obj_classes'][keep_pair_inds]
124 |         blob_dict['all_sbj_labels_int32'] = sbj_labels.astype(np.int32, copy=False)
125 |         blob_dict['all_obj_labels_int32'] = obj_labels.astype(np.int32, copy=False)
126 |     if cfg.MODEL.USE_NODE_CONTRASTIVE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_SO_AWARE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_P_AWARE_LOSS:
127 |         nodes_per_image = cfg.MODEL.NODE_SAMPLE_SIZE
128 |         max_sbj_overlaps = roidb['max_sbj_overlaps']
129 |         max_obj_overlaps = roidb['max_obj_overlaps']
130 |         # sbj
131 |         # Here a naturally existing assumption is, each positive sbj should have at least one positive obj
132 |         sbj_pos_pair_pos_inds = np.where((max_pair_overlaps >= cfg.TRAIN.FG_THRESH))[0]
133 |         sbj_pos_obj_pos_pair_neg_inds = np.where((max_sbj_overlaps >= cfg.TRAIN.FG_THRESH) &
134 |                                                  (max_obj_overlaps >= cfg.TRAIN.FG_THRESH) &
135 |                                                  (max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
136 |         sbj_pos_obj_neg_pair_neg_inds = np.where((max_sbj_overlaps >= cfg.TRAIN.FG_THRESH) &
137 |                                                  (max_obj_overlaps < cfg.TRAIN.FG_THRESH) &
138 |                                                  (max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
139 |         if sbj_pos_pair_pos_inds.size > 0:
140 |             sbj_pos_pair_pos_inds = npr.choice(
141 |                 sbj_pos_pair_pos_inds,
142 |                 size=int(min(nodes_per_image, sbj_pos_pair_pos_inds.size)),
143 |                 replace=False)
144 |         if sbj_pos_obj_pos_pair_neg_inds.size > 0:
145 |             sbj_pos_obj_pos_pair_neg_inds = npr.choice(
146 |                 sbj_pos_obj_pos_pair_neg_inds,
147 |                 size=int(min(nodes_per_image, sbj_pos_obj_pos_pair_neg_inds.size)),
148 |                 replace=False)
149 |         sbj_pos_pair_neg_inds = sbj_pos_obj_pos_pair_neg_inds
150 |         if nodes_per_image - sbj_pos_obj_pos_pair_neg_inds.size > 0 and sbj_pos_obj_neg_pair_neg_inds.size > 0:
151 |             sbj_pos_obj_neg_pair_neg_inds = npr.choice(
152 |                 sbj_pos_obj_neg_pair_neg_inds,
153 |                 size=int(min(nodes_per_image - sbj_pos_obj_pos_pair_neg_inds.size, sbj_pos_obj_neg_pair_neg_inds.size)),
154 |                 replace=False)
155 |             sbj_pos_pair_neg_inds = np.append(sbj_pos_pair_neg_inds, sbj_pos_obj_neg_pair_neg_inds)
156 |         sbj_pos_inds = np.append(sbj_pos_pair_pos_inds, sbj_pos_pair_neg_inds)
157 |         binary_labels_sbj_pos = np.zeros(sbj_pos_inds.size, dtype=np.int32)
158 |         binary_labels_sbj_pos[:sbj_pos_pair_pos_inds.size] = 1
159 |         blob_dict['binary_labels_sbj_pos_int32'] = binary_labels_sbj_pos.astype(np.int32, copy=False)
160 |         prd_pos_labels_sbj_pos = roidb['max_prd_classes'][sbj_pos_pair_pos_inds]
161 |         prd_labels_sbj_pos = np.zeros(sbj_pos_inds.size, dtype=np.int32)
162 |         prd_labels_sbj_pos[:sbj_pos_pair_pos_inds.size] = prd_pos_labels_sbj_pos + 1
163 |         blob_dict['prd_labels_sbj_pos_int32'] = prd_labels_sbj_pos.astype(np.int32, copy=False)
164 |         sbj_labels_sbj_pos = roidb['max_sbj_classes'][sbj_pos_inds] + 1
165 |         # 1. set all obj labels > 0
166 |         obj_labels_sbj_pos = roidb['max_obj_classes'][sbj_pos_inds] + 1
167 |         # 2. find those negative obj
168 |         max_obj_overlaps_sbj_pos = roidb['max_obj_overlaps'][sbj_pos_inds]
169 |         obj_neg_inds_sbj_pos = np.where(max_obj_overlaps_sbj_pos < cfg.TRAIN.FG_THRESH)[0]
170 |         obj_labels_sbj_pos[obj_neg_inds_sbj_pos] = 0
171 |         blob_dict['sbj_labels_sbj_pos_int32'] = sbj_labels_sbj_pos.astype(np.int32, copy=False)
172 |         blob_dict['obj_labels_sbj_pos_int32'] = obj_labels_sbj_pos.astype(np.int32, copy=False)
173 |         # this is for freq bias in RelDN
174 |         blob_dict['sbj_labels_sbj_pos_fg_int32'] = roidb['max_sbj_classes'][sbj_pos_inds].astype(np.int32, copy=False)
175 |         blob_dict['obj_labels_sbj_pos_fg_int32'] = roidb['max_obj_classes'][sbj_pos_inds].astype(np.int32, copy=False)
176 |         
177 |         sampled_sbj_boxes_sbj_pos = roidb['sbj_boxes'][sbj_pos_inds]
178 |         sampled_obj_boxes_sbj_pos = roidb['obj_boxes'][sbj_pos_inds]
179 |         # Scale rois and format as (batch_idx, x1, y1, x2, y2)
180 |         sampled_sbj_rois_sbj_pos = sampled_sbj_boxes_sbj_pos * im_scale
181 |         sampled_obj_rois_sbj_pos = sampled_obj_boxes_sbj_pos * im_scale
182 |         repeated_batch_idx = batch_idx * blob_utils.ones((sbj_pos_inds.shape[0], 1))
183 |         sampled_sbj_rois_sbj_pos = np.hstack((repeated_batch_idx, sampled_sbj_rois_sbj_pos))
184 |         sampled_obj_rois_sbj_pos = np.hstack((repeated_batch_idx, sampled_obj_rois_sbj_pos))
185 |         blob_dict['sbj_rois_sbj_pos'] = sampled_sbj_rois_sbj_pos
186 |         blob_dict['obj_rois_sbj_pos'] = sampled_obj_rois_sbj_pos
187 |         sampled_rel_rois_sbj_pos = box_utils_rel.rois_union(sampled_sbj_rois_sbj_pos, sampled_obj_rois_sbj_pos)
188 |         blob_dict['rel_rois_sbj_pos'] = sampled_rel_rois_sbj_pos
189 |         _, inds_unique_sbj_pos, inds_reverse_sbj_pos = np.unique(
190 |             sampled_sbj_rois_sbj_pos, return_index=True, return_inverse=True, axis=0)
191 |         assert inds_reverse_sbj_pos.shape[0] == sampled_sbj_rois_sbj_pos.shape[0]
192 |         blob_dict['inds_unique_sbj_pos'] = inds_unique_sbj_pos
193 |         blob_dict['inds_reverse_sbj_pos'] = inds_reverse_sbj_pos
194 |         if cfg.MODEL.USE_SPATIAL_FEAT:
195 |             sampled_spt_feat_sbj_pos = box_utils_rel.get_spt_features(
196 |                 sampled_sbj_boxes_sbj_pos, sampled_obj_boxes_sbj_pos, roidb['width'], roidb['height'])
197 |             blob_dict['spt_feat_sbj_pos'] = sampled_spt_feat_sbj_pos
198 |         # obj
199 |         # Here a naturally existing assumption is, each positive obj should have at least one positive sbj
200 |         obj_pos_pair_pos_inds = np.where((max_pair_overlaps >= cfg.TRAIN.FG_THRESH))[0]
201 |         obj_pos_sbj_pos_pair_neg_inds = np.where((max_obj_overlaps >= cfg.TRAIN.FG_THRESH) &
202 |                                                  (max_sbj_overlaps >= cfg.TRAIN.FG_THRESH) &
203 |                                                  (max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
204 |         obj_pos_sbj_neg_pair_neg_inds = np.where((max_obj_overlaps >= cfg.TRAIN.FG_THRESH) &
205 |                                                  (max_sbj_overlaps < cfg.TRAIN.FG_THRESH) &
206 |                                                  (max_pair_overlaps < cfg.TRAIN.BG_THRESH_HI))[0]
207 |         if obj_pos_pair_pos_inds.size > 0:
208 |             obj_pos_pair_pos_inds = npr.choice(
209 |                 obj_pos_pair_pos_inds,
210 |                 size=int(min(nodes_per_image, obj_pos_pair_pos_inds.size)),
211 |                 replace=False)
212 |         if obj_pos_sbj_pos_pair_neg_inds.size > 0:
213 |             obj_pos_sbj_pos_pair_neg_inds = npr.choice(
214 |                 obj_pos_sbj_pos_pair_neg_inds,
215 |                 size=int(min(nodes_per_image, obj_pos_sbj_pos_pair_neg_inds.size)),
216 |                 replace=False)
217 |         obj_pos_pair_neg_inds = obj_pos_sbj_pos_pair_neg_inds
218 |         if nodes_per_image - obj_pos_sbj_pos_pair_neg_inds.size > 0 and obj_pos_sbj_neg_pair_neg_inds.size:
219 |             obj_pos_sbj_neg_pair_neg_inds = npr.choice(
220 |                 obj_pos_sbj_neg_pair_neg_inds,
221 |                 size=int(min(nodes_per_image - obj_pos_sbj_pos_pair_neg_inds.size, obj_pos_sbj_neg_pair_neg_inds.size)),
222 |                 replace=False)
223 |             obj_pos_pair_neg_inds = np.append(obj_pos_pair_neg_inds, obj_pos_sbj_neg_pair_neg_inds)
224 |         obj_pos_inds = np.append(obj_pos_pair_pos_inds, obj_pos_pair_neg_inds)
225 |         binary_labels_obj_pos = np.zeros(obj_pos_inds.size, dtype=np.int32)
226 |         binary_labels_obj_pos[:obj_pos_pair_pos_inds.size] = 1
227 |         blob_dict['binary_labels_obj_pos_int32'] = binary_labels_obj_pos.astype(np.int32, copy=False)
228 |         prd_pos_labels_obj_pos = roidb['max_prd_classes'][obj_pos_pair_pos_inds]
229 |         prd_labels_obj_pos = np.zeros(obj_pos_inds.size, dtype=np.int32)
230 |         prd_labels_obj_pos[:obj_pos_pair_pos_inds.size] = prd_pos_labels_obj_pos + 1
231 |         blob_dict['prd_labels_obj_pos_int32'] = prd_labels_obj_pos.astype(np.int32, copy=False)
232 |         obj_labels_obj_pos = roidb['max_obj_classes'][obj_pos_inds] + 1
233 |         # 1. set all sbj labels > 0
234 |         sbj_labels_obj_pos = roidb['max_sbj_classes'][obj_pos_inds] + 1
235 |         # 2. find those negative sbj
236 |         max_sbj_overlaps_obj_pos = roidb['max_sbj_overlaps'][obj_pos_inds]
237 |         sbj_neg_inds_obj_pos = np.where(max_sbj_overlaps_obj_pos < cfg.TRAIN.FG_THRESH)[0]
238 |         sbj_labels_obj_pos[sbj_neg_inds_obj_pos] = 0
239 |         blob_dict['sbj_labels_obj_pos_int32'] = sbj_labels_obj_pos.astype(np.int32, copy=False)
240 |         blob_dict['obj_labels_obj_pos_int32'] = obj_labels_obj_pos.astype(np.int32, copy=False)
241 |         # this is for freq bias in RelDN
242 |         blob_dict['sbj_labels_obj_pos_fg_int32'] = roidb['max_sbj_classes'][obj_pos_inds].astype(np.int32, copy=False)
243 |         blob_dict['obj_labels_obj_pos_fg_int32'] = roidb['max_obj_classes'][obj_pos_inds].astype(np.int32, copy=False)
244 |         
245 |         sampled_sbj_boxes_obj_pos = roidb['sbj_boxes'][obj_pos_inds]
246 |         sampled_obj_boxes_obj_pos = roidb['obj_boxes'][obj_pos_inds]
247 |         # Scale rois and format as (batch_idx, x1, y1, x2, y2)
248 |         sampled_sbj_rois_obj_pos = sampled_sbj_boxes_obj_pos * im_scale
249 |         sampled_obj_rois_obj_pos = sampled_obj_boxes_obj_pos * im_scale
250 |         repeated_batch_idx = batch_idx * blob_utils.ones((obj_pos_inds.shape[0], 1))
251 |         sampled_sbj_rois_obj_pos = np.hstack((repeated_batch_idx, sampled_sbj_rois_obj_pos))
252 |         sampled_obj_rois_obj_pos = np.hstack((repeated_batch_idx, sampled_obj_rois_obj_pos))
253 |         blob_dict['sbj_rois_obj_pos'] = sampled_sbj_rois_obj_pos
254 |         blob_dict['obj_rois_obj_pos'] = sampled_obj_rois_obj_pos
255 |         sampled_rel_rois_obj_pos = box_utils_rel.rois_union(sampled_sbj_rois_obj_pos, sampled_obj_rois_obj_pos)
256 |         blob_dict['rel_rois_obj_pos'] = sampled_rel_rois_obj_pos
257 |         _, inds_unique_obj_pos, inds_reverse_obj_pos = np.unique(
258 |             sampled_obj_rois_obj_pos, return_index=True, return_inverse=True, axis=0)
259 |         assert inds_reverse_obj_pos.shape[0] == sampled_obj_rois_obj_pos.shape[0]
260 |         blob_dict['inds_unique_obj_pos'] = inds_unique_obj_pos
261 |         blob_dict['inds_reverse_obj_pos'] = inds_reverse_obj_pos
262 |         if cfg.MODEL.USE_SPATIAL_FEAT:
263 |             sampled_spt_feat_obj_pos = box_utils_rel.get_spt_features(
264 |                 sampled_sbj_boxes_obj_pos, sampled_obj_boxes_obj_pos, roidb['width'], roidb['height'])
265 |             blob_dict['spt_feat_obj_pos'] = sampled_spt_feat_obj_pos
266 | 
267 |     return blob_dict
268 | 
269 | 
270 | def _add_rel_multilevel_rois(blobs):
271 |     """By default training RoIs are added for a single feature map level only.
272 |     When using FPN, the RoIs must be distributed over different FPN levels
273 |     according the level assignment heuristic (see: modeling.FPN.
274 |     map_rois_to_fpn_levels).
275 |     """
276 |     lvl_min = cfg.FPN.ROI_MIN_LEVEL
277 |     lvl_max = cfg.FPN.ROI_MAX_LEVEL
278 | 
279 |     def _distribute_rois_over_fpn_levels(rois_blob_names):
280 |         """Distribute rois over the different FPN levels."""
281 |         # Get target level for each roi
282 |         # Recall blob rois are in (batch_idx, x1, y1, x2, y2) format, hence take
283 |         # the box coordinates from columns 1:5
284 |         lowest_target_lvls = None
285 |         for rois_blob_name in rois_blob_names:
286 |             target_lvls = fpn_utils.map_rois_to_fpn_levels(
287 |                 blobs[rois_blob_name][:, 1:5], lvl_min, lvl_max)
288 |             if lowest_target_lvls is None:
289 |                 lowest_target_lvls = target_lvls
290 |             else:
291 |                 lowest_target_lvls = np.minimum(lowest_target_lvls, target_lvls)
292 |         for rois_blob_name in rois_blob_names:
293 |             # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
294 |             fpn_utils.add_multilevel_roi_blobs(
295 |                 blobs, rois_blob_name, blobs[rois_blob_name], lowest_target_lvls, lvl_min,
296 |                 lvl_max)
297 | 
298 |     _distribute_rois_over_fpn_levels(['sbj_rois'])
299 |     _distribute_rois_over_fpn_levels(['obj_rois'])
300 |     _distribute_rois_over_fpn_levels(['rel_rois'])
301 |     if cfg.MODEL.USE_NODE_CONTRASTIVE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_SO_AWARE_LOSS or cfg.MODEL.USE_NODE_CONTRASTIVE_P_AWARE_LOSS:
302 |         _distribute_rois_over_fpn_levels(['sbj_rois_sbj_pos'])
303 |         _distribute_rois_over_fpn_levels(['obj_rois_sbj_pos'])
304 |         _distribute_rois_over_fpn_levels(['rel_rois_sbj_pos'])
305 |         _distribute_rois_over_fpn_levels(['sbj_rois_obj_pos'])
306 |         _distribute_rois_over_fpn_levels(['obj_rois_obj_pos'])
307 |         _distribute_rois_over_fpn_levels(['rel_rois_obj_pos'])
308 | 


--------------------------------------------------------------------------------