├── .github
    └── FUNDING.yml
├── .gitignore
├── .vscode
    └── settings.json
├── LICENSE
├── Makefile
├── README.md
├── config.py
├── data
    ├── co_occour_count.npy
    └── stanford_filtered
    │   └── README.md
├── dataloaders
    ├── __init__.py
    ├── blob.py
    ├── image_transforms.py
    ├── mscoco.py
    └── visual_genome.py
├── docs
    ├── LICENSE.md
    ├── _config.yaml
    ├── _includes
    │   └── image.html
    ├── _layouts
    │   └── default.html
    ├── index.md
    ├── teaser.png
    └── upload.sh
├── install_package.sh
├── lib
    ├── __init__.py
    ├── draw_rectangles
    │   ├── draw_rectangles.c
    │   ├── draw_rectangles.pyx
    │   └── setup.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── sg_eval.py
    │   ├── sg_eval_slow.py
    │   └── test_sg_eval.py
    ├── fpn
    │   ├── anchor_targets.py
    │   ├── box_intersections_cpu
    │   │   ├── bbox.c
    │   │   ├── bbox.pyx
    │   │   └── setup.py
    │   ├── box_utils.py
    │   ├── generate_anchors.py
    │   ├── make.sh
    │   ├── nms
    │   │   ├── Makefile
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   └── nms.py
    │   │   └── src
    │   │   │   ├── cuda
    │   │   │       ├── Makefile
    │   │   │       ├── nms_kernel.cu
    │   │   │       └── nms_kernel.h
    │   │   │   ├── nms_cuda.c
    │   │   │   └── nms_cuda.h
    │   ├── proposal_assignments
    │   │   ├── proposal_assignments_det.py
    │   │   ├── proposal_assignments_gtbox.py
    │   │   ├── proposal_assignments_postnms.py
    │   │   ├── proposal_assignments_rel.py
    │   │   └── rel_assignments.py
    │   └── roi_align
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │       ├── __init__.py
    │   │       └── roi_align
    │   │       │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │       ├── __init__.py
    │   │       └── roi_align.py
    │   │   ├── modules
    │   │       ├── __init__.py
    │   │       └── roi_align.py
    │   │   └── src
    │   │       ├── cuda
    │   │           ├── Makefile
    │   │           ├── roi_align_kernel.cu
    │   │           └── roi_align_kernel.h
    │   │       ├── roi_align_cuda.c
    │   │       └── roi_align_cuda.h
    ├── get_dataset_counts.py
    ├── get_union_boxes.py
    ├── lstm
    │   ├── __init__.py
    │   ├── decoder_rnn.py
    │   └── highway_lstm_cuda
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │       ├── __init__.py
    │   │       └── highway_lstm_layer
    │   │       │   └── __init__.py
    │   │   ├── alternating_highway_lstm.py
    │   │   ├── build.py
    │   │   ├── make.sh
    │   │   └── src
    │   │       ├── highway_lstm_cuda.c
    │   │       ├── highway_lstm_cuda.h
    │   │       ├── highway_lstm_kernel.cu
    │   │       └── highway_lstm_kernel.h
    ├── object_detector.py
    ├── pytorch_misc.py
    ├── rel_model.py
    ├── rel_model_stanford.py
    ├── resnet.py
    ├── sparse_targets.py
    ├── surgery.py
    ├── tree_lstm
    │   ├── __init__.py
    │   ├── decoder_tree_lstm.py
    │   ├── def_tree.py
    │   ├── draw_tree.py
    │   ├── gen_tree.py
    │   ├── graph_to_tree.py
    │   ├── tree_lstm.py
    │   └── tree_utils.py
    └── word_vectors.py
├── models
    ├── eval_rel_count.py
    ├── eval_rels.py
    ├── train_detector.py
    └── train_rels.py
└── scripts
    ├── eval_models.sh
    ├── pretrain_detector.sh
    ├── refine_for_detection.sh
    ├── train_models_sgcls.sh
    ├── train_stanford.sh
    └── train_vctreenet.sh


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: [KaihuaTang]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12 | polar: # Replace with a single Polar username
13 | buy_me_a_coffee: tkhchipaomg
14 | thanks_dev: # Replace with a single thanks.dev username
15 | custom: ['https://kaihuatang.github.io/donate']
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | 
 27 | # PyInstaller
 28 | #  Usually these files are written by a python script from a template
 29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 30 | *.manifest
 31 | *.spec
 32 | 
 33 | # Installer logs
 34 | pip-log.txt
 35 | pip-delete-this-directory.txt
 36 | 
 37 | # Unit test / coverage reports
 38 | htmlcov/
 39 | .tox/
 40 | .coverage
 41 | .coverage.*
 42 | .cache
 43 | nosetests.xml
 44 | coverage.xml
 45 | *.cover
 46 | .hypothesis/
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | local_settings.py
 55 | 
 56 | # Flask stuff:
 57 | instance/
 58 | .webassets-cache
 59 | 
 60 | # Scrapy stuff:
 61 | .scrapy
 62 | 
 63 | # Sphinx documentation
 64 | docs/_build/
 65 | 
 66 | # PyBuilder
 67 | target/
 68 | 
 69 | # Jupyter Notebook
 70 | .ipynb_checkpoints
 71 | 
 72 | # pyenv
 73 | .python-version
 74 | 
 75 | # celery beat schedule file
 76 | celerybeat-schedule
 77 | 
 78 | # SageMath parsed files
 79 | *.sage.py
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | .venv
 86 | venv/
 87 | ENV/
 88 | 
 89 | # Spyder project settings
 90 | .spyderproject
 91 | .spyproject
 92 | 
 93 | # Rope project settings
 94 | .ropeproject
 95 | 
 96 | # mkdocs documentation
 97 | /site
 98 | 
 99 | # mypy
100 | .mypy_cache/
101 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.pylintEnabled": false
3 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Rowan Zellers
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | export PATH := /usr/local/cuda-9.0/bin:$(PATH)
 2 | 
 3 | all: draw_rectangles box_intersections nms roi_align lstm
 4 | 
 5 | draw_rectangles:
 6 | 	cd lib/draw_rectangles; python setup.py build_ext --inplace
 7 | box_intersections:
 8 | 	cd lib/fpn/box_intersections_cpu; python setup.py build_ext --inplace
 9 | nms:
10 | 	cd lib/fpn/nms; make
11 | roi_align:
12 | 	cd lib/fpn/roi_align; make
13 | lstm:
14 | 	cd lib/lstm/highway_lstm_cuda; bash make.sh


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VCTree-Scene-Graph-Generation
 2 | 
 3 | **If you like our work, and want to start your own scene graph generation project, you might be interested in our new SGG codebase: [Scene-Graph-Benchmark.pytorch](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch). It's much easier to follow, and provides state-of-the-art baseline models.**
 4 | 
 5 | Code for the Scene Graph Generation part of CVPR 2019 oral paper: "[Learning to Compose Dynamic Tree Structures for Visual Contexts][0]", as to the VQA part of this paper, please refer to [KaihuaTang/VCTree-Visual-Question-Answering][6]
 6 | 
 7 | UGLY CODE WARNING! UGLY CODE WARNING! UGLY CODE WARNING!
 8 | 
 9 | The code is directly modified from the project [rowanz/neural-motifs][1]. Most of the Codes about the proposed VCTree are located at lib/tree_lstm/*, and if you get any problem that cause you unable to run the project, you can check the issues under [rowanz/neural-motifs][1] first. 
10 | 
11 | **If my open source projects have inspired you, giving me some sponsorship will be a great help to my subsequent open source work.** 
12 | [Support my subsequent open source work❤️🙏](https://kaihuatang.github.io/donate.html)
13 | 
14 | # Dependencies
15 | - You may follow these commands to establish the environments under Ubuntu system
16 | ```
17 | Install Anaconda
18 | conda update -n base conda
19 | conda create -n motif pip python=3.6
20 | conda install pytorch=0.3 torchvision cuda90 -c pytorch
21 | bash install_package.sh
22 | ```
23 | 
24 | # Prepare Dataset and Setup
25 | 
26 | 0. Please follow the [Instruction][2] under ./data/stanford_filtered/ to download the dateset and put them under proper locations. 
27 | 
28 | 1. Update the config file with the dataset paths. Specifically:
29 |     - Visual Genome (the VG_100K folder, image_data.json, VG-SGG.h5, and VG-SGG-dicts.json). See data/stanford_filtered/README.md for the steps I used to download these.
30 |     - You'll also need to fix your PYTHONPATH: ```export PYTHONPATH=/home/YourName/ThePathOfYourProject``` 
31 | 
32 | 2. Compile everything. run ```make``` in the main directory: this compiles the Bilinear Interpolation operation for the RoIs.
33 | 
34 | 3. Pretrain VG detection. The old version involved pretraining COCO as well, but we got rid of that for simplicity. Run ./scripts/pretrain_detector.sh
35 | Note: You might have to modify the learning rate and batch size, particularly if you don't have 3 Titan X GPUs (which is what I used). [You can also download the pretrained detector checkpoint here.](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX) Note that, this detector model is the default initialization of all VCTree models, so when you download this checkpoint, you need to change the "-ckpt THE_PATH_OF_INITIAL_CHECKPOINT_MODEL" under ./scripts/train_vctreenet
36 | 
37 | 
38 | # How to Train / Evaluation
39 | 0. Note that, most of the parameters are under config.py. The training stages and settings are manipulated through ./scripts/train_vctreenet.sh Each line of command in train_vctreenet.sh needs to manually indicate "-ckpt" model (initial parameters) and "-save_dir" the path to save model. Since we have hybrid learning strategy, each task predcls/sgcls/sgdet will have two options for supervised stage and reinformence finetuning stage, respectively. When iteratively switch the stages, the -ckpt PATH should start with previous -save_dir PATH. The first supervised stage will init with [detector checkpoint](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX) as mentioned above.
40 | 
41 | 1. Train VG predicate classification (predcls) 
42 |     - stage 1 (supervised stage of hybrid learning): run ./scripts/train_vctreenet.sh 5 
43 |     - stage 2 (reinformence finetuning stage of hybrid learning): run ./scripts/train_vctreenet.sh 4 
44 |     - (By default, it will run on GPU 2, you can modify CUDA_VISIBLE_DEVICES under train_vctreenet.sh). 
45 |     - The model will be saved by the name "-save_dir checkpoints/THE_NAME_YOU_WILL_SAVE_THE_MODEL"
46 | 
47 | 2. Train VG scene graph classification (sgcls)
48 |     - stage 1 (supervised stage of hybrid learning): run ./scripts/train_vctreenet.sh 3 
49 |     - stage 2 (reinformence finetuning stage of hybrid learning): run ./scripts/train_vctreenet.sh 2 
50 |     - (By default, it will run on GPU 2, you can modify CUDA_VISIBLE_DEVICES under train_vctreenet.sh). 
51 |     - The model will be saved by the name "-save_dir checkpoints/THE_NAME_YOU_WILL_SAVE_THE_MODEL"
52 | 
53 | 3. Train VG scene graph detection (sgdet)
54 |     - stage 1 (supervised stage of hybrid learning): run ./scripts/train_vctreenet.sh 1 
55 |     - stage 2 (reinformence finetuning stage of hybrid learning): run ./scripts/train_vctreenet.sh 0 
56 |     - (By default, it will run on GPU 2, you can modify CUDA_VISIBLE_DEVICES under train_vctreenet.sh). 
57 |     - The model will be saved by the name "-save_dir checkpoints/THE_NAME_YOU_WILL_SAVE_THE_MODEL"
58 | 
59 | 4. Evaluate predicate classification (predcls): 
60 |     - run ./scripts/eval_models.sh 0
61 |     - OR, You can simply download our predcls checkpoint: [VCTree/PredCls][3].
62 | 
63 | 5. Evaluate scene graph classification (sgcls): 
64 |     - run ./scripts/eval_models.sh 1
65 |     - OR, You can simply download our sgcls checkpoint: [VCTree/SGCls][4].
66 | 
67 | 6. Evaluate scene graph detection (sgdet): 
68 |     - run ./scripts/eval_models.sh 2
69 |     - OR, You can simply download our sgdet checkpoint: [VCTree/SGDET][5].
70 | 
71 | 
72 | # Other Things You Need To Know
73 | - When you evaluate your model, you will find 3 metrics are printed: 1st, "R@20/50/100" is what we use to report R@20/50/100 in our paper, 2nd, "cls avg" is corresponding mean recall mR@20/50/100 proposed by our paper, "total R" is another way to calculate recall that used in some previous papers/projects, which is quite tricky and unfair, because it almost always get higher recall. 
74 | - The reinforcement part of hybrid learning is still far from satisfactory. Hence if you are interested in imporving our work, you may start with this part.
75 | 
76 | # If this paper/project inspires your work, pls cite our work:
77 | ```
78 | @inproceedings{tang2018learning,
79 |   title={Learning to Compose Dynamic Tree Structures for Visual Contexts},
80 |   author={Tang, Kaihua and Zhang, Hanwang and Wu, Baoyuan and Luo, Wenhan and Liu, Wei},
81 |   booktitle= "Conference on Computer Vision and Pattern Recognition",
82 |   year={2019}
83 | }
84 | ```
85 | 
86 | [0]: https://arxiv.org/abs/1812.01880
87 | [1]: https://github.com/rowanz/neural-motifs
88 | [2]: https://github.com/rowanz/neural-motifs/tree/master/data/stanford_filtered
89 | [3]: https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21768059&authkey=APvRgmSUEvf4h8s
90 | [4]: https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21768060&authkey=ADI-fKq10g-niGk
91 | [5]: https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21768063&authkey=ADOyKfb6MGR5seI
92 | [6]: https://github.com/KaihuaTang/VCTree-Visual-Question-Answering
93 | 


--------------------------------------------------------------------------------
/data/co_occour_count.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/data/co_occour_count.npy


--------------------------------------------------------------------------------
/data/stanford_filtered/README.md:
--------------------------------------------------------------------------------
1 | # Filtered data
2 | Adapted from [Danfei Xu](https://github.com/danfeiX/scene-graph-TF-release/blob/master/data_tools/README.md).
3 | 
4 | Follow the folling steps to get the dataset set up.
5 | 1. Download the VG images [part1](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) [part2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip). Extract these images to a file and link to them in `config.py` (eg. currently I have `VG_IMAGES=data/visual_genome/VG_100K`). 
6 | 2. Download the [VG metadata](http://cvgl.stanford.edu/scene-graph/VG/image_data.json). I recommend extracting it to this directory (e.g. `data/stanford_filtered/image_data.json`), or you can edit the path in `config.py`.
7 | 3. Download the [scene graphs](http://cvgl.stanford.edu/scene-graph/dataset/VG-SGG.h5) and extract them to `data/stanford_filtered/VG-SGG.h5`
8 | 4. Download the [scene graph dataset metadata](http://cvgl.stanford.edu/scene-graph/dataset/VG-SGG-dicts.json) and extract it to `data/stanford_filtered/VG-SGG-dicts.json`
9 | 


--------------------------------------------------------------------------------
/dataloaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/dataloaders/__init__.py


--------------------------------------------------------------------------------
/dataloaders/blob.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Data blob, hopefully to make collating less painful and MGPU training possible
  3 | """
  4 | from lib.fpn.anchor_targets import anchor_target_layer
  5 | import numpy as np
  6 | import torch
  7 | from torch.autograd import Variable
  8 | 
  9 | 
 10 | class Blob(object):
 11 |     def __init__(self, mode='det', is_train=False, num_gpus=1, primary_gpu=0, batch_size_per_gpu=3):
 12 |         """
 13 |         Initializes an empty Blob object.
 14 |         :param mode: 'det' for detection and 'rel' for det+relationship
 15 |         :param is_train: True if it's training
 16 |         """
 17 |         assert mode in ('det', 'rel')
 18 |         assert num_gpus >= 1
 19 |         self.mode = mode
 20 |         self.is_train = is_train
 21 |         self.num_gpus = num_gpus
 22 |         self.batch_size_per_gpu = batch_size_per_gpu
 23 |         self.primary_gpu = primary_gpu
 24 | 
 25 |         self.imgs = []  # [num_images, 3, IM_SCALE, IM_SCALE] array
 26 |         self.im_sizes = []  # [num_images, 4] array of (h, w, scale, num_valid_anchors)
 27 |         self.all_anchor_inds = []  # [all_anchors, 2] array of (img_ind, anchor_idx). Only has valid
 28 |         # boxes (meaning some are gonna get cut out)
 29 |         self.all_anchors = []  # [num_im, IM_SCALE/4, IM_SCALE/4, num_anchors, 4] shapes. Anchors outside get squashed
 30 |                                # to 0
 31 |         self.gt_boxes = []  # [num_gt, 4] boxes
 32 |         self.gt_classes = []  # [num_gt,2] array of img_ind, class
 33 |         self.gt_rels = []  # [num_rels, 3]. Each row is (gtbox0, gtbox1, rel).
 34 | 
 35 |         self.gt_sents = []
 36 |         self.gt_nodes = []
 37 |         self.sent_lengths = []
 38 | 
 39 |         self.train_anchor_labels = []  # [train_anchors, 5] array of (img_ind, h, w, A, labels)
 40 |         self.train_anchors = []  # [train_anchors, 8] shapes with anchor, target
 41 | 
 42 |         self.train_anchor_inds = None  # This will be split into GPUs, just (img_ind, h, w, A).
 43 | 
 44 |         self.batch_size = None
 45 |         self.gt_box_chunks = None
 46 |         self.anchor_chunks = None
 47 |         self.train_chunks = None
 48 |         self.proposal_chunks = None
 49 |         self.proposals = []
 50 | 
 51 |     @property
 52 |     def is_flickr(self):
 53 |         return self.mode == 'flickr'
 54 | 
 55 |     @property
 56 |     def is_rel(self):
 57 |         return self.mode == 'rel'
 58 | 
 59 |     @property
 60 |     def volatile(self):
 61 |         return not self.is_train
 62 | 
 63 |     def append(self, d):
 64 |         """
 65 |         Adds a single image to the blob
 66 |         :param datom:
 67 |         :return:
 68 |         """
 69 |         i = len(self.imgs)
 70 |         self.imgs.append(d['img'])
 71 | 
 72 |         h, w, scale = d['img_size']
 73 | 
 74 |         # all anchors
 75 |         self.im_sizes.append((h, w, scale))
 76 | 
 77 |         gt_boxes_ = d['gt_boxes'].astype(np.float32) * d['scale']
 78 |         self.gt_boxes.append(gt_boxes_)
 79 | 
 80 |         self.gt_classes.append(np.column_stack((
 81 |             i * np.ones(d['gt_classes'].shape[0], dtype=np.int64),
 82 |             d['gt_classes'],
 83 |         )))
 84 | 
 85 |         # Add relationship info
 86 |         if self.is_rel:
 87 |             self.gt_rels.append(np.column_stack((
 88 |                 i * np.ones(d['gt_relations'].shape[0], dtype=np.int64),
 89 |                 d['gt_relations'])))
 90 | 
 91 |         # Augment with anchor targets
 92 |         if self.is_train:
 93 |             train_anchors_, train_anchor_inds_, train_anchor_targets_, train_anchor_labels_ = \
 94 |                 anchor_target_layer(gt_boxes_, (h, w))
 95 | 
 96 |             self.train_anchors.append(np.hstack((train_anchors_, train_anchor_targets_)))
 97 | 
 98 |             self.train_anchor_labels.append(np.column_stack((
 99 |                 i * np.ones(train_anchor_inds_.shape[0], dtype=np.int64),
100 |                 train_anchor_inds_,
101 |                 train_anchor_labels_,
102 |             )))
103 | 
104 |         if 'proposals' in d:
105 |             self.proposals.append(np.column_stack((i * np.ones(d['proposals'].shape[0], dtype=np.float32),
106 |                                                    d['scale'] * d['proposals'].astype(np.float32))))
107 | 
108 | 
109 | 
110 |     def _chunkize(self, datom, tensor=torch.LongTensor):
111 |         """
112 |         Turn data list into chunks, one per GPU
113 |         :param datom: List of lists of numpy arrays that will be concatenated.
114 |         :return:
115 |         """
116 |         chunk_sizes = [0] * self.num_gpus
117 |         for i in range(self.num_gpus):
118 |             for j in range(self.batch_size_per_gpu):
119 |                 chunk_sizes[i] += datom[i * self.batch_size_per_gpu + j].shape[0]
120 |         return Variable(tensor(np.concatenate(datom, 0)), volatile=self.volatile), chunk_sizes
121 | 
122 |     def reduce(self):
123 |         """ Merges all the detections into flat lists + numbers of how many are in each"""
124 |         if len(self.imgs) != self.batch_size_per_gpu * self.num_gpus:
125 |             raise ValueError("Wrong batch size? imgs len {} bsize/gpu {} numgpus {}".format(
126 |                 len(self.imgs), self.batch_size_per_gpu, self.num_gpus
127 |             ))
128 | 
129 |         self.imgs = Variable(torch.stack(self.imgs, 0), volatile=self.volatile)
130 |         self.im_sizes = np.stack(self.im_sizes).reshape(
131 |             (self.num_gpus, self.batch_size_per_gpu, 3))
132 | 
133 |         if self.is_rel:
134 |             self.gt_rels, self.gt_rel_chunks = self._chunkize(self.gt_rels)
135 | 
136 |         self.gt_boxes, self.gt_box_chunks = self._chunkize(self.gt_boxes, tensor=torch.FloatTensor)
137 |         self.gt_classes, _ = self._chunkize(self.gt_classes)
138 |         if self.is_train:
139 |             self.train_anchor_labels, self.train_chunks = self._chunkize(self.train_anchor_labels)
140 |             self.train_anchors, _ = self._chunkize(self.train_anchors, tensor=torch.FloatTensor)
141 |             self.train_anchor_inds = self.train_anchor_labels[:, :-1].contiguous()
142 | 
143 |         if len(self.proposals) != 0:
144 |             self.proposals, self.proposal_chunks = self._chunkize(self.proposals, tensor=torch.FloatTensor)
145 | 
146 | 
147 | 
148 |     def _scatter(self, x, chunk_sizes, dim=0):
149 |         """ Helper function"""
150 |         if self.num_gpus == 1:
151 |             return x.cuda(self.primary_gpu, async=True)
152 |         return torch.nn.parallel.scatter_gather.Scatter.apply(
153 |             list(range(self.num_gpus)), chunk_sizes, dim, x)
154 | 
155 |     def scatter(self):
156 |         """ Assigns everything to the GPUs"""
157 |         self.imgs = self._scatter(self.imgs, [self.batch_size_per_gpu] * self.num_gpus)
158 | 
159 |         self.gt_classes_primary = self.gt_classes.cuda(self.primary_gpu, async=True)
160 |         self.gt_boxes_primary = self.gt_boxes.cuda(self.primary_gpu, async=True)
161 | 
162 |         # Predcls might need these
163 |         self.gt_classes = self._scatter(self.gt_classes, self.gt_box_chunks)
164 |         self.gt_boxes = self._scatter(self.gt_boxes, self.gt_box_chunks)
165 | 
166 |         if self.is_train:
167 | 
168 |             self.train_anchor_inds = self._scatter(self.train_anchor_inds,
169 |                                                    self.train_chunks)
170 |             self.train_anchor_labels = self.train_anchor_labels.cuda(self.primary_gpu, async=True)
171 |             self.train_anchors = self.train_anchors.cuda(self.primary_gpu, async=True)
172 | 
173 |             if self.is_rel:
174 |                 self.gt_rels = self._scatter(self.gt_rels, self.gt_rel_chunks)
175 |         else:
176 |             if self.is_rel:
177 |                 self.gt_rels = self.gt_rels.cuda(self.primary_gpu, async=True)
178 | 
179 |         if self.proposal_chunks is not None:
180 |             self.proposals = self._scatter(self.proposals, self.proposal_chunks)
181 | 
182 |     def __getitem__(self, index):
183 |         """
184 |         Returns a tuple containing data
185 |         :param index: Which GPU we're on, or 0 if no GPUs
186 |         :return: If training:
187 |         (image, im_size, img_start_ind, anchor_inds, anchors, gt_boxes, gt_classes, 
188 |         train_anchor_inds)
189 |         test:
190 |         (image, im_size, img_start_ind, anchor_inds, anchors)
191 |         """
192 |         if index not in list(range(self.num_gpus)):
193 |             raise ValueError("Out of bounds with index {} and {} gpus".format(index, self.num_gpus))
194 | 
195 |         if self.is_rel:
196 |             rels = self.gt_rels
197 |             if index > 0 or self.num_gpus != 1:
198 |                 rels_i = rels[index] if self.is_rel else None
199 |         elif self.is_flickr:
200 |             rels = (self.gt_sents, self.gt_nodes)
201 |             if index > 0 or self.num_gpus != 1:
202 |                 rels_i = (self.gt_sents[index], self.gt_nodes[index])
203 |         else:
204 |             rels = None
205 |             rels_i = None
206 | 
207 |         if self.proposal_chunks is None:
208 |             proposals = None
209 |         else:
210 |             proposals = self.proposals
211 | 
212 |         if index == 0 and self.num_gpus == 1:
213 |             image_offset = 0
214 |             if self.is_train:
215 |                 return (self.imgs, self.im_sizes[0], image_offset,
216 |                         self.gt_boxes, self.gt_classes, rels, proposals, self.train_anchor_inds)
217 |             return self.imgs, self.im_sizes[0], image_offset, self.gt_boxes, self.gt_classes, rels, proposals
218 | 
219 |         # Otherwise proposals is None
220 |         assert proposals is None
221 | 
222 |         image_offset = self.batch_size_per_gpu * index
223 |         # TODO: Return a namedtuple
224 |         if self.is_train:
225 |             return (
226 |             self.imgs[index], self.im_sizes[index], image_offset,
227 |             self.gt_boxes[index], self.gt_classes[index], rels_i, None, self.train_anchor_inds[index])
228 |         return (self.imgs[index], self.im_sizes[index], image_offset,
229 |                 self.gt_boxes[index], self.gt_classes[index], rels_i, None)
230 | 
231 | 


--------------------------------------------------------------------------------
/dataloaders/image_transforms.py:
--------------------------------------------------------------------------------
  1 | # Some image transforms
  2 | 
  3 | from PIL import Image, ImageOps, ImageFilter, ImageEnhance
  4 | import numpy as np
  5 | from random import randint
  6 | # All of these need to be called on PIL imagez
  7 | 
  8 | class SquarePad(object):
  9 |     def __call__(self, img):
 10 |         w, h = img.size
 11 |         img_padded = ImageOps.expand(img, border=(0, 0, max(h - w, 0), max(w - h, 0)),
 12 |                                      fill=(int(0.485 * 256), int(0.456 * 256), int(0.406 * 256)))
 13 |         return img_padded
 14 | 
 15 | 
 16 | class Grayscale(object):
 17 |     """
 18 |     Converts to grayscale (not always, sometimes).
 19 |     """
 20 |     def __call__(self, img):
 21 |         factor = np.sqrt(np.sqrt(np.random.rand(1)))
 22 |         # print("gray {}".format(factor))
 23 |         enhancer = ImageEnhance.Color(img)
 24 |         return enhancer.enhance(factor)
 25 | 
 26 | 
 27 | class Brightness(object):
 28 |     """
 29 |     Converts to grayscale (not always, sometimes).
 30 |     """
 31 |     def __call__(self, img):
 32 |         factor = np.random.randn(1)/6+1
 33 |         factor = min(max(factor, 0.5), 1.5)
 34 |         # print("brightness {}".format(factor))
 35 | 
 36 |         enhancer = ImageEnhance.Brightness(img)
 37 |         return enhancer.enhance(factor)
 38 | 
 39 | 
 40 | class Contrast(object):
 41 |     """
 42 |     Converts to grayscale (not always, sometimes).
 43 |     """
 44 |     def __call__(self, img):
 45 |         factor = np.random.randn(1)/8+1.0
 46 |         factor = min(max(factor, 0.5), 1.5)
 47 |         # print("contrast {}".format(factor))
 48 | 
 49 |         enhancer = ImageEnhance.Contrast(img)
 50 |         return enhancer.enhance(factor)
 51 | 
 52 | 
 53 | class Hue(object):
 54 |     """
 55 |     Converts to grayscale
 56 |     """
 57 |     def __call__(self, img):
 58 |         # 30 seems good
 59 |         factor = int(np.random.randn(1)*8)
 60 |         factor = min(max(factor, -30), 30)
 61 |         factor = np.array(factor, dtype=np.uint8)
 62 | 
 63 |         hsv = np.array(img.convert('HSV'))
 64 |         hsv[:,:,0] += factor
 65 |         new_img = Image.fromarray(hsv, 'HSV').convert('RGB')
 66 | 
 67 |         return new_img
 68 | 
 69 | 
 70 | class Sharpness(object):
 71 |     """
 72 |     Converts to grayscale
 73 |     """
 74 |     def __call__(self, img):
 75 |         factor = 1.0 + np.random.randn(1)/5
 76 |         # print("sharpness {}".format(factor))
 77 |         enhancer = ImageEnhance.Sharpness(img)
 78 |         return enhancer.enhance(factor)
 79 | 
 80 | 
 81 | def random_crop(img, boxes, box_scale, round_boxes=True, max_crop_fraction=0.1):
 82 |     """
 83 |     Randomly crops the image
 84 |     :param img: PIL image
 85 |     :param boxes: Ground truth boxes
 86 |     :param box_scale: This is the scale that the boxes are at (e.g. 1024 wide). We'll preserve that ratio
 87 |     :param round_boxes: Set this to true if we're going to round the boxes to ints
 88 |     :return: Cropped image, new boxes
 89 |     """
 90 | 
 91 |     w, h = img.size
 92 | 
 93 |     max_crop_w = int(w*max_crop_fraction)
 94 |     max_crop_h = int(h*max_crop_fraction)
 95 |     boxes_scaled = boxes * max(w,h) / box_scale
 96 |     max_to_crop_top = min(int(boxes_scaled[:, 1].min()), max_crop_h)
 97 |     max_to_crop_left = min(int(boxes_scaled[:, 0].min()), max_crop_w)
 98 |     max_to_crop_right = min(int(w - boxes_scaled[:, 2].max()), max_crop_w)
 99 |     max_to_crop_bottom = min(int(h - boxes_scaled[:, 3].max()), max_crop_h)
100 | 
101 |     crop_top = randint(0, max(max_to_crop_top, 0))
102 |     crop_left = randint(0, max(max_to_crop_left, 0))
103 |     crop_right = randint(0, max(max_to_crop_right, 0))
104 |     crop_bottom = randint(0, max(max_to_crop_bottom, 0))
105 |     img_cropped = img.crop((crop_left, crop_top, w - crop_right, h - crop_bottom))
106 | 
107 |     new_boxes = box_scale / max(img_cropped.size) * np.column_stack(
108 |         (boxes_scaled[:,0]-crop_left, boxes_scaled[:,1]-crop_top, boxes_scaled[:,2]-crop_left, boxes_scaled[:,3]-crop_top))
109 | 
110 |     if round_boxes:
111 |         new_boxes = np.round(new_boxes).astype(np.int32)
112 |     return img_cropped, new_boxes
113 | 
114 | 
115 | class RandomOrder(object):
116 |     """ Composes several transforms together in random order - or not at all!
117 |     """
118 | 
119 |     def __init__(self, transforms):
120 |         self.transforms = transforms
121 | 
122 |     def __call__(self, img):
123 |         if self.transforms is None:
124 |             return img
125 |         num_to_pick = np.random.choice(len(self.transforms))
126 |         if num_to_pick == 0:
127 |             return img
128 | 
129 |         order = np.random.choice(len(self.transforms), size=num_to_pick, replace=False)
130 |         for i in order:
131 |             img = self.transforms[i](img)
132 |         return img


--------------------------------------------------------------------------------
/dataloaders/mscoco.py:
--------------------------------------------------------------------------------
  1 | from config import COCO_PATH, IM_SCALE, BOX_SCALE
  2 | import os
  3 | from torch.utils.data import Dataset
  4 | from pycocotools.coco import COCO
  5 | from PIL import Image
  6 | from lib.fpn.anchor_targets import anchor_target_layer
  7 | from torchvision.transforms import Resize, Compose, ToTensor, Normalize
  8 | from dataloaders.image_transforms import SquarePad, Grayscale, Brightness, Sharpness, Contrast, RandomOrder, Hue, random_crop
  9 | import numpy as np
 10 | from dataloaders.blob import Blob
 11 | import torch
 12 | 
 13 | class CocoDetection(Dataset):
 14 |     """
 15 |     Adapted from the torchvision code
 16 |     """
 17 | 
 18 |     def __init__(self, mode):
 19 |         """
 20 |         :param mode: train2014 or val2014
 21 |         """
 22 |         self.mode = mode
 23 |         self.root = os.path.join(COCO_PATH, mode)
 24 |         self.ann_file = os.path.join(COCO_PATH, 'annotations', 'instances_{}.json'.format(mode))
 25 |         self.coco = COCO(self.ann_file)
 26 |         self.ids = [k for k in self.coco.imgs.keys() if len(self.coco.imgToAnns[k]) > 0]
 27 | 
 28 | 
 29 |         tform = []
 30 |         if self.is_train:
 31 |              tform.append(RandomOrder([
 32 |                  Grayscale(),
 33 |                  Brightness(),
 34 |                  Contrast(),
 35 |                  Sharpness(),
 36 |                  Hue(),
 37 |              ]))
 38 | 
 39 |         tform += [
 40 |             SquarePad(),
 41 |             Resize(IM_SCALE),
 42 |             ToTensor(),
 43 |             Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
 44 |         ]
 45 | 
 46 |         self.transform_pipeline = Compose(tform)
 47 |         self.ind_to_classes = ['__background__'] + [v['name'] for k, v in self.coco.cats.items()]
 48 |         # COCO inds are weird (84 inds in total but a bunch of numbers are skipped)
 49 |         self.id_to_ind = {coco_id:(ind+1) for ind, coco_id in enumerate(self.coco.cats.keys())}
 50 |         self.id_to_ind[0] = 0
 51 | 
 52 |         self.ind_to_id = {x:y for y,x in self.id_to_ind.items()}
 53 | 
 54 |     @property
 55 |     def is_train(self):
 56 |         return self.mode.startswith('train')
 57 | 
 58 |     def __getitem__(self, index):
 59 |         """
 60 |         Args:
 61 |             index (int): Index
 62 | 
 63 |         Returns: entry dict
 64 |         """
 65 |         img_id = self.ids[index]
 66 |         path = self.coco.loadImgs(img_id)[0]['file_name']
 67 |         image_unpadded = Image.open(os.path.join(self.root, path)).convert('RGB')
 68 |         ann_ids = self.coco.getAnnIds(imgIds=img_id)
 69 |         anns = self.coco.loadAnns(ann_ids)
 70 |         gt_classes = np.array([self.id_to_ind[x['category_id']] for x in anns], dtype=np.int64)
 71 | 
 72 |         if np.any(gt_classes >= len(self.ind_to_classes)):
 73 |             raise ValueError("OH NO {}".format(index))
 74 | 
 75 |         if len(anns) == 0:
 76 |             raise ValueError("Annotations should not be empty")
 77 |         #     gt_boxes = np.array((0, 4), dtype=np.float32)
 78 |         # else:
 79 |         gt_boxes = np.array([x['bbox'] for x in anns], dtype=np.float32)
 80 | 
 81 |         if np.any(gt_boxes[:, [0,1]] < 0):
 82 |             raise ValueError("GT boxes empty columns")
 83 |         if np.any(gt_boxes[:, [2,3]] < 0):
 84 |             raise ValueError("GT boxes empty h/w")
 85 |         gt_boxes[:, [2, 3]] += gt_boxes[:, [0, 1]]
 86 | 
 87 |         # Rescale so that the boxes are at BOX_SCALE
 88 |         if self.is_train:
 89 |             image_unpadded, gt_boxes = random_crop(image_unpadded,
 90 |                                                    gt_boxes * BOX_SCALE / max(image_unpadded.size),
 91 |                                                    BOX_SCALE,
 92 |                                                    round_boxes=False,
 93 |                                                    )
 94 |         else:
 95 |             # Seems a bit silly because we won't be using GT boxes then but whatever
 96 |             gt_boxes = gt_boxes * BOX_SCALE / max(image_unpadded.size)
 97 |         w, h = image_unpadded.size
 98 |         box_scale_factor = BOX_SCALE / max(w, h)
 99 | 
100 |         # Optionally flip the image if we're doing training
101 |         flipped = self.is_train and np.random.random() > 0.5
102 |         if flipped:
103 |             scaled_w = int(box_scale_factor * float(w))
104 |             image_unpadded = image_unpadded.transpose(Image.FLIP_LEFT_RIGHT)
105 |             gt_boxes[:, [0, 2]] = scaled_w - gt_boxes[:, [2, 0]]
106 | 
107 |         img_scale_factor = IM_SCALE / max(w, h)
108 |         if h > w:
109 |             im_size = (IM_SCALE, int(w*img_scale_factor), img_scale_factor)
110 |         elif h < w:
111 |             im_size = (int(h*img_scale_factor), IM_SCALE, img_scale_factor)
112 |         else:
113 |             im_size = (IM_SCALE, IM_SCALE, img_scale_factor)
114 | 
115 |         entry = {
116 |             'img': self.transform_pipeline(image_unpadded),
117 |             'img_size': im_size,
118 |             'gt_boxes': gt_boxes,
119 |             'gt_classes': gt_classes,
120 |             'scale': IM_SCALE / BOX_SCALE,
121 |             'index': index,
122 |             'image_id': img_id,
123 |             'flipped': flipped,
124 |             'fn': path,
125 |         }
126 | 
127 |         return entry
128 | 
129 |     @classmethod
130 |     def splits(cls, *args, **kwargs):
131 |         """ Helper method to generate splits of the dataset"""
132 |         train = cls('train2014', *args, **kwargs)
133 |         val = cls('val2014', *args, **kwargs)
134 |         return train, val
135 | 
136 |     def __len__(self):
137 |         return len(self.ids)
138 | 
139 | 
140 | def coco_collate(data, num_gpus=3, is_train=False):
141 |     blob = Blob(mode='det', is_train=is_train, num_gpus=num_gpus,
142 |                 batch_size_per_gpu=len(data) // num_gpus)
143 |     for d in data:
144 |         blob.append(d)
145 |     blob.reduce()
146 |     return blob
147 | 
148 | 
149 | class CocoDataLoader(torch.utils.data.DataLoader):
150 |     """
151 |     Iterates through the data, filtering out None,
152 |      but also loads everything as a (cuda) variable
153 |     """
154 |     # def __iter__(self):
155 |     #     for x in super(CocoDataLoader, self).__iter__():
156 |     #         if isinstance(x, tuple) or isinstance(x, list):
157 |     #             yield tuple(y.cuda(async=True) if hasattr(y, 'cuda') else y for y in x)
158 |     #         else:
159 |     #             yield x.cuda(async=True)
160 | 
161 |     @classmethod
162 |     def splits(cls, train_data, val_data, batch_size=3, num_workers=1, num_gpus=3, **kwargs):
163 |         train_load = cls(
164 |             dataset=train_data,
165 |             batch_size=batch_size*num_gpus,
166 |             shuffle=True,
167 |             num_workers=num_workers,
168 |             collate_fn=lambda x: coco_collate(x, num_gpus=num_gpus, is_train=True),
169 |             drop_last=True,
170 |             # pin_memory=True,
171 |             **kwargs,
172 |         )
173 |         val_load = cls(
174 |             dataset=val_data,
175 |             batch_size=batch_size*num_gpus,
176 |             shuffle=False,
177 |             num_workers=num_workers,
178 |             collate_fn=lambda x: coco_collate(x, num_gpus=num_gpus, is_train=False),
179 |             drop_last=True,
180 |             # pin_memory=True,
181 |             **kwargs,
182 |         )
183 |         return train_load, val_load
184 | 
185 | 
186 | if __name__ == '__main__':
187 |     train, val = CocoDetection.splits()
188 |     gtbox = train[0]['gt_boxes']
189 |     img_size = train[0]['img_size']
190 |     anchor_strides, labels, bbox_targets = anchor_target_layer(gtbox, img_size)
191 | 


--------------------------------------------------------------------------------
/docs/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Heiswayi Nrird
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docs/_config.yaml:
--------------------------------------------------------------------------------
1 | exclude: [README.md, LICENSE.md]
2 | 
3 | defaults:
4 |   - values:
5 |       layout: default
6 | 


--------------------------------------------------------------------------------
/docs/_includes/image.html:
--------------------------------------------------------------------------------
1 | <div class="image-wrapper">
2 |     <img src="{{ include.url }}" alt="{{ include.description }}" />
3 | </div>
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8">
 6 |     <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
 7 |     <meta name="viewport" content="width=device-width,initial-scale=1">
 8 |     <title>{{ page.title }}</title>
 9 |     <meta name="author" content="{{ page.author }}">
10 |     <meta name="description" content="{{ page.description }}">
11 |     <meta name="keywords" content="{{ page.keywords }}">
12 |     <link href="https://fonts.googleapis.com/css?family=Bungee+Shade|Droid+Sans" rel="stylesheet">
13 |     <style type="text/css">
14 |         a {
15 |             color: #357edd;
16 |         }
17 |         a:hover {
18 |             color: #e7040f;
19 |         }
20 |         body {
21 |             font-size: 1.1em;
22 |             line-height: 1.2em;
23 |             margin: auto;
24 |             max-width: 700px;
25 |             width: 90%;
26 |             font-family: 'Droid Sans', sans-serif;
27 |         }
28 |         h1,
29 |         h2 {
30 |             letter-spacing: -.028em;
31 |             margin-top: 1em;
32 |             line-height: 2em;
33 |         }
34 |         pre,
35 |         code {
36 |             font-size: .9em;
37 |             font-family: Monaco, 'Lucida Console', monospace;
38 |             color: #f14e32;
39 |         }
40 |         pre {
41 |             overflow-x: auto;
42 |             margin: 0 0 2em 0;
43 |         }
44 |         .image-wrapper{
45 |           max-width:90%;
46 |           height:auto;
47 |           position: relative;
48 |           display:block;
49 |           margin:0 auto;
50 |         }
51 | 
52 |         .image-wrapper img{
53 |           max-width:100% !important;
54 |           height:auto;
55 |           display:block;
56 |         }
57 | 
58 |         /*.site-title {*/
59 |           /*font-family: 'Bungee Shade', cursive;*/
60 |         /*}*/
61 |     </style>
62 |     <script type="text/javascript">
63 |         var _gaq = _gaq || [];
64 |         _gaq.push(['_setAccount', '{{ page.google_analytics_id }}']);
65 |         _gaq.push(['_trackPageview']);
66 |         (function() {
67 |             var ga = document.createElement('script');
68 |             ga.type = 'text/javascript';
69 |             ga.async = true;
70 |             ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
71 |             var s = document.getElementsByTagName('script')[0];
72 |             s.parentNode.insertBefore(ga, s);
73 |         })();
74 |     </script>
75 | </head>
76 | 
77 | <body>
78 |     <!--<h1 class="site-title">{{ page.title }}</h1>-->
79 |     {{ content }}
80 | </body>
81 | 
82 | </html>
83 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | permalink: /
 3 | title: Neural Motifs
 4 | author: Rowan Zellers
 5 | description: Scene Graph Parsing with Global Context (CVPR 2018)
 6 | google_analytics_id: UA-84290243-3
 7 | ---
 8 | # Neural Motifs: Scene Graph Parsing with Global Context (CVPR 2018)
 9 | 
10 | ### by [Rowan Zellers](https://rowanzellers.com), [Mark Yatskar](https://homes.cs.washington.edu/~my89/), [Sam Thomson](https://http://samthomson.com/), [Yejin Choi](https://homes.cs.washington.edu/~yejin/)
11 | 
12 | 
13 | {% include image.html url="teaser.png" description="teaser" %} 
14 | 
15 | # Overview
16 | 
17 | * In this work, we investigate the problem of producing structured graph representations of visual scenes. Similar to object detection, we must predict a box around each object. Here, we also need to predict an edge (with one of several labels, possibly `background`) between every ordered pair of boxes, producing a directed graph where the edges hopefully represent the semantics and interactions present in the scene.
18 | * We present an analysis of the [Visual Genome Scene Graphs dataset](http://visualgenome.org/). In particular:
19 |     * Object labels (e.g. person, shirt) are highly predictive of edge labels (e.g. wearing), but **not vice versa**.
20 |     * Over 90% of the edges in the dataset are non-semantic.
21 |     * There is a significant amount of structure in the dataset, in the form of graph motifs (regularly appearing substructures). 
22 | * Motivated by our analysis, we present a simple baseline that outperforms previous approaches.
23 | * We introduce Stacked Motif Networks (MotifNet), which is a novel architecture that is designed to capture higher order motifs in scene graphs. In doing so, it achieves a sizeable performance gain over prior state-of-the-art.
24 | 
25 | # Read the paper!
26 | The old version of the paper is available at [arxiv link](https://arxiv.org/abs/1711.06640) - camera ready version coming soon!
27 | 
28 | # Bibtex
29 | ```
30 | @inproceedings{zellers2018scenegraphs,
31 |   title={Neural Motifs: Scene Graph Parsing with Global Context},
32 |   author={Zellers, Rowan and Yatskar, Mark and Thomson, Sam and Choi, Yejin},
33 |   booktitle = "Conference on Computer Vision and Pattern Recognition",  
34 |   year={2018}
35 | }
36 | ```
37 | 
38 | # View some examples!
39 | 
40 | Check out [this tool](https://rowanzellers.com/scenegraph2/) I made to visualize the scene graph predictions. Disclaimer: the predictions are from an earlier version of the model, but hopefully they're still helpful!
41 | 
42 | # Code
43 | 
44 | Visit the [`neural-motifs` GitHub repository](https://github.com/rowanz/neural-motifs) for our reference implementation and instructions for running our code.
45 | 
46 | It is released under the MIT license.
47 | 
48 | # Checkpoints available for download
49 | * [Pretrained Detector](https://drive.google.com/open?id=11zKRr2OF5oclFL47kjFYBOxScotQzArX)
50 | * [Motifnet-SGDet](https://drive.google.com/open?id=1thd_5uSamJQaXAPVGVOUZGAOfGCYZYmb)
51 | * [Motifnet-SGCls/PredCls](https://drive.google.com/open?id=12qziGKYjFD3LAnoy4zDT3bcg5QLC0qN6)
52 | 
53 | # questions?
54 | 
55 | Feel free to get in touch! My main website is at [rowanzellers.com](https://rowanzellers.com)
56 | 


--------------------------------------------------------------------------------
/docs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/docs/teaser.png


--------------------------------------------------------------------------------
/docs/upload.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | scp -r _site/* USERNAME@SITE:~/rowanzellers.com/neuralmotifs


--------------------------------------------------------------------------------
/install_package.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | git clone https://github.com/waleedka/coco 
 4 | cd coco/PythonAPI/
 5 | make
 6 | python setup.py build_ext install
 7 | cd ../../
 8 | 
 9 | conda install h5py
10 | 
11 | conda install matplotlib
12 | 
13 | conda install pandas
14 | 
15 | conda install dill
16 | 
17 | conda install tqdm
18 | 
19 | pip install overrides
20 | 
21 | pip install scikit-image


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/__init__.py


--------------------------------------------------------------------------------
/lib/draw_rectangles/draw_rectangles.pyx:
--------------------------------------------------------------------------------
 1 | ######
 2 | # Draws rectangles
 3 | ######
 4 | 
 5 | cimport cython
 6 | import numpy as np
 7 | cimport numpy as np
 8 | 
 9 | DTYPE = np.float32
10 | ctypedef np.float32_t DTYPE_t
11 | 
12 | def draw_union_boxes(bbox_pairs, pooling_size, padding=0):
13 |     """
14 |     Draws union boxes for the image.
15 |     :param box_pairs: [num_pairs, 8]
16 |     :param fmap_size: Size of the original feature map
17 |     :param stride: ratio between fmap size and original img (<1)
18 |     :param pooling_size: resize everything to this size
19 |     :return: [num_pairs, 2, pooling_size, pooling_size arr
20 |     """
21 |     assert padding == 0, "Padding>0 not supported yet"
22 |     return draw_union_boxes_c(bbox_pairs, pooling_size)
23 | 
24 | cdef DTYPE_t minmax(DTYPE_t x):
25 |     return min(max(x, 0), 1)
26 | 
27 | cdef np.ndarray[DTYPE_t, ndim=4] draw_union_boxes_c(
28 |         np.ndarray[DTYPE_t, ndim=2] box_pairs, unsigned int pooling_size):
29 |     """
30 |     Parameters
31 |     ----------
32 |     boxes: (N, 4) ndarray of float. everything has arbitrary ratios
33 |     query_boxes: (K, 4) ndarray of float
34 |     Returns
35 |     -------
36 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
37 |     """
38 |     cdef unsigned int N = box_pairs.shape[0]
39 | 
40 |     cdef np.ndarray[DTYPE_t, ndim = 4] uboxes = np.zeros(
41 |         (N, 2, pooling_size, pooling_size), dtype=DTYPE)
42 |     cdef DTYPE_t x1_union, y1_union, x2_union, y2_union, w, h, x1_box, y1_box, x2_box, y2_box, y_contrib, x_contrib
43 |     cdef unsigned int n, i, j, k
44 | 
45 |     for n in range(N):
46 |         x1_union = min(box_pairs[n, 0], box_pairs[n, 4])
47 |         y1_union = min(box_pairs[n, 1], box_pairs[n, 5])
48 |         x2_union = max(box_pairs[n, 2], box_pairs[n, 6])
49 |         y2_union = max(box_pairs[n, 3], box_pairs[n, 7])
50 | 
51 |         w = x2_union - x1_union
52 |         h = y2_union - y1_union
53 |        
54 |         for i in range(2):
55 |             # Now everything is in the range [0, pooling_size].
56 |             x1_box = (box_pairs[n, 0+4*i] - x1_union)*pooling_size / w
57 |             y1_box = (box_pairs[n, 1+4*i] - y1_union)*pooling_size / h
58 |             x2_box = (box_pairs[n, 2+4*i] - x1_union)*pooling_size / w
59 |             y2_box = (box_pairs[n, 3+4*i] - y1_union)*pooling_size / h
60 |             # print("{:.3f}, {:.3f}, {:.3f}, {:.3f}".format(x1_box, y1_box, x2_box, y2_box))
61 |             for j in range(pooling_size):
62 |                 y_contrib = minmax(j+1-y1_box)*minmax(y2_box-j)
63 |                 for k in range(pooling_size):
64 |                     x_contrib = minmax(k+1-x1_box)*minmax(x2_box-k)                
65 |                     # print("j {} yc {} k {} xc {}".format(j, y_contrib, k, x_contrib))
66 |                     uboxes[n,i,j,k] = x_contrib*y_contrib
67 |     return uboxes
68 | 


--------------------------------------------------------------------------------
/lib/draw_rectangles/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | 
5 | setup(name="draw_rectangles_cython", ext_modules=cythonize('draw_rectangles.pyx'), include_dirs=[numpy.get_include()])


--------------------------------------------------------------------------------
/lib/evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/evaluation/__init__.py


--------------------------------------------------------------------------------
/lib/evaluation/sg_eval_slow.py:
--------------------------------------------------------------------------------
  1 | # JUST TO CHECK THAT IT IS EXACTLY THE SAME..................................
  2 | import numpy as np
  3 | from config import MODES
  4 | 
  5 | class BasicSceneGraphEvaluator:
  6 | 
  7 |     def __init__(self, mode):
  8 |         self.result_dict = {}
  9 |         self.mode = {'sgdet':'sg_det', 'sgcls':'sg_cls', 'predcls':'pred_cls'}[mode]
 10 | 
 11 |         self.result_dict = {}
 12 |         self.result_dict[self.mode + '_recall'] = {20:[], 50:[], 100:[]}
 13 | 
 14 | 
 15 |     @classmethod
 16 |     def all_modes(cls):
 17 |         evaluators = {m: cls(mode=m) for m in MODES}
 18 |         return evaluators
 19 |     def evaluate_scene_graph_entry(self, gt_entry, pred_entry, iou_thresh=0.5):
 20 | 
 21 |         roidb_entry = {
 22 |             'max_overlaps': np.ones(gt_entry['gt_classes'].shape[0], dtype=np.int64),
 23 |             'boxes': gt_entry['gt_boxes'],
 24 |             'gt_relations': gt_entry['gt_relations'],
 25 |             'gt_classes': gt_entry['gt_classes'],
 26 |         }
 27 |         sg_entry = {
 28 |             'boxes': pred_entry['pred_boxes'],
 29 |             'relations': pred_entry['pred_rels'],
 30 |             'obj_scores': pred_entry['obj_scores'],
 31 |             'rel_scores': pred_entry['rel_scores'],
 32 |             'pred_classes': pred_entry['pred_classes'],
 33 |         }
 34 | 
 35 |         pred_triplets, triplet_boxes = \
 36 |             eval_relation_recall(sg_entry, roidb_entry,
 37 |                                 self.result_dict,
 38 |                                 self.mode,
 39 |                                 iou_thresh=iou_thresh)
 40 |         return pred_triplets, triplet_boxes
 41 | 
 42 | 
 43 |     def save(self, fn):
 44 |         np.save(fn, self.result_dict)
 45 | 
 46 | 
 47 |     def print_stats(self):
 48 |         print('======================' + self.mode + '============================')
 49 |         for k, v in self.result_dict[self.mode + '_recall'].items():
 50 |             print('R@%i: %f' % (k, np.mean(v)))
 51 | 
 52 |     def save(self, fn):
 53 |         np.save(fn, self.result_dict)
 54 | 
 55 |     def print_stats(self):
 56 |         print('======================' + self.mode + '============================')
 57 |         for k, v in self.result_dict[self.mode + '_recall'].items():
 58 |             print('R@%i: %f' % (k, np.mean(v)))
 59 | 
 60 | 
 61 | def eval_relation_recall(sg_entry,
 62 |                          roidb_entry,
 63 |                          result_dict,
 64 |                          mode,
 65 |                          iou_thresh):
 66 | 
 67 |     # gt
 68 |     gt_inds = np.where(roidb_entry['max_overlaps'] == 1)[0]
 69 |     gt_boxes = roidb_entry['boxes'][gt_inds].copy().astype(float)
 70 |     num_gt_boxes = gt_boxes.shape[0]
 71 |     gt_relations = roidb_entry['gt_relations'].copy()
 72 |     gt_classes = roidb_entry['gt_classes'].copy()
 73 | 
 74 |     num_gt_relations = gt_relations.shape[0]
 75 |     if num_gt_relations == 0:
 76 |         return (None, None)
 77 |     gt_class_scores = np.ones(num_gt_boxes)
 78 |     gt_predicate_scores = np.ones(num_gt_relations)
 79 |     gt_triplets, gt_triplet_boxes, _ = _triplet(gt_relations[:,2],
 80 |                                              gt_relations[:,:2],
 81 |                                              gt_classes,
 82 |                                              gt_boxes,
 83 |                                              gt_predicate_scores,
 84 |                                              gt_class_scores)
 85 | 
 86 |     # pred
 87 |     box_preds = sg_entry['boxes']
 88 |     num_boxes = box_preds.shape[0]
 89 |     relations = sg_entry['relations']
 90 |     classes = sg_entry['pred_classes'].copy()
 91 |     class_scores = sg_entry['obj_scores'].copy()
 92 | 
 93 |     num_relations = relations.shape[0]
 94 | 
 95 |     if mode =='pred_cls':
 96 |         # if predicate classification task
 97 |         # use ground truth bounding boxes
 98 |         assert(num_boxes == num_gt_boxes)
 99 |         classes = gt_classes
100 |         class_scores = gt_class_scores
101 |         boxes = gt_boxes
102 |     elif mode =='sg_cls':
103 |         assert(num_boxes == num_gt_boxes)
104 |         # if scene graph classification task
105 |         # use gt boxes, but predicted classes
106 |         # classes = np.argmax(class_preds, 1)
107 |         # class_scores = class_preds.max(axis=1)
108 |         boxes = gt_boxes
109 |     elif mode =='sg_det':
110 |         # if scene graph detection task
111 |         # use preicted boxes and predicted classes
112 |         # classes = np.argmax(class_preds, 1)
113 |         # class_scores = class_preds.max(axis=1)
114 |         boxes = box_preds
115 |     else:
116 |         raise NotImplementedError('Incorrect Mode! %s' % mode)
117 | 
118 |     pred_triplets = np.column_stack((
119 |         classes[relations[:, 0]],
120 |         relations[:,2],
121 |         classes[relations[:, 1]],
122 |     ))
123 |     pred_triplet_boxes = np.column_stack((
124 |         boxes[relations[:, 0]],
125 |         boxes[relations[:, 1]],
126 |     ))
127 |     relation_scores = np.column_stack((
128 |         class_scores[relations[:, 0]],
129 |         sg_entry['rel_scores'],
130 |         class_scores[relations[:, 1]],
131 |     )).prod(1)
132 | 
133 |     sorted_inds = np.argsort(relation_scores)[::-1]
134 |     # compue recall
135 |     for k in result_dict[mode + '_recall']:
136 |         this_k = min(k, num_relations)
137 |         keep_inds = sorted_inds[:this_k]
138 |         recall = _relation_recall(gt_triplets,
139 |                                   pred_triplets[keep_inds,:],
140 |                                   gt_triplet_boxes,
141 |                                   pred_triplet_boxes[keep_inds,:],
142 |                                   iou_thresh)
143 |         result_dict[mode + '_recall'][k].append(recall)
144 | 
145 |     # for visualization
146 |     return pred_triplets[sorted_inds, :], pred_triplet_boxes[sorted_inds, :]
147 | 
148 | 
149 | def _triplet(predicates, relations, classes, boxes,
150 |              predicate_scores, class_scores):
151 | 
152 |     # format predictions into triplets
153 |     assert(predicates.shape[0] == relations.shape[0])
154 |     num_relations = relations.shape[0]
155 |     triplets = np.zeros([num_relations, 3]).astype(np.int32)
156 |     triplet_boxes = np.zeros([num_relations, 8]).astype(np.int32)
157 |     triplet_scores = np.zeros([num_relations]).astype(np.float32)
158 |     for i in range(num_relations):
159 |         triplets[i, 1] = predicates[i]
160 |         sub_i, obj_i = relations[i,:2]
161 |         triplets[i, 0] = classes[sub_i]
162 |         triplets[i, 2] = classes[obj_i]
163 |         triplet_boxes[i, :4] = boxes[sub_i, :]
164 |         triplet_boxes[i, 4:] = boxes[obj_i, :]
165 |         # compute triplet score
166 |         score =  class_scores[sub_i]
167 |         score *= class_scores[obj_i]
168 |         score *= predicate_scores[i]
169 |         triplet_scores[i] = score
170 |     return triplets, triplet_boxes, triplet_scores
171 | 
172 | 
173 | def _relation_recall(gt_triplets, pred_triplets,
174 |                      gt_boxes, pred_boxes, iou_thresh):
175 | 
176 |     # compute the R@K metric for a set of predicted triplets
177 | 
178 |     num_gt = gt_triplets.shape[0]
179 |     num_correct_pred_gt = 0
180 | 
181 |     for gt, gt_box in zip(gt_triplets, gt_boxes):
182 |         keep = np.zeros(pred_triplets.shape[0]).astype(bool)
183 |         for i, pred in enumerate(pred_triplets):
184 |             if gt[0] == pred[0] and gt[1] == pred[1] and gt[2] == pred[2]:
185 |                 keep[i] = True
186 |         if not np.any(keep):
187 |             continue
188 |         boxes = pred_boxes[keep,:]
189 |         sub_iou = iou(gt_box[:4], boxes[:,:4])
190 |         obj_iou = iou(gt_box[4:], boxes[:,4:])
191 |         inds = np.intersect1d(np.where(sub_iou >= iou_thresh)[0],
192 |                               np.where(obj_iou >= iou_thresh)[0])
193 |         if inds.size > 0:
194 |             num_correct_pred_gt += 1
195 |     return float(num_correct_pred_gt) / float(num_gt)
196 | 
197 | 
198 | def iou(gt_box, pred_boxes):
199 |     # computer Intersection-over-Union between two sets of boxes
200 |     ixmin = np.maximum(gt_box[0], pred_boxes[:,0])
201 |     iymin = np.maximum(gt_box[1], pred_boxes[:,1])
202 |     ixmax = np.minimum(gt_box[2], pred_boxes[:,2])
203 |     iymax = np.minimum(gt_box[3], pred_boxes[:,3])
204 |     iw = np.maximum(ixmax - ixmin + 1., 0.)
205 |     ih = np.maximum(iymax - iymin + 1., 0.)
206 |     inters = iw * ih
207 | 
208 |     # union
209 |     uni = ((gt_box[2] - gt_box[0] + 1.) * (gt_box[3] - gt_box[1] + 1.) +
210 |             (pred_boxes[:, 2] - pred_boxes[:, 0] + 1.) *
211 |             (pred_boxes[:, 3] - pred_boxes[:, 1] + 1.) - inters)
212 | 
213 |     overlaps = inters / uni
214 |     return overlaps
215 | 


--------------------------------------------------------------------------------
/lib/fpn/anchor_targets.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generates anchor targets to train the detector. Does this during the collate step in training
  3 | as it's much cheaper to do this on a separate thread.
  4 | 
  5 | Heavily adapted from faster_rcnn/rpn_msr/anchor_target_layer.py.
  6 | """
  7 | import numpy as np
  8 | import numpy.random as npr
  9 | 
 10 | from config import IM_SCALE, RPN_NEGATIVE_OVERLAP, RPN_POSITIVE_OVERLAP, \
 11 |     RPN_BATCHSIZE, RPN_FG_FRACTION, ANCHOR_SIZE, ANCHOR_SCALES, ANCHOR_RATIOS
 12 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
 13 | from lib.fpn.generate_anchors import generate_anchors
 14 | 
 15 | 
 16 | def anchor_target_layer(gt_boxes, im_size, 
 17 |                         allowed_border=0):
 18 |     """
 19 |     Assign anchors to ground-truth targets. Produces anchor classification
 20 |     labels and bounding-box regression targets.
 21 | 
 22 |     for each (H, W) location i
 23 |       generate 3 anchor boxes centered on cell i
 24 |     filter out-of-image anchors
 25 |     measure GT overlap
 26 | 
 27 |     :param gt_boxes: [x1, y1, x2, y2] boxes. These are assumed to be at the same scale as
 28 |                      the image (IM_SCALE)
 29 |     :param im_size: Size of the image (h, w). This is assumed to be scaled to IM_SCALE
 30 |     """
 31 |     if max(im_size) != IM_SCALE:
 32 |         raise ValueError("im size is {}".format(im_size))
 33 |     h, w = im_size
 34 | 
 35 |     # Get the indices of the anchors in the feature map.
 36 |     # h, w, A, 4
 37 |     ans_np = generate_anchors(base_size=ANCHOR_SIZE,
 38 |                               feat_stride=16,
 39 |                               anchor_scales=ANCHOR_SCALES,
 40 |                               anchor_ratios=ANCHOR_RATIOS,
 41 |                               )
 42 |     ans_np_flat = ans_np.reshape((-1, 4))
 43 |     inds_inside = np.where(
 44 |         (ans_np_flat[:, 0] >= -allowed_border) &
 45 |         (ans_np_flat[:, 1] >= -allowed_border) &
 46 |         (ans_np_flat[:, 2] < w + allowed_border) &  # width
 47 |         (ans_np_flat[:, 3] < h + allowed_border)  # height
 48 |     )[0]
 49 |     good_ans_flat = ans_np_flat[inds_inside]
 50 |     if good_ans_flat.size == 0:
 51 |         raise ValueError("There were no good anchors for an image of size {} with boxes {}".format(im_size, gt_boxes))
 52 | 
 53 |     # overlaps between the anchors and the gt boxes [num_anchors, num_gtboxes]
 54 |     overlaps = bbox_overlaps(good_ans_flat, gt_boxes)
 55 |     anchor_to_gtbox = overlaps.argmax(axis=1)
 56 |     max_overlaps = overlaps[np.arange(anchor_to_gtbox.shape[0]), anchor_to_gtbox]
 57 |     gtbox_to_anchor = overlaps.argmax(axis=0)
 58 |     gt_max_overlaps = overlaps[gtbox_to_anchor, np.arange(overlaps.shape[1])]
 59 |     gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
 60 | 
 61 |     # Good anchors are those that match SOMEWHERE within a decent tolerance
 62 |     # label: 1 is positive, 0 is negative, -1 is dont care.
 63 |     # assign bg labels first so that positive labels can clobber them
 64 |     labels = (-1) * np.ones(overlaps.shape[0], dtype=np.int64)
 65 |     labels[max_overlaps < RPN_NEGATIVE_OVERLAP] = 0
 66 |     labels[gt_argmax_overlaps] = 1
 67 |     labels[max_overlaps >= RPN_POSITIVE_OVERLAP] = 1
 68 | 
 69 |     # subsample positive labels if we have too many
 70 |     num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
 71 |     fg_inds = np.where(labels == 1)[0]
 72 |     if len(fg_inds) > num_fg:
 73 |         labels[npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)] = -1
 74 | 
 75 |     # subsample negative labels if we have too many
 76 |     num_bg = RPN_BATCHSIZE - np.sum(labels == 1)
 77 |     bg_inds = np.where(labels == 0)[0]
 78 |     if len(bg_inds) > num_bg:
 79 |         labels[npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)] = -1
 80 |     # print("{} fg {} bg ratio{:.3f} inds inside {}".format(RPN_BATCHSIZE-num_bg, num_bg, (RPN_BATCHSIZE-num_bg)/RPN_BATCHSIZE, inds_inside.shape[0]))
 81 | 
 82 | 
 83 |     # Get the labels at the original size
 84 |     labels_unmap = (-1) * np.ones(ans_np_flat.shape[0], dtype=np.int64)
 85 |     labels_unmap[inds_inside] = labels
 86 | 
 87 |     # h, w, A
 88 |     labels_unmap_res = labels_unmap.reshape(ans_np.shape[:-1])
 89 |     anchor_inds = np.column_stack(np.where(labels_unmap_res >= 0))
 90 | 
 91 |     # These ought to be in the same order
 92 |     anchor_inds_flat = np.where(labels >= 0)[0]
 93 |     anchors = good_ans_flat[anchor_inds_flat]
 94 |     bbox_targets = gt_boxes[anchor_to_gtbox[anchor_inds_flat]]
 95 |     labels = labels[anchor_inds_flat]
 96 | 
 97 |     assert np.all(labels >= 0)
 98 | 
 99 | 
100 |     # Anchors: [num_used, 4]
101 |     # Anchor_inds: [num_used, 3] (h, w, A)
102 |     # bbox_targets: [num_used, 4]
103 |     # labels: [num_used]
104 | 
105 |     return anchors, anchor_inds, bbox_targets, labels
106 | 


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(boxes, query_boxes):
 16 |     cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
 17 |     cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
 18 | 
 19 |     return bbox_overlaps_c(boxes_contig, query_contig)
 20 | 
 21 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 22 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 23 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 24 |     """
 25 |     Parameters
 26 |     ----------
 27 |     boxes: (N, 4) ndarray of float
 28 |     query_boxes: (K, 4) ndarray of float
 29 |     Returns
 30 |     -------
 31 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 32 |     """
 33 |     cdef unsigned int N = boxes.shape[0]
 34 |     cdef unsigned int K = query_boxes.shape[0]
 35 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 36 |     cdef DTYPE_t iw, ih, box_area
 37 |     cdef DTYPE_t ua
 38 |     cdef unsigned int k, n
 39 |     for k in range(K):
 40 |         box_area = (
 41 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 42 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 43 |         )
 44 |         for n in range(N):
 45 |             iw = (
 46 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 47 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 48 |             )
 49 |             if iw > 0:
 50 |                 ih = (
 51 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 52 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 53 |                 )
 54 |                 if ih > 0:
 55 |                     ua = float(
 56 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 57 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 58 |                         box_area - iw * ih
 59 |                     )
 60 |                     overlaps[n, k] = iw * ih / ua
 61 |     return overlaps
 62 | 
 63 | 
 64 | def bbox_intersections(boxes, query_boxes):
 65 |     cdef np.ndarray[DTYPE_t, ndim=2] boxes_contig = np.ascontiguousarray(boxes, dtype=DTYPE)
 66 |     cdef np.ndarray[DTYPE_t, ndim=2] query_contig = np.ascontiguousarray(query_boxes, dtype=DTYPE)
 67 | 
 68 |     return bbox_intersections_c(boxes_contig, query_contig)
 69 | 
 70 | 
 71 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 72 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 73 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 74 |     """
 75 |     For each query box compute the intersection ratio covered by boxes
 76 |     ----------
 77 |     Parameters
 78 |     ----------
 79 |     boxes: (N, 4) ndarray of float
 80 |     query_boxes: (K, 4) ndarray of float
 81 |     Returns
 82 |     -------
 83 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 84 |     """
 85 |     cdef unsigned int N = boxes.shape[0]
 86 |     cdef unsigned int K = query_boxes.shape[0]
 87 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 88 |     cdef DTYPE_t iw, ih, box_area
 89 |     cdef DTYPE_t ua
 90 |     cdef unsigned int k, n
 91 |     for k in range(K):
 92 |         box_area = (
 93 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 94 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 95 |         )
 96 |         for n in range(N):
 97 |             iw = (
 98 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 99 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
100 |             )
101 |             if iw > 0:
102 |                 ih = (
103 |                     min(boxes[n, 3], query_boxes[k, 3]) -
104 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
105 |                 )
106 |                 if ih > 0:
107 |                     intersec[n, k] = iw * ih / box_area
108 |     return intersec


--------------------------------------------------------------------------------
/lib/fpn/box_intersections_cpu/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | 
5 | setup(name="bbox_cython", ext_modules=cythonize('bbox.pyx'), include_dirs=[numpy.get_include()])


--------------------------------------------------------------------------------
/lib/fpn/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from torch.nn import functional as F
  4 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps as bbox_overlaps_np
  5 | from lib.fpn.box_intersections_cpu.bbox import bbox_intersections as bbox_intersections_np
  6 | 
  7 | 
  8 | def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1):
  9 |     """
 10 |     Computes the loss for predicting the GT boxes from prior boxes
 11 |     :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2)
 12 |     :param deltas: [num_boxes, 4]    (tx, ty, th, tw)
 13 |     :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2)
 14 |     :return:
 15 |     """
 16 |     prior_centers = center_size(prior_boxes) #(cx, cy, w, h)
 17 |     gt_centers = center_size(gt_boxes) #(cx, cy, w, h)
 18 | 
 19 |     center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:]
 20 |     size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:])
 21 |     all_targets = torch.cat((center_targets, size_targets), 1)
 22 | 
 23 |     loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0))
 24 | 
 25 |     return loss
 26 | 
 27 | 
 28 | def bbox_preds(boxes, deltas):
 29 |     """
 30 |     Converts "deltas" (predicted by the network) along with prior boxes
 31 |     into (x1, y1, x2, y2) representation.
 32 |     :param boxes: Prior boxes, represented as (x1, y1, x2, y2)
 33 |     :param deltas: Offsets (tx, ty, tw, th)
 34 |     :param box_strides [num_boxes,] distance apart between boxes. anchor box can't go more than
 35 |        \pm box_strides/2 from its current position. If None then we'll use the widths
 36 |        and heights
 37 |     :return: Transformed boxes
 38 |     """
 39 | 
 40 |     if boxes.size(0) == 0:
 41 |         return boxes
 42 |     prior_centers = center_size(boxes)
 43 | 
 44 |     xys = prior_centers[:, :2] + prior_centers[:, 2:] * deltas[:, :2]
 45 | 
 46 |     whs = torch.exp(deltas[:, 2:]) * prior_centers[:, 2:]
 47 | 
 48 |     return point_form(torch.cat((xys, whs), 1))
 49 | 
 50 | 
 51 | def center_size(boxes):
 52 |     """ Convert prior_boxes to (cx, cy, w, h)
 53 |     representation for comparison to center-size form ground truth data.
 54 |     Args:
 55 |         boxes: (tensor) point_form boxes
 56 |     Return:
 57 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 58 |     """
 59 |     wh = boxes[:, 2:] - boxes[:, :2] + 1.0
 60 | 
 61 |     if isinstance(boxes, np.ndarray):
 62 |         return np.column_stack((boxes[:, :2] + 0.5 * wh, wh))
 63 |     return torch.cat((boxes[:, :2] + 0.5 * wh, wh), 1)
 64 | 
 65 | 
 66 | def point_form(boxes):
 67 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
 68 |     representation for comparison to point form ground truth data.
 69 |     Args:
 70 |         boxes: (tensor) center-size default boxes from priorbox layers.
 71 |     Return:
 72 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 73 |     """
 74 |     if isinstance(boxes, np.ndarray):
 75 |         return np.column_stack((boxes[:, :2] - 0.5 * boxes[:, 2:],
 76 |                                 boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)))
 77 |     return torch.cat((boxes[:, :2] - 0.5 * boxes[:, 2:],
 78 |                       boxes[:, :2] + 0.5 * (boxes[:, 2:] - 2.0)), 1)  # xmax, ymax
 79 | 
 80 | 
 81 | ###########################################################################
 82 | ### Torch Utils, creds to Max de Groot
 83 | ###########################################################################
 84 | 
 85 | def bbox_intersections(box_a, box_b):
 86 |     """ We resize both tensors to [A,B,2] without new malloc:
 87 |     [A,2] -> [A,1,2] -> [A,B,2]
 88 |     [B,2] -> [1,B,2] -> [A,B,2]
 89 |     Then we compute the area of intersect between box_a and box_b.
 90 |     Args:
 91 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 92 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 93 |     Return:
 94 |       (tensor) intersection area, Shape: [A,B].
 95 |     """
 96 |     if isinstance(box_a, np.ndarray):
 97 |         assert isinstance(box_b, np.ndarray)
 98 |         return bbox_intersections_np(box_a, box_b)
 99 |     A = box_a.size(0)
100 |     B = box_b.size(0)
101 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
102 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
103 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
104 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
105 |     inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
106 |     return inter[:, :, 0] * inter[:, :, 1]
107 | 
108 | 
109 | def bbox_overlaps(box_a, box_b):
110 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
111 |     is simply the intersection over union of two boxes.  Here we operate on
112 |     ground truth boxes and default boxes.
113 |     E.g.:
114 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
115 |     Args:
116 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
117 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
118 |     Return:
119 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
120 |     """
121 |     if isinstance(box_a, np.ndarray):
122 |         assert isinstance(box_b, np.ndarray)
123 |         return bbox_overlaps_np(box_a, box_b)
124 | 
125 |     inter = bbox_intersections(box_a, box_b)
126 |     area_a = ((box_a[:, 2] - box_a[:, 0] + 1.0) *
127 |               (box_a[:, 3] - box_a[:, 1] + 1.0)).unsqueeze(1).expand_as(inter)  # [A,B]
128 |     area_b = ((box_b[:, 2] - box_b[:, 0] + 1.0) *
129 |               (box_b[:, 3] - box_b[:, 1] + 1.0)).unsqueeze(0).expand_as(inter)  # [A,B]
130 |     union = area_a + area_b - inter
131 |     return inter / union  # [A,B]
132 | 
133 | 
134 | def nms_overlaps(boxes):
135 |     """ get overlaps for each channel"""
136 |     assert boxes.dim() == 3
137 |     N = boxes.size(0)
138 |     nc = boxes.size(1)
139 |     max_xy = torch.min(boxes[:, None, :, 2:].expand(N, N, nc, 2),
140 |                        boxes[None, :, :, 2:].expand(N, N, nc, 2))
141 | 
142 |     min_xy = torch.max(boxes[:, None, :, :2].expand(N, N, nc, 2),
143 |                        boxes[None, :, :, :2].expand(N, N, nc, 2))
144 | 
145 |     inter = torch.clamp((max_xy - min_xy + 1.0), min=0)
146 | 
147 |     # n, n, 151
148 |     inters = inter[:,:,:,0]*inter[:,:,:,1]
149 |     boxes_flat = boxes.view(-1, 4)
150 |     areas_flat = (boxes_flat[:,2]- boxes_flat[:,0]+1.0)*(
151 |         boxes_flat[:,3]- boxes_flat[:,1]+1.0)
152 |     areas = areas_flat.view(boxes.size(0), boxes.size(1))
153 |     union = -inters + areas[None] + areas[:, None]
154 |     return inters / union
155 | 
156 | 


--------------------------------------------------------------------------------
/lib/fpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | from config import IM_SCALE
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | # array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | def generate_anchors(base_size=16, feat_stride=16, anchor_scales=(8,16,32), anchor_ratios=(0.5,1,2)):
 40 |   """ A wrapper function to generate anchors given different scales
 41 |     Also return the number of anchors in variable 'length'
 42 |   """
 43 |   anchors = generate_base_anchors(base_size=base_size, 
 44 |                                   ratios=np.array(anchor_ratios),
 45 |                                   scales=np.array(anchor_scales))
 46 |   A = anchors.shape[0]
 47 |   shift_x = np.arange(0, IM_SCALE // feat_stride) * feat_stride # Same as shift_x
 48 |   shift_x, shift_y = np.meshgrid(shift_x, shift_x)
 49 | 
 50 |   shifts = np.stack([shift_x, shift_y, shift_x, shift_y], -1)  # h, w, 4
 51 |   all_anchors = shifts[:, :, None] + anchors[None, None]  #h, w, A, 4
 52 |   return all_anchors
 53 | 
 54 |   # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
 55 |   # K = shifts.shape[0]
 56 |   # # width changes faster, so here it is H, W, C
 57 |   # anchors = anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 58 |   # anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)
 59 |   # length = np.int32(anchors.shape[0])
 60 | 
 61 | 
 62 | def generate_base_anchors(base_size=16, ratios=[0.5, 1, 2], scales=2 ** np.arange(3, 6)):
 63 |   """
 64 |   Generate anchor (reference) windows by enumerating aspect ratios X
 65 |   scales wrt a reference (0, 0, 15, 15) window.
 66 |   """
 67 | 
 68 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 69 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 70 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 71 |                        for i in range(ratio_anchors.shape[0])])
 72 |   return anchors
 73 | 
 74 | 
 75 | def _whctrs(anchor):
 76 |   """
 77 |   Return width, height, x center, and y center for an anchor (window).
 78 |   """
 79 | 
 80 |   w = anchor[2] - anchor[0] + 1
 81 |   h = anchor[3] - anchor[1] + 1
 82 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 83 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 84 |   return w, h, x_ctr, y_ctr
 85 | 
 86 | 
 87 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 88 |   """
 89 |   Given a vector of widths (ws) and heights (hs) around a center
 90 |   (x_ctr, y_ctr), output a set of anchors (windows).
 91 |   """
 92 | 
 93 |   ws = ws[:, np.newaxis]
 94 |   hs = hs[:, np.newaxis]
 95 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 96 |                        y_ctr - 0.5 * (hs - 1),
 97 |                        x_ctr + 0.5 * (ws - 1),
 98 |                        y_ctr + 0.5 * (hs - 1)))
 99 |   return anchors
100 | 
101 | 
102 | def _ratio_enum(anchor, ratios):
103 |   """
104 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
105 |   """
106 | 
107 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
108 |   size = w * h
109 |   size_ratios = size / ratios
110 |   # NOTE: CHANGED TO NOT HAVE ROUNDING
111 |   ws = np.sqrt(size_ratios)
112 |   hs = ws * ratios
113 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
114 |   return anchors
115 | 
116 | 
117 | def _scale_enum(anchor, scales):
118 |   """
119 |   Enumerate a set of anchors for each scale wrt an anchor.
120 |   """
121 | 
122 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
123 |   ws = w * scales
124 |   hs = h * scales
125 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
126 |   return anchors
127 | 


--------------------------------------------------------------------------------
/lib/fpn/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd anchors
 4 | python setup.py build_ext --inplace
 5 | cd ..
 6 | 
 7 | cd box_intersections_cpu
 8 | python setup.py build_ext --inplace
 9 | cd ..
10 | 
11 | cd cpu_nms
12 | python build.py
13 | cd ..
14 | 
15 | cd roi_align
16 | python build.py -C src/cuda clean
17 | python build.py -C src/cuda clean
18 | cd ..
19 | 
20 | echo "Done compiling hopefully"
21 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/nms.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/nms.cu.o: src/cuda/nms_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
 5 | 
 6 | sources = []
 7 | headers = []
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 
36 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/functions/nms.py:
--------------------------------------------------------------------------------
 1 | # Le code for doing NMS
 2 | import torch
 3 | import numpy as np
 4 | from .._ext import nms
 5 | 
 6 | 
 7 | def apply_nms(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, boxes_per_im=None,
 8 |               nms_thresh=0.7):
 9 |     """
10 |     Note - this function is non-differentiable so everything is assumed to be a tensor, not
11 |     a variable.
12 |         """
13 |     just_inds = boxes_per_im is None
14 |     if boxes_per_im is None:
15 |         boxes_per_im = [boxes.size(0)]
16 | 
17 | 
18 |     s = 0
19 |     keep = []
20 |     im_per = []
21 |     for bpi in boxes_per_im:
22 |         e = s + int(bpi)
23 |         keep_im = _nms_single_im(scores[s:e], boxes[s:e], pre_nms_topn, post_nms_topn, nms_thresh)
24 |         keep.append(keep_im + s)
25 |         im_per.append(keep_im.size(0))
26 | 
27 |         s = e
28 | 
29 |     inds = torch.cat(keep, 0)
30 |     if just_inds:
31 |         return inds
32 |     return inds, im_per
33 | 
34 | 
35 | def _nms_single_im(scores, boxes,  pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7):
36 |     keep = torch.IntTensor(scores.size(0))
37 |     vs, idx = torch.sort(scores, dim=0, descending=True)
38 |     if idx.size(0) > pre_nms_topn:
39 |         idx = idx[:pre_nms_topn]
40 |     boxes_sorted = boxes[idx].contiguous()
41 |     num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh)
42 |     num_out = min(num_out, post_nms_topn)
43 |     keep = keep[:num_out].long()
44 |     keep = idx[keep.cuda(scores.get_device())]
45 |     return keep
46 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: nms_kernel.cu nms_kernel.h
2 | 	/usr/local/cuda/bin/nvcc -c -o nms.cu.o nms_kernel.cu --compiler-options -fPIC -gencode arch=compute_61,code=sm_61
3 | clean:
4 | 	rm nms.cu.o
5 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | #define CUDA_CHECK(condition) \
 12 |   /* Code block avoids redefinition of cudaError_t error */ \
 13 |   do { \
 14 |     cudaError_t error = condition; \
 15 |     if (error != cudaSuccess) { \
 16 |       std::cout << cudaGetErrorString(error) << std::endl; \
 17 |     } \
 18 |   } while (0)
 19 | 
 20 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 21 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 22 | 
 23 | __device__ inline float devIoU(float const * const a, float const * const b) {
 24 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 25 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 26 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 27 |   float interS = width * height;
 28 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 |   return interS / (Sa + Sb - interS);
 31 | }
 32 | 
 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 34 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 35 |   const int row_start = blockIdx.y;
 36 |   const int col_start = blockIdx.x;
 37 | 
 38 |   // if (row_start > col_start) return;
 39 | 
 40 |   const int row_size =
 41 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 42 |   const int col_size =
 43 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 44 | 
 45 |   __shared__ float block_boxes[threadsPerBlock * 5];
 46 |   if (threadIdx.x < col_size) {
 47 |     block_boxes[threadIdx.x * 4 + 0] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 0];
 49 |     block_boxes[threadIdx.x * 4 + 1] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 1];
 51 |     block_boxes[threadIdx.x * 4 + 2] =
 52 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 2];
 53 |     block_boxes[threadIdx.x * 4 + 3] =
 54 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 4 + 3];
 55 |   }
 56 |   __syncthreads();
 57 | 
 58 |   if (threadIdx.x < row_size) {
 59 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 60 |     const float *cur_box = dev_boxes + cur_box_idx * 4;
 61 |     int i = 0;
 62 |     unsigned long long t = 0;
 63 |     int start = 0;
 64 |     if (row_start == col_start) {
 65 |       start = threadIdx.x + 1;
 66 |     }
 67 |     for (i = start; i < col_size; i++) {
 68 |       if (devIoU(cur_box, block_boxes + i * 4) > nms_overlap_thresh) {
 69 |         t |= 1ULL << i;
 70 |       }
 71 |     }
 72 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 73 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 74 |   }
 75 | }
 76 | 
 77 | void _set_device(int device_id) {
 78 |   int current_device;
 79 |   CUDA_CHECK(cudaGetDevice(&current_device));
 80 |   if (current_device == device_id) {
 81 |     return;
 82 |   }
 83 |   // The call to cudaSetDevice must come before any calls to Get, which
 84 |   // may perform initialization using the GPU.
 85 |   CUDA_CHECK(cudaSetDevice(device_id));
 86 | }
 87 | 
 88 | extern "C" int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
 89 |           float nms_overlap_thresh, int device_id) {
 90 |   _set_device(device_id);
 91 | 
 92 |   unsigned long long* mask_dev = NULL;
 93 | 
 94 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 95 | 
 96 |   CUDA_CHECK(cudaMalloc(&mask_dev,
 97 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
 98 | 
 99 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
100 |               DIVUP(boxes_num, threadsPerBlock));
101 |   dim3 threads(threadsPerBlock);
102 |   nms_kernel<<<blocks, threads>>>(boxes_num,
103 |                                   nms_overlap_thresh,
104 |                                   boxes_dev,
105 |                                   mask_dev);
106 | 
107 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
108 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
109 |                         mask_dev,
110 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
111 |                         cudaMemcpyDeviceToHost));
112 | 
113 |   std::vector<unsigned long long> remv(col_blocks);
114 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
115 | 
116 |   int num_to_keep = 0;
117 |   for (int i = 0; i < boxes_num; i++) {
118 |     int nblock = i / threadsPerBlock;
119 |     int inblock = i % threadsPerBlock;
120 | 
121 |     if (!(remv[nblock] & (1ULL << inblock))) {
122 |       keep_out[num_to_keep++] = i;
123 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
124 |       for (int j = nblock; j < col_blocks; j++) {
125 |         remv[j] |= p[j];
126 |       }
127 |     }
128 |   }
129 | 
130 |   CUDA_CHECK(cudaFree(mask_dev));
131 |   return num_to_keep;
132 | }
133 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | int ApplyNMSGPU(int* keep_out, const float* boxes_dev, const int boxes_num,
2 |           float nms_overlap_thresh, int device_id);
3 | 
4 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/nms_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh)
 8 | {
 9 |     int* keep_data = THIntTensor_data(keep);
10 |     const float* boxes_sorted_data = THCudaTensor_data(state, boxes_sorted);
11 | 
12 |     const int boxes_num = THCudaTensor_size(state, boxes_sorted, 0);
13 | 
14 |     const int devId = THCudaTensor_getDevice(state, boxes_sorted);
15 | 
16 |     int numTotalKeep = ApplyNMSGPU(keep_data, boxes_sorted_data, boxes_num, nms_thresh, devId);
17 |     return numTotalKeep;
18 | }
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/lib/fpn/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int nms_apply(THIntTensor* keep, THCudaTensor* boxes_sorted, const float nms_thresh);


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_det.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | import numpy.random as npr
  4 | from config import BG_THRESH_HI, BG_THRESH_LO, FG_FRACTION, ROIS_PER_IMG
  5 | from lib.fpn.box_utils import bbox_overlaps
  6 | from lib.pytorch_misc import to_variable
  7 | import torch
  8 | 
  9 | #############################################################
 10 | # The following is only for object detection
 11 | @to_variable
 12 | def proposal_assignments_det(rpn_rois, gt_boxes, gt_classes, image_offset, fg_thresh=0.5):
 13 |     """
 14 |     Assign object detection proposals to ground-truth targets. Produces proposal
 15 |     classification labels and bounding-box regression targets.
 16 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 17 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1
 18 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 19 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 20 |     :return:
 21 |         rois: [num_rois, 5]
 22 |         labels: [num_rois] array of labels
 23 |         bbox_targets [num_rois, 4] array of targets for the labels.
 24 |     """
 25 |     fg_rois_per_image = int(np.round(ROIS_PER_IMG * FG_FRACTION))
 26 | 
 27 |     gt_img_inds = gt_classes[:, 0] - image_offset
 28 | 
 29 |     all_boxes = torch.cat([rpn_rois[:, 1:], gt_boxes], 0)
 30 | 
 31 |     ims_per_box = torch.cat([rpn_rois[:, 0].long(), gt_img_inds], 0)
 32 | 
 33 |     im_sorted, idx = torch.sort(ims_per_box, 0)
 34 |     all_boxes = all_boxes[idx]
 35 | 
 36 |     # Assume that the GT boxes are already sorted in terms of image id
 37 |     num_images = int(im_sorted[-1]) + 1
 38 | 
 39 |     labels = []
 40 |     rois = []
 41 |     bbox_targets = []
 42 |     for im_ind in range(num_images):
 43 |         g_inds = (gt_img_inds == im_ind).nonzero()
 44 | 
 45 |         if g_inds.dim() == 0:
 46 |             continue
 47 |         g_inds = g_inds.squeeze(1)
 48 |         g_start = g_inds[0]
 49 |         g_end = g_inds[-1] + 1
 50 | 
 51 |         t_inds = (im_sorted == im_ind).nonzero().squeeze(1)
 52 |         t_start = t_inds[0]
 53 |         t_end = t_inds[-1] + 1
 54 | 
 55 |         # Max overlaps: for each predicted box, get the max ROI
 56 |         # Get the indices into the GT boxes too (must offset by the box start)
 57 |         ious = bbox_overlaps(all_boxes[t_start:t_end], gt_boxes[g_start:g_end])
 58 |         max_overlaps, gt_assignment = ious.max(1)
 59 |         max_overlaps = max_overlaps.cpu().numpy()
 60 |         # print("Best overlap is {}".format(max_overlaps.max()))
 61 |         # print("\ngt assignment is {} while g_start is {} \n ---".format(gt_assignment, g_start))
 62 |         gt_assignment += g_start
 63 | 
 64 |         keep_inds_np, num_fg = _sel_inds(max_overlaps, fg_thresh, fg_rois_per_image,
 65 |                                          ROIS_PER_IMG)
 66 | 
 67 |         if keep_inds_np.size == 0:
 68 |             continue
 69 | 
 70 |         keep_inds = torch.LongTensor(keep_inds_np).cuda(rpn_rois.get_device())
 71 | 
 72 |         labels_ = gt_classes[:, 1][gt_assignment[keep_inds]]
 73 |         bbox_target_ = gt_boxes[gt_assignment[keep_inds]]
 74 | 
 75 |         # Clamp labels_ for the background RoIs to 0
 76 |         if num_fg < labels_.size(0):
 77 |             labels_[num_fg:] = 0
 78 | 
 79 |         rois_ = torch.cat((
 80 |             im_sorted[t_start:t_end, None][keep_inds].float(),
 81 |             all_boxes[t_start:t_end][keep_inds],
 82 |         ), 1)
 83 | 
 84 |         labels.append(labels_)
 85 |         rois.append(rois_)
 86 |         bbox_targets.append(bbox_target_)
 87 | 
 88 |     rois = torch.cat(rois, 0)
 89 |     labels = torch.cat(labels, 0)
 90 |     bbox_targets = torch.cat(bbox_targets, 0)
 91 |     return rois, labels, bbox_targets
 92 | 
 93 | 
 94 | def _sel_inds(max_overlaps, fg_thresh=0.5, fg_rois_per_image=128, rois_per_image=256):
 95 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 96 |     fg_inds = np.where(max_overlaps >= fg_thresh)[0]
 97 | 
 98 |     # Guard against the case when an image has fewer than fg_rois_per_image
 99 |     # foreground RoIs
100 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.shape[0])
101 |     # Sample foreground regions without replacement
102 |     if fg_inds.size > 0:
103 |         fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
104 | 
105 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
106 |     bg_inds = np.where((max_overlaps < BG_THRESH_HI) & (max_overlaps >= BG_THRESH_LO))[0]
107 | 
108 |     # Compute number of background RoIs to take from this image (guarding
109 |     # against there being fewer than desired)
110 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
111 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
112 |     # Sample background regions without replacement
113 |     if bg_inds.size > 0:
114 |         bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
115 | 
116 |     return np.append(fg_inds, bg_inds), fg_rois_per_this_image
117 | 
118 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_gtbox.py:
--------------------------------------------------------------------------------
 1 | from lib.pytorch_misc import enumerate_by_image, gather_nd, random_choose
 2 | from lib.fpn.box_utils import bbox_preds, center_size, bbox_overlaps
 3 | import torch
 4 | from lib.pytorch_misc import diagonal_inds, to_variable
 5 | from config import RELS_PER_IMG, REL_FG_FRACTION
 6 | 
 7 | 
 8 | @to_variable
 9 | def proposal_assignments_gtbox(rois, gt_boxes, gt_classes, gt_rels, image_offset, fg_thresh=0.5):
10 |     """
11 |     Assign object detection proposals to ground-truth targets. Produces proposal
12 |     classification labels and bounding-box regression targets.
13 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
14 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]. Not needed it seems
15 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
16 |         Note, the img_inds here start at image_offset
17 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type].
18 |         Note, the img_inds here start at image_offset
19 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
20 |     :return:
21 |         rois: [num_rois, 5]
22 |         labels: [num_rois] array of labels
23 |         bbox_targets [num_rois, 4] array of targets for the labels.
24 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
25 |     """
26 |     im_inds = rois[:,0].long()
27 | 
28 |     num_im = im_inds[-1] + 1
29 | 
30 |     # Offset the image indices in fg_rels to refer to absolute indices (not just within img i)
31 |     fg_rels = gt_rels.clone()
32 |     fg_rels[:,0] -= image_offset
33 |     offset = {}
34 |     for i, s, e in enumerate_by_image(im_inds):
35 |         offset[i] = s
36 |     for i, s, e in enumerate_by_image(fg_rels[:, 0]):
37 |         fg_rels[s:e, 1:3] += offset[i]
38 | 
39 |     # Try ALL things, not just intersections.
40 |     is_cand = (im_inds[:, None] == im_inds[None])
41 |     is_cand.view(-1)[diagonal_inds(is_cand)] = 0
42 | 
43 |     # # Compute salience
44 |     # gt_inds = fg_rels[:, 1:3].contiguous().view(-1)
45 |     # labels_arange = labels.data.new(labels.size(0))
46 |     # torch.arange(0, labels.size(0), out=labels_arange)
47 |     # salience_labels = ((gt_inds[:, None] == labels_arange[None]).long().sum(0) > 0).long()
48 |     # labels = torch.stack((labels, salience_labels), 1)
49 | 
50 |     # Add in some BG labels
51 | 
52 |     # NOW WE HAVE TO EXCLUDE THE FGs.
53 |     # TODO: check if this causes an error if many duplicate GTs havent been filtered out
54 | 
55 |     is_cand.view(-1)[fg_rels[:,1]*im_inds.size(0) + fg_rels[:,2]] = 0
56 |     is_bgcand = is_cand.nonzero()
57 |     # TODO: make this sample on a per image case
58 |     # If too many then sample
59 |     num_fg = min(fg_rels.size(0), int(RELS_PER_IMG * REL_FG_FRACTION * num_im))
60 |     if num_fg < fg_rels.size(0):
61 |         fg_rels = random_choose(fg_rels, num_fg)
62 | 
63 |     # If too many then sample
64 |     num_bg = min(is_bgcand.size(0) if is_bgcand.dim() > 0 else 0,
65 |                  int(RELS_PER_IMG * num_im) - num_fg)
66 |     if num_bg > 0:
67 |         bg_rels = torch.cat((
68 |             im_inds[is_bgcand[:, 0]][:, None],
69 |             is_bgcand,
70 |             (is_bgcand[:, 0, None] < -10).long(),
71 |         ), 1)
72 | 
73 |         if num_bg < is_bgcand.size(0):
74 |             bg_rels = random_choose(bg_rels, num_bg)
75 |         rel_labels = torch.cat((fg_rels, bg_rels), 0)
76 |     else:
77 |         rel_labels = fg_rels
78 | 
79 | 
80 |     # last sort by rel.
81 |     _, perm = torch.sort(rel_labels[:, 0]*(gt_boxes.size(0)**2) +
82 |                          rel_labels[:,1]*gt_boxes.size(0) + rel_labels[:,2])
83 | 
84 |     rel_labels = rel_labels[perm].contiguous()
85 | 
86 |     labels = gt_classes[:,1].contiguous()
87 |     return rois, labels, rel_labels
88 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/proposal_assignments_postnms.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from .proposal_assignments_rel import _sel_rels
  9 | from lib.fpn.box_utils import bbox_overlaps
 10 | from lib.pytorch_misc import to_variable
 11 | import torch
 12 | 
 13 | 
 14 | @to_variable
 15 | def proposal_assignments_postnms(
 16 |         rois, gt_boxes, gt_classes, gt_rels, nms_inds, image_offset, fg_thresh=0.5,
 17 |         max_objs=100, max_rels=100, rand_val=0.01):
 18 |     """
 19 |     Assign object detection proposals to ground-truth targets. Produces proposal
 20 |     classification labels and bounding-box regression targets.
 21 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 22 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 23 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 24 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 25 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 26 |     :return:
 27 |         rois: [num_rois, 5]
 28 |         labels: [num_rois] array of labels
 29 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 30 |     """
 31 |     pred_inds_np = rois[:, 0].cpu().numpy().astype(np.int64)
 32 |     pred_boxes_np = rois[:, 1:].cpu().numpy()
 33 |     nms_inds_np = nms_inds.cpu().numpy()
 34 |     sup_inds_np = np.setdiff1d(np.arange(pred_boxes_np.shape[0]), nms_inds_np)
 35 | 
 36 |     # split into chosen and suppressed
 37 |     chosen_inds_np = pred_inds_np[nms_inds_np]
 38 |     chosen_boxes_np = pred_boxes_np[nms_inds_np]
 39 | 
 40 |     suppre_inds_np = pred_inds_np[sup_inds_np]
 41 |     suppre_boxes_np = pred_boxes_np[sup_inds_np]
 42 | 
 43 |     gt_boxes_np = gt_boxes.cpu().numpy()
 44 |     gt_classes_np = gt_classes.cpu().numpy()
 45 |     gt_rels_np = gt_rels.cpu().numpy()
 46 | 
 47 |     gt_classes_np[:, 0] -= image_offset
 48 |     gt_rels_np[:, 0] -= image_offset
 49 | 
 50 |     num_im = gt_classes_np[:, 0].max()+1
 51 | 
 52 |     rois = []
 53 |     obj_labels = []
 54 |     rel_labels = []
 55 |     num_box_seen = 0
 56 | 
 57 |     for im_ind in range(num_im):
 58 |         chosen_ind = np.where(chosen_inds_np == im_ind)[0]
 59 |         suppre_ind = np.where(suppre_inds_np == im_ind)[0]
 60 | 
 61 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 62 |         gt_boxes_i = gt_boxes_np[gt_ind]
 63 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 64 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 65 | 
 66 |         # Get IOUs between chosen and GT boxes and if needed we'll add more in
 67 | 
 68 |         chosen_boxes_i = chosen_boxes_np[chosen_ind]
 69 |         suppre_boxes_i = suppre_boxes_np[suppre_ind]
 70 | 
 71 |         n_chosen = chosen_boxes_i.shape[0]
 72 |         n_suppre = suppre_boxes_i.shape[0]
 73 |         n_gt_box = gt_boxes_i.shape[0]
 74 | 
 75 |         # add a teensy bit of random noise because some GT boxes might be duplicated, etc.
 76 |         pred_boxes_i = np.concatenate((chosen_boxes_i, suppre_boxes_i, gt_boxes_i), 0)
 77 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i) + rand_val*(
 78 |             np.random.rand(pred_boxes_i.shape[0], gt_boxes_i.shape[0])-0.5)
 79 | 
 80 |         # Let's say that a box can only be assigned ONCE for now because we've already done
 81 |         # the NMS and stuff.
 82 |         is_hit = ious > fg_thresh
 83 | 
 84 |         obj_assignments_i = is_hit.argmax(1)
 85 |         obj_assignments_i[~is_hit.any(1)] = -1
 86 | 
 87 |         vals, first_occurance_ind = np.unique(obj_assignments_i, return_index=True)
 88 |         obj_assignments_i[np.setdiff1d(
 89 |             np.arange(obj_assignments_i.shape[0]), first_occurance_ind)] = -1
 90 | 
 91 |         extra_to_add = np.where(obj_assignments_i[n_chosen:] != -1)[0] + n_chosen
 92 | 
 93 |         # Add them in somewhere at random
 94 |         num_inds_to_have = min(max_objs, n_chosen + extra_to_add.shape[0])
 95 |         boxes_i = np.zeros((num_inds_to_have, 4), dtype=np.float32)
 96 |         labels_i = np.zeros(num_inds_to_have, dtype=np.int64)
 97 | 
 98 |         inds_from_nms = np.sort(np.random.choice(num_inds_to_have, size=n_chosen, replace=False))
 99 |         inds_from_elsewhere = np.setdiff1d(np.arange(num_inds_to_have), inds_from_nms)
100 | 
101 |         boxes_i[inds_from_nms] = chosen_boxes_i
102 |         labels_i[inds_from_nms] = gt_classes_i[obj_assignments_i[:n_chosen]]
103 | 
104 |         boxes_i[inds_from_elsewhere] = pred_boxes_i[extra_to_add]
105 |         labels_i[inds_from_elsewhere] = gt_classes_i[obj_assignments_i[extra_to_add]]
106 | 
107 |         # Now, we do the relationships. same as for rle
108 |         all_rels_i = _sel_rels(bbox_overlaps(boxes_i, gt_boxes_i),
109 |                                boxes_i,
110 |                                labels_i,
111 |                                gt_classes_i,
112 |                                gt_rels_i,
113 |                                fg_thresh=fg_thresh,
114 |                                fg_rels_per_image=100)
115 |         all_rels_i[:,0:2] += num_box_seen
116 | 
117 |         rois.append(np.column_stack((
118 |             im_ind * np.ones(boxes_i.shape[0], dtype=np.float32),
119 |             boxes_i,
120 |         )))
121 |         obj_labels.append(labels_i)
122 |         rel_labels.append(np.column_stack((
123 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
124 |             all_rels_i,
125 |         )))
126 |         num_box_seen += boxes_i.size
127 | 
128 |     rois = torch.FloatTensor(np.concatenate(rois, 0)).cuda(gt_boxes.get_device(), async=True)
129 |     labels = torch.LongTensor(np.concatenate(obj_labels, 0)).cuda(gt_boxes.get_device(), async=True)
130 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(gt_boxes.get_device(),
131 |                                                                       async=True)
132 | 
133 |     return rois, labels, rel_labels
134 | 


--------------------------------------------------------------------------------
/lib/fpn/proposal_assignments/rel_assignments.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Goal: assign ROIs to targets
  3 | # --------------------------------------------------------
  4 | 
  5 | 
  6 | import numpy as np
  7 | import numpy.random as npr
  8 | from config import BG_THRESH_HI, BG_THRESH_LO, REL_FG_FRACTION, RELS_PER_IMG_REFINE
  9 | from lib.fpn.box_utils import bbox_overlaps
 10 | from lib.pytorch_misc import to_variable, nonintersecting_2d_inds
 11 | from collections import defaultdict
 12 | import torch
 13 | 
 14 | @to_variable
 15 | def rel_assignments(im_inds, rpn_rois, roi_gtlabels, gt_boxes, gt_classes, gt_rels, image_offset,
 16 |                     fg_thresh=0.5, num_sample_per_gt=4, filter_non_overlap=True):
 17 |     """
 18 |     Assign object detection proposals to ground-truth targets. Produces proposal
 19 |     classification labels and bounding-box regression targets.
 20 |     :param rpn_rois: [img_ind, x1, y1, x2, y2]
 21 |     :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
 22 |     :param gt_classes: [num_boxes, 2] array of [img_ind, class]
 23 |     :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
 24 |     :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 25 |     :return:
 26 |         rois: [num_rois, 5]
 27 |         labels: [num_rois] array of labels
 28 |         bbox_targets [num_rois, 4] array of targets for the labels.
 29 |         rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
 30 |     """
 31 |     fg_rels_per_image = int(np.round(REL_FG_FRACTION * 64))
 32 | 
 33 |     pred_inds_np = im_inds.cpu().numpy()
 34 |     pred_boxes_np = rpn_rois.cpu().numpy()
 35 |     pred_boxlabels_np = roi_gtlabels.cpu().numpy()
 36 |     gt_boxes_np = gt_boxes.cpu().numpy()
 37 |     gt_classes_np = gt_classes.cpu().numpy()
 38 |     gt_rels_np = gt_rels.cpu().numpy()
 39 | 
 40 |     gt_classes_np[:, 0] -= image_offset
 41 |     gt_rels_np[:, 0] -= image_offset
 42 | 
 43 |     num_im = gt_classes_np[:, 0].max()+1
 44 | 
 45 |     # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
 46 |     #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
 47 |     # ))
 48 | 
 49 |     rel_labels = []
 50 |     fg_rel_labels = []
 51 |     num_box_seen = 0
 52 |     for im_ind in range(num_im):
 53 |         pred_ind = np.where(pred_inds_np == im_ind)[0]
 54 | 
 55 |         gt_ind = np.where(gt_classes_np[:, 0] == im_ind)[0]
 56 |         gt_boxes_i = gt_boxes_np[gt_ind]
 57 |         gt_classes_i = gt_classes_np[gt_ind, 1]
 58 |         gt_rels_i = gt_rels_np[gt_rels_np[:, 0] == im_ind, 1:]
 59 | 
 60 |         # [num_pred, num_gt]
 61 |         pred_boxes_i = pred_boxes_np[pred_ind]
 62 |         pred_boxlabels_i = pred_boxlabels_np[pred_ind]
 63 | 
 64 |         ious = bbox_overlaps(pred_boxes_i, gt_boxes_i)
 65 |         is_match = (pred_boxlabels_i[:,None] == gt_classes_i[None]) & (ious >= fg_thresh)
 66 | 
 67 |         # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
 68 |         pbi_iou = bbox_overlaps(pred_boxes_i, pred_boxes_i)
 69 |         if filter_non_overlap:
 70 |             rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
 71 |             rels_intersect = rel_possibilities
 72 |         else:
 73 |             rel_possibilities = np.ones((pred_boxes_i.shape[0], pred_boxes_i.shape[0]),
 74 |                                         dtype=np.int64) - np.eye(pred_boxes_i.shape[0],
 75 |                                                                  dtype=np.int64)
 76 |             rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)
 77 | 
 78 |         # ONLY select relations between ground truth because otherwise we get useless data
 79 |         rel_possibilities[pred_boxlabels_i == 0] = 0
 80 |         rel_possibilities[:, pred_boxlabels_i == 0] = 0
 81 | 
 82 |         # Sample the GT relationships.
 83 |         fg_rels = []
 84 |         p_size = []
 85 |         for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
 86 |             fg_rels_i = []
 87 |             fg_scores_i = []
 88 | 
 89 |             for from_ind in np.where(is_match[:, from_gtind])[0]:
 90 |                 for to_ind in np.where(is_match[:, to_gtind])[0]:
 91 |                     if from_ind != to_ind:
 92 |                         fg_rels_i.append((from_ind, to_ind, rel_id))
 93 |                         fg_scores_i.append((ious[from_ind, from_gtind] * ious[to_ind, to_gtind]))
 94 |                         rel_possibilities[from_ind, to_ind] = 0
 95 |             if len(fg_rels_i) == 0:
 96 |                 continue
 97 |             p = np.array(fg_scores_i)
 98 |             p = p / p.sum()
 99 |             p_size.append(p.shape[0])
100 |             num_to_add = min(p.shape[0], num_sample_per_gt)
101 |             for rel_to_add in npr.choice(p.shape[0], p=p, size=num_to_add, replace=False):
102 |                 fg_rels.append(fg_rels_i[rel_to_add])
103 | 
104 |         fg_rels = np.array(fg_rels, dtype=np.int64)
105 |         all_fg_rels = fg_rels
106 |         if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
107 |             fg_rels = fg_rels[npr.choice(fg_rels.shape[0], size=fg_rels_per_image, replace=False)]
108 |         elif fg_rels.size == 0:
109 |             fg_rels = np.zeros((0, 3), dtype=np.int64)
110 |             all_fg_rels = np.zeros((1, 3), dtype=np.int64)
111 | 
112 | 
113 |         bg_rels = np.column_stack(np.where(rel_possibilities))
114 |         bg_rels = np.column_stack((bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))
115 | 
116 |         num_bg_rel = min(64 - fg_rels.shape[0], bg_rels.shape[0])
117 |         if bg_rels.size > 0:
118 |             # Sample 4x as many intersecting relationships as non-intersecting.
119 |             # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
120 |             # p = bg_rels_intersect.astype(np.float32)
121 |             # p[bg_rels_intersect == 0] = 0.2
122 |             # p[bg_rels_intersect == 1] = 0.8
123 |             # p /= p.sum()
124 |             bg_rels = bg_rels[
125 |                 np.random.choice(bg_rels.shape[0],
126 |                                  #p=p,
127 |                                  size=num_bg_rel, replace=False)]
128 |         else:
129 |             bg_rels = np.zeros((0, 3), dtype=np.int64)
130 | 
131 |         if fg_rels.size == 0 and bg_rels.size == 0:
132 |             # Just put something here
133 |             bg_rels = np.array([[0, 0, 0]], dtype=np.int64)
134 | 
135 |         # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
136 |         all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
137 |         all_rels_i[:,0:2] += num_box_seen
138 |         all_fg_rels[:,0:2] += num_box_seen
139 | 
140 |         all_rels_i = all_rels_i[np.lexsort((all_rels_i[:,1], all_rels_i[:,0]))]
141 |         all_fg_rels = all_fg_rels[np.lexsort((all_fg_rels[:,1], all_fg_rels[:,0]))]
142 | 
143 |         rel_labels.append(np.column_stack((
144 |             im_ind*np.ones(all_rels_i.shape[0], dtype=np.int64),
145 |             all_rels_i,
146 |         )))
147 |         fg_rel_labels.append(np.column_stack((
148 |             im_ind*np.ones(all_fg_rels.shape[0], dtype=np.int64),
149 |             all_fg_rels,
150 |         )))
151 |         num_box_seen += pred_boxes_i.shape[0]
152 |     rel_labels = torch.LongTensor(np.concatenate(rel_labels, 0)).cuda(rpn_rois.get_device(),
153 |                                                                       async=True)
154 |     
155 |     fg_rel_labels = torch.LongTensor(np.concatenate(fg_rel_labels, 0)).cuda(rpn_rois.get_device(),
156 |                                                                       async=True)
157 |     return rel_labels, fg_rel_labels
158 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/roi_align.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/fpn/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/fpn/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/fpn/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # Might have to export PATH=/usr/local/cuda-8.0/bin${PATH:+:${PATH}}
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/cuda/roi_align.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/fpn/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/fpn/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | """
 2 | performs ROI aligning
 3 | """
 4 | 
 5 | import torch
 6 | from torch.autograd import Function
 7 | from .._ext import roi_align
 8 | 
 9 | class RoIAlignFunction(Function):
10 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
11 |         self.aligned_width = int(aligned_width)
12 |         self.aligned_height = int(aligned_height)
13 |         self.spatial_scale = float(spatial_scale)
14 | 
15 |         self.feature_size = None
16 | 
17 |     def forward(self, features, rois):
18 |         self.save_for_backward(rois)
19 | 
20 |         rois_normalized = rois.clone()
21 | 
22 |         self.feature_size = features.size()
23 |         batch_size, num_channels, data_height, data_width = self.feature_size
24 | 
25 |         height = (data_height -1) / self.spatial_scale
26 |         width = (data_width - 1) / self.spatial_scale
27 | 
28 |         rois_normalized[:,1] /= width
29 |         rois_normalized[:,2] /= height
30 |         rois_normalized[:,3] /= width
31 |         rois_normalized[:,4] /= height
32 | 
33 | 
34 |         num_rois = rois.size(0)
35 | 
36 |         output = features.new(num_rois, num_channels, self.aligned_height,
37 |             self.aligned_width).zero_()
38 | 
39 |         if features.is_cuda:
40 |             res = roi_align.roi_align_forward_cuda(self.aligned_height,
41 |                                              self.aligned_width,
42 |                                              self.spatial_scale, features,
43 |                                              rois_normalized, output)
44 |             assert res == 1
45 |         else:
46 |             raise ValueError
47 | 
48 |         return output
49 | 
50 |     def backward(self, grad_output):
51 |         assert(self.feature_size is not None and grad_output.is_cuda)
52 | 
53 |         rois = self.saved_tensors[0]
54 | 
55 |         rois_normalized = rois.clone()
56 | 
57 |         batch_size, num_channels, data_height, data_width = self.feature_size
58 | 
59 |         height = (data_height -1) / self.spatial_scale
60 |         width = (data_width - 1) / self.spatial_scale
61 | 
62 |         rois_normalized[:,1] /= width
63 |         rois_normalized[:,2] /= height
64 |         rois_normalized[:,3] /= width
65 |         rois_normalized[:,4] /= height
66 | 
67 |         grad_input = rois_normalized.new(batch_size, num_channels, data_height,
68 |                                   data_width).zero_()
69 |         res = roi_align.roi_align_backward_cuda(self.aligned_height,
70 |                                           self.aligned_width,
71 |                                           self.spatial_scale, grad_output,
72 |                                           rois_normalized, grad_input)
73 |         assert res == 1
74 |         return grad_input, None
75 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/fpn/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/fpn/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: roi_align_kernel.cu roi_align_kernel.h
2 | 	/usr/local/cuda/bin/nvcc -c -o roi_align.cu.o roi_align_kernel.cu --compiler-options -fPIC -gencode arch=compute_61,code=sm_61
3 | clean:
4 | 	rm roi_align.cu.o
5 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr,
 16 |          int num_boxes, int batch, int image_height, int image_width, int crop_height,
 17 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr) {
 18 |     CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
 19 |         // (n, c, ph, pw) is an element in the aligned output
 20 |         int idx = out_idx;
 21 |         const int x = idx % crop_width;
 22 |         idx /= crop_width;
 23 |         const int y = idx % crop_height;
 24 |         idx /= crop_height;
 25 |         const int d = idx % depth;
 26 |         const int b = idx / depth;
 27 | 
 28 |         const int b_in = int(boxes_ptr[b*5]);
 29 |         const float x1 = boxes_ptr[b * 5 + 1];
 30 |         const float y1 = boxes_ptr[b * 5 + 2];
 31 |         const float x2 = boxes_ptr[b * 5 + 3];
 32 |         const float y2 = boxes_ptr[b * 5 + 4];
 33 |         if (b_in < 0 || b_in >= batch) {
 34 |             continue;
 35 |         }
 36 | 
 37 |         const float height_scale =
 38 |             (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
 39 |                               : 0;
 40 |         const float width_scale =
 41 |             (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
 42 | 
 43 |         const float in_y = (crop_height > 1)
 44 |                                ? y1 * (image_height - 1) + y * height_scale
 45 |                                : 0.5 * (y1 + y2) * (image_height - 1);
 46 |         if (in_y < 0 || in_y > image_height - 1) {
 47 |             crops_ptr[out_idx] = extrapolation_value;
 48 |             continue;
 49 |         }
 50 | 
 51 |         const float in_x = (crop_width > 1)
 52 |                                ? x1 * (image_width - 1) + x * width_scale
 53 |                                : 0.5 * (x1 + x2) * (image_width - 1);
 54 |         if (in_x < 0 || in_x > image_width - 1) {
 55 |           crops_ptr[out_idx] = extrapolation_value;
 56 |           continue;
 57 |         }
 58 | 
 59 |         const int top_y_index = floorf(in_y);
 60 |         const int bottom_y_index = ceilf(in_y);
 61 |         const float y_lerp = in_y - top_y_index;
 62 | 
 63 |         const int left_x_index = floorf(in_x);
 64 |         const int right_x_index = ceilf(in_x);
 65 |         const float x_lerp = in_x - left_x_index;
 66 | 
 67 |         const float top_left = image_ptr[((b_in*depth + d) * image_height
 68 |             + top_y_index) * image_width + left_x_index];
 69 |         const float top_right = image_ptr[((b_in*depth + d) * image_height
 70 |             + top_y_index) * image_width + right_x_index];
 71 |         const float bottom_left = image_ptr[((b_in*depth + d) * image_height
 72 |             + bottom_y_index) * image_width + left_x_index];
 73 |         const float bottom_right = image_ptr[((b_in*depth + d) * image_height
 74 |             + bottom_y_index) * image_width + right_x_index];
 75 | 
 76 |         const float top = top_left + (top_right - top_left) * x_lerp;
 77 |         const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
 78 |         crops_ptr[out_idx] = top + (bottom - top) * y_lerp;
 79 |         }
 80 |     }
 81 | 
 82 |     int ROIAlignForwardLaucher(const float* image_ptr, const float* boxes_ptr,
 83 |          int num_boxes,  int batch, int image_height, int image_width, int crop_height,
 84 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream) {
 85 | 
 86 |         const int kThreadsPerBlock = 1024;
 87 |         const int output_size = num_boxes * crop_height * crop_width * depth;
 88 |         cudaError_t err;
 89 | 
 90 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>
 91 |         (output_size, image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
 92 |          crop_height, crop_width, depth, extrapolation_value, crops_ptr);
 93 | 
 94 |         err = cudaGetLastError();
 95 |         if(cudaSuccess != err) {
 96 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 97 |             exit( -1 );
 98 |         }
 99 | 
100 |         return 1;
101 |     }
102 | 
103 | __global__ void ROIAlignBackward(
104 |     const int nthreads, const float* grads_ptr, const float* boxes_ptr,
105 |     int num_boxes, int batch, int image_height,
106 |     int image_width, int crop_height, int crop_width, int depth,
107 |     float* grads_image_ptr) {
108 |   CUDA_1D_KERNEL_LOOP(out_idx, nthreads) {
109 | 
110 |     // out_idx = d + depth * (w + crop_width * (h + crop_height * b))
111 |     int idx = out_idx;
112 |     const int x = idx % crop_width;
113 |     idx /= crop_width;
114 |     const int y = idx % crop_height;
115 |     idx /= crop_height;
116 |     const int d = idx % depth;
117 |     const int b = idx / depth;
118 | 
119 |     const int b_in = boxes_ptr[b * 5];
120 |     const float x1 = boxes_ptr[b * 5 + 1];
121 |     const float y1 = boxes_ptr[b * 5 + 2];
122 |     const float x2 = boxes_ptr[b * 5 + 3];
123 |     const float y2 = boxes_ptr[b * 5 + 4];
124 |     if (b_in < 0 || b_in >= batch) {
125 |       continue;
126 |     }
127 | 
128 |     const float height_scale =
129 |         (crop_height > 1) ? (y2 - y1) * (image_height - 1) / (crop_height - 1)
130 |                           : 0;
131 |     const float width_scale =
132 |         (crop_width > 1) ? (x2 - x1) * (image_width - 1) / (crop_width - 1) : 0;
133 | 
134 |     const float in_y = (crop_height > 1)
135 |                            ? y1 * (image_height - 1) + y * height_scale
136 |                            : 0.5 * (y1 + y2) * (image_height - 1);
137 |     if (in_y < 0 || in_y > image_height - 1) {
138 |       continue;
139 |     }
140 | 
141 |     const float in_x = (crop_width > 1)
142 |                            ? x1 * (image_width - 1) + x * width_scale
143 |                            : 0.5 * (x1 + x2) * (image_width - 1);
144 |     if (in_x < 0 || in_x > image_width - 1) {
145 |       continue;
146 |     }
147 | 
148 |     const int top_y_index = floorf(in_y);
149 |     const int bottom_y_index = ceilf(in_y);
150 |     const float y_lerp = in_y - top_y_index;
151 | 
152 |     const int left_x_index = floorf(in_x);
153 |     const int right_x_index = ceilf(in_x);
154 |     const float x_lerp = in_x - left_x_index;
155 | 
156 |     const float dtop = (1 - y_lerp) * grads_ptr[out_idx];
157 |     atomicAdd(
158 |         grads_image_ptr + ((b_in*depth + d)*image_height + top_y_index) * image_width + left_x_index,
159 |         (1 - x_lerp) * dtop);
160 |     atomicAdd(grads_image_ptr +
161 |                       ((b_in * depth + d)*image_height+top_y_index)*image_width + right_x_index,
162 |                        x_lerp * dtop);
163 | 
164 |     const float dbottom = y_lerp * grads_ptr[out_idx];
165 |     atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+left_x_index,
166 |         (1 - x_lerp) * dbottom);
167 |     atomicAdd(grads_image_ptr + ((b_in*depth+d)*image_height+bottom_y_index)*image_width+right_x_index,
168 |         x_lerp * dbottom);
169 |   }
170 | }
171 | 
172 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes,
173 |     int batch, int image_height, int image_width, int crop_height, int crop_width, int depth,
174 |     float* grads_image_ptr, cudaStream_t stream) {
175 |         const int kThreadsPerBlock = 1024;
176 |         const int output_size = num_boxes * crop_height * crop_width * depth;
177 |         cudaError_t err;
178 | 
179 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>
180 |         (output_size, grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width, crop_height,
181 |         crop_width, depth, grads_image_ptr);
182 | 
183 |         err = cudaGetLastError();
184 |         if(cudaSuccess != err) {
185 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
186 |             exit( -1 );
187 |         }
188 | 
189 |         return 1;
190 |     }
191 | 
192 | 
193 | #ifdef __cplusplus
194 | }
195 | #endif
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* image_ptr, const float* boxes_ptr, int num_boxes, int batch, int image_height, int image_width, int crop_height,
 9 |   int crop_width, int depth, float extrapolation_value, float* crops_ptr);
10 | 
11 | int ROIAlignForwardLaucher(
12 |     const float* image_ptr, const float* boxes_ptr,
13 |          int num_boxes,  int batch, int image_height, int image_width, int crop_height,
14 |          int crop_width, int depth, float extrapolation_value, float* crops_ptr, cudaStream_t stream);
15 | 
16 | __global__ void ROIAlignBackward(const int nthreads, const float* grads_ptr,
17 |     const float* boxes_ptr, int num_boxes, int batch, int image_height,
18 |     int image_width, int crop_height, int crop_width, int depth,
19 |     float* grads_image_ptr);
20 | 
21 | int ROIAlignBackwardLaucher(const float* grads_ptr, const float* boxes_ptr, int num_boxes,
22 |     int batch, int image_height, int image_width, int crop_height,
23 |     int crop_width, int depth, float* grads_image_ptr, cudaStream_t stream);
24 | 
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif
30 | 
31 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "cuda/roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * image_ptr = THCudaTensor_data(state, features);
12 |     float * boxes_ptr = THCudaTensor_data(state, rois);
13 | 
14 |     float * crops_ptr = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_boxes = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // batch size
25 |     int batch = THCudaTensor_size(state, features, 0);
26 |     // data height
27 |     int image_height = THCudaTensor_size(state, features, 2);
28 |     // data width
29 |     int image_width = THCudaTensor_size(state, features, 3);
30 |     // Number of channels
31 |     int depth = THCudaTensor_size(state, features, 1);
32 | 
33 |     cudaStream_t stream = THCState_getCurrentStream(state);
34 |     float extrapolation_value = 0.0;
35 | 
36 |     ROIAlignForwardLaucher(
37 |          image_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
38 |          crop_height, crop_width, depth, extrapolation_value, crops_ptr,
39 |          stream);
40 | 
41 |     return 1;
42 | }
43 | 
44 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
45 |     THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
46 | {
47 |     // Grab the input tensor
48 |     float * grads_ptr = THCudaTensor_data(state, top_grad);
49 |     float * boxes_ptr = THCudaTensor_data(state, rois);
50 | 
51 |     float * grads_image_ptr = THCudaTensor_data(state, bottom_grad);
52 | 
53 |     // Number of ROIs
54 |     int num_boxes = THCudaTensor_size(state, rois, 0);
55 |     int size_rois = THCudaTensor_size(state, rois, 1);
56 |     if (size_rois != 5)
57 |     {
58 |         return 0;
59 |     }
60 | 
61 |     // batch size
62 |     int batch = THCudaTensor_size(state, bottom_grad, 0);
63 |     // data height
64 |     int image_height = THCudaTensor_size(state, bottom_grad, 2);
65 |     // data width
66 |     int image_width = THCudaTensor_size(state, bottom_grad, 3);
67 |     // Number of channels
68 |     int depth = THCudaTensor_size(state, bottom_grad, 1);
69 | 
70 |     cudaStream_t stream = THCState_getCurrentStream(state);
71 | 
72 |     ROIAlignBackwardLaucher(
73 |         grads_ptr, boxes_ptr, num_boxes, batch, image_height, image_width,
74 |         crop_height, crop_width, depth, grads_image_ptr, stream);
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/fpn/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int crop_height, int crop_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int crop_height, int crop_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois,
6 |                         THCudaTensor * bottom_grad);
7 | 


--------------------------------------------------------------------------------
/lib/get_dataset_counts.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Get counts of all of the examples in the dataset. Used for creating the baseline
 3 | dictionary model
 4 | """
 5 | 
 6 | import numpy as np
 7 | from dataloaders.visual_genome import VG
 8 | from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps
 9 | from lib.pytorch_misc import nonintersecting_2d_inds
10 | 
11 | 
12 | def get_counts(train_data=VG(mode='train', filter_duplicate_rels=False, num_val_im=5000), must_overlap=True):
13 |     """
14 |     Get counts of all of the relations. Used for modeling directly P(rel | o1, o2)
15 |     :param train_data: 
16 |     :param must_overlap: 
17 |     :return: 
18 |     """
19 |     fg_matrix = np.zeros((
20 |         train_data.num_classes,
21 |         train_data.num_classes,
22 |         train_data.num_predicates,
23 |     ), dtype=np.int64)
24 | 
25 |     bg_matrix = np.zeros((
26 |         train_data.num_classes,
27 |         train_data.num_classes,
28 |     ), dtype=np.int64)
29 | 
30 |     for ex_ind in range(len(train_data)):
31 |         gt_classes = train_data.gt_classes[ex_ind].copy()
32 |         gt_relations = train_data.relationships[ex_ind].copy()
33 |         gt_boxes = train_data.gt_boxes[ex_ind].copy()
34 | 
35 |         # For the foreground, we'll just look at everything
36 |         o1o2 = gt_classes[gt_relations[:, :2]]
37 |         for (o1, o2), gtr in zip(o1o2, gt_relations[:,2]):
38 |             fg_matrix[o1, o2, gtr] += 1
39 | 
40 |         # For the background, get all of the things that overlap.
41 |         o1o2_total = gt_classes[np.array(
42 |             box_filter(gt_boxes, must_overlap=must_overlap), dtype=int)]
43 |         for (o1, o2) in o1o2_total:
44 |             bg_matrix[o1, o2] += 1
45 | 
46 |     return fg_matrix, bg_matrix
47 | 
48 | 
49 | def box_filter(boxes, must_overlap=False):
50 |     """ Only include boxes that overlap as possible relations. 
51 |     If no overlapping boxes, use all of them."""
52 |     n_cands = boxes.shape[0]
53 | 
54 |     overlaps = bbox_overlaps(boxes.astype(np.float), boxes.astype(np.float)) > 0
55 |     np.fill_diagonal(overlaps, 0)
56 | 
57 |     all_possib = np.ones_like(overlaps, dtype=np.bool)
58 |     np.fill_diagonal(all_possib, 0)
59 | 
60 |     if must_overlap:
61 |         possible_boxes = np.column_stack(np.where(overlaps))
62 | 
63 |         if possible_boxes.size == 0:
64 |             possible_boxes = np.column_stack(np.where(all_possib))
65 |     else:
66 |         possible_boxes = np.column_stack(np.where(all_possib))
67 |     return possible_boxes
68 | 
69 | if __name__ == '__main__':
70 |     fg, bg = get_counts(must_overlap=False)
71 | 


--------------------------------------------------------------------------------
/lib/get_union_boxes.py:
--------------------------------------------------------------------------------
 1 | """
 2 | credits to https://github.com/ruotianluo/pytorch-faster-rcnn/blob/master/lib/nets/network.py#L91
 3 | """
 4 | 
 5 | import torch
 6 | from torch.autograd import Variable
 7 | from torch.nn import functional as F
 8 | from lib.fpn.roi_align.functions.roi_align import RoIAlignFunction
 9 | from lib.draw_rectangles.draw_rectangles import draw_union_boxes
10 | import numpy as np
11 | from torch.nn.modules.module import Module
12 | from torch import nn
13 | from config import BATCHNORM_MOMENTUM
14 | 
15 | class UnionBoxesAndFeats(Module):
16 |     def __init__(self, pooling_size=7, stride=16, dim=256, concat=False, use_feats=True):
17 |         """
18 |         :param pooling_size: Pool the union boxes to this dimension
19 |         :param stride: pixel spacing in the entire image
20 |         :param dim: Dimension of the feats
21 |         :param concat: Whether to concat (yes) or add (False) the representations
22 |         """
23 |         super(UnionBoxesAndFeats, self).__init__()
24 |         
25 |         self.pooling_size = pooling_size
26 |         self.stride = stride
27 | 
28 |         self.dim = dim
29 |         self.use_feats = use_feats
30 | 
31 |         self.conv = nn.Sequential(
32 |             nn.Conv2d(2, dim //2, kernel_size=7, stride=2, padding=3, bias=True),
33 |             nn.ReLU(inplace=True),
34 |             nn.BatchNorm2d(dim//2, momentum=BATCHNORM_MOMENTUM),
35 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
36 |             nn.Conv2d(dim // 2, dim, kernel_size=3, stride=1, padding=1, bias=True),
37 |             nn.ReLU(inplace=True),
38 |             nn.BatchNorm2d(dim, momentum=BATCHNORM_MOMENTUM),
39 |         )
40 |         self.concat = concat
41 | 
42 |     def forward(self, fmap, rois, union_inds):
43 |         union_pools = union_boxes(fmap, rois, union_inds, pooling_size=self.pooling_size, stride=self.stride)
44 |         if not self.use_feats:
45 |             return union_pools.detach()
46 | 
47 |         pair_rois = torch.cat((rois[:, 1:][union_inds[:, 0]], rois[:, 1:][union_inds[:, 1]]),1).data.cpu().numpy()
48 |         # rects_np = get_rect_features(pair_rois, self.pooling_size*2-1) - 0.5
49 |         rects_np = draw_union_boxes(pair_rois, self.pooling_size*4-1) - 0.5
50 |         rects = Variable(torch.FloatTensor(rects_np).cuda(fmap.get_device()), volatile=fmap.volatile)
51 |         if self.concat:
52 |             return torch.cat((union_pools, self.conv(rects)), 1)
53 |         return union_pools + self.conv(rects)
54 | 
55 | # def get_rect_features(roi_pairs, pooling_size):
56 | #     rects_np = draw_union_boxes(roi_pairs, pooling_size)
57 | #     # add union + intersection
58 | #     stuff_to_cat = [
59 | #         rects_np.max(1),
60 | #         rects_np.min(1),
61 | #         np.minimum(1-rects_np[:,0], rects_np[:,1]),
62 | #         np.maximum(1-rects_np[:,0], rects_np[:,1]),
63 | #         np.minimum(rects_np[:,0], 1-rects_np[:,1]),
64 | #         np.maximum(rects_np[:,0], 1-rects_np[:,1]),
65 | #         np.minimum(1-rects_np[:,0], 1-rects_np[:,1]),
66 | #         np.maximum(1-rects_np[:,0], 1-rects_np[:,1]),
67 | #     ]
68 | #     rects_np = np.concatenate([rects_np] + [x[:,None] for x in stuff_to_cat], 1)
69 | #     return rects_np
70 | 
71 | 
72 | def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16):
73 |     """
74 |     :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride)
75 |     :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2]
76 |     :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2]
77 |     :param pooling_size: we'll resize to this
78 |     :param stride:
79 |     :return:
80 |     """
81 |     assert union_inds.size(1) == 2
82 |     im_inds = rois[:,0][union_inds[:,0]]
83 |     assert (im_inds.data == rois.data[:,0][union_inds[:,1]]).sum() == union_inds.size(0)
84 |     union_rois = torch.cat((
85 |         im_inds[:,None],
86 |         torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:, 1]]),
87 |         torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:, 1]]),
88 |     ),1)
89 | 
90 |     # (num_rois, d, pooling_size, pooling_size)
91 |     union_pools = RoIAlignFunction(pooling_size, pooling_size,
92 |                                    spatial_scale=1/stride)(fmap, union_rois)
93 |     return union_pools
94 |  
95 | 


--------------------------------------------------------------------------------
/lib/lstm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/lstm/__init__.py


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/lstm/highway_lstm_cuda/__init__.py


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/lstm/highway_lstm_cuda/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/_ext/highway_lstm_layer/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._highway_lstm_layer import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/build.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=invalid-name
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | if not torch.cuda.is_available():
 7 |     raise Exception('HighwayLSTM can only be compiled with CUDA')
 8 | 
 9 | sources = ['src/highway_lstm_cuda.c']
10 | headers = ['src/highway_lstm_cuda.h']
11 | defines = [('WITH_CUDA', None)]
12 | with_cuda = True
13 | 
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | extra_objects = ['src/highway_lstm_kernel.cu.o']
16 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
17 | 
18 | ffi = create_extension(
19 |         '_ext.highway_lstm_layer',
20 |         headers=headers,
21 |         sources=sources,
22 |         define_macros=defines,
23 |         relative_to=__file__,
24 |         with_cuda=with_cuda,
25 |         extra_objects=extra_objects
26 |         )
27 | 
28 | if __name__ == '__main__':
29 |     ffi.build()
30 | 


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | # Which CUDA capabilities do we want to pre-build for?
 6 | # https://developer.nvidia.com/cuda-gpus
 7 | #   Compute/shader model   Cards
 8 | #   61                    P4, P40, Titan X
 9 | #   60                    P100
10 | #   52                    M40
11 | #   37                    K80
12 | #   35                    K40, K20
13 | #   30                    K10, Grid K520 (AWS G2)
14 | 
15 | CUDA_MODELS=(52 61)
16 | 
17 | # Nvidia doesn't guarantee binary compatability across GPU versions.
18 | # However, binary compatibility within one GPU generation can be guaranteed
19 | # under certain conditions because they share the basic instruction set.
20 | # This is the case between two GPU versions that do not show functional 
21 | # differences at all (for instance when one version is a scaled down version
22 | # of the other), or when one version is functionally included in the other.
23 | 
24 | # To fix this problem, we can create a 'fat binary' which generates multiple
25 | # translations of the CUDA source. The most appropriate version is chosen at
26 | # runtime by the CUDA driver. See:
27 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-compilation
28 | # http://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#fatbinaries
29 | CUDA_MODEL_TARGETS=""
30 | for i in "${CUDA_MODELS[@]}"
31 | do
32 |         CUDA_MODEL_TARGETS+=" -gencode arch=compute_${i},code=sm_${i}"
33 | done
34 | 
35 | echo "Building kernel for following target architectures: "
36 | echo $CUDA_MODEL_TARGETS
37 | 
38 | cd src
39 | echo "Compiling kernel"
40 | /usr/local/cuda/bin/nvcc -c -o highway_lstm_kernel.cu.o highway_lstm_kernel.cu --compiler-options -fPIC $CUDA_MODEL_TARGETS
41 | cd ../
42 | python build.py
43 | 


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "highway_lstm_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 | 
 6 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch,
 7 |         int numLayers, int seqLength,
 8 |         THCudaTensor *x,
 9 |         THIntTensor *lengths,
10 |         THCudaTensor *h_data,
11 |         THCudaTensor *c_data,
12 |         THCudaTensor *tmp_i,
13 |         THCudaTensor *tmp_h,
14 |         THCudaTensor *T,
15 |         THCudaTensor *bias,
16 |         THCudaTensor *dropout,
17 |         THCudaTensor *gates,
18 |         int isTraining) {
19 | 
20 |     float * x_ptr = THCudaTensor_data(state, x);
21 |     int * lengths_ptr = THIntTensor_data(lengths);
22 |     float * h_data_ptr = THCudaTensor_data(state, h_data);
23 |     float * c_data_ptr = THCudaTensor_data(state, c_data);
24 |     float * tmp_i_ptr = THCudaTensor_data(state, tmp_i);
25 |     float * tmp_h_ptr = THCudaTensor_data(state, tmp_h);
26 |     float * T_ptr = THCudaTensor_data(state, T);
27 |     float * bias_ptr = THCudaTensor_data(state, bias);
28 |     float * dropout_ptr = THCudaTensor_data(state, dropout);
29 |     float * gates_ptr;
30 |     if (isTraining == 1) {
31 |         gates_ptr = THCudaTensor_data(state, gates);
32 |     } else {
33 |         gates_ptr = NULL;
34 |     }
35 | 
36 |     cudaStream_t stream = THCState_getCurrentStream(state);
37 |     cublasHandle_t handle = THCState_getCurrentBlasHandle(state);
38 | 
39 |     highway_lstm_forward_ongpu(inputSize, hiddenSize, miniBatch, numLayers, 
40 |             seqLength, x_ptr, lengths_ptr, h_data_ptr, c_data_ptr, tmp_i_ptr,
41 |             tmp_h_ptr, T_ptr, bias_ptr, dropout_ptr, gates_ptr,
42 |             isTraining, stream, handle);
43 | 
44 |     return 1;
45 | 
46 | }
47 | 
48 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength,
49 |         THCudaTensor *out_grad,
50 |         THIntTensor *lengths,
51 |         THCudaTensor *h_data_grad,
52 |         THCudaTensor *c_data_grad,
53 |         THCudaTensor *x,
54 |         THCudaTensor *h_data,
55 |         THCudaTensor *c_data,
56 |         THCudaTensor *T,
57 |         THCudaTensor *gates_out,
58 |         THCudaTensor *dropout_in,
59 |         THCudaTensor *h_gates_grad,
60 |         THCudaTensor *i_gates_grad,
61 |         THCudaTensor *h_out_grad,
62 |         THCudaTensor *x_grad,
63 |         THCudaTensor *T_grad,
64 |         THCudaTensor *bias_grad,
65 |         int isTraining,
66 |         int do_weight_grad) {
67 | 
68 |     float * out_grad_ptr = THCudaTensor_data(state, out_grad);
69 |     int * lengths_ptr = THIntTensor_data(lengths);
70 |     float * h_data_grad_ptr = THCudaTensor_data(state, h_data_grad);
71 |     float * c_data_grad_ptr = THCudaTensor_data(state, c_data_grad);
72 |     float * x_ptr = THCudaTensor_data(state, x);
73 |     float * h_data_ptr = THCudaTensor_data(state, h_data);
74 |     float * c_data_ptr = THCudaTensor_data(state, c_data);
75 |     float * T_ptr = THCudaTensor_data(state, T);
76 |     float * gates_out_ptr = THCudaTensor_data(state, gates_out);
77 |     float * dropout_in_ptr = THCudaTensor_data(state, dropout_in);
78 |     float * h_gates_grad_ptr = THCudaTensor_data(state, h_gates_grad);
79 |     float * i_gates_grad_ptr = THCudaTensor_data(state, i_gates_grad);
80 |     float * h_out_grad_ptr = THCudaTensor_data(state, h_out_grad);
81 |     float * x_grad_ptr = THCudaTensor_data(state, x_grad);
82 |     float * T_grad_ptr = THCudaTensor_data(state, T_grad);
83 |     float * bias_grad_ptr = THCudaTensor_data(state, bias_grad);
84 | 
85 |     cudaStream_t stream = THCState_getCurrentStream(state);
86 |     cublasHandle_t handle = THCState_getCurrentBlasHandle(state);
87 | 
88 |     highway_lstm_backward_ongpu(inputSize, hiddenSize, miniBatch, numLayers,
89 |             seqLength, out_grad_ptr, lengths_ptr, h_data_grad_ptr, c_data_grad_ptr,
90 |             x_ptr, h_data_ptr, c_data_ptr, T_ptr, gates_out_ptr, dropout_in_ptr,
91 |             h_gates_grad_ptr, i_gates_grad_ptr, h_out_grad_ptr,
92 |             x_grad_ptr, T_grad_ptr, bias_grad_ptr, isTraining, do_weight_grad,
93 |             stream, handle);
94 | 
95 |     return 1;
96 | 
97 | }
98 | 


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/src/highway_lstm_cuda.h:
--------------------------------------------------------------------------------
 1 | int highway_lstm_forward_cuda(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength,
 2 |     THCudaTensor *x, THIntTensor *lengths, THCudaTensor *h_data,
 3 |     THCudaTensor *c_data, THCudaTensor *tmp_i,
 4 |     THCudaTensor *tmp_h, THCudaTensor *T, THCudaTensor *bias,
 5 |     THCudaTensor *dropout, THCudaTensor *gates, int isTraining);
 6 | 
 7 | int highway_lstm_backward_cuda(int inputSize, int hiddenSize, int miniBatch, 
 8 |         int numLayers, int seqLength, THCudaTensor *out_grad, THIntTensor *lengths,
 9 |         THCudaTensor *h_data_grad, THCudaTensor *c_data_grad, THCudaTensor *x, 
10 |         THCudaTensor *h_data, THCudaTensor *c_data, THCudaTensor *T,
11 |         THCudaTensor *gates_out, THCudaTensor *dropout_in,
12 |         THCudaTensor *h_gates_grad, THCudaTensor *i_gates_grad,
13 |         THCudaTensor *h_out_grad, THCudaTensor *x_grad,  THCudaTensor *T_grad,
14 |         THCudaTensor *bias_grad, int isTraining, int do_weight_grad);
15 | 


--------------------------------------------------------------------------------
/lib/lstm/highway_lstm_cuda/src/highway_lstm_kernel.h:
--------------------------------------------------------------------------------
 1 | #include <cublasXt.h>
 2 | 
 3 | #ifdef __cplusplus
 4 | extern "C" {
 5 | #endif
 6 | 
 7 | void highway_lstm_forward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *x, int *lengths, float*h_data, float *c_data, float *tmp_i, float *tmp_h, float *T, float *bias, float *dropout, float *gates, int is_training, cudaStream_t stream, cublasHandle_t handle);
 8 | 
 9 | void highway_lstm_backward_ongpu(int inputSize, int hiddenSize, int miniBatch, int numLayers, int seqLength, float *out_grad, int *lengths, float *h_data_grad, float *c_data_grad, float *x, float *h_data, float *c_data, float *T, float *gates_out, float *dropout_in, float *h_gates_grad, float *i_gates_grad, float *h_out_grad, float *x_grad, float *T_grad, float *bias_grad, int isTraining, int do_weight_grad, cudaStream_t stream, cublasHandle_t handle);
10 | 
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 | 


--------------------------------------------------------------------------------
/lib/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import math
  3 | import torch.utils.model_zoo as model_zoo
  4 | from torchvision.models.resnet import model_urls, conv3x3, BasicBlock
  5 | from torchvision.models.vgg import vgg16
  6 | from config import BATCHNORM_MOMENTUM
  7 | 
  8 | class Bottleneck(nn.Module):
  9 |     expansion = 4
 10 | 
 11 |     def __init__(self, inplanes, planes, stride=1, downsample=None, relu_end=True):
 12 |         super(Bottleneck, self).__init__()
 13 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
 15 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 16 |                                padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BATCHNORM_MOMENTUM)
 18 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 19 |         self.bn3 = nn.BatchNorm2d(planes * 4, momentum=BATCHNORM_MOMENTUM)
 20 |         self.relu = nn.ReLU(inplace=True)
 21 |         self.downsample = downsample
 22 |         self.stride = stride
 23 |         self.relu_end = relu_end
 24 | 
 25 |     def forward(self, x):
 26 |         residual = x
 27 | 
 28 |         out = self.conv1(x)
 29 |         out = self.bn1(out)
 30 |         out = self.relu(out)
 31 | 
 32 |         out = self.conv2(out)
 33 |         out = self.bn2(out)
 34 |         out = self.relu(out)
 35 | 
 36 |         out = self.conv3(out)
 37 |         out = self.bn3(out)
 38 | 
 39 |         if self.downsample is not None:
 40 |             residual = self.downsample(x)
 41 | 
 42 |         out += residual
 43 | 
 44 |         if self.relu_end:
 45 |             out = self.relu(out)
 46 |         return out
 47 | 
 48 | 
 49 | class ResNet(nn.Module):
 50 | 
 51 |     def __init__(self, block, layers, num_classes=1000):
 52 |         self.inplanes = 64
 53 |         super(ResNet, self).__init__()
 54 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 55 |                                bias=False)
 56 |         self.bn1 = nn.BatchNorm2d(64, momentum=BATCHNORM_MOMENTUM)
 57 |         self.relu = nn.ReLU(inplace=True)
 58 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 59 |         self.layer1 = self._make_layer(block, 64, layers[0])
 60 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 61 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 62 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1)  # HACK
 63 |         self.avgpool = nn.AvgPool2d(7)
 64 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 65 | 
 66 |         for m in self.modules():
 67 |             if isinstance(m, nn.Conv2d):
 68 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 69 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 70 |             elif isinstance(m, nn.BatchNorm2d):
 71 |                 m.weight.data.fill_(1)
 72 |                 m.bias.data.zero_()
 73 | 
 74 |     def _make_layer(self, block, planes, blocks, stride=1):
 75 |         downsample = None
 76 |         if stride != 1 or self.inplanes != planes * block.expansion:
 77 |             downsample = nn.Sequential(
 78 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 79 |                           kernel_size=1, stride=stride, bias=False),
 80 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BATCHNORM_MOMENTUM),
 81 |             )
 82 | 
 83 |         layers = []
 84 |         layers.append(block(self.inplanes, planes, stride, downsample))
 85 |         self.inplanes = planes * block.expansion
 86 |         for i in range(1, blocks):
 87 |             layers.append(block(self.inplanes, planes))
 88 | 
 89 |         return nn.Sequential(*layers)
 90 | 
 91 |     def forward(self, x):
 92 |         x = self.conv1(x)
 93 |         x = self.bn1(x)
 94 |         x = self.relu(x)
 95 |         x = self.maxpool(x)
 96 | 
 97 |         x = self.layer1(x)
 98 |         x = self.layer2(x)
 99 |         x = self.layer3(x)
100 |         x = self.layer4(x)
101 | 
102 |         x = self.avgpool(x)
103 |         x = x.view(x.size(0), -1)
104 |         x = self.fc(x)
105 | 
106 |         return x
107 | 
108 | def resnet101(pretrained=False, **kwargs):
109 |     """Constructs a ResNet-101 model.
110 | 
111 |     Args:
112 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
113 |     """
114 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
115 |     if pretrained:
116 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
117 |     return model
118 | 
119 | def resnet_l123():
120 |     model = resnet101(pretrained=True)
121 |     del model.layer4
122 |     del model.avgpool
123 |     del model.fc
124 |     return model
125 | 
126 | def resnet_l4(relu_end=True):
127 |     model = resnet101(pretrained=True)
128 |     l4 = model.layer4
129 |     if not relu_end:
130 |         l4[-1].relu_end = False
131 |     l4[0].conv2.stride = (1, 1)
132 |     l4[0].downsample[0].stride = (1, 1)
133 |     return l4
134 | 
135 | def vgg_fc(relu_end=True, linear_end=True):
136 |     model = vgg16(pretrained=True)
137 |     vfc = model.classifier
138 |     del vfc._modules['6'] # Get rid of linear layer
139 |     del vfc._modules['5'] # Get rid of linear layer
140 |     if not relu_end:
141 |         del vfc._modules['4'] # Get rid of linear layer
142 |         if not linear_end:
143 |             del vfc._modules['3']
144 |     return vfc
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/lib/sparse_targets.py:
--------------------------------------------------------------------------------
 1 | from lib.word_vectors import obj_edge_vectors
 2 | import torch.nn as nn
 3 | import torch
 4 | from torch.autograd import Variable
 5 | import numpy as np
 6 | from config import DATA_PATH
 7 | import os
 8 | from lib.get_dataset_counts import get_counts
 9 | 
10 | 
11 | class FrequencyBias(nn.Module):
12 |     """
13 |     The goal of this is to provide a simplified way of computing
14 |     P(predicate | obj1, obj2, img).
15 |     """
16 | 
17 |     def __init__(self, eps=1e-3):
18 |         super(FrequencyBias, self).__init__()
19 | 
20 |         fg_matrix, bg_matrix = get_counts(must_overlap=True)
21 |         bg_matrix += 1
22 |         fg_matrix[:, :, 0] = bg_matrix
23 | 
24 |         pred_dist = np.log(fg_matrix / fg_matrix.sum(2)[:, :, None] + eps)
25 | 
26 |         self.num_objs = pred_dist.shape[0]
27 |         pred_dist = torch.FloatTensor(pred_dist).view(-1, pred_dist.shape[2])
28 | 
29 |         self.obj_baseline = nn.Embedding(pred_dist.size(0), pred_dist.size(1))
30 |         self.obj_baseline.weight.data = pred_dist
31 | 
32 |     def index_with_labels(self, labels):
33 |         """
34 |         :param labels: [batch_size, 2] 
35 |         :return: 
36 |         """
37 |         return self.obj_baseline(labels[:, 0] * self.num_objs + labels[:, 1])
38 | 
39 |     def forward(self, obj_cands0, obj_cands1):
40 |         """
41 |         :param obj_cands0: [batch_size, 151] prob distibution over cands.
42 |         :param obj_cands1: [batch_size, 151] prob distibution over cands.
43 |         :return: [batch_size, #predicates] array, which contains potentials for
44 |         each possibility
45 |         """
46 |         # [batch_size, 151, 151] repr of the joint distribution
47 |         joint_cands = obj_cands0[:, :, None] * obj_cands1[:, None]
48 | 
49 |         # [151, 151, 51] of targets per.
50 |         baseline = joint_cands.view(joint_cands.size(0), -1) @ self.obj_baseline.weight
51 | 
52 |         return baseline
53 | 


--------------------------------------------------------------------------------
/lib/surgery.py:
--------------------------------------------------------------------------------
 1 | # create predictions from the other stuff
 2 | """
 3 | Go from proposals + scores to relationships.
 4 | 
 5 | pred-cls: No bbox regression, obj dist is exactly known
 6 | sg-cls : No bbox regression
 7 | sg-det : Bbox regression
 8 | 
 9 | in all cases we'll return:
10 | boxes, objs, rels, pred_scores
11 | 
12 | """
13 | 
14 | import numpy as np
15 | import torch
16 | from lib.pytorch_misc import unravel_index
17 | from lib.fpn.box_utils import bbox_overlaps
18 | # from ad3 import factor_graph as fg
19 | from time import time
20 | 
21 | def filter_dets(boxes, obj_scores, obj_classes, rel_inds, pred_scores, gt_boxes, gt_classes, gt_rels):
22 |     """
23 |     Filters detections....
24 |     :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
25 |     :param obj_scores: [num_box] probabilities for the scores
26 |     :param obj_classes: [num_box] class labels for the topk
27 |     :param rel_inds: [num_rel, 2] TENSOR consisting of (im_ind0, im_ind1)
28 |     :param pred_scores: [topk, topk, num_rel, num_predicates]
29 |     :param use_nms: True if use NMS to filter dets.
30 |     :return: boxes, objs, rels, pred_scores
31 | 
32 |     """
33 |     if boxes.dim() != 2:
34 |         raise ValueError("Boxes needs to be [num_box, 4] but its {}".format(boxes.size()))
35 | 
36 |     num_box = boxes.size(0)
37 |     assert obj_scores.size(0) == num_box
38 | 
39 |     assert obj_classes.size() == obj_scores.size()
40 |     num_rel = rel_inds.size(0)
41 |     assert rel_inds.size(1) == 2
42 |     assert pred_scores.size(0) == num_rel
43 | 
44 |     obj_scores0 = obj_scores.data[rel_inds[:,0]]
45 |     obj_scores1 = obj_scores.data[rel_inds[:,1]]
46 | 
47 |     pred_scores_max, pred_classes_argmax = pred_scores.data[:,1:].max(1)
48 |     pred_classes_argmax = pred_classes_argmax + 1
49 | 
50 |     rel_scores_argmaxed = pred_scores_max * obj_scores0 * obj_scores1
51 |     rel_scores_vs, rel_scores_idx = torch.sort(rel_scores_argmaxed.view(-1), dim=0, descending=True)
52 | 
53 |     rels = rel_inds[rel_scores_idx].cpu().numpy()
54 |     pred_scores_sorted = pred_scores[rel_scores_idx].data.cpu().numpy()
55 |     obj_scores_np = obj_scores.data.cpu().numpy()
56 |     objs_np = obj_classes.data.cpu().numpy()
57 |     boxes_out = boxes.data.cpu().numpy()
58 | 
59 |     return boxes_out, objs_np, obj_scores_np, rels, pred_scores_sorted, gt_boxes, gt_classes, gt_rels
60 | 
61 | # def _get_similar_boxes(boxes, obj_classes_topk, nms_thresh=0.3):
62 | #     """
63 | #     Assuming bg is NOT A LABEL.
64 | #     :param boxes: [num_box, topk, 4] if bbox regression else [num_box, 4]
65 | #     :param obj_classes: [num_box, topk] class labels
66 | #     :return: num_box, topk, num_box, topk array containing similarities.
67 | #     """
68 | #     topk = obj_classes_topk.size(1)
69 | #     num_box = boxes.size(0)
70 | #
71 | #     box_flat = boxes.view(-1, 4) if boxes.dim() == 3 else boxes[:, None].expand(
72 | #         num_box, topk, 4).contiguous().view(-1, 4)
73 | #     jax = bbox_overlaps(box_flat, box_flat).data > nms_thresh
74 | #     # Filter out things that are not gonna compete.
75 | #     classes_eq = obj_classes_topk.data.view(-1)[:, None] == obj_classes_topk.data.view(-1)[None, :]
76 | #     jax &= classes_eq
77 | #     boxes_are_similar = jax.view(num_box, topk, num_box, topk)
78 | #     return boxes_are_similar.cpu().numpy().astype(np.bool)
79 | 


--------------------------------------------------------------------------------
/lib/tree_lstm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KaihuaTang/VCTree-Scene-Graph-Generation/1d7983123c534f72786b241046f8d2a007756d31/lib/tree_lstm/__init__.py


--------------------------------------------------------------------------------
/lib/tree_lstm/decoder_tree_lstm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | from typing import Optional, Tuple
  7 | 
  8 | from lib.tree_lstm import tree_utils
  9 | from lib.tree_lstm.tree_lstm import BiTreeLSTM_Backward, BiTreeLSTM_Foreward
 10 | from lib.fpn.box_utils import nms_overlaps
 11 | from lib.word_vectors import obj_edge_vectors
 12 | from lib.lstm.highway_lstm_cuda.alternating_highway_lstm import block_orthogonal
 13 | import numpy as np
 14 | 
 15 | class DecoderTreeLSTM(torch.nn.Module):
 16 |     def __init__(self, classes, embed_dim, inputs_dim, hidden_dim, direction='backward', dropout=0.2, pass_root = False, not_rl = True):
 17 |         super(DecoderTreeLSTM, self).__init__()
 18 |         """
 19 |         Initializes the RNN
 20 |         :param embed_dim: Dimension of the embeddings
 21 |         :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes
 22 |         :param hidden_dim: Hidden dim of the decoder
 23 |         :param vocab_size: Number of words in the vocab
 24 |         :param bos_token: To use during decoding (non teacher forcing mode))
 25 |         :param bos: beginning of sentence token
 26 |         :param unk: unknown token (not used)
 27 |         direction = foreward | backward
 28 |         """
 29 |         self.classes = classes
 30 |         self.hidden_size = hidden_dim
 31 |         self.inputs_dim = inputs_dim
 32 |         self.nms_thresh = 0.5
 33 |         self.dropout = dropout
 34 |         self.pass_root = pass_root
 35 |         # generate embed layer
 36 |         embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=embed_dim)
 37 |         self.obj_embed = nn.Embedding(len(self.classes), embed_dim)
 38 |         self.obj_embed.weight.data = embed_vecs
 39 |         # generate out layer
 40 |         self.out = nn.Linear(self.hidden_size, len(self.classes))
 41 |         self.out.weight = torch.nn.init.xavier_normal(self.out.weight,  gain=1.0)
 42 |         self.out.bias.data.fill_(0.0)
 43 |         self.not_rl = not_rl
 44 | 
 45 |         if direction == 'backward':
 46 |             self.input_size = inputs_dim + embed_dim
 47 |             self.decoderLSTM = BiTreeLSTM_Backward(self.input_size, self.hidden_size, self.pass_root, is_pass_embed=True, embed_layer=self.obj_embed, embed_out_layer=self.out, not_rl=not_rl)
 48 |         elif direction == 'foreward':
 49 |             self.input_size = inputs_dim + embed_dim * 2
 50 |             self.decoderLSTM = BiTreeLSTM_Foreward(self.input_size, self.hidden_size, self.pass_root, is_pass_embed=True, embed_layer=self.obj_embed, embed_out_layer=self.out, not_rl=not_rl)
 51 |         else:
 52 |             print('Error Decoder LSTM Direction')
 53 | 
 54 | 
 55 |     def forward(self, forest, features, num_obj, labels=None, boxes_for_nms=None, batch_size=0):
 56 |         # generate dropout
 57 |         if self.dropout > 0.0:
 58 |             dropout_mask = get_dropout_mask(self.dropout, self.hidden_size)
 59 |         else:
 60 |             dropout_mask = None
 61 | 
 62 |         # generate tree lstm input/output class
 63 |         out_h = None
 64 |         out_dists = None
 65 |         out_commitments = None
 66 |         h_order = Variable(torch.LongTensor(num_obj).zero_().cuda())
 67 |         order_idx = 0
 68 |         lstm_io = tree_utils.TreeLSTM_IO(out_h, h_order, order_idx, out_dists, out_commitments, dropout_mask)
 69 | 
 70 |         for idx in range(len(forest)):
 71 |             self.decoderLSTM(forest[idx], features, lstm_io)
 72 | 
 73 |         out_h = torch.index_select(lstm_io.hidden, 0, lstm_io.order.long())
 74 |         out_dists = torch.index_select(lstm_io.dists, 0, lstm_io.order.long())[:-batch_size]
 75 |         out_commitments = torch.index_select(lstm_io.commitments, 0, lstm_io.order.long())[:-batch_size]
 76 | 
 77 |         # Do NMS here as a post-processing step
 78 |         if boxes_for_nms is not None and not self.training and self.not_rl:
 79 |             is_overlap = nms_overlaps(boxes_for_nms.data).view(
 80 |                 boxes_for_nms.size(0), boxes_for_nms.size(0), boxes_for_nms.size(1)
 81 |             ).cpu().numpy() >= self.nms_thresh
 82 |             # is_overlap[np.arange(boxes_for_nms.size(0)), np.arange(boxes_for_nms.size(0))] = False
 83 | 
 84 |             out_dists_sampled = F.softmax(out_dists, 1).data.cpu().numpy()
 85 |             out_dists_sampled[:,0] = 0
 86 | 
 87 |             out_commitments = out_commitments.data.new(out_commitments.shape[0]).fill_(0)
 88 | 
 89 |             for i in range(out_commitments.size(0)):
 90 |                 box_ind, cls_ind = np.unravel_index(out_dists_sampled.argmax(), out_dists_sampled.shape)
 91 |                 out_commitments[int(box_ind)] = int(cls_ind)
 92 |                 out_dists_sampled[is_overlap[box_ind,:,cls_ind], cls_ind] = 0.0
 93 |                 out_dists_sampled[box_ind] = -1.0 # This way we won't re-sample
 94 | 
 95 |             out_commitments = Variable(out_commitments.view(-1))
 96 |         else:
 97 |             out_commitments = out_commitments.view(-1)
 98 | 
 99 |         if self.training and self.not_rl and (labels is not None):
100 |             out_commitments = labels.clone()
101 |         else:
102 |             out_commitments = torch.cat((out_commitments, Variable(torch.randn(batch_size).long().fill_(0).cuda()).view(-1)), 0)
103 |             
104 |         return out_dists, out_commitments
105 | 
106 | 
107 | def get_dropout_mask(dropout_probability: float, h_dim: int):
108 |     """
109 |     Computes and returns an element-wise dropout mask for a given tensor, where
110 |     each element in the mask is dropped out with probability dropout_probability.
111 |     Note that the mask is NOT applied to the tensor - the tensor is passed to retain
112 |     the correct CUDA tensor type for the mask.
113 | 
114 |     Parameters
115 |     ----------
116 |     dropout_probability : float, required.
117 |         Probability of dropping a dimension of the input.
118 |     tensor_for_masking : torch.Variable, required.
119 | 
120 | 
121 |     Returns
122 |     -------
123 |     A torch.FloatTensor consisting of the binary mask scaled by 1/ (1 - dropout_probability).
124 |     This scaling ensures expected values and variances of the output of applying this mask
125 |      and the original tensor are the same.
126 |     """
127 |     binary_mask = Variable(torch.FloatTensor(h_dim).cuda().fill_(0.0))
128 |     binary_mask.data.copy_(torch.rand(h_dim) > dropout_probability)
129 |     # Scale mask by 1/keep_prob to preserve output statistics.
130 |     dropout_mask = binary_mask.float().div(1.0 - dropout_probability)
131 |     return dropout_mask


--------------------------------------------------------------------------------
/lib/tree_lstm/def_tree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | import numpy as np
  7 | 
  8 | from config import BOX_SCALE, IM_SCALE
  9 | 
 10 | 
 11 | class BasicBiTree(object):
 12 |     def __init__(self, idx, is_root=False):
 13 |         self.index = int(idx)
 14 |         self.is_root = is_root
 15 |         self.left_child = None
 16 |         self.right_child = None
 17 |         self.parent = None
 18 |         self.num_child = 0
 19 | 
 20 |     def add_left_child(self, child):
 21 |         if self.left_child is not None:
 22 |             print('Left child already exist')
 23 |             return
 24 |         child.parent = self
 25 |         self.num_child += 1
 26 |         self.left_child = child
 27 |     
 28 |     def add_right_child(self, child):
 29 |         if self.right_child is not None:
 30 |             print('Right child already exist')
 31 |             return
 32 |         child.parent = self
 33 |         self.num_child += 1
 34 |         self.right_child = child
 35 | 
 36 |     def get_total_child(self):
 37 |         sum = 0
 38 |         sum += self.num_child
 39 |         if self.left_child is not None:
 40 |             sum += self.left_child.get_total_child()
 41 |         if self.right_child is not None:
 42 |             sum += self.right_child.get_total_child()
 43 |         return sum
 44 | 
 45 |     def depth(self):
 46 |         if hasattr(self, '_depth'):
 47 |             return self._depth
 48 |         if self.parent is None:
 49 |             count = 1
 50 |         else:
 51 |             count = self.parent.depth() + 1
 52 |         self._depth = count
 53 |         return self._depth
 54 | 
 55 |     def max_depth(self):
 56 |         if hasattr(self, '_max_depth'):
 57 |             return self._max_depth
 58 |         count = 0
 59 |         if self.left_child is not None:
 60 |             left_depth = self.left_child.max_depth()
 61 |             if left_depth > count:
 62 |                 count = left_depth
 63 |         if self.right_child is not None:
 64 |             right_depth = self.right_child.max_depth()
 65 |             if right_depth > count:
 66 |                 count = right_depth
 67 |         count += 1
 68 |         self._max_depth = count
 69 |         return self._max_depth
 70 | 
 71 |     # by index
 72 |     def is_descendant(self, idx):
 73 |         left_flag = False
 74 |         right_flag = False
 75 |         # node is left child
 76 |         if self.left_child is not None:
 77 |             if self.left_child.index is idx:
 78 |                 return True
 79 |             else:
 80 |                 left_flag = self.left_child.is_descendant(idx)
 81 |         # node is right child
 82 |         if self.right_child is not None:
 83 |             if self.right_child.index is idx:
 84 |                 return True
 85 |             else:
 86 |                 right_flag = self.right_child.is_descendant(idx)
 87 |         # node is descendant
 88 |         if left_flag or right_flag:
 89 |             return True
 90 |         else:
 91 |             return False
 92 |             
 93 |     # whether input node is under left sub tree
 94 |     def is_left_descendant(self, idx):
 95 |         if self.left_child is not None:
 96 |             if self.left_child.index is idx:
 97 |                 return True
 98 |             else:
 99 |                 return self.left_child.is_descendant(idx)
100 |         else:
101 |             return False
102 |     
103 |     # whether input node is under right sub tree
104 |     def is_right_descendant(self, idx):
105 |         if self.right_child is not None:
106 |             if self.right_child.index is idx:
107 |                 return True
108 |             else:
109 |                 return self.right_child.is_descendant(idx)
110 |         else:
111 |             return False
112 | 
113 |     
114 | class ArbitraryTree(object):
115 |     def __init__(self, idx, score, label=-1, box=None, im_idx=-1, is_root=False):
116 |         self.index = int(idx)
117 |         self.is_root = is_root
118 |         self.score = float(score)
119 |         self.children = []
120 |         self.label = label
121 |         self.embeded_label = None
122 |         self.box = box.view(-1) if box is not None else None #[x1,y1,x2,y2]
123 |         self.im_idx = int(im_idx) # which image it comes from
124 |         self.parent = None
125 |         self.node_order = -1 # the n_th node added to the tree
126 |     
127 |     def generate_bi_tree(self):
128 |         # generate a BiTree node, parent/child relationship are not inherited
129 |         return BiTree(self.index, self.score, self.label, self.box, self.im_idx, self.is_root)
130 | 
131 |     def add_child(self, child):
132 |         child.parent = self
133 |         self.children.append(child)
134 | 
135 |     def print(self):
136 |         print('index: ', self.index)
137 |         print('node_order: ', self.node_order)
138 |         print('num of child: ', len(self.children))
139 |         for node in self.children:
140 |             node.print()
141 | 
142 |     def find_node_by_order(self, order, result_node):
143 |         if self.node_order == order:
144 |             result_node = self
145 |         elif len(self.children) > 0:
146 |             for i in range(len(self.children)):
147 |                 result_node = self.children[i].find_node_by_order(order, result_node)
148 |         
149 |         return result_node
150 |     
151 |     def find_node_by_index(self, index, result_node):
152 |         if self.index == index:
153 |             result_node = self
154 |         elif len(self.children) > 0:
155 |             for i in range(len(self.children)):
156 |                 result_node = self.children[i].find_node_by_index(index, result_node)
157 |                 
158 |         return result_node
159 | 
160 |     def search_best_insert(self, score_map, best_score, insert_node, best_depend_node, best_insert_node, ignore_root = True):
161 |         if self.is_root and ignore_root:
162 |             pass
163 |         elif float(score_map[self.index, insert_node.index]) > float(best_score):
164 |             best_score = score_map[self.index, insert_node.index]
165 |             best_depend_node = self
166 |             best_insert_node = insert_node
167 |         
168 |         # iteratively search child
169 |         for i in range(self.get_child_num()):
170 |             best_score, best_depend_node, best_insert_node = \
171 |                 self.children[i].search_best_insert(score_map, best_score, insert_node, best_depend_node, best_insert_node)
172 | 
173 |         return best_score, best_depend_node, best_insert_node
174 | 
175 |     def get_child_num(self):
176 |         return len(self.children)
177 |     
178 |     def get_total_child(self):
179 |         sum = 0
180 |         num_current_child = self.get_child_num()
181 |         sum += num_current_child
182 |         for i in range(num_current_child):
183 |             sum += self.children[i].get_total_child()
184 |         return sum
185 | 
186 | # only support binary tree
187 | class BiTree(BasicBiTree):
188 |     def __init__(self, idx, node_score, label, box, im_idx, is_root=False):
189 |         super(BiTree, self).__init__(idx, is_root)
190 |         self.state_c = None
191 |         self.state_h = None
192 |         self.state_c_backward = None
193 |         self.state_h_backward = None
194 |         # used to select node
195 |         self.node_score = float(node_score)
196 |         self.label = label
197 |         self.embeded_label = None
198 |         self.box = box.view(-1) #[x1,y1,x2,y2]
199 |         self.im_idx = int(im_idx) # which image it comes from
200 | 
201 | 


--------------------------------------------------------------------------------
/lib/tree_lstm/draw_tree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | import numpy as np
  7 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
  8 | from skimage.transform import resize
  9 | from config import IM_SCALE
 10 | 
 11 | tree_max_depth = 9
 12 | 
 13 | label_to_text = {"-1":"unknown-1", "0":"unknown-0", "1": "airplane", "2": "animal", "3": "arm", "4": "bag", "5": "banana", "6": "basket", "7": "beach", "8": "bear", "9": "bed", "10": "bench", "11": "bike", "12": "bird", "13": "board", "14": "boat", "15": "book", "16": "boot", "17": "bottle", "18": "bowl", "19": "box", "20": "boy", "21": "branch", "22": "building", "23": "bus", "24": "cabinet", "25": "cap", "26": "car", "27": "cat", "28": "chair", "29": "child", "30": "clock", "31": "coat", "32": "counter", "33": "cow", "34": "cup", "35": "curtain", "36": "desk", "37": "dog", "38": "door", "39": "drawer", "40": "ear", "41": "elephant", "42": "engine", "43": "eye", "44": "face", "45": "fence", "46": "finger", "47": "flag", "48": "flower", "49": "food", "50": "fork", "51": "fruit", "52": "giraffe", "53": "girl", "54": "glass", "55": "glove", "56": "guy", "57": "hair", "58": "hand", "59": "handle", "60": "hat", "61": "head", "62": "helmet", "63": "hill", "64": "horse", "65": "house", "66": "jacket", "67": "jean", "68": "kid", "69": "kite", "70": "lady", "71": "lamp", "72": "laptop", "73": "leaf", "74": "leg", "75": "letter", "76": "light", "77": "logo", "78": "man", "79": "men", "80": "motorcycle", "81": "mountain", "82": "mouth", "83": "neck", "84": "nose", "85": "number", "86": "orange", "87": "pant", "88": "paper", "89": "paw", "90": "people", "91": "person", "92": "phone", "93": "pillow", "94": "pizza", "95": "plane", "96": "plant", "97": "plate", "98": "player", "99": "pole", "100": "post", "101": "pot", "102": "racket", "103": "railing", "104": "rock", "105": "roof", "106": "room", "107": "screen", "108": "seat", "109": "sheep", "110": "shelf", "111": "shirt", "112": "shoe", "113": "short", "114": "sidewalk", "115": "sign", "116": "sink", "117": "skateboard", "118": "ski", "119": "skier", "120": "sneaker", "121": "snow", "122": "sock", "123": "stand", "124": "street", "125": "surfboard", "126": "table", "127": "tail", "128": "tie", "129": "tile", "130": "tire", "131": "toilet", "132": "towel", "133": "tower", "134": "track", "135": "train", "136": "tree", "137": "truck", "138": "trunk", "139": "umbrella", "140": "vase", "141": "vegetable", "142": "vehicle", "143": "wave", "144": "wheel", "145": "window", "146": "windshield", "147": "wing", "148": "wire", "149": "woman", "150": "zebra"}
 14 | 
 15 | def draw_tree_region_v2(tree, image, example_id, pred_labels):
 16 |     """
 17 |     tree: A tree structure
 18 |     image: origin image batch [batch_size, 3, IM_SIZE, IM_SIZE]
 19 |     output: a image with roi bbox, the color of box correspond to the depth of roi node
 20 |     """
 21 |     sample_image = image[tree.im_idx].view(image.shape[1:]).clone()
 22 |     sample_image = (revert_normalize(sample_image) * 255).int()
 23 |     sample_image = torch.clamp(sample_image, 0, 255)
 24 |     sample_image = sample_image.permute(1,2,0).contiguous().data.cpu().numpy().astype(dtype = np.uint8)
 25 |     sample_image = Image.fromarray(sample_image, 'RGB').convert("RGBA")
 26 | 
 27 |     draw = ImageDraw.Draw(sample_image)
 28 |     draw_box(draw, tree, pred_labels)
 29 |     
 30 |     sample_image.save('./output/example/'+str(example_id)+'_box'+'.png')
 31 | 
 32 |     #print('saved img ' + str(example_id))
 33 | 
 34 | 
 35 | def draw_box(draw, tree, pred_labels):
 36 |     x1,y1,x2,y2 = int(tree.box[0]), int(tree.box[1]), int(tree.box[2]), int(tree.box[3])
 37 |     draw.rectangle(((x1, y1), (x2, y2)), outline="red")
 38 |     draw.rectangle(((x1, y1), (x1+50, y1+10)), fill="red")
 39 |     node_label = int(pred_labels[int(tree.index)])
 40 |     draw.text((x1, y1), label_to_text[str(node_label)])
 41 | 
 42 |     if (tree.left_child is not None):
 43 |         draw_box(draw, tree.left_child, pred_labels)
 44 |     if (tree.right_child is not None):
 45 |         draw_box(draw, tree.right_child, pred_labels)
 46 |     
 47 | 
 48 | 
 49 | def draw_tree_region(tree, image, example_id):
 50 |     """
 51 |     tree: A tree structure
 52 |     image: origin image batch [batch_size, 3, IM_SIZE, IM_SIZE]
 53 |     output: a image display regions in a tree structure
 54 |     """
 55 |     sample_image = image[tree.im_idx].view(image.shape[1:]).clone()
 56 |     sample_image = (revert_normalize(sample_image) * 255).int()
 57 |     sample_image = torch.clamp(sample_image, 0, 255)
 58 |     sample_image = sample_image.permute(1,2,0).contiguous().data.cpu().numpy().astype(dtype = np.uint8)
 59 | 
 60 |     global tree_max_depth
 61 | 
 62 |     depth = min(tree.max_depth(), tree_max_depth)
 63 |     tree_img = create_tree_img(depth, 64)
 64 |     tree_img = write_cell(sample_image, tree_img, (0,0,tree_img.shape[1], tree_img.shape[0]), tree, 64)
 65 | 
 66 |     im = Image.fromarray(sample_image, 'RGB')
 67 |     tree_img = Image.fromarray(tree_img, 'RGB')
 68 |     im.save('./output/example/'+str(example_id)+'_origin'+'.jpg')
 69 |     tree_img.save('./output/example/'+str(example_id)+'_tree'+'.jpg')
 70 | 
 71 |     if example_id % 200 == 0:
 72 |         print('saved img ' + str(example_id))
 73 | 
 74 | def write_cell(orig_img, tree_img, draw_box, tree, cell_size):
 75 |     """
 76 |     orig_img: original image
 77 |     tree_img: draw roi tree
 78 |     draw_box: the whole bbox used to draw this sub-tree [x1,y1,x2,y2]
 79 |     tree: a sub-tree
 80 |     cell_size: size of each roi
 81 |     """
 82 |     x1,y1,x2,y2 = draw_box
 83 |     if tree is None:
 84 |         return tree_img
 85 |     # draw
 86 |     roi = orig_img[int(tree.box[1]):int(tree.box[3]), int(tree.box[0]):int(tree.box[2]), :]
 87 |     roi = Image.fromarray(roi, 'RGB')
 88 |     roi = roi.resize((cell_size, cell_size))
 89 |     roi = np.array(roi)
 90 |     draw_x1 = int(max((x1+x2)/2 - cell_size/2, 0))
 91 |     draw_x2 = int(min(draw_x1 + cell_size, x2))
 92 |     draw_y1 = y1
 93 |     draw_y2 = min(y1 + cell_size, y2)
 94 |     tree_img[draw_y1:draw_y2, draw_x1:draw_x2,:] = roi[:draw_y2-draw_y1,:draw_x2-draw_x1,:]
 95 |     # recursive draw
 96 |     global tree_max_depth
 97 |     if (tree.left_child is not None) and tree.left_child.depth() <= tree_max_depth:
 98 |         tree_img = write_cell(orig_img, tree_img, (x1,draw_y2,int((x1+x2)/2),y2), tree.left_child, cell_size)
 99 |     if (tree.right_child is not None) and tree.right_child.depth() <= tree_max_depth:
100 |         tree_img = write_cell(orig_img, tree_img, (int((x1+x2)/2),draw_y2,x2,y2), tree.right_child, cell_size)
101 |     
102 |     return tree_img
103 | 
104 | def create_tree_img(depth, cell_size):
105 |     height = cell_size * (depth)
106 |     width = cell_size * (2**(depth-1))
107 |     return np.zeros((height,width,3)).astype(dtype=np.uint8)
108 | 
109 | def revert_normalize(image):
110 |     image[0,:,:] = image[0,:,:] * 0.229
111 |     image[1,:,:] = image[1,:,:] * 0.224
112 |     image[2,:,:] = image[2,:,:] * 0.225
113 | 
114 |     image[0,:,:] = image[0,:,:] + 0.485
115 |     image[1,:,:] = image[1,:,:] + 0.456
116 |     image[2,:,:] = image[2,:,:] + 0.406
117 | 
118 |     return image
119 | 
120 | def print_tree(tree):
121 |     if tree is None:
122 |         return
123 |     if(tree.left_child is not None):
124 |         print_node(tree.left_child)
125 |     if(tree.right_child is not None):
126 |         print_node(tree.right_child)
127 | 
128 |     print_tree(tree.left_child)
129 |     print_tree(tree.right_child)
130 | 
131 |     return
132 |     
133 | 
134 | def print_node(tree):
135 |     print(' depth: ', tree.depth(), end="")
136 |     print(' label: ', tree.label, end="")
137 |     print(' index: ', int(tree.index), end="")
138 |     print(' score: ', tree.score(), end="")
139 |     print(' center_x: ', tree.center_x)
140 | 


--------------------------------------------------------------------------------
/lib/tree_lstm/gen_tree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | from lib.word_vectors import obj_edge_vectors
  7 | import numpy as np
  8 | from config import IM_SCALE
  9 | import random
 10 | 
 11 | from lib.tree_lstm import tree_utils
 12 | from lib.lstm.highway_lstm_cuda.alternating_highway_lstm import block_orthogonal
 13 | from lib.tree_lstm.def_tree import ArbitraryTree
 14 | from config import LOG_SOFTMAX
 15 | 
 16 | class RLFeatPreprocessNet(nn.Module):
 17 |     def __init__(self, feature_size, embed_size, box_info_size, overlap_info_size, output_size):
 18 |         super(RLFeatPreprocessNet, self).__init__()
 19 |         self.feature_size = feature_size
 20 |         self.embed_size = embed_size
 21 |         self.box_info_size = box_info_size
 22 |         self.overlap_info_size = overlap_info_size
 23 |         self.output_size = output_size
 24 | 
 25 |         # linear layers
 26 |         self.resize_feat = nn.Linear(self.feature_size, int(output_size / 4))
 27 |         self.resize_embed = nn.Linear(self.embed_size, int(output_size / 4))
 28 |         self.resize_box = nn.Linear(self.box_info_size, int(output_size / 4))
 29 |         self.resize_overlap = nn.Linear(self.overlap_info_size, int(output_size / 4))
 30 | 
 31 |         # init
 32 |         self.resize_feat.weight.data.normal_(0, 0.001)
 33 |         self.resize_embed.weight.data.normal_(0, 0.01)
 34 |         self.resize_box.weight.data.normal_(0, 1)
 35 |         self.resize_overlap.weight.data.normal_(0, 1)
 36 |         self.resize_feat.bias.data.zero_()
 37 |         self.resize_embed.bias.data.zero_()
 38 |         self.resize_box.bias.data.zero_()
 39 |         self.resize_overlap.bias.data.zero_()
 40 | 
 41 |     def forward(self, obj_feat, obj_embed, box_info, overlap_info):
 42 |         resized_obj = self.resize_feat(obj_feat)
 43 |         resized_embed = self.resize_embed(obj_embed)
 44 |         resized_box = self.resize_box(box_info)
 45 |         resized_overlap = self.resize_overlap(overlap_info)
 46 | 
 47 |         output_feat = torch.cat((resized_obj, resized_embed, resized_box, resized_overlap), 1)
 48 |         return output_feat
 49 | 
 50 | def generate_forest(im_inds, gt_forest, pair_scores, box_priors, obj_label, use_rl_tree, is_training, mode):
 51 |     """
 52 |     generate a list of trees that covers all the objects in a batch
 53 |     im_inds: [obj_num]
 54 |     box_priors: [obj_num, (x1, y1, x2, y2)]
 55 |     pair_scores: [obj_num, obj_num]
 56 |     
 57 |     output: list of trees, each present a chunk of overlaping objects
 58 |     """
 59 |     output_forest = []  # the list of trees, each one is a chunk of overlapping objects
 60 |     num_obj = box_priors.shape[0]
 61 |     
 62 |     # node score: accumulate parent scores
 63 |     node_scores = pair_scores.mean(1).view(-1)
 64 |     # make forest
 65 |     group_id = 0
 66 | 
 67 |     gen_tree_loss_per_batch = []
 68 |     entropy_loss = []
 69 | 
 70 |     while(torch.nonzero(im_inds == group_id).numel() > 0):
 71 |         # select the nodes from the same image
 72 |         rl_node_container = []
 73 |         remain_index = []
 74 |         picked_list = torch.nonzero(im_inds == group_id).view(-1)
 75 |         root_idx = picked_list[-1]
 76 | 
 77 |         rl_root = ArbitraryTree(root_idx, node_scores[int(root_idx)], -1, box_priors[int(root_idx)], im_inds[int(root_idx)], is_root=True)
 78 | 
 79 |         # put all nodes into node container
 80 |         for idx in picked_list[:-1]:
 81 |             if obj_label is not None:
 82 |                 label = int(obj_label[idx])
 83 |             else:
 84 |                 label = -1
 85 |             new_node = ArbitraryTree(idx, node_scores[idx], label, box_priors[idx], im_inds[idx])
 86 |             rl_node_container.append(new_node)
 87 |             remain_index.append(int(idx))
 88 |         
 89 |         # iteratively generate tree
 90 |         rl_gen_tree(rl_node_container, pair_scores, node_scores, gen_tree_loss_per_batch, entropy_loss, rl_root, remain_index, (is_training and use_rl_tree), mode)
 91 | 
 92 |         output_forest.append(rl_root)
 93 |         group_id += 1
 94 | 
 95 |     return output_forest, gen_tree_loss_per_batch, entropy_loss
 96 |     
 97 | 
 98 | def rl_gen_tree(node_container, pair_scores, node_scores, gen_tree_loss_per_batch, entropy_loss, rl_root, remain_index, rl_training, mode):
 99 |     """
100 |     use reinforcement learning to generate loss (without baseline tree)
101 |     Calculate the log(pr) for each decision (not cross entropy)
102 |     Step 1: Devide all nodes into left child container and right child container
103 |     Step 2: From left child container and right child container, select their respective sub roots
104 | 
105 |     pair_scores: [obj_num, obj_num]
106 |     node_scores: [obj_num]
107 |     """
108 |     num_nodes = len(node_container)
109 |     # Step 0
110 |     if  num_nodes == 0:
111 |         return
112 |     # Step 1
113 |     select_node = []
114 |     select_index = []
115 |     select_node.append(rl_root)
116 |     select_index.append(rl_root.index)
117 | 
118 |     if mode == 'predcls':
119 |         first_score = node_scores[remain_index].contiguous().view(-1)
120 |         _, inds = F.softmax(first_score, 0).max(0)
121 |         first_node = node_container[int(inds)]
122 |         rl_root.add_child(first_node)
123 |         select_node.append(first_node)
124 |         select_index.append(first_node.index)
125 |         node_container.remove(first_node)
126 |         remain_index.remove(first_node.index)
127 | 
128 |     not_sampled = True
129 | 
130 |     while len(node_container) > 0:
131 |         wid = len(remain_index)
132 |         select_index_var = Variable(torch.LongTensor(select_index).cuda())
133 |         remain_index_var = Variable(torch.LongTensor(remain_index).cuda())
134 |         select_score_map = torch.index_select( torch.index_select(pair_scores, 0, select_index_var), 1, remain_index_var ).contiguous().view(-1)
135 |         #select_score_map = pair_scores[select_index][:,remain_index].contiguous().view(-1)
136 |         if rl_training and not_sampled:
137 |             dist = F.softmax(select_score_map, 0)
138 |             greedy_id = dist.max(0)[1]
139 |             best_id = torch.multinomial(dist, 1)[0]
140 |             if int(greedy_id) != int(best_id):
141 |                 not_sampled = False
142 |                 if LOG_SOFTMAX:
143 |                     prob = dist[best_id] + 1e-20
144 |                 else:
145 |                     prob = select_score_map[best_id] + 1e-20
146 |                 gen_tree_loss_per_batch.append(prob.log())
147 |             #neg_entropy = dist * (dist + 1e-20).log()
148 |             #entropy_loss.append(neg_entropy.sum())
149 |         else:
150 |             best_score, best_id = select_score_map.max(0)
151 |         depend_id = int(best_id) // wid
152 |         insert_id = int(best_id) % wid
153 |         best_depend_node = select_node[depend_id]
154 |         best_insert_node = node_container[insert_id]
155 |         best_depend_node.add_child(best_insert_node)
156 | 
157 |         select_node.append(best_insert_node)
158 |         select_index.append(best_insert_node.index)
159 |         node_container.remove(best_insert_node)
160 |         remain_index.remove(best_insert_node.index)
161 | 
162 |         
163 |         
164 | 
165 |     
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/lib/tree_lstm/graph_to_tree.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | from lib.word_vectors import obj_edge_vectors
  7 | import numpy as np
  8 | from config import IM_SCALE, ROOT_PATH
  9 | import random
 10 | 
 11 | from lib.tree_lstm.def_tree import ArbitraryTree
 12 | 
 13 | def graph_to_trees(co_occour_prob, rel_labels, obj_labels):
 14 |     """
 15 |     Generate arbitrary trees according to the ground truth graph
 16 | 
 17 |     co_occour: [num_obj_classes, num_obj_classes]
 18 |     rel_labels: [image_index, i, j, rel_label], i is not j, i & j are under same image
 19 |     obj_labels: [image_index, obj_label]
 20 | 
 21 |     output: [forest]
 22 |     """
 23 |     num_nodes = obj_labels.shape[0]
 24 |     num_edges = rel_labels.shape[0]
 25 |     output = []
 26 | 
 27 |     # calculate the score of each edge 
 28 |     edge_scores = rel_labels.float().clone()
 29 |     for i in range(num_edges):
 30 |         if int(rel_labels[i, 3]) == 0:
 31 |             edge_scores[i, 3] = 0
 32 |         else:
 33 |             sub_id = int(obj_labels[int(rel_labels[i, 1]), 1]) - 1
 34 |             obj_id = int(obj_labels[int(rel_labels[i, 2]), 1]) - 1
 35 |             edge_scores[i, 3] = co_occour_prob[sub_id, obj_id]
 36 | 
 37 |     # generate score map (num_obj * num_obj)
 38 |     score_map = np.zeros((num_nodes, num_nodes))
 39 |     for i in range(num_edges):
 40 |         sub_id = int(edge_scores[i, 1])
 41 |         obj_id = int(edge_scores[i, 2])
 42 |         score_map[sub_id, obj_id] = float(score_map[sub_id, obj_id]) + float(edge_scores[i, 3])
 43 |         score_map[obj_id, sub_id] = float(score_map[obj_id, sub_id]) + float(edge_scores[i, 3])
 44 | 
 45 |     # calculate the score of each node
 46 |     node_scores = obj_labels.float().clone()
 47 |     node_scores[:,1] = 0
 48 |     for i in range(num_edges):
 49 |         node_id_1 = int(edge_scores[i, 1])
 50 |         node_id_2 = int(edge_scores[i, 2])
 51 |         node_scores[node_id_1, 1] = float(node_scores[node_id_1, 1]) + float(edge_scores[i, 3])
 52 |         node_scores[node_id_2, 1] = float(node_scores[node_id_2, 1]) + float(edge_scores[i, 3])
 53 | 
 54 |     # generate arbitrary tree
 55 |     group_id = 0
 56 |     im_inds = obj_labels[:, 0].contiguous()
 57 |     while(torch.nonzero(im_inds == group_id).numel() > 0):
 58 |         # generate node container
 59 |         node_container = []
 60 |         picked_list = torch.nonzero(im_inds == group_id).view(-1)
 61 |         for idx in picked_list:
 62 |             node_container.append(ArbitraryTree(idx, node_scores[int(idx), 1]))
 63 |         # use virtual root (entire image), index is -1, score is almost infinity
 64 |         tree_root = ArbitraryTree(-1, 10e10, is_root=True)
 65 |         # node insert order
 66 |         node_order = 0
 67 |         # find first & best node to start
 68 |         best_node = find_best_node(node_container)
 69 |         tree_root.add_child(best_node)
 70 |         best_node.node_order = node_order
 71 |         node_order += 1
 72 |         node_container.remove(best_node)
 73 |         # generate tree
 74 |         while(len(node_container) > 0):
 75 |             best_depend_node = None
 76 |             best_insert_node = None
 77 |             best_score = -1
 78 |             for i in range(len(node_container)):
 79 |                 best_score, best_depend_node, best_insert_node = \
 80 |                     tree_root.search_best_insert(score_map, best_score, node_container[i], best_depend_node, best_insert_node)
 81 | 
 82 |             # if not in current tree, add to root, else insert
 83 |             if best_score == 0:
 84 |                 best_node = find_best_node(node_container)
 85 |                 tree_root.add_child(best_node)
 86 |                 best_node.node_order = node_order
 87 |                 node_order += 1
 88 |                 node_container.remove(best_node)
 89 |             else:
 90 |                 best_depend_node.add_child(best_insert_node)
 91 |                 best_insert_node.node_order = node_order
 92 |                 node_order += 1
 93 |                 node_container.remove(best_insert_node)
 94 | 
 95 |         # add tree to forest
 96 |         output.append(tree_root)
 97 |         # next image
 98 |         group_id += 1
 99 |     
100 |     return output
101 |     
102 | 
103 | def arbitraryForest_to_biForest(forest):
104 |     """
105 |     forest: a set of arbitrary Tree
106 |     output: a set of corresponding binary Tree
107 |     """
108 |     output = []
109 |     for i in range(len(forest)):
110 |         result_tree = arTree_to_biTree(forest[i])
111 |         # make sure they are equivalent tree
112 |         # assert(result_tree.get_total_child() == forest[i].get_total_child())
113 |         output.append(result_tree)
114 |         
115 |     return output
116 | 
117 | 
118 | def arTree_to_biTree(arTree):
119 |     root_node = arTree.generate_bi_tree()
120 |     arNode_to_biNode(arTree, root_node)
121 | 
122 |     return root_node
123 | 
124 | def arNode_to_biNode(arNode, biNode):
125 |     if arNode.get_child_num() >= 1:
126 |         new_bi_node = arNode.children[0].generate_bi_tree()
127 |         biNode.add_left_child(new_bi_node)
128 |         arNode_to_biNode(arNode.children[0], biNode.left_child)
129 | 
130 |     if arNode.get_child_num() > 1:
131 |         current_bi_node = biNode.left_child
132 |         for i in range(arNode.get_child_num() - 1):
133 |             new_bi_node = arNode.children[i+1].generate_bi_tree()
134 |             current_bi_node.add_right_child(new_bi_node)
135 |             current_bi_node = current_bi_node.right_child
136 |             arNode_to_biNode(arNode.children[i+1], current_bi_node)
137 | 
138 | def find_best_node(node_container):
139 |     max_node_score = -1 
140 |     best_node = None
141 |     for i in range(len(node_container)):
142 |         if node_container[i].score > max_node_score:
143 |             max_node_score = node_container[i].score
144 |             best_node = node_container[i]
145 |     return best_node
146 | 
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/lib/tree_lstm/tree_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from torch.nn.utils.rnn import PackedSequence
  6 | import numpy as np
  7 | 
  8 | from config import BOX_SCALE, IM_SCALE
  9 | from lib.fpn.box_utils import bbox_overlaps, bbox_intersections, center_size
 10 | 
 11 | class TreeLSTM_IO(object):
 12 |     def __init__(self, hidden_tensor, order_tensor, order_count, dists_tensor, commitments_tensor, dropout_mask):
 13 |         self.hidden = hidden_tensor # Float tensor [num_obj, self.out_dim]
 14 |         self.order = order_tensor # Long tensor [num_obj]
 15 |         self.order_count = order_count # int
 16 |         self.dists = dists_tensor # FLoat tensor [num_obj, len(self.classes)]
 17 |         self.commitments = commitments_tensor
 18 |         self.dropout_mask = dropout_mask
 19 | 
 20 | def get_overlap_info(im_inds, box_priors):
 21 |     """
 22 |     input:
 23 |         im_inds: [num_object]
 24 |         box_priors: [number_object, 4]
 25 |     output: [number_object, 6]
 26 |         number of overlapped obj (self not included)
 27 |         sum of all intersection area (self not included)
 28 |         sum of IoU (Intersection over Union)
 29 |         average of all intersection area (self not included)
 30 |         average of IoU (Intersection over Union)
 31 |         roi area
 32 |     """
 33 |     # generate forest
 34 |     num_obj = box_priors.shape[0]
 35 |     inds_offset = (im_inds * 1000).view(-1,1).expand(box_priors.shape)
 36 |     offset_box = box_priors + inds_offset.float()
 37 |     intersection = bbox_intersections(offset_box, offset_box) 
 38 |     overlap = bbox_overlaps(offset_box, offset_box)
 39 |     # [obj_num, obj_num], diagonal elements should been removed
 40 |     reverse_eye = Variable(1.0 - torch.eye(num_obj).float().cuda())
 41 |     intersection = intersection * reverse_eye
 42 |     overlap = overlap * reverse_eye
 43 |     box_area = bbox_area(offset_box)
 44 |     # generate input feat
 45 |     boxes_info = Variable(torch.FloatTensor(num_obj, 6).zero_().cuda()) # each obj has how many overlaped objects
 46 | 
 47 |     for obj_idx in range(num_obj):
 48 |             boxes_info[obj_idx, 0] = torch.nonzero(intersection[obj_idx]).numel()
 49 |             boxes_info[obj_idx, 1] = intersection[obj_idx].view(-1).sum() / float(IM_SCALE * IM_SCALE)
 50 |             boxes_info[obj_idx, 2] = overlap[obj_idx].view(-1).sum()
 51 |             boxes_info[obj_idx, 3] = boxes_info[obj_idx, 1] / (boxes_info[obj_idx, 0] + 1e-9)
 52 |             boxes_info[obj_idx, 4] = boxes_info[obj_idx, 2] /  (boxes_info[obj_idx, 0] + 1e-9)
 53 |             boxes_info[obj_idx, 5] = box_area[obj_idx] / float(IM_SCALE * IM_SCALE)
 54 |     
 55 |     return boxes_info, intersection
 56 | 
 57 | def get_box_info(boxes):
 58 |     """
 59 |     input: [batch_size, (x1,y1,x2,y2)]
 60 |     output: [batch_size, (x1,y1,x2,y2,cx,cy,w,h)]
 61 |     """
 62 |     return torch.cat((boxes / float(IM_SCALE), center_size(boxes) / float(IM_SCALE)), 1)
 63 | 
 64 | 
 65 | def get_box_pair_info(box1, box2):
 66 |     """
 67 |     input: 
 68 |         box1 [batch_size, (x1,y1,x2,y2,cx,cy,w,h)]
 69 |         box2 [batch_size, (x1,y1,x2,y2,cx,cy,w,h)]
 70 |     output: 
 71 |         32-digits: [box1, box2, unionbox, intersectionbox]
 72 |     """
 73 |     # union box
 74 |     unionbox = box1[:,:4].clone()
 75 |     unionbox[:, 0] = torch.min(box1[:, 0], box2[:, 0])
 76 |     unionbox[:, 1] = torch.min(box1[:, 1], box2[:, 1])
 77 |     unionbox[:, 2] = torch.max(box1[:, 2], box2[:, 2])
 78 |     unionbox[:, 3] = torch.max(box1[:, 3], box2[:, 3])
 79 |     union_info = get_box_info(unionbox)
 80 | 
 81 |     # intersection box
 82 |     intersextion_box = box1[:,:4].clone()
 83 |     intersextion_box[:, 0] = torch.max(box1[:, 0], box2[:, 0])
 84 |     intersextion_box[:, 1] = torch.max(box1[:, 1], box2[:, 1])
 85 |     intersextion_box[:, 2] = torch.min(box1[:, 2], box2[:, 2])
 86 |     intersextion_box[:, 3] = torch.min(box1[:, 3], box2[:, 3])
 87 |     case1 = torch.nonzero(intersextion_box[:, 2].contiguous().view(-1) < intersextion_box[:, 0].contiguous().view(-1)).view(-1)
 88 |     case2 = torch.nonzero(intersextion_box[:, 3].contiguous().view(-1) < intersextion_box[:, 1].contiguous().view(-1)).view(-1)
 89 |     intersextion_info = get_box_info(intersextion_box)
 90 |     if case1.numel() > 0:
 91 |         intersextion_info[case1, :] = 0
 92 |     if case2.numel() > 0:
 93 |         intersextion_info[case2, :] = 0
 94 | 
 95 |     return torch.cat((box1, box2, union_info, intersextion_info), 1)
 96 | 
 97 | 
 98 | def bbox_area(gt_boxes): 
 99 |     """
100 |     gt_boxes: (K, 4) ndarray of float
101 | 
102 |     area: (k)
103 |     """
104 |     K = gt_boxes.size(0)
105 |     gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) *
106 |                 (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(K)
107 | 
108 |     return gt_boxes_area
109 | 
110 | def bbox_center(gt_boxes):
111 |     """
112 |     gt_boxes: (K, 4) ndarray of float
113 | 
114 |     center: (k, 2)
115 |     """
116 |     center = (gt_boxes[:, 2:] + gt_boxes[:, :2]) / 2.0
117 |     assert(center.shape[1] == 2)
118 |     return center
119 | 
120 | def print_tree(tree):
121 |     if tree is None:
122 |         return
123 |     if(tree.left_child is not None):
124 |         print_node(tree.left_child)
125 |     if(tree.right_child is not None):
126 |         print_node(tree.right_child)
127 | 
128 |     print_tree(tree.left_child)
129 |     print_tree(tree.right_child)
130 | 
131 |     return
132 |     
133 | 
134 | def print_node(tree):
135 |     print(' depth: ', tree.depth(), end="")
136 |     print(' label: ', tree.label, end="")
137 |     print(' index: ', int(tree.index), end="")
138 |     print(' score: ', tree.score(), end="")
139 |     print(' center_x: ', tree.center_x)
140 | 


--------------------------------------------------------------------------------
/lib/word_vectors.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adapted from PyTorch's text library.
  3 | """
  4 | 
  5 | import array
  6 | import os
  7 | import zipfile
  8 | 
  9 | import six
 10 | import torch
 11 | from six.moves.urllib.request import urlretrieve
 12 | from tqdm import tqdm
 13 | 
 14 | from config import DATA_PATH
 15 | 
 16 | 
 17 | def obj_edge_vectors(names, wv_type='glove.6B', wv_dir=DATA_PATH, wv_dim=300):
 18 |     wv_dict, wv_arr, wv_size = load_word_vectors(wv_dir, wv_type, wv_dim)
 19 | 
 20 |     vectors = torch.Tensor(len(names), wv_dim)
 21 |     vectors.normal_(0,1)
 22 | 
 23 |     for i, token in enumerate(names):
 24 |         wv_index = wv_dict.get(token, None)
 25 |         if wv_index is not None:
 26 |             vectors[i] = wv_arr[wv_index]
 27 |         else:
 28 |             # Try the longest word (hopefully won't be a preposition
 29 |             lw_token = sorted(token.split(' '), key=lambda x: len(x), reverse=True)[0]
 30 |             print("{} -> {} ".format(token, lw_token))
 31 |             wv_index = wv_dict.get(lw_token, None)
 32 |             if wv_index is not None:
 33 |                 vectors[i] = wv_arr[wv_index]
 34 |             else:
 35 |                 print("fail on {}".format(token))
 36 | 
 37 |     return vectors
 38 | 
 39 | URL = {
 40 |         'glove.42B': 'http://nlp.stanford.edu/data/glove.42B.300d.zip',
 41 |         'glove.840B': 'http://nlp.stanford.edu/data/glove.840B.300d.zip',
 42 |         'glove.twitter.27B': 'http://nlp.stanford.edu/data/glove.twitter.27B.zip',
 43 |         'glove.6B': 'http://nlp.stanford.edu/data/glove.6B.zip',
 44 |         }
 45 | 
 46 | 
 47 | def load_word_vectors(root, wv_type, dim):
 48 |     """Load word vectors from a path, trying .pt, .txt, and .zip extensions."""
 49 |     if isinstance(dim, int):
 50 |         dim = str(dim) + 'd'
 51 |     fname = os.path.join(root, wv_type + '.' + dim)
 52 |     if os.path.isfile(fname + '.pt'):
 53 |         fname_pt = fname + '.pt'
 54 |         print('loading word vectors from', fname_pt)
 55 |         return torch.load(fname_pt)
 56 |     if os.path.isfile(fname + '.txt'):
 57 |         fname_txt = fname + '.txt'
 58 |         cm = open(fname_txt, 'rb')
 59 |         cm = [line for line in cm]
 60 |     elif os.path.basename(wv_type) in URL:
 61 |         url = URL[wv_type]
 62 |         print('downloading word vectors from {}'.format(url))
 63 |         filename = os.path.basename(fname)
 64 |         if not os.path.exists(root):
 65 |             os.makedirs(root)
 66 |         with tqdm(unit='B', unit_scale=True, miniters=1, desc=filename) as t:
 67 |             fname, _ = urlretrieve(url, fname, reporthook=reporthook(t))
 68 |             with zipfile.ZipFile(fname, "r") as zf:
 69 |                 print('extracting word vectors into {}'.format(root))
 70 |                 zf.extractall(root)
 71 |         if not os.path.isfile(fname + '.txt'):
 72 |             raise RuntimeError('no word vectors of requested dimension found')
 73 |         return load_word_vectors(root, wv_type, dim)
 74 |     else:
 75 |         raise RuntimeError('unable to load word vectors')
 76 | 
 77 |     wv_tokens, wv_arr, wv_size = [], array.array('d'), None
 78 |     if cm is not None:
 79 |         for line in tqdm(range(len(cm)), desc="loading word vectors from {}".format(fname_txt)):
 80 |             entries = cm[line].strip().split(b' ')
 81 |             word, entries = entries[0], entries[1:]
 82 |             if wv_size is None:
 83 |                 wv_size = len(entries)
 84 |             try:
 85 |                 if isinstance(word, six.binary_type):
 86 |                     word = word.decode('utf-8')
 87 |             except:
 88 |                 print('non-UTF8 token', repr(word), 'ignored')
 89 |                 continue
 90 |             wv_arr.extend(float(x) for x in entries)
 91 |             wv_tokens.append(word)
 92 | 
 93 |     wv_dict = {word: i for i, word in enumerate(wv_tokens)}
 94 |     wv_arr = torch.Tensor(wv_arr).view(-1, wv_size)
 95 |     ret = (wv_dict, wv_arr, wv_size)
 96 |     torch.save(ret, fname + '.pt')
 97 |     return ret
 98 | 
 99 | def reporthook(t):
100 |     """https://github.com/tqdm/tqdm"""
101 |     last_b = [0]
102 | 
103 |     def inner(b=1, bsize=1, tsize=None):
104 |         """
105 |         b: int, optionala
106 |         Number of blocks just transferred [default: 1].
107 |         bsize: int, optional
108 |         Size of each block (in tqdm units) [default: 1].
109 |         tsize: int, optional
110 |         Total size (in tqdm units). If [default: None] remains unchanged.
111 |         """
112 |         if tsize is not None:
113 |             t.total = tsize
114 |         t.update((b - last_b[0]) * bsize)
115 |         last_b[0] = b
116 |     return inner
117 | 


--------------------------------------------------------------------------------
/models/eval_rels.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from dataloaders.visual_genome import VGDataLoader, VG
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | from config import ModelConfig
  7 | from lib.pytorch_misc import optimistic_restore
  8 | from lib.evaluation.sg_eval import BasicSceneGraphEvaluator
  9 | from tqdm import tqdm
 10 | from config import BOX_SCALE, IM_SCALE
 11 | import dill as pkl
 12 | import os
 13 | 
 14 | conf = ModelConfig()
 15 | if conf.model == 'motifnet':
 16 |     from lib.rel_model import RelModel
 17 | elif conf.model == 'stanford':
 18 |     from lib.rel_model_stanford import RelModelStanford as RelModel
 19 | else:
 20 |     raise ValueError()
 21 | 
 22 | train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True,
 23 |                           use_proposals=conf.use_proposals,
 24 |                           filter_non_overlap=conf.mode == 'sgdet')
 25 | if conf.test:
 26 |     val = test
 27 | train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel',
 28 |                                                batch_size=conf.batch_size,
 29 |                                                num_workers=conf.num_workers,
 30 |                                                num_gpus=conf.num_gpus)
 31 | 
 32 | detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates,
 33 |                     num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True,
 34 |                     use_resnet=conf.use_resnet, order=conf.order,
 35 |                     nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim,
 36 |                     use_proposals=conf.use_proposals,
 37 |                     pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder,
 38 |                     pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
 39 |                     pooling_dim=conf.pooling_dim,
 40 |                     rec_dropout=conf.rec_dropout,
 41 |                     use_bias=conf.use_bias,
 42 |                     use_tanh=conf.use_tanh,
 43 |                     use_encoded_box = conf.use_encoded_box,
 44 |                     use_rl_tree = conf.use_rl_tree,
 45 |                     draw_tree = conf.draw_tree,
 46 |                     limit_vision=conf.limit_vision
 47 |                     )
 48 | 
 49 | 
 50 | detector.cuda()
 51 | ckpt = torch.load(conf.ckpt)
 52 | 
 53 | optimistic_restore(detector, ckpt['state_dict'])
 54 | # if conf.mode == 'sgdet':
 55 | #     det_ckpt = torch.load('checkpoints/new_vgdet/vg-19.tar')['state_dict']
 56 | #     detector.detector.bbox_fc.weight.data.copy_(det_ckpt['bbox_fc.weight'])
 57 | #     detector.detector.bbox_fc.bias.data.copy_(det_ckpt['bbox_fc.bias'])
 58 | #     detector.detector.score_fc.weight.data.copy_(det_ckpt['score_fc.weight'])
 59 | #     detector.detector.score_fc.bias.data.copy_(det_ckpt['score_fc.bias'])
 60 | 
 61 | all_pred_entries = []
 62 | def val_batch(batch_num, b, evaluator, thrs=(20, 50, 100)):
 63 |     det_res = detector[b]
 64 |     if conf.num_gpus == 1:
 65 |         det_res = [det_res]
 66 | 
 67 |     for i, (boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i, gt_boxes, gt_classes, gt_rels) in enumerate(det_res):
 68 |         gt_entry = {
 69 |             'gt_classes': val.gt_classes[batch_num + i].copy(),
 70 |             'gt_relations': val.relationships[batch_num + i].copy(),
 71 |             'gt_boxes': val.gt_boxes[batch_num + i].copy(),
 72 |         }
 73 |         assert np.all(objs_i[rels_i[:,0]] > 0) and np.all(objs_i[rels_i[:,1]] > 0)
 74 |         # assert np.all(rels_i[:,2] > 0)
 75 | 
 76 |         pred_entry = {
 77 |             'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE,
 78 |             'pred_classes': objs_i,
 79 |             'pred_rel_inds': rels_i,
 80 |             'obj_scores': obj_scores_i,
 81 |             'rel_scores': pred_scores_i,
 82 |         }
 83 |         all_pred_entries.append(pred_entry)
 84 | 
 85 |         evaluator[conf.mode].evaluate_scene_graph_entry(
 86 |             gt_entry,
 87 |             pred_entry,
 88 |         )
 89 | 
 90 | evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=conf.multi_pred)
 91 | if conf.cache is not None and os.path.exists(conf.cache):
 92 |     print("Found {}! Loading from it".format(conf.cache))
 93 |     with open(conf.cache,'rb') as f:
 94 |         all_pred_entries = pkl.load(f)
 95 |     for i, pred_entry in enumerate(tqdm(all_pred_entries)):
 96 |         gt_entry = {
 97 |             'gt_classes': val.gt_classes[i].copy(),
 98 |             'gt_relations': val.relationships[i].copy(),
 99 |             'gt_boxes': val.gt_boxes[i].copy(),
100 |         }
101 |         evaluator[conf.mode].evaluate_scene_graph_entry(
102 |             gt_entry,
103 |             pred_entry,
104 |         )
105 |     evaluator[conf.mode].print_stats()
106 | else:
107 |     detector.eval()
108 |     for val_b, batch in enumerate(tqdm(val_loader)):
109 |         val_batch(conf.num_gpus*val_b, batch, evaluator)
110 | 
111 |     evaluator[conf.mode].print_stats()
112 | 
113 |     if conf.cache is not None:
114 |         with open(conf.cache,'wb') as f:
115 |             pkl.dump(all_pred_entries, f)
116 | 


--------------------------------------------------------------------------------
/models/train_detector.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Training script 4 Detection
  3 | """
  4 | from dataloaders.mscoco import CocoDetection, CocoDataLoader
  5 | from dataloaders.visual_genome import VGDataLoader, VG
  6 | from lib.object_detector import ObjectDetector
  7 | import numpy as np
  8 | from torch import optim
  9 | import torch
 10 | import pandas as pd
 11 | import time
 12 | import os
 13 | from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, IM_SCALE, BOX_SCALE
 14 | from torch.nn import functional as F
 15 | from lib.fpn.box_utils import bbox_loss
 16 | import torch.backends.cudnn as cudnn
 17 | from pycocotools.cocoeval import COCOeval
 18 | from lib.pytorch_misc import optimistic_restore, clip_grad_norm
 19 | from torch.optim.lr_scheduler import ReduceLROnPlateau
 20 | 
 21 | cudnn.benchmark = True
 22 | conf = ModelConfig()
 23 | 
 24 | if conf.coco:
 25 |     train, val = CocoDetection.splits()
 26 |     val.ids = val.ids[:conf.val_size]
 27 |     train.ids = train.ids
 28 |     train_loader, val_loader = CocoDataLoader.splits(train, val, batch_size=conf.batch_size,
 29 |                                                      num_workers=conf.num_workers,
 30 |                                                      num_gpus=conf.num_gpus)
 31 | else:
 32 |     train, val, _ = VG.splits(num_val_im=conf.val_size, filter_non_overlap=False,
 33 |                               filter_empty_rels=False, use_proposals=conf.use_proposals)
 34 |     train_loader, val_loader = VGDataLoader.splits(train, val, batch_size=conf.batch_size,
 35 |                                                    num_workers=conf.num_workers,
 36 |                                                    num_gpus=conf.num_gpus)
 37 | 
 38 | detector = ObjectDetector(classes=train.ind_to_classes, num_gpus=conf.num_gpus,
 39 |                           mode='rpntrain' if not conf.use_proposals else 'proposals', use_resnet=conf.use_resnet)
 40 | detector.cuda()
 41 | 
 42 | # Note: if you're doing the stanford setup, you'll need to change this to freeze the lower layers
 43 | if conf.use_proposals:
 44 |     for n, param in detector.named_parameters():
 45 |         if n.startswith('features'):
 46 |             param.requires_grad = False
 47 | 
 48 | optimizer = optim.SGD([p for p in detector.parameters() if p.requires_grad],
 49 |                       weight_decay=conf.l2, lr=conf.lr * conf.num_gpus * conf.batch_size, momentum=0.9)
 50 | scheduler = ReduceLROnPlateau(optimizer, 'max', patience=3, factor=0.1,
 51 |                               verbose=True, threshold=0.001, threshold_mode='abs', cooldown=1)
 52 | 
 53 | start_epoch = -1
 54 | if conf.ckpt is not None:
 55 |     ckpt = torch.load(conf.ckpt)
 56 |     if optimistic_restore(detector, ckpt['state_dict']):
 57 |         start_epoch = ckpt['epoch']
 58 | 
 59 | 
 60 | def train_epoch(epoch_num):
 61 |     detector.train()
 62 |     tr = []
 63 |     start = time.time()
 64 |     for b, batch in enumerate(train_loader):
 65 |         tr.append(train_batch(batch))
 66 | 
 67 |         if b % conf.print_interval == 0 and b >= conf.print_interval:
 68 |             mn = pd.concat(tr[-conf.print_interval:], axis=1).mean(1)
 69 |             time_per_batch = (time.time() - start) / conf.print_interval
 70 |             print("\ne{:2d}b{:5d}/{:5d} {:.3f}s/batch, {:.1f}m/epoch".format(
 71 |                 epoch_num, b, len(train_loader), time_per_batch, len(train_loader) * time_per_batch / 60))
 72 |             print(mn)
 73 |             print('-----------', flush=True)
 74 |             start = time.time()
 75 |     return pd.concat(tr, axis=1)
 76 | 
 77 | 
 78 | def train_batch(b):
 79 |     """
 80 |     :param b: contains:
 81 |           :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
 82 |           :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
 83 |           :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
 84 |                                   RPN feature vector that give us all_anchors,
 85 |                                   each one (img_ind, fpn_idx)
 86 |           :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.
 87 | 
 88 |           :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img
 89 | 
 90 |           Training parameters:
 91 |           :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
 92 |                                     be used to compute the training loss (img_ind, fpn_idx)
 93 |           :param gt_boxes: [num_gt, 4] GT boxes over the batch.
 94 |           :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)
 95 | 
 96 |     :return:
 97 |     """
 98 |     result = detector[b]
 99 |     scores = result.od_obj_dists
100 |     box_deltas = result.od_box_deltas
101 |     labels = result.od_obj_labels
102 |     roi_boxes = result.od_box_priors
103 |     bbox_targets = result.od_box_targets
104 |     rpn_scores = result.rpn_scores
105 |     rpn_box_deltas = result.rpn_box_deltas
106 | 
107 |     # detector loss
108 |     valid_inds = (labels.data != 0).nonzero().squeeze(1)
109 |     fg_cnt = valid_inds.size(0)
110 |     bg_cnt = labels.size(0) - fg_cnt
111 |     class_loss = F.cross_entropy(scores, labels)
112 | 
113 |     # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d
114 |     box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4)
115 |     twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data
116 | 
117 |     box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds],
118 |                          bbox_targets[valid_inds]) * box_reg_mult
119 | 
120 |     loss = class_loss + box_loss
121 | 
122 |     # RPN loss
123 |     if not conf.use_proposals:
124 |         train_anchor_labels = b.train_anchor_labels[:, -1]
125 |         train_anchors = b.train_anchors[:, :4]
126 |         train_anchor_targets = b.train_anchors[:, 4:]
127 | 
128 |         train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1)
129 |         rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels)
130 | 
131 |         # print("{} fg {} bg, ratio of {:.3f} vs {:.3f}. RPN {}fg {}bg ratio of {:.3f} vs {:.3f}".format(
132 |         #     fg_cnt, bg_cnt, fg_cnt / (fg_cnt + bg_cnt + 1e-4), FG_FRACTION,
133 |         #     train_valid_inds.size(0), train_anchor_labels.size(0)-train_valid_inds.size(0),
134 |         #     train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4), RPN_FG_FRACTION), flush=True)
135 |         rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4)
136 |         rpn_box_loss = bbox_loss(train_anchors[train_valid_inds],
137 |                                  rpn_box_deltas[train_valid_inds],
138 |                                  train_anchor_targets[train_valid_inds]) * rpn_box_mult
139 | 
140 |         loss += rpn_class_loss + rpn_box_loss
141 |         res = pd.Series([rpn_class_loss.data[0], rpn_box_loss.data[0],
142 |                          class_loss.data[0], box_loss.data[0], loss.data[0]],
143 |                         ['rpn_class_loss', 'rpn_box_loss', 'class_loss', 'box_loss', 'total'])
144 |     else:
145 |         res = pd.Series([class_loss.data[0], box_loss.data[0], loss.data[0]],
146 |                         ['class_loss', 'box_loss', 'total'])
147 | 
148 |     optimizer.zero_grad()
149 |     loss.backward()
150 |     clip_grad_norm(
151 |         [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
152 |         max_norm=conf.clip, clip=True)
153 |     optimizer.step()
154 | 
155 |     return res
156 | 
157 | 
158 | def val_epoch():
159 |     detector.eval()
160 |     # all_boxes is a list of length number-of-classes.
161 |     # Each list element is a list of length number-of-images.
162 |     # Each of those list elements is either an empty list []
163 |     # or a numpy array of detection.
164 |     vr = []
165 |     for val_b, batch in enumerate(val_loader):
166 |         vr.append(val_batch(val_b, batch))
167 |     vr = np.concatenate(vr, 0)
168 |     if vr.shape[0] == 0:
169 |         print("No detections anywhere")
170 |         return 0.0
171 | 
172 |     val_coco = val.coco
173 |     coco_dt = val_coco.loadRes(vr)
174 |     coco_eval = COCOeval(val_coco, coco_dt, 'bbox')
175 |     coco_eval.params.imgIds = val.ids if conf.coco else [x for x in range(len(val))]
176 | 
177 |     coco_eval.evaluate()
178 |     coco_eval.accumulate()
179 |     coco_eval.summarize()
180 |     mAp = coco_eval.stats[1]
181 |     return mAp
182 | 
183 | 
184 | def val_batch(batch_num, b):
185 |     result = detector[b]
186 |     if result is None:
187 |         return np.zeros((0, 7))
188 |     scores_np = result.obj_scores.data.cpu().numpy()
189 |     cls_preds_np = result.obj_preds.data.cpu().numpy()
190 |     boxes_np = result.boxes_assigned.data.cpu().numpy()
191 |     im_inds_np = result.im_inds.data.cpu().numpy()
192 |     im_scales = b.im_sizes.reshape((-1, 3))[:, 2]
193 |     if conf.coco:
194 |         boxes_np /= im_scales[im_inds_np][:, None]
195 |         boxes_np[:, 2:4] = boxes_np[:, 2:4] - boxes_np[:, 0:2] + 1
196 |         cls_preds_np[:] = [val.ind_to_id[c_ind] for c_ind in cls_preds_np]
197 |         im_inds_np[:] = [val.ids[im_ind + batch_num * conf.batch_size * conf.num_gpus]
198 |                          for im_ind in im_inds_np]
199 |     else:
200 |         boxes_np *= BOX_SCALE / IM_SCALE
201 |         boxes_np[:, 2:4] = boxes_np[:, 2:4] - boxes_np[:, 0:2] + 1
202 |         im_inds_np += batch_num * conf.batch_size * conf.num_gpus
203 | 
204 |     return np.column_stack((im_inds_np, boxes_np, scores_np, cls_preds_np))
205 | 
206 | 
207 | print("Training starts now!")
208 | for epoch in range(start_epoch + 1, start_epoch + 1 + conf.num_epochs):
209 |     rez = train_epoch(epoch)
210 |     print("overall{:2d}: ({:.3f})\n{}".format(epoch, rez.mean(1)['total'], rez.mean(1)), flush=True)
211 |     mAp = val_epoch()
212 |     scheduler.step(mAp)
213 | 
214 |     torch.save({
215 |         'epoch': epoch,
216 |         'state_dict': detector.state_dict(),
217 |         'optimizer': optimizer.state_dict(),
218 |     }, os.path.join(conf.save_dir, '{}-{}.tar'.format('coco' if conf.coco else 'vg', epoch)))
219 | 


--------------------------------------------------------------------------------
/scripts/eval_models.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This is a script that will evaluate all models for SGCLS
 4 | export CUDA_VISIBLE_DEVICES=1
 5 | 
 6 | if [ $1 == "0" ]; then
 7 |     python models/eval_rels.py -m predcls -model motifnet -order leftright -nl_obj 1 -nl_edge 1 -b 6 -clip 5 \
 8 |         -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/predcls.tar -nepoch 50 -use_bias -use_encoded_box
 9 | 
10 | elif [ $1 == "1" ]; then
11 |     python models/eval_rels.py -m sgcls -model motifnet -order leftright -nl_obj 1 -nl_edge 1 -b 6 -clip 5 \
12 |         -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/sgcls.tar -nepoch 50 -use_bias -use_encoded_box
13 | 
14 | elif [ $1 == "2" ]; then
15 |     python models/eval_rels.py -m sgdet -model motifnet -order leftright -nl_obj 1 -nl_edge 1 -b 6 -clip 5 \
16 |         -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/sgdet.tar -nepoch 50 -use_bias -use_encoded_box
17 | fi
18 | 


--------------------------------------------------------------------------------
/scripts/pretrain_detector.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Train the model without COCO pretraining
 3 | python models/train_detector.py -b 6 -lr 1e-3 -save_dir checkpoints/vgdet -nepoch 50 -ngpu 3 -nwork 3 -p 100 -clip 5
 4 | 
 5 | # If you want to evaluate on the frequency baseline now, run this command (replace the checkpoint with the
 6 | # best checkpoint you found).
 7 | #export CUDA_VISIBLE_DEVICES=0
 8 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-24.tar -nwork 1 -p 100 -test
 9 | #export CUDA_VISIBLE_DEVICES=1
10 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test
11 | #export CUDA_VISIBLE_DEVICES=2
12 | #python models/eval_rel_count.py -ngpu 1 -b 6 -ckpt checkpoints/vgdet/vg-28.tar -nwork 1 -p 100 -test
13 | #
14 | #
15 | 


--------------------------------------------------------------------------------
/scripts/refine_for_detection.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Refine Motifnet for detection
 4 | 
 5 | 
 6 | export CUDA_VISIBLE_DEVICES=$1
 7 | 
 8 | if [ $1 == "0" ]; then
 9 |      echo "TRAINING THE BASELINE"
10 |     python models/train_rels.py -m sgdet -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
11 |     -clip 5 -p 100 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/baseline-sgcls/vgrel-11.tar  -save_dir checkpoints/baseline-sgdet \
12 |     -nepoch 50 -use_bias
13 | elif [ $1 == "1" ]; then
14 |     echo "TRAINING STANFORD"
15 |     python models/train_rels.py -m sgdet -model stanford -b 6 -p 100 -lr 1e-4 -ngpu 1 -clip 5 \
16 |     -ckpt checkpoints/stanford-sgcls/vgrel-11.tar -save_dir checkpoints/stanford-sgdet
17 | elif [ $1 == "2" ]; then
18 |     echo "Refining Motifnet for detection!"
19 |     python models/train_rels.py -m sgdet -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
20 |         -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/motifnet-sgcls/vgrel-7.tar \
21 |         -save_dir checkpoints/motifnet-sgdet -nepoch 10 -use_bias
22 | fi


--------------------------------------------------------------------------------
/scripts/train_models_sgcls.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # This is a script that will train all of the models for scene graph classification and then evaluate them.
 4 | export CUDA_VISIBLE_DEVICES=$1
 5 | 
 6 | if [ $1 == "0" ]; then
 7 |     echo "TRAINING THE BASELINE"
 8 |     python models/train_rels.py -m sgcls -model motifnet -nl_obj 0 -nl_edge 0 -b 6 \
 9 |     -clip 5 -p 100 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/baseline2 \
10 |     -nepoch 50 -use_bias
11 | elif [ $1 == "1" ]; then
12 |     echo "TRAINING MESSAGE PASSING"
13 | 
14 |     python models/train_rels.py -m sgcls -model stanford -b 6 -p 100 -lr 1e-3 -ngpu 1 -clip 5 \
15 |     -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford2
16 | elif [ $1 == "2" ]; then
17 |     echo "TRAINING MOTIFNET"
18 | 
19 |     python models/train_rels.py -m sgcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 \
20 |         -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar \
21 |         -save_dir checkpoints/motifnet2 -nepoch 50 -use_bias
22 | fi
23 | 
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/scripts/train_stanford.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | python models/train_rels.py -m sgcls -model stanford -b 4 -p 400 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vgdet/vg-24.tar -save_dir checkpoints/stanford -adam
4 | 
5 | # To test you can run this command
6 | # python models/eval_rels.py -m sgcls -model stanford -ngpu 1 -ckpt checkpoints/stanford/vgrel-28.tar -test
7 | 


--------------------------------------------------------------------------------
/scripts/train_vctreenet.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Train VCTREE using different orderings
 4 | 
 5 | export CUDA_VISIBLE_DEVICES=1
 6 | 
 7 | if [ $1 == "0" ]; then
 8 |     echo "TRAINING VCTREE V1"
 9 |     python models/train_rels.py -m sgdet -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 1 -clip 5 \
10 |         -p 2000 -hidden_dim 512 -pooling_dim 4096 -lr 1e-5 -ngpu 1 -ckpt checkpoints/vctree-sgdet-19.tar\
11 |         -save_dir checkpoints/vctree-sgdet-rl -nepoch 50 -use_bias -use_encoded_box -use_rl_tree
12 | elif [ $1 == "1" ]; then
13 |     echo "TRAINING VCTREE V2"
14 |     python models/train_rels.py -m sgdet -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 2 -clip 3 \
15 |         -p 1000 -hidden_dim 512 -pooling_dim 4096 -lr 1e-5 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar\
16 |         -save_dir checkpoints/vctree-sgdet -nepoch 50 -use_bias -use_encoded_box
17 | 
18 | 
19 | elif [ $1 == "2" ]; then
20 |     echo "TRAINING VCTREE V3"
21 |     python models/train_rels.py -m sgcls -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 1 -clip 5 \
22 |         -p 2000 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vctree-sgcls-19.tar\
23 |         -save_dir checkpoints/vctree-sgcls-rl -nepoch 50 -use_bias -use_encoded_box -use_rl_tree
24 | elif [ $1 == "3" ]; then
25 |     echo "TRAINING VCTREE V3"
26 |     python models/train_rels.py -m sgcls -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 5 -clip 5 \
27 |         -p 500 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar\
28 |         -save_dir checkpoints/vctree-sgcls -nepoch 50 -use_bias -use_encoded_box
29 | 
30 | elif [ $1 == "4" ]; then
31 |     echo "TRAINING VCTREE V3"
32 |     python models/train_rels.py -m predcls -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 1 -clip 5 \
33 |         -p 2000 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vctree-predcls-18.tar\
34 |         -save_dir checkpoints/vctree-predcls-rl -nepoch 50 -use_bias -use_encoded_box -use_rl_tree
35 | elif [ $1 == "5" ]; then
36 |     echo "TRAINING VCTREE V3"
37 |     python models/train_rels.py -m predcls -model motifnet -order confidence -nl_obj 1 -nl_edge 1 -b 5 -clip 5 \
38 |         -p 2000 -hidden_dim 512 -pooling_dim 4096 -lr 1e-4 -ngpu 1 -ckpt checkpoints/vg-faster-rcnn.tar\
39 |         -save_dir checkpoints/vctree-predcls -nepoch 50 -use_bias -use_encoded_box
40 | 
41 | fi
42 | 


--------------------------------------------------------------------------------