├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── bua
    ├── __init__.py
    ├── caffe
    │   ├── __init__.py
    │   ├── config.py
    │   ├── dataloader
    │   │   ├── __init__.py
    │   │   ├── dataset_mapper.py
    │   │   ├── detection_utils.py
    │   │   └── transform_gen.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── box_regression.py
    │   │   ├── fast_rcnn.py
    │   │   ├── layers
    │   │   │   ├── csrc
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── nms
    │   │   │   │   │   ├── nms.cu
    │   │   │   │   │   ├── nms.h
    │   │   │   │   │   ├── nms_cpu.cpp
    │   │   │   │   │   ├── vision_cpu.h
    │   │   │   │   │   └── vision_cuda.h
    │   │   │   │   └── vision.cpp
    │   │   │   ├── nms.py
    │   │   │   └── wrappers.py
    │   │   ├── rcnn.py
    │   │   ├── roi_heads.py
    │   │   ├── rpn.py
    │   │   └── rpn_outputs.py
    │   └── postprocessing.py
    ├── d2
    │   ├── __init__.py
    │   ├── config.py
    │   ├── dataloader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── dataset_mapper.py
    │   └── modeling
    │   │   └── roi_heads.py
    └── visual_genome.py
├── configs
    ├── caffe
    │   ├── test-caffe-r101-fix36.yaml
    │   ├── test-caffe-r101.yaml
    │   └── test-caffe-r152.yaml
    └── d2
    │   ├── base-d2.yaml
    │   ├── test-d2-X152.yaml
    │   ├── test-d2-r101.yaml
    │   ├── test-d2-r50.yaml
    │   ├── train-d2-r101.yaml
    │   └── train-d2-r50.yaml
├── datasets
    ├── demo
    │   ├── 000456.jpg
    │   ├── 000542.jpg
    │   ├── 001150.jpg
    │   ├── 001763.jpg
    │   ├── 004545.jpg
    │   ├── example_image.jpg
    │   ├── example_image1.png
    │   └── example_image2.png
    └── init
├── evaluation
    ├── __init__.py
    ├── attributes_vocab.txt
    ├── objects_vocab.txt
    ├── vg_eval.py
    └── vg_evaluation.py
├── extract_features.py
├── opts.py
├── setup.py
├── train_net.py
└── utils
    ├── __init__.py
    ├── extract_d2features.py
    ├── extract_features_faster.py
    ├── extract_features_multigpu.py
    ├── extract_features_singlegpu.py
    ├── extract_utils.py
    ├── extractor.py
    ├── progress_bar.py
    ├── utils.py
    └── visualize.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | /bottom_up_attention.pytorch.egg-info/
 2 | __pycache__/
 3 | .ipynb_checkpoints/
 4 | /build/
 5 | /datasets/visual_genome/
 6 | /extract/
 7 | /output/
 8 | /output_caffe152/
 9 | *.pth
10 | *.pkl


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "detectron2"]
2 | 	path = detectron2
3 | 	url = https://github.com/facebookresearch/detectron2
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # bottom-up-attention.pytorch
  2 | 
  3 | This repository contains a **PyTorch** reimplementation of the [bottom-up-attention](https://github.com/peteanderson80/bottom-up-attention) project based on *Caffe*. 
  4 | 
  5 | We use [Detectron2](https://github.com/facebookresearch/detectron2) as the backend to provide completed functions including training, testing and feature extraction. Furthermore, we migrate the pre-trained Caffe-based model from the original repository which can extract **the same visual features** as the original model (with deviation < 0.01).
  6 | 
  7 | Some example object and attribute predictions for salient image regions are illustrated below. The script to obtain the following visualizations can be found [here](utils/visualize.ipynb)
  8 | 
  9 | ![example-image](datasets/demo/example_image.jpg?raw=true)
 10 | 
 11 | ## Table of Contents
 12 | 
 13 | 0. [Prerequisites](#Prerequisites)
 14 | 1. [Training](#Training)
 15 | 2. [Testing](#Testing)
 16 | 3. [Feature Extraction](#Feature-Extraction)
 17 | 4. [Pre-trained models](#Pre-trained-models)
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | #### Requirements
 22 | 
 23 | - [Python](https://www.python.org/downloads/) >= 3.6
 24 | - [PyTorch](http://pytorch.org/) >= 1.4
 25 | - [Cuda](https://developer.nvidia.com/cuda-toolkit) >= 9.2 and [cuDNN](https://developer.nvidia.com/cudnn)
 26 | - [Apex](https://github.com/NVIDIA/apex.git)
 27 | - [Detectron2](https://github.com/facebookresearch/detectron2)
 28 | - [Ray](https://github.com/ray-project/ray)
 29 | - [OpenCV](https://opencv.org/)
 30 | - [Pycocotools](https://github.com/cocodataset/cocoapi)
 31 | 
 32 | Note that most of the requirements above are needed for Detectron2. 
 33 | 
 34 | #### Installation
 35 | 
 36 | 1. Clone the project including the required version (v0.2.1) of Detectron2. **Note that if you use another version, some strange problems may occur**.
 37 |    ```bash
 38 |    # clone the repository inclduing Detectron2(@be792b9) 
 39 |    $ git clone --recursive https://github.com/MILVLG/bottom-up-attention.pytorch
 40 |    ```
 41 |    
 42 | 2. Install Detectron2
 43 |    ```bash
 44 |    $ cd detectron2
 45 |    $ pip install -e .
 46 |    $ cd ..
 47 |    ```
 48 |    **We recommend using Detectron2 v0.2.1 (@be792b9) as backend for this project, which has been cloned in step 1. We believe a newer Detectron2 version is also compatible with this project unless their interface has been changed (we have tested v0.3 with PyTorch 1.5).**
 49 |    
 50 | 3. Compile the rest tools using the following script:
 51 | 
 52 |    ```bash
 53 |    # install apex
 54 |    $ git clone https://github.com/NVIDIA/apex.git
 55 |    $ cd apex
 56 |    $ python setup.py install
 57 |    $ cd ..
 58 |    # install the rest modules
 59 |    $ python setup.py build develop
 60 |    $ pip install ray
 61 |    ```
 62 | 
 63 | #### Setup
 64 | 
 65 | If you want to train or test the model, you need to download the images and annotation files of the Visual Genome (VG) dataset. **If you only need to extract visual features using the pre-trained model, you can skip this part**.
 66 | 
 67 | The original VG images ([part1](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) and [part2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip)) are to be downloaded and unzipped to one folder and put it into the `datasets` folder.
 68 | 
 69 | The generated annotation files in the original repository are needed to be transformed to a COCO data format required by Detectron2. The preprocessed annotation files can be downloaded [here](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWpiE_5PvBdKiKfCi0pBx_EB5ONo8D8XABUz7tWcnltCrw?download=1) and unzipped to the `dataset` folder.
 70 | 
 71 | Finally, the `datasets` folders will have the following structure:
 72 | 
 73 | ```angular2html
 74 | |-- datasets
 75 |    |-- visual_genome
 76 |    |  |-- images
 77 |    |  |  |  |-- 1.jpg
 78 |    |  |  |  |-- 2.jpg
 79 |    |  |  |  |-- ...
 80 |    |  |  |  |-- ...
 81 |    |  |-- annotations
 82 |    |  |  |-- visual_genome_train.json
 83 |    |  |  |-- visual_genome_test.json
 84 |    |  |  |-- visual_genome_val.json
 85 | ```
 86 | 
 87 | ## Training
 88 | 
 89 | The following script will train a bottom-up-attention model on the `train` split of VG. 
 90 | 
 91 | ```bash
 92 | $ python3 train_net.py --mode d2 \
 93 |          --config-file configs/d2/train-d2-r101.yaml \
 94 |          --resume
 95 | ```
 96 | 
 97 | 1. `mode = 'd2'` refers to training a model with the Detectron2 backend, which is inspired by the [grid-feats-vqa](https://github.com/facebookresearch/grid-feats-vqa/). We think it is unnecessary to train a new model using the `caffe` mode. The pre-trained Caffe models are provided for testing and feature extraction. 
 98 | 
 99 | 2. `config-file` refers to all the configurations of the model.
100 | 
101 | 3. `resume` refers to a flag if you want to resume training from a specific checkpoint. 
102 | 
103 | ## Testing
104 | 
105 | Given the trained model, the following script will test the performance on the `val` split of VG:
106 | 
107 | ```bash
108 | $ python3 train_net.py --mode caffe \
109 |          --config-file configs/caffe/test-caffe-r101.yaml \
110 |          --eval-only
111 | ```
112 | 
113 | 1. `mode = {'caffe', 'd2'}` refers to the used mode. For the converted model from Caffe, you need to use the `caffe` mode. For other models trained with Detectron2, you need to use the `d2` mode.
114 | 
115 | 2. `config-file` refers to all the configurations of the model, which also include the path of the model weights. 
116 | 
117 | 3. `eval-only` refers to a flag to declare the testing phase.
118 | 
119 | ## Feature Extraction
120 | 
121 | Given the trained model, the following script will extract the bottom-up-attention visual features. Single GPU and multiple GPUs are both supported. 
122 | 
123 | ```bash
124 | $ python3 extract_features.py --mode caffe \
125 |          --num-cpus 32 --gpus '0,1,2,3' \
126 |          --extract-mode roi_feats \
127 |          --min-max-boxes '10,100' \
128 |          --config-file configs/caffe/test-caffe-r101.yaml \
129 |          --image-dir <image_dir> --bbox-dir <out_dir> --out-dir <out_dir>
130 |          --fastmode
131 | ```
132 | 
133 | 1. `mode = {'caffe', 'd2'}` refers to the used mode. For the converted model from Caffe, you need to use the `caffe` mode. For other models trained with Detectron2, you need to use the `detectron2` mode. `'caffe'` is the default value. **Note** that the `detecron2` mode need to run with [Ray](https://github.com/ray-project/ray).
134 | 
135 | 2. `num-cpus` refers to the number of cpu cores to use for accelerating the cpu computation. **0** stands for using all possible cpus and **1** is the default value. 
136 | 
137 | 3. `gpus` refers to the ids of gpus to use. **'0'** is the default value. If the number of gpus greater than 1, for example **'0,1,2,3'**, the script will use the [Ray](https://github.com/ray-project/ray) library for parallelization.
138 | 
139 | 4. `config-file` refers to all the configurations of the model, which also include the path of the model weights. 
140 | 
141 | 5. `extract-mode` refers to the modes for feature extraction, including {`roi_feats`, `bboxes` and `bbox_feats`}. 
142 | 
143 | 6. `min-max-boxes` refers to the min-and-max number of features (boxes) to be extracted. **Note**  that `mode d2` only support to set the min-and-max number as `'100,100'` to get 100 boxes per image or other values to get about 50~60 boxes per image.
144 | 
145 | 7. `image-dir` refers to the input image directory.
146 | 
147 | 8. `bbox-dir` refers to the pre-proposed bbox directory. Only be used if the `extract-mode` is set to `'bbox_feats'`.
148 | 
149 | 9. `out-dir` refers to the output feature directory.
150 | 
151 | 10. `fastmode` refers to use the a faster version (about `2x` faster on a workstation with 4 Titan-V GPUs and 32 CPU cores), at the expense of a potential memory leakage problem if the computing capability of GPUs and CPUs is mismatched. More details and some matched examples in [here](https://github.com/MILVLG/bottom-up-attention.pytorch/pull/41).
152 | 
153 |     
154 | 
155 | Using the same pre-trained model, we also provide an alternative *two-stage* strategy for extracting visual features. This results in (slightly) more accurate bounding boxes and visual features, at the expense of more time overhead:
156 | 
157 | ```bash
158 | # extract bboxes only:
159 | $ python3 extract_features.py --mode caffe \
160 |          --num-cpus 32 --gpu '0,1,2,3' \
161 |          --extract-mode bboxes \
162 |          --config-file configs/caffe/test-caffe-r101.yaml \
163 |          --image-dir <image_dir> --out-dir <out_dir>  --resume 
164 | 
165 | # extract visual features with the pre-extracted bboxes:
166 | $ python3 extract_features.py --mode caffe \
167 |          --num-cpus 32 --gpu '0,1,2,3' \
168 |          --extract-mode bbox_feats \
169 |          --config-file configs/caffe/test-caffe-r101.yaml \
170 |          --image-dir <image_dir> --bbox-dir <bbox_dir> --out-dir <out_dir>  --resume 
171 | 
172 | ```
173 | 
174 | ## Pre-trained models
175 | 
176 | We provided pre-trained models as follows, including the models trained in both the `caffe` and `d2` mode. 
177 | 
178 | For the models of the `caffe` mode, `R101-k36` and `R101-k10-100` refer to the [fix36 model](https://www.dropbox.com/s/2h4hmgcvpaewizu/resnet101_faster_rcnn_final_iter_320000.caffemodel?dl=1) and [dynamic 10-100 model](https://www.dropbox.com/s/5xethd2nxa8qrnq/resnet101_faster_rcnn_final.caffemodel?dl=1) provided in the original [bottom-up-attention](https://github.com/peteanderson80/bottom-up-attention) repository. We additionally provide a `R-152` model which outperforms the two counterparts above.  
179 | 
180 | For the models of the `d2` mode, we follow the configurations and implementations in the [grid-feats-vqa](https://github.com/facebookresearch/grid-feats-vqa/) and trained three models using the training script in this repository, namely `R50`, `R101` and `X152`.
181 | 
182 | name | mode | objects mAP@0.5 |weighted objects mAP@0.5|download
183 | :-:|:-:|:-:|:-:|:-:
184 | [R101-k36](./configs/caffe/test-caffe-r101-fix36.yaml)|caffe|9.3|14.0|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EUKhQ3hSRv9JrrW64qpNLSIBGoOjEGCkF8zvgBP9gKax-w?download=1)
185 | [R101-k10-100](./configs/caffe/test-caffe-r101.yaml)|caffe|10.2|15.1|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaXvCC3WjtlLvvEfLr3oa8UBLA21tcLh4L8YLbYXl6jgjg?download=1)
186 | [R152](./configs/caffe/test-caffe-r152.yaml)|caffe|**11.1**|15.7|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ETDgy4bY0xpGgsu5tEMzgLcBQjAwpnkKkltNTtPVuMj4GQ?download=1)
187 | [R50](./configs/d2/test-d2-r50.yaml)|d2|8.2|14.9|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EfYoinBHrFlKmKonocse8yEBXN-hyCHNygYqjxGpIBsPvQ?download=1)
188 | [R101](./configs/d2/test-d2-r101.yaml)|d2|9.2|15.9|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EXXItFlOpHlNq81O1H_cPyoBXUPyXoHmIwPEudnTWKX4rQ?download=1)
189 | [X152](./configs/d2/test-d2-X152.yaml)|d2|10.7|**17.7**|[model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EdLhYc39P8tBkEDVCDOrNV4BgPhz9M4iBq8oPw1iyVSlmg?download=1)
190 | 
191 | 
192 | ## License
193 | 
194 | This project is released under the [Apache 2.0 license](LICENSE).
195 | 
196 | ## Contact
197 | 
198 | This repository is currently maintained by Zhou Yu ([@yuzcccc](https://github.com/yuzcccc)), Tongan Luo ([@Zoroaster97](https://github.com/Zoroaster97)), and Jing Li ([@J1mL3e_](https://github.com/JimLee4530)).
199 | 
200 | ## Citation
201 | 
202 | If this repository is helpful for your research or you want to refer the provided pretrained models, you could cite the work using the following BibTeX entry:
203 | 
204 | ```
205 | @misc{yu2020buapt,
206 |   author = {Yu, Zhou and Li, Jing and Luo, Tongan and Yu, Jun},
207 |   title = {A PyTorch Implementation of Bottom-Up-Attention},
208 |   howpublished = {\url{https://github.com/MILVLG/bottom-up-attention.pytorch}},
209 |   year = {2020}
210 | }
211 | 
212 | ```
213 | 


--------------------------------------------------------------------------------
/bua/__init__.py:
--------------------------------------------------------------------------------
 1 | from .d2 import add_attribute_config
 2 | from .caffe import add_bottom_up_attention_config
 3 | 
 4 | def add_config(args, cfg):
 5 |     if args.mode == "caffe":
 6 |         add_bottom_up_attention_config(cfg, True)
 7 |     elif args.mode == "d2":
 8 |         add_attribute_config(cfg)
 9 |     else:
10 |         raise Exception("detection model not supported: {}".format(args.model))
11 | from . import visual_genome


--------------------------------------------------------------------------------
/bua/caffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import add_bottom_up_attention_config
2 | from .modeling.backbone import build_bua_resnet_backbone
3 | from .modeling.rcnn import GeneralizedBUARCNN
4 | from .modeling.roi_heads import BUACaffeRes5ROIHeads
5 | from .modeling.rpn import StandardBUARPNHead, BUARPN


--------------------------------------------------------------------------------
/bua/caffe/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | 
 6 | 
 7 | def add_bottom_up_attention_config(cfg, caffe=False):
 8 |     """
 9 |     Add config for tridentnet.
10 |     """
11 |     _C = cfg
12 | 
13 |     _C.MODEL.BUA = CN()
14 |     _C.MODEL.BUA.CAFFE = caffe
15 |     _C.MODEL.BUA.RESNET_VERSION = 1
16 |     _C.MODEL.BUA.ATTRIBUTE_ON = False
17 |     _C.MODEL.BUA.EXTRACT_FEATS = False
18 | 
19 |     _C.MODEL.BUA.RPN = CN()
20 |     # out_channels of conv for bottom-up-attentions RPN.
21 |     _C.MODEL.BUA.RPN.CONV_OUT_CHANNELS = 512
22 | 
23 |     _C.MODEL.BUA.EXTRACTOR = CN()
24 | 
25 |     # EXTRACTOR.MODE {1: extract roi features, 2: extract bbox only ,3: extract roi features by gt_bbox}
26 |     _C.MODEL.BUA.EXTRACTOR.MODE = 1
27 | 
28 |     # config of postprocessing in extractor
29 |     _C.MODEL.BUA.EXTRACTOR.MIN_BOXES = 10
30 |     _C.MODEL.BUA.EXTRACTOR.MAX_BOXES = 100
31 |     _C.MODEL.BUA.EXTRACTOR.CONF_THRESH = 0.2
32 |     _C.MODEL.BUA.EXTRACTOR.OUTPUT_DIR = ".output/"
33 | 
34 |     _C.MODEL.BUA.ATTRIBUTE = CN()
35 |     _C.MODEL.BUA.ATTRIBUTE.NUM_CLASSES = 401
36 | 


--------------------------------------------------------------------------------
/bua/caffe/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset_mapper import DatasetMapper
2 | 
3 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]


--------------------------------------------------------------------------------
/bua/caffe/dataloader/dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import copy
  3 | import logging
  4 | import numpy as np
  5 | import torch
  6 | import cv2
  7 | 
  8 | from detectron2.data import detection_utils as utils
  9 | from detectron2.data  import transforms as T
 10 | 
 11 | from .transform_gen import ResizeShortestEdge
 12 | from .detection_utils import annotations_to_instances
 13 | 
 14 | """
 15 | This file contains the default mapping that's applied to "dataset dicts".
 16 | """
 17 | 
 18 | __all__ = ["DatasetMapper"]
 19 | 
 20 | def build_transform_gen(cfg, is_train):
 21 |     """
 22 |     Create a list of :class:`TransformGen` from config.
 23 |     Now it includes resizing and flipping.
 24 | 
 25 |     Returns:
 26 |         list[TransformGen]
 27 |     """
 28 |     if is_train:
 29 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 30 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 31 |     else:
 32 |         min_size = cfg.INPUT.MIN_SIZE_TEST
 33 |         max_size = cfg.INPUT.MAX_SIZE_TEST
 34 | 
 35 |     logger = logging.getLogger(__name__)
 36 |     tfm_gens = []
 37 |     tfm_gens.append(ResizeShortestEdge(min_size, max_size, cfg.MODEL.PIXEL_MEAN))
 38 |     if is_train:
 39 |         logger.info("TransformGens used in training: " + str(tfm_gens))
 40 |     return tfm_gens
 41 | 
 42 | class DatasetMapper:
 43 |     """
 44 |     A callable which takes a dataset dict in Detectron2 Dataset format,
 45 |     and map it into a format used by the model.
 46 | 
 47 |     This is the default callable to be used to map your dataset dict into training data.
 48 |     You may need to follow it to implement your own one for customized logic.
 49 | 
 50 |     The callable currently does the following:
 51 |     1. Read the image from "file_name"
 52 |     2. Applies cropping/geometric transforms to the image and annotations
 53 |     3. Prepare data and annotations to Tensor and :class:`Instances`
 54 |     """
 55 | 
 56 |     def __init__(self, cfg, is_train=True):
 57 |         if cfg.INPUT.CROP.ENABLED and is_train:
 58 |             self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
 59 |             logging.getLogger(__name__).info("CropGen used in training: " + str(self.crop_gen))
 60 |         else:
 61 |             self.crop_gen = None
 62 | 
 63 |         self.tfm_gens = build_transform_gen(cfg, is_train)
 64 | 
 65 |         # fmt: off
 66 |         self.img_format     = cfg.INPUT.FORMAT
 67 |         self.mask_on        = cfg.MODEL.MASK_ON
 68 |         self.mask_format    = cfg.INPUT.MASK_FORMAT
 69 |         self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
 70 |         self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
 71 |         # fmt: on
 72 |         if self.keypoint_on and is_train:
 73 |             # Flip only makes sense in training
 74 |             self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
 75 |         else:
 76 |             self.keypoint_hflip_indices = None
 77 | 
 78 |         if self.load_proposals:
 79 |             self.min_box_side_len = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
 80 |             self.proposal_topk = (
 81 |                 cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TRAIN
 82 |                 if is_train
 83 |                 else cfg.DATASETS.PRECOMPUTED_PROPOSAL_TOPK_TEST
 84 |             )
 85 |         self.is_train = is_train
 86 | 
 87 |     def __call__(self, dataset_dict):
 88 |         """
 89 |         Args:
 90 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
 91 | 
 92 |         Returns:
 93 |             dict: a format that builtin models in detectron2 accept
 94 |         """
 95 |         dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
 96 |         # USER: Write your own image loading if it's not from a file
 97 |         # image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
 98 |         image = cv2.imread(dataset_dict["file_name"])
 99 |         h, w = image.shape[:2]
100 |         # utils.check_image_size(dataset_dict, image)
101 | 
102 |         if "annotations" not in dataset_dict:
103 |             image, transforms = T.apply_transform_gens(
104 |                 ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
105 |             )
106 |         else:
107 |             # Crop around an instance if there are instances in the image.
108 |             # USER: Remove if you don't use cropping
109 |             if self.crop_gen:
110 |                 crop_tfm = utils.gen_crop_transform_with_instance(
111 |                     self.crop_gen.get_crop_size(image.shape[:2]),
112 |                     image.shape[:2],
113 |                     np.random.choice(dataset_dict["annotations"]),
114 |                 )
115 |                 image = crop_tfm.apply_image(image)
116 |             image, transforms = T.apply_transform_gens(self.tfm_gens, image)
117 |             if self.crop_gen:
118 |                 transforms = crop_tfm + transforms
119 | 
120 |         image_shape = image.shape[:2]  # h, w
121 | 
122 |         # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
123 |         # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
124 |         # Therefore it's important to use torch.Tensor.
125 |         dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
126 |         dataset_dict["im_scale"] = float(image_shape[0])/ float(h)
127 |         # Can use uint8 if it turns out to be slow some day
128 | 
129 |         # USER: Remove if you don't use pre-computed proposals.
130 |         if self.load_proposals:
131 |             utils.transform_proposals(
132 |                 dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
133 |             )
134 | 
135 |         if not self.is_train:
136 |             dataset_dict.pop("annotations", None)
137 |             dataset_dict.pop("sem_seg_file_name", None)
138 |             return dataset_dict
139 | 
140 |         if "annotations" in dataset_dict:
141 |             # USER: Modify this if you want to keep them for some reason.
142 |             for anno in dataset_dict["annotations"]:
143 |                 if not self.mask_on:
144 |                     anno.pop("segmentation", None)
145 |                 if not self.keypoint_on:
146 |                     anno.pop("keypoints", None)
147 | 
148 |             # USER: Implement additional transformations if you have other types of data
149 |             annos = [
150 |                 utils.transform_instance_annotations(
151 |                     obj, transforms, image_shape
152 |                 )
153 |                 for obj in dataset_dict.pop("annotations")
154 |                 if obj.get("iscrowd", 0) == 0
155 |             ]
156 |             instances = annotations_to_instances(
157 |                 annos, image_shape, mask_format=self.mask_format
158 |             )
159 |             # Create a tight bounding box from masks, useful when image is cropped
160 |             if self.crop_gen and instances.has("gt_masks"):
161 |                 instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
162 |             dataset_dict["instances"] = utils.filter_empty_instances(instances)
163 | 
164 |         return dataset_dict
165 | 


--------------------------------------------------------------------------------
/bua/caffe/dataloader/detection_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | """
 5 | Common data processing utilities that are used in a
 6 | typical object detection data pipeline.
 7 | """
 8 | import torch
 9 | 
10 | from detectron2.structures import (
11 |     Boxes,
12 |     BoxMode,
13 |     Instances,
14 | )
15 | 
16 | def transform_instance_annotations(
17 |     annotation, transforms, image_size, *, keypoint_hflip_indices=None
18 | ):
19 |     """
20 |     Apply transforms to box, segmentation and keypoints annotations of a single instance.
21 | 
22 |     It will use `transforms.apply_box` for the box, and
23 |     `transforms.apply_coords` for segmentation polygons & keypoints.
24 |     If you need anything more specially designed for each data structure,
25 |     you'll need to implement your own version of this function or the transforms.
26 | 
27 |     Args:
28 |         annotation (dict): dict of instance annotations for a single instance.
29 |             It will be modified in-place.
30 |         transforms (TransformList):
31 |         image_size (tuple): the height, width of the transformed image
32 |         keypoint_hflip_indices (ndarray[int]): see `create_keypoint_hflip_indices`.
33 | 
34 |     Returns:
35 |         dict:
36 |             the same input dict with fields "bbox", "segmentation", "keypoints"
37 |             transformed according to `transforms`.
38 |             The "bbox_mode" field will be set to XYXY_ABS.
39 |     """
40 |     bbox = BoxMode.convert(annotation["bbox"], annotation["bbox_mode"], BoxMode.XYXY_ABS)
41 |     # Note that bbox is 1d (per-instance bounding box)
42 |     annotation["bbox"] = transforms.apply_box([bbox])[0]
43 |     annotation["bbox_mode"] = BoxMode.XYXY_ABS
44 | 
45 |     if "attributes" in annotation:
46 |         annotation["attributes"] = annotation["attributes"]
47 | 
48 |     return annotation
49 | 
50 | def annotations_to_instances(annos, image_size, mask_format="polygon"):
51 |     """
52 |     Create an :class:`Instances` object used by the models,
53 |     from instance annotations in the dataset dict.
54 | 
55 |     Args:
56 |         annos (list[dict]): a list of instance annotations in one image, each
57 |             element for one instance.
58 |         image_size (tuple): height, width
59 | 
60 |     Returns:
61 |         Instances:
62 |             It will contain fields "gt_boxes", "gt_classes",
63 |             "gt_masks", "gt_keypoints", if they can be obtained from `annos`.
64 |             This is the format that builtin models expect.
65 |     """
66 |     boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
67 |     target = Instances(image_size)
68 |     boxes = target.gt_boxes = Boxes(boxes)
69 |     boxes.clip(image_size)
70 | 
71 |     classes = [obj["category_id"] for obj in annos]
72 |     classes = torch.tensor(classes, dtype=torch.int64)
73 |     target.gt_classes = classes
74 | 
75 |     # attributes = [obj["attributes"] for obj in annos]
76 |     attributes = []
77 |     for obj in annos:
78 |         if "attributes" in obj.keys():
79 |             attributes.append(obj["attributes"])
80 |         else:
81 |             attributes.append([-1]*16)
82 |     attributes = torch.tensor(attributes, dtype=torch.int64)
83 |     target.gt_attributes = attributes
84 | 
85 |     return target


--------------------------------------------------------------------------------
/bua/caffe/dataloader/transform_gen.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import PIL.Image as Image
 3 | import numpy as np
 4 | from fvcore.transforms.transform import Transform
 5 | from detectron2.data.transforms import TransformGen
 6 | 
 7 | 
 8 | class ResizeTransform(Transform):
 9 |     """
10 |     Resize the image to a target size.
11 |     """
12 | 
13 |     def __init__(self, h, w, im_scale, pixel_mean):
14 |         """
15 |         Args:
16 |             h, w (int): original image size
17 |             im_scale: im_scale of new_h/h or new_w/w
18 |         """
19 |         # TODO decide on PIL vs opencv
20 |         super().__init__()
21 |         self._set_attributes(locals())
22 | 
23 |     def apply_image(self, img):
24 |         assert img.shape[:2] == (self.h, self.w)
25 |         img_norm = img.astype(np.float32, copy=True) - np.asarray(self.pixel_mean)
26 |         im = cv2.resize(
27 |             img_norm,
28 |             None,
29 |             None,
30 |             fx=self.im_scale,
31 |             fy=self.im_scale,
32 |             interpolation=cv2.INTER_LINEAR
33 |         )
34 |         ret = np.asarray(im)
35 |         return ret
36 | 
37 |     def apply_coords(self, coords):
38 |         coords[:, 0] = coords[:, 0] * (self.im_scale)
39 |         coords[:, 1] = coords[:, 1] * (self.im_scale)
40 |         return coords
41 | 
42 |     def apply_segmentation(self, segmentation):
43 |         segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
44 |         return segmentation
45 | 
46 | 
47 | class ResizeShortestEdge(TransformGen):
48 |     """
49 |     Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
50 |     If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
51 |     """
52 | 
53 |     def __init__(
54 |         self, min_size, max_size, pixel_mean):
55 |         """
56 |         Args:
57 |             min_size (int): minimum allowed smallest edge length.
58 |             max_size (int): maximum allowed longest edge length.
59 |         """
60 |         super().__init__()
61 |         self.min_size = min_size
62 |         self.max_size = max_size
63 |         self.pixel_mean = pixel_mean
64 | 
65 |         self._init(locals())
66 | 
67 |     def get_transform(self, img):
68 |         h, w = img.shape[:2]
69 | 
70 |         im_shape = img.shape
71 |         im_size_min = np.min(im_shape[0:2])
72 |         im_size_max = np.max(im_shape[0:2])
73 | 
74 |         im_scale = float(self.min_size if not type(self.min_size) is tuple else self.min_size[0]) / float(im_size_min)
75 | 
76 |         # Prevent the biggest axis from being more than max_size
77 |         if np.round(im_scale * im_size_max) > self.max_size:
78 |             im_scale = float(self.max_size) / float(im_size_max)
79 | 
80 |         return ResizeTransform(h, w, im_scale, self.pixel_mean)
81 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_bua_resnet_backbone
2 | from .rcnn import GeneralizedBUARCNN
3 | from .roi_heads import BUACaffeRes5ROIHeads
4 | from .rpn import StandardBUARPNHead, BUARPN
5 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/backbone.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import fvcore.nn.weight_init as weight_init
  3 | from torch import nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from detectron2.layers import Conv2d, FrozenBatchNorm2d, get_norm, BatchNorm2d
  7 | from detectron2.modeling import BACKBONE_REGISTRY, ResNet, make_stage
  8 | from detectron2.modeling.backbone.resnet import BottleneckBlock, DeformBottleneckBlock, ResNetBlockBase
  9 | 
 10 | from .layers.wrappers import Conv2dv2
 11 | 
 12 | __all__ = ["BUABasicStem", "BUABasicStemv2", "build_bua_resnet_backbone"]
 13 | 
 14 | class BUABasicStem(nn.Module):
 15 |     def __init__(self, in_channels=3, out_channels=64, norm="BN"):
 16 |         """
 17 |         Args:
 18 |             norm (str or callable): a callable that takes the number of
 19 |                 channels and return a `nn.Module`, or a pre-defined string
 20 |                 (one of {"FrozenBN", "BN", "GN"}).
 21 |         """
 22 |         super().__init__()
 23 |         self.conv1 = Conv2d(
 24 |             in_channels,
 25 |             out_channels,
 26 |             kernel_size=7,
 27 |             stride=2,
 28 |             padding=3,
 29 |             bias=False,
 30 |             norm=get_norm(norm, out_channels),
 31 |         )
 32 |         weight_init.c2_msra_fill(self.conv1)
 33 | 
 34 |     def forward(self, x):
 35 |         x = self.conv1(x)
 36 |         x = F.relu_(x)
 37 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=0, ceil_mode=True)
 38 |         return x
 39 | 
 40 |     @property
 41 |     def out_channels(self):
 42 |         return self.conv1.out_channels
 43 | 
 44 |     @property
 45 |     def stride(self):
 46 |         return 4  # = stride 2 conv -> stride 2 max pool
 47 | 
 48 | class BUABasicStemv2(nn.Module):
 49 |     def __init__(self, in_channels=3, out_channels=64, norm="BN"):
 50 |         """
 51 |         Args:
 52 |             norm (str or callable): a callable that takes the number of
 53 |                 channels and return a `nn.Module`, or a pre-defined string
 54 |                 (one of {"FrozenBN", "BN", "GN"}).
 55 |         """
 56 |         super().__init__()
 57 |         self.norm = BatchNorm2d(in_channels, eps=2e-5)
 58 |         self.conv1 = Conv2d(
 59 |             in_channels,
 60 |             out_channels,
 61 |             kernel_size=7,
 62 |             stride=2,
 63 |             padding=3,
 64 |             bias=False,
 65 |             norm=BatchNorm2d(out_channels, eps=2e-5),
 66 |         )
 67 |         # weight_init.c2_msra_fill(self.norm)
 68 |         weight_init.c2_msra_fill(self.conv1)
 69 | 
 70 |     def forward(self, x):
 71 |         x = self.norm(x)
 72 |         x = self.conv1(x)
 73 |         x = F.relu_(x)
 74 |         x = F.max_pool2d(x, kernel_size=3, stride=2, padding=0, ceil_mode=True)
 75 |         return x
 76 | 
 77 |     @property
 78 |     def out_channels(self):
 79 |         return self.conv1.out_channels
 80 | 
 81 |     @property
 82 |     def stride(self):
 83 |         return 4  # = stride 2 conv -> stride 2 max pool
 84 | 
 85 | @BACKBONE_REGISTRY.register()
 86 | def build_bua_resnet_backbone(cfg, input_shape):
 87 |     """
 88 |     Create a ResNet instance from config.
 89 | 
 90 |     Returns:
 91 |         ResNet: a :class:`ResNet` instance.
 92 |     """
 93 |     # need registration of new blocks/stems?
 94 |     norm = cfg.MODEL.RESNETS.NORM
 95 |     if cfg.MODEL.BUA.RESNET_VERSION == 2:
 96 |         stem = BUABasicStemv2(
 97 |             in_channels=input_shape.channels,
 98 |             out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
 99 |         )
100 |     else:
101 |         stem = BUABasicStem(
102 |             in_channels=input_shape.channels,
103 |             out_channels=cfg.MODEL.RESNETS.STEM_OUT_CHANNELS,
104 |             norm=norm,
105 |         )
106 |     freeze_at = cfg.MODEL.BACKBONE.FREEZE_AT
107 | 
108 |     if freeze_at >= 1:
109 |         for p in stem.parameters():
110 |             p.requires_grad = False
111 |         stem = FrozenBatchNorm2d.convert_frozen_batchnorm(stem)
112 | 
113 |     # fmt: off
114 |     out_features        = cfg.MODEL.RESNETS.OUT_FEATURES
115 |     depth               = cfg.MODEL.RESNETS.DEPTH
116 |     num_groups          = cfg.MODEL.RESNETS.NUM_GROUPS
117 |     width_per_group     = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
118 |     bottleneck_channels = num_groups * width_per_group
119 |     in_channels         = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
120 |     out_channels        = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
121 |     stride_in_1x1       = cfg.MODEL.RESNETS.STRIDE_IN_1X1
122 |     res5_dilation       = cfg.MODEL.RESNETS.RES5_DILATION
123 |     deform_on_per_stage = cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE
124 |     deform_modulated    = cfg.MODEL.RESNETS.DEFORM_MODULATED
125 |     deform_num_groups   = cfg.MODEL.RESNETS.DEFORM_NUM_GROUPS
126 |     # fmt: on
127 |     assert res5_dilation in {1, 2}, "res5_dilation cannot be {}.".format(res5_dilation)
128 | 
129 |     num_blocks_per_stage = {50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3]}[depth]
130 | 
131 |     stages = []
132 | 
133 |     # Avoid creating variables without gradients
134 |     # It consumes extra memory and may cause allreduce to fail
135 |     out_stage_idx = [{"res2": 2, "res3": 3, "res4": 4, "res5": 5}[f] for f in out_features]
136 |     max_stage_idx = max(out_stage_idx)
137 |     for idx, stage_idx in enumerate(range(2, max_stage_idx + 1)):
138 |         dilation = res5_dilation if stage_idx == 5 else 1
139 |         first_stride = 1 if idx == 0 or (stage_idx == 5 and dilation == 2) else 2
140 |         stage_kargs = {
141 |             "num_blocks": num_blocks_per_stage[idx],
142 |             "first_stride": first_stride,
143 |             "in_channels": in_channels,
144 |             "bottleneck_channels": bottleneck_channels,
145 |             "out_channels": out_channels,
146 |             "num_groups": num_groups,
147 |             "norm": norm,
148 |             "stride_in_1x1": stride_in_1x1,
149 |             "dilation": dilation,
150 |         }
151 |         if deform_on_per_stage[idx]:
152 |             stage_kargs["block_class"] = DeformBottleneckBlock
153 |             stage_kargs["deform_modulated"] = deform_modulated
154 |             stage_kargs["deform_num_groups"] = deform_num_groups
155 |         else:
156 |             stage_kargs["block_class"] = BottleneckBlock if cfg.MODEL.BUA.RESNET_VERSION == 1 else BottleneckBlockv2
157 |         blocks = make_stage(**stage_kargs)
158 |         in_channels = out_channels
159 |         out_channels *= 2
160 |         bottleneck_channels *= 2
161 | 
162 |         if freeze_at >= stage_idx:
163 |             for block in blocks:
164 |                 block.freeze()
165 |         stages.append(blocks)
166 |     return ResNet(stem, stages, out_features=out_features)
167 | 
168 | class BottleneckBlockv2(ResNetBlockBase):
169 |     def __init__(
170 |         self,
171 |         in_channels,
172 |         out_channels,
173 |         *,
174 |         bottleneck_channels,
175 |         stride=1,
176 |         num_groups=1,
177 |         norm="BN",
178 |         stride_in_1x1=False,
179 |         dilation=1,
180 |     ):
181 |         """
182 |         Args:
183 |             norm (str or callable): a callable that takes the number of
184 |                 channels and return a `nn.Module`, or a pre-defined string
185 |                 (one of {"FrozenBN", "BN", "GN"}).
186 |             stride_in_1x1 (bool): when stride==2, whether to put stride in the
187 |                 first 1x1 convolution or the bottleneck 3x3 convolution.
188 |         """
189 |         super().__init__(in_channels, out_channels, stride)
190 | 
191 |         if in_channels != out_channels:
192 |             self.shortcut = Conv2dv2(
193 |                 in_channels,
194 |                 out_channels,
195 |                 kernel_size=1,
196 |                 stride=stride,
197 |                 bias=False,
198 |                 norm=None,
199 |             )
200 |         else:
201 |             self.shortcut = None
202 | 
203 |         # The original MSRA ResNet models have stride in the first 1x1 conv
204 |         # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
205 |         # stride in the 3x3 conv
206 |         stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
207 | 
208 |         self.conv1 = Conv2dv2(
209 |             in_channels,
210 |             bottleneck_channels,
211 |             kernel_size=1,
212 |             stride=stride_1x1,
213 |             bias=False,
214 |             norm=None,
215 |         )
216 | 
217 |         self.conv2 = Conv2dv2(
218 |             bottleneck_channels,
219 |             bottleneck_channels,
220 |             kernel_size=3,
221 |             stride=stride_3x3,
222 |             padding=1 * dilation,
223 |             bias=False,
224 |             groups=num_groups,
225 |             dilation=dilation,
226 |             norm=BatchNorm2d(bottleneck_channels, eps=2e-5),
227 |             activation=F.relu_,
228 |         )
229 | 
230 |         self.conv3 = Conv2dv2(
231 |             bottleneck_channels,
232 |             out_channels,
233 |             kernel_size=1,
234 |             bias=False,
235 |             norm=BatchNorm2d(bottleneck_channels, eps=2e-5),
236 |             activation=F.relu_,
237 |         )
238 | 
239 |         for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
240 |             if layer is not None:  # shortcut can be None
241 |                 weight_init.c2_msra_fill(layer)
242 | 
243 |         self.norm = BatchNorm2d(in_channels, eps=2e-5)
244 | 
245 |         # Zero-initialize the last normalization in each residual branch,
246 |         # so that at the beginning, the residual branch starts with zeros,
247 |         # and each residual block behaves like an identity.
248 |         # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
249 |         # "For BN layers, the learnable scaling coefficient γ is initialized
250 |         # to be 1, except for each residual block's last BN
251 |         # where γ is initialized to be 0."
252 | 
253 |         # nn.init.constant_(self.conv3.norm.weight, 0)
254 |         # TODO this somehow hurts performance when training GN models from scratch.
255 |         # Add it as an option when we need to use this code to train a backbone.
256 | 
257 |     def forward(self, x):
258 |         x_2 = self.norm(x)
259 |         x_2 = F.relu_(x_2)
260 | 
261 |         out = self.conv1(x_2)
262 |         # out = F.relu_(out)
263 | 
264 |         out = self.conv2(out)
265 |         # out = F.relu_(out)
266 | 
267 |         out = self.conv3(out)
268 | 
269 |         if self.shortcut is not None:
270 |             shortcut = self.shortcut(x_2)
271 |         else:
272 |             shortcut = x
273 | 
274 |         out += shortcut
275 |         # out = F.relu_(out)
276 |         return out


--------------------------------------------------------------------------------
/bua/caffe/modeling/box_regression.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import math
  3 | import torch
  4 | from detectron2.structures import Boxes
  5 | from typing import List, Tuple, Union
  6 | 
  7 | # Value for clamping large dw and dh predictions. The heuristic is that we clamp
  8 | # such that dw and dh are no larger than what would transform a 16px box into a
  9 | # 1000px box (based on a small anchor, 16px, and a typical image size, 1000px).
 10 | _DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
 11 | 
 12 | 
 13 | __all__ = ["BUABoxes", "BUABox2BoxTransform"]
 14 | 
 15 | class BUABoxes(Boxes):
 16 |     """
 17 |         This structure stores a list of boxes as a Nx4 torch.Tensor.
 18 |         It supports some common methods about boxes
 19 |         (`area`, `clip`, `nonempty`, etc),
 20 |         and also behaves like a Tensor
 21 |         (support indexing, `to(device)`, `.device`, and iteration over all boxes)
 22 | 
 23 |         Attributes:
 24 |             tensor: float matrix of Nx4.
 25 |         """
 26 | 
 27 |     BoxSizeType = Union[List[int], Tuple[int, int]]
 28 |     def __init__(self, tensor: torch.Tensor):
 29 |         super().__init__(tensor)
 30 | 
 31 |     def clip(self, box_size: BoxSizeType) -> None:
 32 |         """
 33 |         NOTE: In order to be the same as bottom-up-attention network, we have
 34 |         defined the new clip function.
 35 | 
 36 |         Clip (in place) the boxes by limiting x coordinates to the range [0, width]
 37 |         and y coordinates to the range [0, height].
 38 | 
 39 |         Args:
 40 |             box_size (height, width): The clipping box's size.
 41 |         """
 42 |         assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
 43 |         TO_REMOVE = 1
 44 |         h, w = box_size
 45 |         self.tensor[:, 0].clamp_(min=0, max=w - TO_REMOVE)
 46 |         self.tensor[:, 1].clamp_(min=0, max=h - TO_REMOVE)
 47 |         self.tensor[:, 2].clamp_(min=0, max=w - TO_REMOVE)
 48 |         self.tensor[:, 3].clamp_(min=0, max=h - TO_REMOVE)
 49 | 
 50 |     def nonempty(self, threshold: int = 0) -> torch.Tensor:
 51 |         """
 52 |         NOTE: In order to be the same as bottom-up-attention network, we have
 53 |         defined the new nonempty function.
 54 | 
 55 |         Find boxes that are non-empty.
 56 |         A box is considered empty, if either of its side is no larger than threshold.
 57 | 
 58 |         Returns:
 59 |             Tensor:
 60 |                 a binary vector which represents whether each box is empty
 61 |                 (False) or non-empty (True).
 62 |         """
 63 |         TO_REMOVE = 1
 64 |         box = self.tensor
 65 |         widths = box[:, 2] - box[:, 0] + TO_REMOVE
 66 |         heights = box[:, 3] - box[:, 1] + TO_REMOVE
 67 |         keep = (widths > threshold) & (heights > threshold)
 68 |         return keep
 69 | 
 70 |     def filter_boxes(self):
 71 |         box = self.tensor
 72 |         keep = (box[:, 3] > box[:, 1]) & (box[:, 2] > box[:, 0])
 73 |         return keep
 74 | 
 75 |     def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Boxes":
 76 |         """
 77 |         Returns:
 78 |             BUABoxes: Create a new :class:`BUABoxes` by indexing.
 79 | 
 80 |         The following usage are allowed:
 81 |         1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
 82 |         2. `new_boxes = boxes[2:10]`: return a slice of boxes.
 83 |         3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
 84 |            with `length = len(boxes)`. Nonzero elements in the vector will be selected.
 85 | 
 86 |         Note that the returned Boxes might share storage with this Boxes,
 87 |         subject to Pytorch's indexing semantics.
 88 |         """
 89 |         if isinstance(item, int):
 90 |             return BUABoxes(self.tensor[item].view(1, -1))
 91 |         b = self.tensor[item]
 92 |         assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
 93 |         return BUABoxes(b)
 94 | 
 95 | class BUABox2BoxTransform(object):
 96 |     """
 97 |     The box-to-box transform defined in R-CNN. The transformation is parameterized
 98 |     by 4 deltas: (dx, dy, dw, dh). The transformation scales the box's width and height
 99 |     by exp(dw), exp(dh) and shifts a box's center by the offset (dx * width, dy * height).
100 |     """
101 | 
102 |     def __init__(self, weights, scale_clamp=_DEFAULT_SCALE_CLAMP):
103 |         """
104 |         Args:
105 |             weights (4-element tuple): Scaling factors that are applied to the
106 |                 (dx, dy, dw, dh) deltas. In Fast R-CNN, these were originally set
107 |                 such that the deltas have unit variance; now they are treated as
108 |                 hyperparameters of the system.
109 |             scale_clamp (float): When predicting deltas, the predicted box scaling
110 |                 factors (dw and dh) are clamped such that they are <= scale_clamp.
111 |         """
112 |         self.weights = weights
113 |         self.scale_clamp = scale_clamp
114 | 
115 |     def get_deltas(self, src_boxes, target_boxes):
116 |         """
117 |         Get box regression transformation deltas (dx, dy, dw, dh) that can be used
118 |         to transform the `src_boxes` into the `target_boxes`. That is, the relation
119 |         ``target_boxes == self.apply_deltas(deltas, src_boxes)`` is true (unless
120 |         any delta is too large and is clamped).
121 | 
122 |         Args:
123 |             src_boxes (Tensor): source boxes, e.g., object proposals
124 |             target_boxes (Tensor): target of the transformation, e.g., ground-truth
125 |                 boxes.
126 |         """
127 |         assert isinstance(src_boxes, torch.Tensor), type(src_boxes)
128 |         assert isinstance(target_boxes, torch.Tensor), type(target_boxes)
129 | 
130 |         TO_REMOVE = 1  # TODO remove
131 |         src_widths = src_boxes[:, 2] - src_boxes[:, 0] + TO_REMOVE
132 |         src_heights = src_boxes[:, 3] - src_boxes[:, 1] + TO_REMOVE
133 |         src_ctr_x = src_boxes[:, 0] + 0.5 * src_widths
134 |         src_ctr_y = src_boxes[:, 1] + 0.5 * src_heights
135 | 
136 |         target_widths = target_boxes[:, 2] - target_boxes[:, 0] + TO_REMOVE
137 |         target_heights = target_boxes[:, 3] - target_boxes[:, 1] + TO_REMOVE
138 |         target_ctr_x = target_boxes[:, 0] + 0.5 * target_widths
139 |         target_ctr_y = target_boxes[:, 1] + 0.5 * target_heights
140 | 
141 |         wx, wy, ww, wh = self.weights
142 |         dx = wx * (target_ctr_x - src_ctr_x) / src_widths
143 |         dy = wy * (target_ctr_y - src_ctr_y) / src_heights
144 |         dw = ww * torch.log(target_widths / src_widths)
145 |         dh = wh * torch.log(target_heights / src_heights)
146 | 
147 |         deltas = torch.stack((dx, dy, dw, dh), dim=1)
148 |         assert (src_widths > 0).all().item(), "Input boxes to Box2BoxTransform are not valid!"
149 |         return deltas
150 | 
151 |     def apply_deltas(self, deltas, boxes):
152 |         """
153 |         Apply transformation `deltas` (dx, dy, dw, dh) to `boxes`.
154 | 
155 |         Args:
156 |             deltas (Tensor): transformation deltas of shape (N, k*4), where k >= 1.
157 |                 deltas[i] represents k potentially different class-specific
158 |                 box transformations for the single box boxes[i].
159 |             boxes (Tensor): boxes to transform, of shape (N, 4)
160 |         """
161 |         assert torch.isfinite(deltas).all().item(), "Box regression deltas become infinite or NaN!"
162 |         boxes = boxes.to(deltas.dtype)
163 | 
164 |         TO_REMOVE = 1  # TODO remove
165 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
166 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
167 |         ctr_x = boxes[:, 0] + 0.5 * widths
168 |         ctr_y = boxes[:, 1] + 0.5 * heights
169 | 
170 |         wx, wy, ww, wh = self.weights
171 |         dx = deltas[:, 0::4] / wx
172 |         dy = deltas[:, 1::4] / wy
173 |         dw = deltas[:, 2::4] / ww
174 |         dh = deltas[:, 3::4] / wh
175 | 
176 |         # Prevent sending too large values into torch.exp()
177 |         dw = torch.clamp(dw, max=self.scale_clamp)
178 |         dh = torch.clamp(dh, max=self.scale_clamp)
179 | 
180 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
181 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
182 |         pred_w = torch.exp(dw) * widths[:, None]
183 |         pred_h = torch.exp(dh) * heights[:, None]
184 | 
185 |         pred_boxes = torch.zeros_like(deltas)
186 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w  # x1
187 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h  # y1
188 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w  # x2
189 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h  # y2
190 |         return pred_boxes


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .nms import SwapAlign2Nat, swap_align2nat
3 | 
4 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/nms/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/nms/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "vision_cpu.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "vision_cuda.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/nms/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "vision_cpu.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/nms/vision_cpu.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/nms/vision_cuda.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 6 | 
 7 | 
 8 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
 9 |                              const int height,
10 |                              const int width);
11 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include <torch/extension.h>
 4 | #include "nms/nms.h"
 5 | 
 6 | namespace bottom_up_attention {
 7 | 
 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 9 |   m.def("nms", &nms, "non-maximum suppression");
10 | }
11 | 
12 | } // namespace bottom_up_attention
13 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from ._utils import _C
 3 | from bua.caffe.modeling import _C
 4 | 
 5 | from apex import amp
 6 | import torch
 7 | 
 8 | # Only valid with fp32 inputs - give AMP the hint
 9 | nms = amp.float_function(_C.nms)
10 | 
11 | # nms.__doc__ = """
12 | # This function performs Non-maximum suppresion"""
13 | 
14 | # NOTE: In order to be consistent with bottom-up-attention, we nms core function from maskrcnn-benchmark
15 | 
16 | def batched_nms(boxes, scores, idxs, iou_threshold):
17 |     """
18 |     Same as torchvision.ops.boxes.batched_nms, but safer.
19 |     """
20 |     assert boxes.shape[-1] == 4
21 |     boxes = boxes.cpu()
22 |     scores = scores.cpu()
23 |     # TODO may need better strategy.
24 |     # Investigate after having a fully-cuda NMS op.
25 |     if len(boxes) < 40000:
26 |         return box_ops_batched_nms(boxes, scores, idxs, iou_threshold)
27 | 
28 |     result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
29 |     for id in torch.unique(idxs).cpu().tolist():
30 |         # if id == 0:
31 |         #     continue
32 |         mask = (idxs == id).nonzero().view(-1)
33 |         keep = nms(boxes[mask], scores[mask], iou_threshold)
34 |         result_mask[mask[keep]] = True
35 |     keep = result_mask.nonzero().view(-1)
36 |     keep = keep[scores[keep].argsort(descending=True)]
37 |     return keep
38 | 
39 | def box_ops_batched_nms(boxes, scores, idxs, iou_threshold):
40 |     """
41 |     Performs non-maximum suppression in a batched fashion.
42 | 
43 |     Each index value correspond to a category, and NMS
44 |     will not be applied between elements of different categories.
45 | 
46 |     Parameters
47 |     ----------
48 |     boxes : Tensor[N, 4]
49 |         boxes where NMS will be performed. They
50 |         are expected to be in (x1, y1, x2, y2) format
51 |     scores : Tensor[N]
52 |         scores for each one of the boxes
53 |     idxs : Tensor[N]
54 |         indices of the categories for each one of the boxes.
55 |     iou_threshold : float
56 |         discards all overlapping boxes
57 |         with IoU < iou_threshold
58 | 
59 |     Returns
60 |     -------
61 |     keep : Tensor
62 |         int64 tensor with the indices of
63 |         the elements that have been kept by NMS, sorted
64 |         in decreasing order of scores
65 |     """
66 |     if boxes.numel() == 0:
67 |         return torch.empty((0,), dtype=torch.int64, device=boxes.device)
68 |     # strategy: in order to perform NMS independently per class.
69 |     # we add an offset to all the boxes. The offset is dependent
70 |     # only on the class idx, and is large enough so that boxes
71 |     # from different classes do not overlap
72 |     max_coordinate = boxes.max()
73 |     offsets = idxs.to(boxes) * (max_coordinate + 1)
74 |     boxes_for_nms = boxes + offsets[:, None]
75 |     keep = nms(boxes_for_nms, scores, iou_threshold)
76 |     return keep


--------------------------------------------------------------------------------
/bua/caffe/modeling/layers/wrappers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch.nn.modules.utils import _ntuple
 4 | 
 5 | class Conv2dv2(torch.nn.Conv2d):
 6 |     """
 7 |     A wrapper around :class:`torch.nn.Conv2d` to support more features.
 8 |     """
 9 | 
10 |     def __init__(self, *args, **kwargs):
11 |         """
12 |         Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
13 | 
14 |         Args:
15 |             norm (nn.Module, optional): a normalization layer
16 |             activation (callable(Tensor) -> Tensor): a callable activation function
17 | 
18 |         It assumes that norm layer is used before activation.
19 |         """
20 |         norm = kwargs.pop("norm", None)
21 |         activation = kwargs.pop("activation", None)
22 |         super().__init__(*args, **kwargs)
23 | 
24 |         self.norm = norm
25 |         self.activation = activation
26 | 
27 |     def forward(self, x):
28 |         if x.numel() == 0 and self.training:
29 |             # https://github.com/pytorch/pytorch/issues/12013
30 |             assert not isinstance(
31 |                 self.norm, torch.nn.SyncBatchNorm
32 |             ), "SyncBatchNorm does not support empty inputs!"
33 |         if self.norm is not None:
34 |             x = self.norm(x)
35 |         if self.activation is not None:
36 |             x = self.activation(x)
37 |         x = super().forward(x)
38 |         return x


--------------------------------------------------------------------------------
/bua/caffe/modeling/rcnn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import logging, os
  3 | import torch
  4 | from torch import nn
  5 | import torch.nn.functional as F
  6 | 
  7 | from detectron2.structures import ImageList
  8 | from detectron2.utils.logger import log_first_n
  9 | 
 10 | from detectron2.modeling.backbone  import build_backbone
 11 | from detectron2.modeling.postprocessing import detector_postprocess
 12 | from detectron2.modeling.proposal_generator import build_proposal_generator
 13 | from detectron2.modeling.roi_heads import build_roi_heads
 14 | from detectron2.modeling.meta_arch import META_ARCH_REGISTRY
 15 | 
 16 | # from models.bua_caffe.postprocessing import extractor_postprocess
 17 | #from utils import save_features
 18 | 
 19 | __all__ = ["GeneralizedBUARCNN"]
 20 | 
 21 | 
 22 | @META_ARCH_REGISTRY.register()
 23 | class GeneralizedBUARCNN(nn.Module):
 24 |     """
 25 |     Generalized R-CNN. Any models that contains the following three components:
 26 |     1. Per-image feature extraction (aka backbone)
 27 |     2. Region proposal generation
 28 |     3. Per-region feature extraction and prediction
 29 |     """
 30 | 
 31 |     def __init__(self, cfg):
 32 |         super().__init__()
 33 | 
 34 |         self.device = torch.device(cfg.MODEL.DEVICE)
 35 |         self.bua_caffe = cfg.MODEL.BUA.CAFFE
 36 |         self.resnet_version = cfg.MODEL.BUA.RESNET_VERSION
 37 |         self.backbone = build_backbone(cfg)
 38 |         self.in_features = cfg.MODEL.RPN.IN_FEATURES
 39 |         self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape())
 40 |         self.roi_heads = build_roi_heads(cfg, self.backbone.output_shape())
 41 | 
 42 |         assert len(cfg.MODEL.PIXEL_MEAN) == len(cfg.MODEL.PIXEL_STD)
 43 |         self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS
 44 |         self.extractor = cfg.MODEL.BUA.EXTRACTOR
 45 |         self.to(self.device)
 46 | 
 47 |     def forward(self, batched_inputs):
 48 |         """
 49 |         Args:
 50 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 51 |                 Each item in the list contains the inputs for one image.
 52 |                 For now, each item in the list is a dict that contains:
 53 | 
 54 |                 * image: Tensor, image in (C, H, W) format.
 55 |                 * instances (optional): groundtruth :class:`Instances`
 56 |                 * proposals (optional): :class:`Instances`, precomputed proposals.
 57 | 
 58 |                 Other information that's included in the original dicts, such as:
 59 | 
 60 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 61 |                     See :meth:`postprocess` for details.
 62 | 
 63 |         Returns:
 64 |             list[dict]:
 65 |                 Each dict is the output for one input image.
 66 |                 The dict contains one key "instances" whose value is a :class:`Instances`.
 67 |                 The :class:`Instances` object has the following keys:
 68 |                     "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
 69 |         """
 70 |         if not self.training:
 71 |             return self.inference(batched_inputs)
 72 | 
 73 |         images = self.preprocess_image(batched_inputs)
 74 |         if "instances" in batched_inputs[0]:
 75 |             gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
 76 |         elif "targets" in batched_inputs[0]:
 77 |             log_first_n(
 78 |                 logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10
 79 |             )
 80 |             gt_instances = [x["targets"].to(self.device) for x in batched_inputs]
 81 |         else:
 82 |             gt_instances = None
 83 | 
 84 |         features = self.backbone(images.tensor)
 85 | 
 86 |         if self.resnet_version == 2:
 87 |             for f in features:
 88 |                 out = self.roi_heads.res5[0].norm(features[f])
 89 |                 features[f] = F.relu_(out)
 90 | 
 91 |         if self.proposal_generator:
 92 |             proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
 93 |         else:
 94 |             assert "proposals" in batched_inputs[0]
 95 |             proposals = [x["proposals"].to(self.device) for x in batched_inputs]
 96 |             proposal_losses = {}
 97 | 
 98 |         _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
 99 | 
100 |         losses = {}
101 |         losses.update(detector_losses)
102 |         losses.update(proposal_losses)
103 |         return losses
104 | 
105 |     def inference(self, batched_inputs, detected_instances=None, do_postprocess=True):
106 |         """
107 |         Run inference on the given inputs.
108 | 
109 |         Args:
110 |             batched_inputs (list[dict]): same as in :meth:`forward`
111 |             detected_instances (None or list[Instances]): if not None, it
112 |                 contains an `Instances` object per image. The `Instances`
113 |                 object contains "pred_boxes" and "pred_classes" which are
114 |                 known boxes in the image.
115 |                 The inference will then skip the detection of bounding boxes,
116 |                 and only predict other per-ROI outputs.
117 |             do_postprocess (bool): whether to apply post-processing on the outputs.
118 | 
119 |         Returns:
120 |             same as in :meth:`forward`.
121 |         """
122 |         assert not self.training
123 | 
124 |         images = self.preprocess_image(batched_inputs)
125 |         features = self.backbone(images.tensor)
126 | 
127 |         if self.resnet_version == 2:
128 |             for f in features:
129 |                 out = self.roi_heads.res5[0].norm(features[f])
130 |                 features[f] = F.relu_(out)
131 | 
132 |         if detected_instances is None:
133 |             if self.proposal_generator:
134 |                 proposals, _ = self.proposal_generator(images, features, None)
135 |             else:
136 |                 assert "proposals" in batched_inputs[0]
137 |                 proposals = [x["proposals"].to(self.device) for x in batched_inputs]
138 | 
139 |             if self.extract_on:
140 |                 return self.roi_heads(images, features, proposals, None)
141 |             else:
142 |                 results, _ = self.roi_heads(images, features, proposals, None)
143 |         else:
144 |             detected_instances = [x.to(self.device) for x in detected_instances]
145 |             results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
146 | 
147 |         if do_postprocess:
148 |             processed_results = []
149 |             for results_per_image, input_per_image, image_size in zip(
150 |                 results, batched_inputs, images.image_sizes
151 |             ):
152 |                 height = input_per_image.get("height", image_size[0])
153 |                 width = input_per_image.get("width", image_size[1])
154 |                 if not self.bua_caffe:
155 |                     results_per_image = detector_postprocess(results_per_image, height, width)
156 |                 processed_results.append({"instances": results_per_image})
157 |             return processed_results
158 |         else:
159 |             return results
160 | 
161 |     def preprocess_image(self, batched_inputs):
162 |         """
163 |         Normalize, pad and batch the input images.
164 |         """
165 |         images = [x["image"].to(self.device) for x in batched_inputs]
166 |         image_scales = [x["im_scale"] for x in batched_inputs]
167 |         images = ImageList.from_tensors(images, self.backbone.size_divisibility)
168 |         images.image_scales = image_scales
169 |         return images
170 | 


--------------------------------------------------------------------------------
/bua/caffe/modeling/rpn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | from typing import Dict, List
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from detectron2.modeling import RPN_HEAD_REGISTRY
  9 | from detectron2.layers import ShapeSpec
 10 | 
 11 | from detectron2.modeling.proposal_generator import build_rpn_head
 12 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY
 13 | from detectron2.modeling.anchor_generator import build_anchor_generator
 14 | from .box_regression import BUABox2BoxTransform
 15 | from detectron2.modeling.matcher import Matcher
 16 | from .rpn_outputs import BUARPNOutputs, find_top_bua_rpn_proposals
 17 | 
 18 | import copy
 19 | 
 20 | @RPN_HEAD_REGISTRY.register()
 21 | class StandardBUARPNHead(nn.Module):
 22 |     """
 23 |     RPN classification and regression heads. Uses a 3x3 conv to produce a shared
 24 |     hidden state from which one 1x1 conv predicts objectness logits for each anchor
 25 |     and a second 1x1 conv predicts bounding-box deltas specifying how to deform
 26 |     each anchor into an object proposal.
 27 |     """
 28 | 
 29 |     def __init__(self, cfg, input_shape: List[ShapeSpec]):
 30 |         super().__init__()
 31 | 
 32 |         # Standard RPN is shared across levels:
 33 |         out_channels = cfg.MODEL.BUA.RPN.CONV_OUT_CHANNELS
 34 | 
 35 |         in_channels = [s.channels for s in input_shape]
 36 |         assert len(set(in_channels)) == 1, "Each level must have the same channel!"
 37 |         in_channels = in_channels[0]
 38 | 
 39 |         # RPNHead should take the same input as anchor generator
 40 |         # NOTE: it assumes that creating an anchor generator does not have unwanted side effect.
 41 |         anchor_generator = build_anchor_generator(cfg, input_shape)
 42 |         num_cell_anchors = anchor_generator.num_cell_anchors
 43 |         box_dim = anchor_generator.box_dim
 44 |         assert (
 45 |             len(set(num_cell_anchors)) == 1
 46 |         ), "Each level must have the same number of cell anchors"
 47 |         num_cell_anchors = num_cell_anchors[0]
 48 | 
 49 |         # 3x3 conv for the hidden representation
 50 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
 51 |         # 1x1 conv for predicting objectness logits
 52 |         self.objectness_logits = nn.Conv2d(out_channels, num_cell_anchors * 2, kernel_size=1, stride=1)
 53 |         # 1x1 conv for predicting box2box transform deltas
 54 |         self.anchor_deltas = nn.Conv2d(
 55 |             out_channels, num_cell_anchors * box_dim, kernel_size=1, stride=1
 56 |         )
 57 | 
 58 |         for l in [self.conv, self.objectness_logits, self.anchor_deltas]:
 59 |             nn.init.normal_(l.weight, std=0.01)
 60 |             nn.init.constant_(l.bias, 0)
 61 | 
 62 |     def forward(self, features):
 63 |         """
 64 |         Args:
 65 |             features (list[Tensor]): list of feature maps
 66 |         """
 67 |         pred_objectness_logits = []
 68 |         pred_anchor_deltas = []
 69 |         for x in features:
 70 |             t = F.relu(self.conv(x))
 71 |             pred_objectness_logits.append(self.objectness_logits(t))
 72 |             pred_anchor_deltas.append(self.anchor_deltas(t))
 73 |         return pred_objectness_logits, pred_anchor_deltas
 74 | 
 75 | @PROPOSAL_GENERATOR_REGISTRY.register()
 76 | class BUARPN(nn.Module):
 77 |     """
 78 |     Region Proposal Network, introduced by the Faster R-CNN paper.
 79 |     """
 80 | 
 81 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 82 |         super().__init__()
 83 | 
 84 |         # fmt: off
 85 |         self.min_box_side_len        = cfg.MODEL.PROPOSAL_GENERATOR.MIN_SIZE
 86 |         self.in_features             = cfg.MODEL.RPN.IN_FEATURES
 87 |         self.nms_thresh              = cfg.MODEL.RPN.NMS_THRESH
 88 |         self.batch_size_per_image    = cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE
 89 |         self.positive_fraction       = cfg.MODEL.RPN.POSITIVE_FRACTION
 90 |         self.smooth_l1_beta          = cfg.MODEL.RPN.SMOOTH_L1_BETA
 91 |         self.loss_weight             = cfg.MODEL.RPN.LOSS_WEIGHT
 92 |         # fmt: on
 93 | 
 94 |         # Map from self.training state to train/test settings
 95 |         self.pre_nms_topk = {
 96 |             True: cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN,
 97 |             False: cfg.MODEL.RPN.PRE_NMS_TOPK_TEST,
 98 |         }
 99 |         self.post_nms_topk = {
100 |             True: cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN,
101 |             False: cfg.MODEL.RPN.POST_NMS_TOPK_TEST,
102 |         }
103 |         self.boundary_threshold = cfg.MODEL.RPN.BOUNDARY_THRESH
104 | 
105 |         self.anchor_generator = build_anchor_generator(
106 |             cfg, [input_shape[f] for f in self.in_features]
107 |         )
108 |         self.box2box_transform = BUABox2BoxTransform(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
109 |         self.anchor_matcher = Matcher(
110 |             cfg.MODEL.RPN.IOU_THRESHOLDS, cfg.MODEL.RPN.IOU_LABELS, allow_low_quality_matches=True
111 |         )
112 |         self.rpn_head = build_rpn_head(cfg, [input_shape[f] for f in self.in_features])
113 | 
114 |     def forward(self, images, features, gt_instances=None):
115 |         """
116 |         Args:
117 |             images (ImageList): input images of length `N`
118 |             features (dict[str: Tensor]): input data as a mapping from feature
119 |                 map name to tensor. Axis 0 represents the number of images `N` in
120 |                 the input data; axes 1-3 are channels, height, and width, which may
121 |                 vary between feature maps (e.g., if a feature pyramid is used).
122 |             gt_instances (list[Instances], optional): a length `N` list of `Instances`s.
123 |                 Each `Instances` stores ground-truth instances for the corresponding image.
124 | 
125 |         Returns:
126 |             proposals: list[Instances] or None
127 |             loss: dict[Tensor]
128 |         """
129 |         gt_boxes = [x.gt_boxes for x in gt_instances] if gt_instances is not None else None
130 |         del gt_instances
131 |         features = [features[f] for f in self.in_features]
132 |         pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
133 |         anchors_in_image = self.anchor_generator(features)
134 |         anchors = [copy.deepcopy(anchors_in_image) for _ in range(len(features[0]))]
135 |         # TODO: The anchors only depend on the feature map shape; there's probably
136 |         # an opportunity for some optimizations (e.g., caching anchors).
137 |         outputs = BUARPNOutputs(
138 |             self.box2box_transform,
139 |             self.anchor_matcher,
140 |             self.batch_size_per_image,
141 |             self.positive_fraction,
142 |             images,
143 |             pred_objectness_logits,
144 |             pred_anchor_deltas,
145 |             anchors,
146 |             self.boundary_threshold,
147 |             gt_boxes,
148 |             self.smooth_l1_beta,
149 |         )
150 | 
151 |         if self.training:
152 |             losses = {k: v * self.loss_weight for k, v in outputs.losses().items()}
153 |         else:
154 |             losses = {}
155 | 
156 |         with torch.no_grad():
157 |             # Find the top proposals by applying NMS and removing boxes that
158 |             # are too small. The proposals are treated as fixed for approximate
159 |             # joint training with roi heads. This approach ignores the derivative
160 |             # w.r.t. the proposal boxes’ coordinates that are also network
161 |             # responses, so is approximate.
162 |             proposals = find_top_bua_rpn_proposals(
163 |                 outputs.predict_proposals(),
164 |                 outputs.predict_objectness_logits(),
165 |                 images,
166 |                 self.nms_thresh,
167 |                 self.pre_nms_topk[self.training],
168 |                 self.post_nms_topk[self.training],
169 |                 self.min_box_side_len,
170 |                 self.training,
171 |             )
172 |             # For RPN-only models, the proposals are the final output and we return them in
173 |             # high-to-low confidence order.
174 |             # For end-to-end models, the RPN proposals are an intermediate state
175 |             # and this sorting is actually not needed. But the cost is negligible.
176 |             # inds = [p.objectness_logits.sort(descending=True)[1] for p in proposals]
177 |             # proposals = [p[ind] for p, ind in zip(proposals, inds)]
178 | 
179 |         return proposals, losses


--------------------------------------------------------------------------------
/bua/caffe/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from detectron2.structures import Instances
 7 | from modeling.layers.nms import nms  # BC-compat
 8 | 
 9 | def extractor_postprocess(boxes, scores, features_pooled, input_per_image, extractor):
10 |     """
11 |     Resize the output instances.
12 |     The input images are often resized when entering an object detector.
13 |     As a result, we often need the outputs of the detector in a different
14 |     resolution from its inputs.
15 | 
16 |     This function will resize the raw outputs of an R-CNN detector
17 |     to produce outputs according to the desired output resolution.
18 | 
19 |     Args:
20 |         results (Instances): the raw outputs from the detector.
21 |             `results.image_size` contains the input image resolution the detector sees.
22 |             This object might be modified in-place.
23 |         output_height, output_width: the desired output resolution.
24 | 
25 |     Returns:
26 |         Instances: the resized output from the model, based on the output resolution
27 |     """
28 |     MIN_BOXES = extractor.MIN_BOXES
29 |     MAX_BOXES = extractor.MAX_BOXES
30 |     CONF_THRESH = extractor.CONF_THRESH
31 | 
32 |     cur_device = scores.device
33 | 
34 |     dets = boxes / input_per_image["im_scale"]
35 | 
36 |     max_conf = torch.zeros((scores.shape[0])).to(cur_device)
37 | 
38 |     for cls_ind in range(1, scores.shape[1]):
39 |         cls_scores = scores[:, cls_ind]
40 |         keep = nms(dets, cls_scores, 0.3)
41 |         max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
42 |                                         cls_scores[keep],
43 |                                         max_conf[keep])
44 | 
45 |     keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
46 |     if len(keep_boxes) < MIN_BOXES:
47 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
48 |     elif len(keep_boxes) > MAX_BOXES:
49 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
50 |         # keep_boxes = torch.argsort(max_conf, descending=True)[:100]
51 |         # feat_list.append(feats[i][keep_boxes])
52 |     image_feat = features_pooled[keep_boxes]
53 |     image_bboxes = dets[keep_boxes]
54 | 
55 |     return image_feat, image_bboxes


--------------------------------------------------------------------------------
/bua/d2/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataloader.build_loader import (
2 |     build_detection_train_loader_with_attributes,
3 |     build_detection_test_loader_with_attributes,
4 | )
5 | from .modeling.roi_heads import AttributeRes5ROIHeads
6 | from .. import visual_genome
7 | from .config import add_attribute_config


--------------------------------------------------------------------------------
/bua/d2/config.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | from detectron2.config import CfgNode as CN
 5 | 
 6 | """
 7 | config for mode detectron2
 8 | """
 9 | 
10 | def add_attribute_config(cfg):
11 |     """
12 |     Add config for attribute prediction.
13 |     """
14 |     # Whether to have attribute prediction
15 |     cfg.MODEL.ATTRIBUTE_ON = False
16 |     # Maximum number of attributes per foreground instance
17 |     cfg.INPUT.MAX_ATTR_PER_INS = 16
18 |     # ------------------------------------------------------------------------ #
19 |     # Attribute Head
20 |     # -----------------------------------------------------------------------  #
21 |     cfg.MODEL.ROI_ATTRIBUTE_HEAD = CN()
22 |     # Dimension for object class embedding, used in conjunction with 
23 |     # visual features to predict attributes
24 |     cfg.MODEL.ROI_ATTRIBUTE_HEAD.OBJ_EMBED_DIM = 256
25 |     # Dimension of the hidden fc layer of the input visual features
26 |     cfg.MODEL.ROI_ATTRIBUTE_HEAD.FC_DIM = 512
27 |     # Loss weight for attribute prediction, 0.2 is best per analysis
28 |     cfg.MODEL.ROI_ATTRIBUTE_HEAD.LOSS_WEIGHT = 0.2
29 |     # Number of classes for attributes
30 |     cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_CLASSES = 400
31 | 
32 |     """
33 |     Add config for box regression loss adjustment.
34 |     """
35 |     # Loss weights for RPN box regression
36 |     cfg.MODEL.RPN.BBOX_LOSS_WEIGHT = 1.0
37 |     # Loss weights for R-CNN box regression
38 |     cfg.MODEL.ROI_BOX_HEAD.BBOX_LOSS_WEIGHT = 1.0
39 | 
40 |     cfg.MODEL.EXTRACT_FEATS = False
41 |     cfg.MODEL.EXTRACT_MODE = 1
42 | 
43 |     _C = cfg
44 |     _C.MODEL.BUA = CN()
45 |     _C.MODEL.BUA.EXTRACT_FEATS = False
46 |     _C.MODEL.BUA.EXTRACTOR = CN()
47 |     _C.MODEL.BUA.ATTRIBUTE_ON = False
48 |     # _C.MODEL.BUA.EXTRACT_FEATS = False
49 | 
50 |     # EXTRACTOR.MODE {1: extract roi features, 2: extract bbox only ,3: extract roi features by gt_bbox}
51 |     _C.MODEL.BUA.EXTRACTOR.MODE = 1
52 | 
53 |     # config of postprocessing in extractor
54 |     _C.MODEL.BUA.EXTRACTOR.MIN_BOXES = 10
55 |     _C.MODEL.BUA.EXTRACTOR.MAX_BOXES = 100
56 |     _C.MODEL.BUA.EXTRACTOR.CONF_THRESH = 0.2
57 |     _C.MODEL.BUA.EXTRACTOR.OUTPUT_DIR = ".output/"


--------------------------------------------------------------------------------
/bua/d2/dataloader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import (
2 |     build_detection_train_loader_with_attributes,
3 |     build_detection_test_loader_with_attributes,
4 | )
5 | from ... import visual_genome


--------------------------------------------------------------------------------
/bua/d2/dataloader/build_loader.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import logging
  3 | import operator
  4 | import torch.utils.data
  5 | 
  6 | from detectron2.utils.comm import get_world_size
  7 | from detectron2.data import samplers
  8 | from detectron2.data.build import get_detection_dataset_dicts, worker_init_reset_seed, trivial_batch_collator
  9 | from detectron2.data.common import AspectRatioGroupedDataset, DatasetFromList, MapDataset
 10 | 
 11 | from .dataset_mapper import AttributeDatasetMapper
 12 | 
 13 | """
 14 | data_loader for mode detectron2
 15 | """
 16 | 
 17 | def build_detection_train_loader_with_attributes(cfg, mapper=None):
 18 |     num_workers = get_world_size()
 19 |     images_per_batch = cfg.SOLVER.IMS_PER_BATCH
 20 |     assert (
 21 |         images_per_batch % num_workers == 0
 22 |     ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
 23 |         images_per_batch, num_workers
 24 |     )
 25 |     assert (
 26 |         images_per_batch >= num_workers
 27 |     ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
 28 |         images_per_batch, num_workers
 29 |     )
 30 |     images_per_worker = images_per_batch // num_workers
 31 |     # NOTE above is added
 32 | 
 33 |     dataset_dicts = get_detection_dataset_dicts(
 34 |         cfg.DATASETS.TRAIN,
 35 |         filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
 36 |         min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
 37 |         if cfg.MODEL.KEYPOINT_ON
 38 |         else 0,
 39 |         proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
 40 |     )
 41 |     dataset = DatasetFromList(dataset_dicts, copy=False)
 42 | 
 43 |     if mapper is None:
 44 |         mapper = AttributeDatasetMapper(cfg, True)
 45 |     dataset = MapDataset(dataset, mapper)
 46 | 
 47 |     sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
 48 |     logger = logging.getLogger(__name__)
 49 |     logger.info("Using training sampler {}".format(sampler_name))
 50 |     if sampler_name == "TrainingSampler":
 51 |         sampler = samplers.TrainingSampler(len(dataset))
 52 |     elif sampler_name == "RepeatFactorTrainingSampler":
 53 |         sampler = samplers.RepeatFactorTrainingSampler(
 54 |             dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
 55 |         )
 56 |     else:
 57 |         raise ValueError("Unknown training sampler: {}".format(sampler_name))
 58 | 
 59 |     if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
 60 |         data_loader = torch.utils.data.DataLoader(
 61 |             dataset,
 62 |             sampler=sampler,
 63 |             num_workers=cfg.DATALOADER.NUM_WORKERS,
 64 |             batch_sampler=None,
 65 |             collate_fn=operator.itemgetter(0),
 66 |             worker_init_fn=worker_init_reset_seed,
 67 |         )
 68 |         data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
 69 |     else:
 70 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
 71 |             sampler, images_per_worker, drop_last=True
 72 |         )
 73 |         data_loader = torch.utils.data.DataLoader(
 74 |             dataset,
 75 |             num_workers=cfg.DATALOADER.NUM_WORKERS,
 76 |             batch_sampler=batch_sampler,
 77 |             collate_fn=trivial_batch_collator,
 78 |             worker_init_fn=worker_init_reset_seed,
 79 |         )
 80 | 
 81 |     return data_loader
 82 | 
 83 | 
 84 | def build_detection_test_loader_with_attributes(cfg, dataset_name, mapper=None):
 85 |     dataset_dicts = get_detection_dataset_dicts(
 86 |         [dataset_name],
 87 |         filter_empty=False,
 88 |         proposal_files=[
 89 |             cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
 90 |         ]
 91 |         if cfg.MODEL.LOAD_PROPOSALS
 92 |         else None,
 93 |     )
 94 | 
 95 |     dataset = DatasetFromList(dataset_dicts)
 96 |     if mapper is None:
 97 |         mapper = AttributeDatasetMapper(cfg, False)
 98 |     dataset = MapDataset(dataset, mapper)
 99 | 
100 |     sampler = samplers.InferenceSampler(len(dataset))
101 |     batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)
102 | 
103 |     data_loader = torch.utils.data.DataLoader(
104 |         dataset,
105 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
106 |         batch_sampler=batch_sampler,
107 |         collate_fn=trivial_batch_collator,
108 |     )
109 |     return data_loader


--------------------------------------------------------------------------------
/bua/d2/dataloader/dataset_mapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import copy
  3 | import logging
  4 | import numpy as np
  5 | import torch
  6 | from fvcore.common.file_io import PathManager
  7 | from PIL import Image
  8 | 
  9 | from detectron2.data import detection_utils as utils
 10 | from detectron2.data import transforms as T
 11 | from detectron2.data import DatasetMapper
 12 | from detectron2.structures import (
 13 |     BitMasks,
 14 |     Boxes,
 15 |     BoxMode,
 16 |     Instances,
 17 |     Keypoints,
 18 |     PolygonMasks,
 19 |     polygons_to_bitmask,
 20 | )
 21 | 
 22 | """
 23 | data mapper for mode detecrton2
 24 | """
 25 | 
 26 | def annotations_to_instances_with_attributes(annos, 
 27 |                                              image_size, 
 28 |                                              mask_format="polygon", 
 29 |                                              load_attributes=False, 
 30 |                                              max_attr_per_ins=16):
 31 |     """
 32 |     Extend the function annotations_to_instances() to support attributes
 33 |     """
 34 |     boxes = [BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS) for obj in annos]
 35 |     target = Instances(image_size)
 36 |     boxes = target.gt_boxes = Boxes(boxes)
 37 |     boxes.clip(image_size)
 38 | 
 39 |     classes = [obj["category_id"] for obj in annos]
 40 |     classes = torch.tensor(classes, dtype=torch.int64)
 41 |     target.gt_classes = classes
 42 | 
 43 |     if len(annos) and "segmentation" in annos[0]:
 44 |         segms = [obj["segmentation"] for obj in annos]
 45 |         if mask_format == "polygon":
 46 |             masks = PolygonMasks(segms)
 47 |         else:
 48 |             assert mask_format == "bitmask", mask_format
 49 |             masks = []
 50 |             for segm in segms:
 51 |                 if isinstance(segm, list):
 52 |                     # polygon
 53 |                     masks.append(polygons_to_bitmask(segm, *image_size))
 54 |                 elif isinstance(segm, dict):
 55 |                     # COCO RLE
 56 |                     masks.append(mask_util.decode(segm))
 57 |                 elif isinstance(segm, np.ndarray):
 58 |                     assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
 59 |                         segm.ndim
 60 |                     )
 61 |                     # mask array
 62 |                     masks.append(segm)
 63 |                 else:
 64 |                     raise ValueError(
 65 |                         "Cannot convert segmentation of type '{}' to BitMasks!"
 66 |                         "Supported types are: polygons as list[list[float] or ndarray],"
 67 |                         " COCO-style RLE as a dict, or a full-image segmentation mask "
 68 |                         "as a 2D ndarray.".format(type(segm))
 69 |                     )
 70 |             masks = BitMasks(
 71 |                 torch.stack([torch.from_numpy(np.ascontiguousarray(x)) for x in masks])
 72 |             )
 73 |         target.gt_masks = masks
 74 | 
 75 |     if len(annos) and "keypoints" in annos[0]:
 76 |         kpts = [obj.get("keypoints", []) for obj in annos]
 77 |         target.gt_keypoints = Keypoints(kpts)
 78 | 
 79 |     if len(annos) and load_attributes:
 80 |         attributes = -torch.ones((len(annos), max_attr_per_ins), dtype=torch.int64)
 81 |         for idx, anno in enumerate(annos):
 82 |             if "attribute_ids" in anno:
 83 |                 for jdx, attr_id in enumerate(anno["attribute_ids"]):
 84 |                     attributes[idx, jdx] = attr_id
 85 |         target.gt_attributes = attributes
 86 | 
 87 |     return target
 88 | 
 89 | 
 90 | class AttributeDatasetMapper(DatasetMapper):
 91 |     """
 92 |     Extend DatasetMapper to support attributes.
 93 |     """
 94 |     def __init__(self, cfg, is_train=True):
 95 |         super().__init__(cfg, is_train)
 96 | 
 97 |         # fmt: off
 98 |         self.attribute_on      = cfg.MODEL.BUA.ATTRIBUTE_ON
 99 |         self.max_attr_per_ins  = cfg.INPUT.MAX_ATTR_PER_INS
100 |         # fmt: on
101 |         # NOTE Added to fit d202
102 |         if cfg.INPUT.CROP.ENABLED and is_train:
103 |             self.crop_gen = T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE)
104 |         else:
105 |             self.crop_gen = None
106 | 
107 |         self.tfm_gens = utils.build_transform_gen(cfg, is_train)
108 |         self.load_proposals = cfg.MODEL.LOAD_PROPOSALS
109 |         self.mask_on        = cfg.MODEL.MASK_ON
110 |         self.keypoint_on    = cfg.MODEL.KEYPOINT_ON
111 |         self.mask_format    = cfg.INPUT.MASK_FORMAT
112 |         # NOTE ok
113 | 
114 |     def __call__(self, dataset_dict):
115 |         dataset_dict = copy.deepcopy(dataset_dict)
116 |         # NOTE  Added to fit d202
117 |         image = utils.read_image(dataset_dict["file_name"], format=self.image_format) # image_format
118 |         # image = utils.read_image(dataset_dict["file_name"], format=self.img_format) # image_format
119 |         utils.check_image_size(dataset_dict, image)
120 | 
121 |         if "annotations" not in dataset_dict:
122 |             image, transforms = T.apply_transform_gens(
123 |                 ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
124 |             )
125 |         else:
126 |             if self.crop_gen:
127 |                 crop_tfm = utils.gen_crop_transform_with_instance(
128 |                     self.crop_gen.get_crop_size(image.shape[:2]),
129 |                     image.shape[:2],
130 |                     np.random.choice(dataset_dict["annotations"]),
131 |                 )
132 |                 image = crop_tfm.apply_image(image)
133 |             image, transforms = T.apply_transform_gens(self.tfm_gens, image)
134 |             if self.crop_gen:
135 |                 transforms = crop_tfm + transforms
136 | 
137 |         image_shape = image.shape[:2]
138 |         dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
139 | 
140 |         if self.load_proposals:
141 |             utils.transform_proposals(
142 |                 dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
143 |             )
144 | 
145 |         if not self.is_train:
146 |             dataset_dict.pop("annotations", None)
147 |             dataset_dict.pop("sem_seg_file_name", None)
148 |             return dataset_dict
149 | 
150 |         if "annotations" in dataset_dict:
151 |             for anno in dataset_dict["annotations"]:
152 |                 if not self.mask_on:
153 |                     anno.pop("segmentation", None)
154 |                 if not self.keypoint_on:
155 |                     anno.pop("keypoints", None)
156 |                 if not self.attribute_on:
157 |                     anno.pop("attribute_ids")
158 | 
159 |             annos = [
160 |                 utils.transform_instance_annotations(
161 |                     obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
162 |                 )
163 |                 for obj in dataset_dict.pop("annotations")
164 |                 if obj.get("iscrowd", 0) == 0
165 |             ]
166 |             instances = annotations_to_instances_with_attributes(
167 |                 annos, image_shape, mask_format=self.mask_format,
168 |                 load_attributes=self.attribute_on, max_attr_per_ins=self.max_attr_per_ins
169 |             )
170 |             if self.crop_gen and instances.has("gt_masks"):
171 |                 instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
172 |             dataset_dict["instances"] = utils.filter_empty_instances(instances)
173 | 
174 |         if "sem_seg_file_name" in dataset_dict:
175 |             with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
176 |                 sem_seg_gt = Image.open(f)
177 |                 sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
178 |             sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
179 |             sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
180 |             dataset_dict["sem_seg"] = sem_seg_gt
181 |         return dataset_dict
182 | 


--------------------------------------------------------------------------------
/bua/visual_genome.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | import contextlib
  4 | import io
  5 | import logging
  6 | import os
  7 | from fvcore.common.file_io import PathManager
  8 | from fvcore.common.timer import Timer
  9 | 
 10 | from detectron2.data import DatasetCatalog, MetadataCatalog
 11 | from detectron2.structures import BoxMode
 12 | 
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | """
 17 | load json for mode detectron2
 18 | """
 19 | 
 20 | def load_coco_with_attributes_json(json_file, 
 21 |                                    image_root, 
 22 |                                    dataset_name=None, 
 23 |                                    extra_annotation_keys=None):
 24 |     """
 25 |     Extend load_coco_json() with additional support for attributes
 26 |     """
 27 |     from pycocotools.coco import COCO
 28 | 
 29 |     timer = Timer()
 30 |     json_file = PathManager.get_local_path(json_file)
 31 |     with contextlib.redirect_stdout(io.StringIO()):
 32 |         coco_api = COCO(json_file)
 33 |     if timer.seconds() > 1:
 34 |         logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
 35 | 
 36 |     id_map = None
 37 |     if dataset_name is not None:
 38 |         meta = MetadataCatalog.get(dataset_name)
 39 |         cat_ids = sorted(coco_api.getCatIds())
 40 |         cats = coco_api.loadCats(cat_ids)
 41 |         thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])]
 42 |         meta.thing_classes = thing_classes
 43 |         if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)):
 44 |             if "coco" not in dataset_name:
 45 |                 logger.warning(
 46 |                     """
 47 | Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.
 48 | """
 49 |                 )
 50 |         id_map = {v: i for i, v in enumerate(cat_ids)}
 51 |         meta.thing_dataset_id_to_contiguous_id = id_map
 52 | 
 53 |     img_ids = sorted(coco_api.imgs.keys())
 54 |     imgs = coco_api.loadImgs(img_ids)
 55 |     anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
 56 | 
 57 |     if "minival" not in json_file:
 58 |         ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
 59 |         assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
 60 |             json_file
 61 |         )
 62 | 
 63 |     imgs_anns = list(zip(imgs, anns))
 64 | 
 65 |     logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file))
 66 | 
 67 |     dataset_dicts = []
 68 | 
 69 |     ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or [])
 70 | 
 71 |     num_instances_without_valid_segmentation = 0
 72 | 
 73 |     for (img_dict, anno_dict_list) in imgs_anns:
 74 |         record = {}
 75 |         record["file_name"] = os.path.join(image_root, img_dict["file_name"])
 76 |         record["height"] = img_dict["height"]
 77 |         record["width"] = img_dict["width"]
 78 |         image_id = record["image_id"] = img_dict["id"]
 79 | 
 80 |         objs = []
 81 |         for anno in anno_dict_list:
 82 |             assert anno["image_id"] == image_id
 83 | 
 84 |             assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.'
 85 | 
 86 |             obj = {key: anno[key] for key in ann_keys if key in anno}
 87 | 
 88 |             segm = anno.get("segmentation", None)
 89 |             if segm:  
 90 |                 if not isinstance(segm, dict):
 91 |                     segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
 92 |                     if len(segm) == 0:
 93 |                         num_instances_without_valid_segmentation += 1
 94 |                         continue
 95 |                 obj["segmentation"] = segm
 96 | 
 97 |             keypts = anno.get("keypoints", None)
 98 |             if keypts:
 99 |                 for idx, v in enumerate(keypts):
100 |                     if idx % 3 != 2:
101 |                         keypts[idx] = v + 0.5
102 |                 obj["keypoints"] = keypts
103 | 
104 |             attrs = anno.get("attribute_ids", None)
105 |             if attrs:  # list[int]
106 |                 obj["attribute_ids"] = attrs  
107 | 
108 |             attr = anno.get("attribute", None)
109 |             if attr:
110 |                 # NOTE import from bua
111 |                 # obj["attributes"] = attr  # 正常读入
112 |                 # obj["attribute_ids"] = attr  # 正常读入
113 |                 # print(attr)
114 |                 max_attributes_per_ins = 16
115 |                 attributes = [-1 for _ in range(max_attributes_per_ins)]
116 |                 for idx, a in enumerate(attr):
117 |                     attributes[idx] = a - 1  # bua train、val的json中attr类别是1-400
118 |                 obj["attribute_ids"] = attributes        
119 | 
120 |             obj["bbox_mode"] = BoxMode.XYWH_ABS
121 |             if id_map:
122 |                 obj["category_id"] = id_map[obj["category_id"]]
123 |             objs.append(obj)
124 |         record["annotations"] = objs
125 |         dataset_dicts.append(record)
126 | 
127 |     if num_instances_without_valid_segmentation > 0:
128 |         logger.warning(
129 |             "Filtered out {} instances without valid segmentation. "
130 |             "There might be issues in your dataset generation process.".format(
131 |                 num_instances_without_valid_segmentation
132 |             )
133 |         )
134 |     return dataset_dicts
135 | 
136 | def register_coco_instances_with_attributes(name, metadata, json_file, image_root):
137 |     DatasetCatalog.register(name, lambda: load_coco_with_attributes_json(json_file, 
138 |                                                                          image_root, 
139 |                                                                          name))
140 |     MetadataCatalog.get(name).set(
141 |         json_file=json_file, image_root=image_root, evaluator_type="coco", **metadata
142 |     )
143 | # ==== Predefined splits for visual genome images ===========
144 | _PREDEFINED_SPLITS_VG = {
145 |     "visual_genome_train": ("visual_genome/images", 
146 |                             "visual_genome/annotations/visual_genome_train.json"),
147 |     "visual_genome_val": ("visual_genome/images", 
148 |                           "visual_genome/annotations/visual_genome_val.json"),
149 |     "visual_genome_test": ("visual_genome/images", 
150 |                            "visual_genome/annotations/visual_genome_test.json"),
151 | }
152 | 
153 | def register_all_vg(root):
154 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_VG.items():
155 |         register_coco_instances_with_attributes(
156 |             key,
157 |             {}, # no meta data
158 |             os.path.join(root, json_file),
159 |             os.path.join(root, image_root),
160 |         )
161 | 
162 | # Register them all under "./datasets"
163 | _root = os.getenv("DETECTRON2_DATASETS", "datasets")
164 | register_all_vg(_root)


--------------------------------------------------------------------------------
/configs/caffe/test-caffe-r101-fix36.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   WEIGHTS: "bua-caffe-frcn-r101-k36.pth"
 3 |   META_ARCHITECTURE: "GeneralizedBUARCNN"
 4 |   PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
 5 |   ANCHOR_GENERATOR:
 6 |     SIZES: [[4, 8, 16, 32]]
 7 |   PROPOSAL_GENERATOR:
 8 |     NAME: "BUARPN"
 9 |     MIN_SIZE: 16
10 |   BUA:
11 |     ATTRIBUTE_ON: True
12 |     EXTRACT_FEATS: False  # auto True when extract feats
13 |     RPN:
14 |       CONV_OUT_CHANNELS: 512
15 |     ATTRIBUTE:
16 |       NUM_CLASSES: 401
17 |   RESNETS:
18 |     DEPTH: 101
19 |     OUT_FEATURES: ["res4"]
20 |     NORM: "BN"
21 |     RES5_DILATION: 2
22 |   BACKBONE:
23 |     NAME: "build_bua_resnet_backbone"
24 |     FREEZE_AT: 3
25 |   RPN:
26 |     HEAD_NAME: "StandardBUARPNHead"
27 |     PRE_NMS_TOPK_TRAIN: 12000
28 |     POST_NMS_TOPK_TRAIN: 2000
29 |     POST_NMS_TOPK_TEST: 300
30 |     PRE_NMS_TOPK_TEST: 6000
31 |     BATCH_SIZE_PER_IMAGE: 64
32 |   ROI_HEADS:
33 |     NAME: "BUACaffeRes5ROIHeads"
34 |     BATCH_SIZE_PER_IMAGE: 64
35 |     SCORE_THRESH_TEST: -1.0
36 |     NMS_THRESH_TEST: 0.3
37 |     POSITIVE_FRACTION: 0.5
38 |     NUM_CLASSES: 1601
39 |   ROI_BOX_HEAD:
40 |     POOLER_TYPE: "ROIPool"
41 |     BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0)
42 | DATASETS:
43 |   TRAIN: ("visual_genome_train",)
44 |   TEST: ("visual_genome_val",)
45 | TEST:
46 |   DETECTIONS_PER_IMAGE: 400
47 | DATALOADER:
48 |   NUM_WORKERS: 1
49 | INPUT:
50 |   MIN_SIZE_TRAIN: (600, )
51 |   MAX_SIZE_TRAIN: 1000
52 |   MIN_SIZE_TEST: 600
53 |   MAX_SIZE_TEST: 1000


--------------------------------------------------------------------------------
/configs/caffe/test-caffe-r101.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   WEIGHTS: "bua-caffe-frcn-r101-k10-100.pth"
 3 |   META_ARCHITECTURE: "GeneralizedBUARCNN"
 4 |   PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
 5 |   ANCHOR_GENERATOR:
 6 |     SIZES: [[4, 8, 16, 32]]
 7 |   PROPOSAL_GENERATOR:
 8 |     NAME: "BUARPN"
 9 |     MIN_SIZE: 16
10 |   BUA:
11 |     ATTRIBUTE_ON: True
12 |     EXTRACT_FEATS: False  # auto True when extract feats
13 |     RPN:
14 |       CONV_OUT_CHANNELS: 512
15 |     ATTRIBUTE:
16 |       NUM_CLASSES: 401
17 |   RESNETS:
18 |     DEPTH: 101
19 |     OUT_FEATURES: ["res4"]
20 |     NORM: "BN"
21 |     RES5_DILATION: 2
22 |   BACKBONE:
23 |     NAME: "build_bua_resnet_backbone"
24 |     FREEZE_AT: 3
25 |   RPN:
26 |     HEAD_NAME: "StandardBUARPNHead"
27 |     PRE_NMS_TOPK_TRAIN: 12000
28 |     POST_NMS_TOPK_TRAIN: 2000
29 |     POST_NMS_TOPK_TEST: 300
30 |     PRE_NMS_TOPK_TEST: 6000
31 |     BATCH_SIZE_PER_IMAGE: 64
32 |   ROI_HEADS:
33 |     NAME: "BUACaffeRes5ROIHeads"
34 |     BATCH_SIZE_PER_IMAGE: 64
35 |     SCORE_THRESH_TEST: -1.0
36 |     NMS_THRESH_TEST: 0.3
37 |     POSITIVE_FRACTION: 0.5
38 |     NUM_CLASSES: 1601
39 |   ROI_BOX_HEAD:
40 |     POOLER_TYPE: "ROIPool"
41 |     BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0)
42 | DATASETS:
43 |   TRAIN: ("visual_genome_train",)
44 |   TEST: ("visual_genome_val",)
45 | TEST:
46 |   DETECTIONS_PER_IMAGE: 400
47 | DATALOADER:
48 |   NUM_WORKERS: 1
49 | INPUT:
50 |   MIN_SIZE_TRAIN: (600, )
51 |   MAX_SIZE_TRAIN: 1000
52 |   MIN_SIZE_TEST: 600
53 |   MAX_SIZE_TEST: 1000
54 | 
55 | 


--------------------------------------------------------------------------------
/configs/caffe/test-caffe-r152.yaml:
--------------------------------------------------------------------------------
 1 | OUTPUT_DIR: "./output_caffe152"
 2 | MODEL:
 3 |   WEIGHTS: "bua-caffe-frcn-r152.pth"
 4 |   META_ARCHITECTURE: "GeneralizedBUARCNN"
 5 |   PIXEL_MEAN: [0, 0, 0]
 6 |   ANCHOR_GENERATOR:
 7 |     SIZES: [[4, 8, 16, 32]]
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "BUARPN"
10 |     MIN_SIZE: 16
11 |   BUA:
12 |     ATTRIBUTE_ON: True
13 |     EXTRACT_FEATS: False  # auto True when extract feats
14 |     RESNET_VERSION: 2
15 |     RPN:
16 |       CONV_OUT_CHANNELS: 512
17 |     EXTRACTOR:
18 |       MIN_BOXES: 100
19 |       MAX_BOXES: 100
20 |     ATTRIBUTE:
21 |       NUM_CLASSES: 401
22 |   RESNETS:
23 |     DEPTH: 152
24 |     OUT_FEATURES: ["res4"]
25 |     NORM: "BN"
26 |     RES5_DILATION: 1
27 |     STRIDE_IN_1X1: False
28 |   BACKBONE:
29 |     NAME: "build_bua_resnet_backbone"
30 |     FREEZE_AT: 3
31 |   RPN:
32 |     HEAD_NAME: "StandardBUARPNHead"
33 |     PRE_NMS_TOPK_TRAIN: 12000
34 |     POST_NMS_TOPK_TRAIN: 2000
35 |     POST_NMS_TOPK_TEST: 300
36 |     PRE_NMS_TOPK_TEST: 6000
37 |     BATCH_SIZE_PER_IMAGE: 64
38 |   ROI_HEADS:
39 |     NAME: "BUACaffeRes5ROIHeads"
40 |     BATCH_SIZE_PER_IMAGE: 64
41 |     SCORE_THRESH_TEST: -1.0
42 |     NMS_THRESH_TEST: 0.3
43 |     POSITIVE_FRACTION: 0.5
44 |     NUM_CLASSES: 1601
45 |   ROI_BOX_HEAD:
46 |     POOLER_TYPE: "ROIPool"
47 |     BBOX_REG_WEIGHTS: (1.0, 1.0, 1.0, 1.0)
48 | DATASETS:
49 |   TRAIN: ("visual_genome_train",)
50 |   TEST: ("visual_genome_val",)
51 | TEST:
52 |   DETECTIONS_PER_IMAGE: 400
53 | DATALOADER:
54 |   NUM_WORKERS: 1
55 | INPUT:
56 |   MIN_SIZE_TRAIN: (600, )
57 |   MAX_SIZE_TRAIN: 1000
58 |   MIN_SIZE_TEST: 600
59 |   MAX_SIZE_TEST: 1000
60 | 
61 | 


--------------------------------------------------------------------------------
/configs/d2/base-d2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   ATTRIBUTE_ON: True
 4 |   RPN:
 5 |     PRE_NMS_TOPK_TEST: 6000
 6 |     POST_NMS_TOPK_TEST: 1000
 7 |     SMOOTH_L1_BETA: 0.1111
 8 |     BOUNDARY_THRESH: 0
 9 |   ROI_HEADS:
10 |     NAME: "AttributeRes5ROIHeads"
11 |     NUM_CLASSES: 1600
12 |   ROI_BOX_HEAD:
13 |     POOLER_SAMPLING_RATIO: 2
14 |     SMOOTH_L1_BETA: 1.
15 | DATASETS:
16 |   TRAIN: ("visual_genome_train", "visual_genome_val")
17 |   TEST: ("visual_genome_test",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 8
20 |   BASE_LR: 0.01
21 |   STEPS: (120000, 160000)
22 |   MAX_ITER: 180000
23 |   # IMS_PER_BATCH: 16 
24 |   # BASE_LR: 0.02
25 |   # STEPS: (60000, 80000)
26 |   # MAX_ITER: 90000
27 | INPUT:
28 |   MIN_SIZE_TRAIN: (600,)
29 |   MAX_SIZE_TRAIN: 1000
30 |   MIN_SIZE_TEST: 600
31 |   MAX_SIZE_TEST: 1000
32 | VERSION: 2
33 | 


--------------------------------------------------------------------------------
/configs/d2/test-d2-X152.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   WEIGHTS: "bua-d2-frcn-x152.pth"
 3 |   META_ARCHITECTURE: "GeneralizedRCNN"
 4 |   ATTRIBUTE_ON: True
 5 |   BUA:
 6 |     ATTRIBUTE_ON: True
 7 |     EXTRACTOR:
 8 |       MODE: 1
 9 |       MIN_BOXES: 10
10 |       MAX_BOXES: 20
11 |       CONF_THRESH: 0.4
12 |   RESNETS:
13 |     STRIDE_IN_1X1: False  # this is a C2 model
14 |     NUM_GROUPS: 32
15 |     WIDTH_PER_GROUP: 8
16 |     DEPTH: 152
17 |   RPN:
18 |     PRE_NMS_TOPK_TEST: 6000
19 |     POST_NMS_TOPK_TEST: 1000
20 |     SMOOTH_L1_BETA: 0.1111
21 |     BOUNDARY_THRESH: 0
22 |   ROI_HEADS:
23 |     NAME: "AttributeRes5ROIHeads"
24 |     NUM_CLASSES: 1600
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     NUM_FC: 2
28 |     POOLER_RESOLUTION: 7
29 |     POOLER_SAMPLING_RATIO: 2
30 |     SMOOTH_L1_BETA: 1.
31 | DATASETS:
32 |   TRAIN: ("visual_genome_train", "visual_genome_val")
33 |   TEST: ("visual_genome_test",)
34 | SOLVER:
35 |   # IMS_PER_BATCH: 16
36 |   # BASE_LR: 0.02
37 |   # STEPS: (60000, 80000)
38 |   # MAX_ITER: 90000
39 |   IMS_PER_BATCH: 8
40 |   BASE_LR: 0.01
41 |   STEPS: (120000, 160000)
42 |   MAX_ITER: 180000
43 | INPUT:
44 |   MIN_SIZE_TRAIN: (600,)
45 |   MAX_SIZE_TRAIN: 1000
46 |   MIN_SIZE_TEST: 600
47 |   MAX_SIZE_TEST: 1000
48 | VERSION: 2


--------------------------------------------------------------------------------
/configs/d2/test-d2-r101.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "base-d2.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "bua-d2-frcn-r101.pth"
 4 |   BUA:
 5 |     ATTRIBUTE_ON: True
 6 |     EXTRACTOR:
 7 |       MODE: 1
 8 |       MIN_BOXES: 10
 9 |       MAX_BOXES: 20
10 |       CONF_THRESH: 0.4
11 |   RESNETS:
12 |     DEPTH: 101


--------------------------------------------------------------------------------
/configs/d2/test-d2-r50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "base-d2.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "bua-d2-frcn-r50.pth"
 4 |   BUA:
 5 |     ATTRIBUTE_ON: True
 6 |     EXTRACTOR:
 7 |       MODE: 1
 8 |       MIN_BOXES: 10
 9 |       MAX_BOXES: 20
10 |       CONF_THRESH: 0.4
11 |   RESNETS:
12 |     DEPTH: 50


--------------------------------------------------------------------------------
/configs/d2/train-d2-r101.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "base-d2.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "R-101.pkl"  # the backbone weight is download from d2 at https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-101.pkl
 4 |   BUA:
 5 |     ATTRIBUTE_ON: True
 6 |     EXTRACTOR:
 7 |       MODE: 1
 8 |       MIN_BOXES: 10
 9 |       MAX_BOXES: 20
10 |       CONF_THRESH: 0.4
11 |   RESNETS:
12 |     DEPTH: 101


--------------------------------------------------------------------------------
/configs/d2/train-d2-r50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "base-d2.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "R-50.pkl" # the backbone weight is download from d2 at https://dl.fbaipublicfiles.com/detectron2/ImageNetPretrained/MSRA/R-50.pkl
 4 |   BUA:
 5 |     ATTRIBUTE_ON: True
 6 |     EXTRACTOR:
 7 |       MODE: 1
 8 |       MIN_BOXES: 10
 9 |       MAX_BOXES: 20
10 |       CONF_THRESH: 0.4
11 |   RESNETS:
12 |     DEPTH: 50


--------------------------------------------------------------------------------
/datasets/demo/000456.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/000456.jpg


--------------------------------------------------------------------------------
/datasets/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/000542.jpg


--------------------------------------------------------------------------------
/datasets/demo/001150.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/001150.jpg


--------------------------------------------------------------------------------
/datasets/demo/001763.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/001763.jpg


--------------------------------------------------------------------------------
/datasets/demo/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/004545.jpg


--------------------------------------------------------------------------------
/datasets/demo/example_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/example_image.jpg


--------------------------------------------------------------------------------
/datasets/demo/example_image1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/example_image1.png


--------------------------------------------------------------------------------
/datasets/demo/example_image2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/demo/example_image2.png


--------------------------------------------------------------------------------
/datasets/init:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MILVLG/bottom-up-attention.pytorch/4dbce869ad17117ca9f1df55bc5604cdbcd47f59/datasets/init


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .vg_evaluation import VGEvaluator


--------------------------------------------------------------------------------
/evaluation/attributes_vocab.txt:
--------------------------------------------------------------------------------
  1 | gray,grey
  2 | multi colored,multi-colored,multicolored
  3 | double decker,double-decker
  4 | unmade
  5 | red
  6 | camouflage
  7 | blue
  8 | white
  9 | green
 10 | pink
 11 | yellow
 12 | black
 13 | ivory
 14 | throwing
 15 | orange
 16 | spiky
 17 | plaid
 18 | purple
 19 | soccer
 20 | brake
 21 | blonde
 22 | american
 23 | flat screen
 24 | brown
 25 | wooden
 26 | performing
 27 | pulled back
 28 | windshield
 29 | bald
 30 | chocolate
 31 | khaki
 32 | apple
 33 | blowing
 34 | parked
 35 | sticking out
 36 | fluorescent
 37 | glazed
 38 | cooking
 39 | brick
 40 | home
 41 | palm
 42 | curly
 43 | cheese
 44 | crashing
 45 | calm
 46 | christmas
 47 | gravel
 48 | chain link,chainlink
 49 | clear
 50 | cloudy
 51 | curled
 52 | striped
 53 | flying
 54 | pine
 55 | arched
 56 | hardwood
 57 | silver
 58 | framed
 59 | one way,oneway
 60 | tall
 61 | muscular
 62 | skiing
 63 | tiled
 64 | bare
 65 | surfing
 66 | stuffed
 67 | wii
 68 | taking off
 69 | sleeping
 70 | jumping
 71 | metal
 72 | fire
 73 | neon green
 74 | soap
 75 | park
 76 | chalk
 77 | license
 78 | powdered
 79 | up
 80 | woven
 81 | baby
 82 | polar
 83 | floppy
 84 | toasted
 85 | coffee
 86 | potted
 87 | wet
 88 | tennis
 89 | dry
 90 | balding
 91 | carpeted
 92 | deep blue
 93 | cardboard
 94 | pointed
 95 | sandy
 96 | snow-covered,snow covered
 97 | sheer
 98 | wood
 99 | swimming
100 | traffic
101 | crouching
102 | short
103 | melted
104 | marble
105 | rock
106 | open
107 | paper
108 | stacked
109 | stainless
110 | cluttered
111 | dirt
112 | waving
113 | ripe
114 | salt
115 | rolling
116 | long
117 | clock
118 | maroon
119 | little
120 | triangle
121 | large
122 | sand
123 | fallen
124 | foamy
125 | stack
126 | sliced
127 | blond
128 | plain
129 | straw
130 | busy
131 | checkered
132 | extended
133 | stainless steel,stainless-steel
134 | stone
135 | rocky
136 | laying down
137 | grazing
138 | porcelain
139 | snowboarding
140 | stop
141 | leather
142 | gold
143 | cargo
144 | playing tennis
145 | winter
146 | walking
147 | roman
148 | peeled
149 | plastic
150 | colorful
151 | shining
152 | burnt
153 | messy
154 | tile
155 | cloudless
156 | glass
157 | smiling
158 | fruit
159 | overcast
160 | adult
161 | water
162 | round
163 | birthday
164 | dark
165 | snowy
166 | leafless
167 | young
168 | wicker
169 | skateboarding
170 | cooked
171 | huge
172 | dress
173 | wire
174 | cracked
175 | concrete
176 | laying
177 | grassy
178 | foggy
179 | fried
180 | slice
181 | batting
182 | mountain
183 | halved
184 | ski
185 | statue
186 | still
187 | octagonal
188 | side view
189 | sitting
190 | wavy
191 | floral
192 | running
193 | moving
194 | small
195 | door
196 | wine
197 | closed
198 | cement
199 | splashing
200 | empty
201 | eating
202 | skating
203 | playing
204 | old
205 | tan
206 | leafy
207 | down
208 | electrical
209 | manicured
210 | standing
211 | blurry
212 | choppy
213 | driving
214 | watching
215 | parking
216 | pointy
217 | covering
218 | for sale
219 | reflecting
220 | railroad
221 | golden brown
222 | steep
223 | granite
224 | roll
225 | train
226 | spotted
227 | fluffy
228 | bending
229 | tarmacked
230 | furry
231 | dirty
232 | hanging
233 | above
234 | half full
235 | bright
236 | chrome
237 | toilet paper
238 | squatting
239 | chopped
240 | flowing
241 | neon
242 | skate
243 | rusty
244 | male
245 | covered
246 | outstretched
247 | lit
248 | riding
249 | shirtless
250 | reaching
251 | baseball
252 | iron
253 | night
254 | speckled
255 | bright blue
256 | horizontal
257 | denim
258 | cake
259 | hazy
260 | chipped
261 | police
262 | off
263 | dead
264 | nike
265 | steamed
266 | beige
267 | brunette
268 | short sleeved
269 | laptop
270 | decorated
271 | sharp
272 | perched
273 | clay
274 | made
275 | mesh
276 | street
277 | burgundy
278 | bent
279 | rusted
280 | paved
281 | patterned
282 | painted
283 | flat
284 | landing
285 | light blue
286 | puffy
287 | shaggy
288 | resting
289 | overgrown
290 | bending over
291 | circular
292 | curved
293 | cast
294 | rainbow colored,rainbow
295 | lime green
296 | ceramic
297 | dried
298 | styrofoam
299 | long sleeved,long sleeve
300 | wispy
301 | ocean
302 | big
303 | teal
304 | oval
305 | greenish
306 | murky
307 | tomato
308 | letter
309 | bricked
310 | in air
311 | distant
312 | full
313 | opened
314 | looking
315 | power
316 | holding
317 | browned
318 | growing
319 | backwards
320 | clean
321 | racing
322 | grilled
323 | seasoned
324 | barefoot
325 | kneeling
326 | digital
327 | herd
328 | sliding
329 | recessed
330 | lying
331 | serving
332 | polka dot
333 | cut
334 | ornate
335 | piled
336 | steel
337 | muddy
338 | hilly
339 | raised
340 | hitting
341 | evergreen
342 | sunny
343 | wrist
344 | half
345 | blank
346 | numbered
347 | electric
348 | computer
349 | rolled
350 | whole
351 | lush
352 | daytime
353 | toilet
354 | pointing
355 | asphalt
356 | public
357 | alone
358 | posing
359 | bunch
360 | square
361 | safety
362 | wearing
363 | stripes
364 | bathroom
365 | reflective
366 | assorted
367 | swinging
368 | airborne
369 | dark blue
370 | grass
371 | burned
372 | telephone
373 | docked
374 | pile
375 | laughing
376 | brass
377 | rubber
378 | frosted
379 | hairy
380 | overhead
381 | glowing
382 | soda
383 | number
384 | talking
385 | barren
386 | shaved
387 | shiny
388 | rough
389 | written
390 | older
391 | thin
392 | decorative
393 | wrinkled
394 | peeling
395 | golden
396 | metallic
397 | back
398 | thick
399 | black and white
400 | leaning


--------------------------------------------------------------------------------
/evaluation/vg_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | import numpy as np
  7 | 
  8 | 
  9 | def vg_eval(detpath,
 10 |             gt_roidb,
 11 |             image_index,
 12 |             classindex,
 13 |             ovthresh=0.5,
 14 |             use_07_metric=False,
 15 |             eval_attributes=False):
 16 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 17 |                                 detpath,
 18 |                                 gt_roidb,
 19 |                                 image_index,
 20 |                                 classindex,
 21 |                                 [ovthresh],
 22 |                                 [use_07_metric])
 23 |     Top level function that does the Visual Genome evaluation.
 24 |     detpath: Path to detections
 25 |     gt_roidb: List of ground truth structs.
 26 |     image_index: List of image ids.
 27 |     classindex: Category index
 28 |     [ovthresh]: Overlap threshold (default = 0.5)
 29 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 30 |         (default False)
 31 |     """
 32 |     # extract gt objects for this class
 33 |     class_recs = {}
 34 |     npos = 0
 35 |     for item, imagename in zip(gt_roidb, image_index):
 36 |         if eval_attributes:
 37 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 38 |         else:
 39 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 40 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 41 |         det = [False] * bbox.shape[0]
 42 |         npos = npos + sum(~difficult)
 43 |         class_recs[str(imagename)] = {'bbox': bbox,
 44 |                                       'difficult': difficult,
 45 |                                       'det': det}
 46 |     if npos == 0:
 47 |         # No ground truth examples
 48 |         return 0, 0, 0, 0, npos
 49 | 
 50 |     # read dets
 51 |     with open(detpath, 'r') as f:
 52 |         lines = f.readlines()
 53 |     if len(lines) == 0:
 54 |         # No detection examples
 55 |         return 0, 0, 0, 0, npos
 56 | 
 57 |     splitlines = [x.strip().split(' ') for x in lines]
 58 |     image_ids = [x[0] for x in splitlines]
 59 |     confidence = np.array([float(x[1]) for x in splitlines])
 60 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 61 | 
 62 |     # sort by confidence
 63 |     sorted_ind = np.argsort(-confidence)
 64 |     sorted_scores = -np.sort(-confidence)
 65 |     BB = BB[sorted_ind, :]
 66 |     image_ids = [image_ids[x] for x in sorted_ind]
 67 | 
 68 |     # go down dets and mark TPs and FPs
 69 |     nd = len(image_ids)
 70 |     tp = np.zeros(nd)
 71 |     fp = np.zeros(nd)
 72 |     for d in range(nd):
 73 |         if image_ids[d] not in class_recs:
 74 |             print(image_ids[d], detpath)
 75 |             continue
 76 |         R = class_recs[image_ids[d]]
 77 |         bb = BB[d, :].astype(float)
 78 |         ovmax = -np.inf
 79 |         BBGT = R['bbox'].astype(float)
 80 | 
 81 |         if BBGT.size > 0:
 82 |             # compute overlaps
 83 |             # intersection
 84 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 85 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 86 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 87 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 88 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 89 |             ih = np.maximum(iymax - iymin + 1., 0.)
 90 |             inters = iw * ih
 91 | 
 92 |             # union
 93 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 94 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 95 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
 96 | 
 97 |             overlaps = inters / uni
 98 |             ovmax = np.max(overlaps)
 99 |             jmax = np.argmax(overlaps)
100 | 
101 |         if ovmax > ovthresh:
102 |             if not R['difficult'][jmax]:
103 |                 if not R['det'][jmax]:
104 |                     tp[d] = 1.
105 |                     R['det'][jmax] = 1
106 |                 else:
107 |                     fp[d] = 1.
108 |         else:
109 |             fp[d] = 1.
110 | 
111 |     # compute precision recall
112 |     fp = np.cumsum(fp)
113 |     tp = np.cumsum(tp)
114 |     rec = tp / float(npos)
115 |     # avoid divide by zero in case the first detection matches a difficult
116 |     # ground truth
117 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
118 |     ap = voc_ap(rec, prec, use_07_metric)
119 | 
120 |     return rec, prec, ap, sorted_scores, npos
121 | 
122 | def voc_ap(rec, prec, use_07_metric=False):
123 |     """ ap = voc_ap(rec, prec, [use_07_metric])
124 |     Compute VOC AP given precision and recall.
125 |     If use_07_metric is true, uses the
126 |     VOC 07 11 point method (default:False).
127 |     """
128 |     if use_07_metric:
129 |         # 11 point metric
130 |         ap = 0.
131 |         for t in np.arange(0., 1.1, 0.1):
132 |             if np.sum(rec >= t) == 0:
133 |                 p = 0
134 |             else:
135 |                 p = np.max(prec[rec >= t])
136 |             ap = ap + p / 11.
137 |     else:
138 |         # correct AP calculation
139 |         # first append sentinel values at the end
140 |         mrec = np.concatenate(([0.], rec, [1.]))
141 |         mpre = np.concatenate(([0.], prec, [0.]))
142 | 
143 |         # compute the precision envelope
144 |         for i in range(mpre.size - 1, 0, -1):
145 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
146 | 
147 |         # to calculate area under PR curve, look for points
148 |         # where X axis (recall) changes value
149 |         i = np.where(mrec[1:] != mrec[:-1])[0]
150 | 
151 |         # and sum (\Delta recall) * prec
152 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
153 |     return ap


--------------------------------------------------------------------------------
/evaluation/vg_evaluation.py:
--------------------------------------------------------------------------------
  1 | import os, io
  2 | import numpy as np
  3 | 
  4 | import copy
  5 | import torch
  6 | import logging
  7 | import pickle as cPickle
  8 | import itertools
  9 | import contextlib
 10 | from pycocotools.coco import COCO
 11 | from collections import OrderedDict
 12 | from fvcore.common.file_io import PathManager
 13 | 
 14 | import detectron2.utils.comm as comm
 15 | from detectron2.data import MetadataCatalog
 16 | from detectron2.evaluation.evaluator import DatasetEvaluator
 17 | from detectron2.data.datasets.coco import convert_to_coco_json
 18 | from detectron2.evaluation.coco_evaluation import instances_to_coco_json
 19 | 
 20 | from .vg_eval import vg_eval
 21 | 
 22 | class VGEvaluator(DatasetEvaluator):
 23 |     """
 24 |         Evaluate object proposal, instance detection
 25 |         outputs using VG's metrics and APIs.
 26 |     """
 27 |     def __init__(self, dataset_name, cfg, distributed, output_dir=None):
 28 |         """
 29 |         Args:
 30 |             dataset_name (str): name of the dataset to be evaluated.
 31 |                 It must have either the following corresponding metadata:
 32 | 
 33 |                     "json_file": the path to the COCO format annotation
 34 | 
 35 |                 Or it must be in detectron2's standard dataset format
 36 |                 so it can be converted to COCO format automatically.
 37 |             cfg (CfgNode): config instance
 38 |             distributed (True): if True, will collect results from all ranks for evaluation.
 39 |                 Otherwise, will evaluate the results in the current process.
 40 |             output_dir (str): optional, an output directory to dump all
 41 |                 results predicted on the dataset. The dump contains two files:
 42 | 
 43 |                 1. "instance_predictions.pth" a file in torch serialization
 44 |                    format that contains all the raw original predictions.
 45 |                 2. "coco_instances_results.json" a json file in COCO's result
 46 |                    format.
 47 |         """
 48 |         self._tasks = self._tasks_from_config(cfg)
 49 |         self._distributed = distributed
 50 |         self._logger = logging.getLogger(__name__)
 51 |         self._cpu_device = torch.device("cpu")
 52 |         self._output_dir = output_dir
 53 | 
 54 |         self._metadata = MetadataCatalog.get(dataset_name)
 55 |         if not hasattr(self._metadata, "json_file"):
 56 |             self._logger.warning(f"json_file was not found in MetaDataCatalog for '{dataset_name}'")
 57 | 
 58 |             cache_path = os.path.join(output_dir, f"{dataset_name}_vg_format.json")
 59 |             self._metadata.json_file = cache_path
 60 |             convert_to_coco_json(dataset_name, cache_path)
 61 | 
 62 |         json_file = PathManager.get_local_path(self._metadata.json_file)
 63 |         with contextlib.redirect_stdout(io.StringIO()):
 64 |             self._coco_api = COCO(json_file)
 65 | 
 66 |         self._classes = ['__background__']
 67 |         self._class_to_ind = {}
 68 |         self._class_to_ind[self._classes[0]] = 0
 69 |         with open(os.path.join('evaluation/objects_vocab.txt')) as f:
 70 |             count = 1
 71 |             for object in f.readlines():
 72 |                 names = [n.lower().strip() for n in object.split(',')]
 73 |                 self._classes.append(names[0])
 74 |                 for n in names:
 75 |                     self._class_to_ind[n] = count
 76 |                 count += 1
 77 | 
 78 |         # Load attributes
 79 |         self._attributes = ['__no_attribute__']
 80 |         self._attribute_to_ind = {}
 81 |         self._attribute_to_ind[self._attributes[0]] = 0
 82 |         with open(os.path.join('evaluation/attributes_vocab.txt')) as f:
 83 |             count = 1
 84 |             for att in f.readlines():
 85 |                 names = [n.lower().strip() for n in att.split(',')]
 86 |                 self._attributes.append(names[0])
 87 |                 for n in names:
 88 |                     self._attribute_to_ind[n] = count
 89 |                 count += 1
 90 | 
 91 |         self.roidb, self.image_index = self.gt_roidb(self._coco_api)
 92 | 
 93 |     def _tasks_from_config(self, cfg):
 94 |         """
 95 |         Returns:
 96 |             tuple[str]: tasks that can be evaluated under the given configuration.
 97 |         """
 98 |         tasks = ("bbox",)
 99 |         if cfg.MODEL.MASK_ON:
100 |             tasks = tasks + ("segm",)
101 |         if cfg.MODEL.KEYPOINT_ON:
102 |             tasks = tasks + ("keypoints",)
103 |         return tasks
104 | 
105 |     def gt_roidb(self, dataset):
106 |         roidb = []
107 |         image_index = dataset.imgToAnns.keys()
108 |         for img_index in dataset.imgToAnns:
109 |             tmp_dict = {}
110 |             num_objs = len(dataset.imgToAnns[img_index])
111 |             bboxes = np.zeros((num_objs, 4), dtype=np.uint16)
112 |             gt_attributes = np.zeros((num_objs, 16), dtype=np.int32)
113 |             gt_classes = np.zeros((num_objs), dtype=np.int32)
114 |             for ind, item in enumerate(dataset.imgToAnns[img_index]):
115 |                 bboxes[ind, :] = item['bbox']
116 |                 gt_classes[ind] = item['category_id'] + 1 # NOTE
117 |                 for j, attr in enumerate(item['attribute_ids']):
118 |                     gt_attributes[ind, j] = attr
119 |             bboxes[:, 2] = bboxes[:, 2] + bboxes[:, 0]
120 |             bboxes[:, 3] = bboxes[:, 3] + bboxes[:, 1]
121 |             tmp_dict['boxes'] = bboxes
122 |             tmp_dict['gt_attributes'] = gt_attributes
123 |             tmp_dict['gt_classes'] = gt_classes
124 |             roidb.append(tmp_dict)
125 |         return roidb, image_index
126 | 
127 |     def reset(self):
128 |         self._predictions = []
129 | 
130 |     def process(self, inputs, outputs):
131 |         """
132 |         Args:
133 |             inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
134 |                 It is a list of dict. Each dict corresponds to an image and
135 |                 contains keys like "height", "width", "file_name", "image_id".
136 |             outputs: the outputs of a COCO model. It is a list of dicts with key
137 |                 "instances" that contains :class:`Instances`.
138 |         """
139 |         for input, output in zip(inputs, outputs):
140 |             prediction = {"image_id": input["image_id"]}
141 | 
142 |             # TODO this is ugly
143 |             if "instances" in output:
144 |                 instances = output["instances"].to(self._cpu_device)
145 |                 prediction["boxes"] = instances.pred_boxes.tensor.numpy()
146 |                 prediction["labels"] = instances.pred_classes.numpy()
147 |                 prediction["scores"] = instances.scores.numpy()
148 |             self._predictions.append(prediction)
149 | 
150 |     def evaluate(self):
151 |         if self._distributed:
152 |             comm.synchronize()
153 |             self._predictions = comm.gather(self._predictions, dst=0)
154 |             self._predictions = list(itertools.chain(*self._predictions))
155 | 
156 |             if not comm.is_main_process():
157 |                 return {}
158 | 
159 |         # self._predictions = torch.load(os.path.join(self._output_dir, "instances_predictions.pth"))
160 | 
161 |         if len(self._predictions) == 0:
162 |             self._logger.warning("[VGEvaluator] Did not receive valid predictions.")
163 |             return {}
164 | 
165 |         if self._output_dir:
166 |             PathManager.mkdirs(self._output_dir)
167 |             file_path = os.path.join(self._output_dir, "instances_predictions.pth")
168 |             with PathManager.open(file_path, "wb") as f:
169 |                 torch.save(self._predictions, f)
170 | 
171 |         self._results = OrderedDict()
172 |         self._eval_vg()
173 |         # Copy so the caller can do whatever with results
174 |         return copy.deepcopy(self._results)
175 | 
176 |     def _eval_vg(self):
177 |         self.write_voc_results_file(self._predictions, output_dir=self._output_dir)
178 |         self.do_python_eval(self._output_dir)
179 | 
180 |     def write_voc_results_file(self, predictions, output_dir):
181 | 
182 |         # preds = []
183 |         # for item in predictions:
184 |         #     pred = {}
185 |         #     pred['image_id'] = item['image_id']
186 |         #     scores = item["scores"]
187 |         #     labels = item["labels"]
188 |         #     bbox = item["boxes"]
189 |         #     for ind, instance in enumerate(item['instances']):
190 |         #         scores[ind] = instance['score']
191 |         #         labels[ind] = instance['category_id']
192 |         #         bbox[ind, :] = instance['bbox'][:]
193 |         #     pred['scores'] = scores
194 |         #     pred['lables'] = labels
195 |         #     pred['bbox'] = bbox
196 |         #     preds.append(pred)
197 | 
198 |         for cls_ind, cls in enumerate(self._classes):
199 |             if cls == '__background__':
200 |                 continue
201 |             print('Writing "{}" vg result file'.format(cls))
202 |             filename = self.get_vg_results_file_template(output_dir).format(cls)
203 |             with open(filename, 'wt') as f:
204 |                 for pred_ind, item in enumerate(predictions):
205 |                     scores = item["scores"]
206 |                     labels = item["labels"]+1
207 |                     bbox = item["boxes"]
208 |                     if cls_ind not in labels:
209 |                         continue
210 |                     dets = bbox[labels==cls_ind]
211 |                     scores = scores[labels==cls_ind]
212 |                     for k in range(dets.shape[0]):
213 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
214 |                                 format(str(item["image_id"]), scores[k],
215 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
216 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
217 | 
218 |     def get_vg_results_file_template(self, output_dir, pickle=True, eval_attributes = False):
219 |         filename = 'detections_vg'+'_{:s}.txt'
220 |         path = os.path.join(output_dir, filename)
221 |         return path
222 | 
223 |     def do_python_eval(self, output_dir, pickle=True, eval_attributes = False):
224 |         # We re-use parts of the pascal voc python code for visual genome
225 |         aps = []
226 |         nposs = []
227 |         thresh = []
228 |         # The PASCAL VOC metric changed in 2010
229 |         use_07_metric = False
230 |         print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
231 |         if not os.path.isdir(output_dir):
232 |             os.mkdir(output_dir)
233 |         # Load ground truth
234 |         if eval_attributes:
235 |             classes = self._attributes
236 |         else:
237 |             classes = self._classes
238 |         for i, cls in enumerate(classes):
239 |             if cls == '__background__' or cls == '__no_attribute__':
240 |                 continue
241 |             filename = self.get_vg_results_file_template(output_dir).format(cls)
242 |             rec, prec, ap, scores, npos = vg_eval(
243 |                 filename, self.roidb, self.image_index, i, ovthresh=0.5,
244 |                 use_07_metric=use_07_metric, eval_attributes=eval_attributes)
245 | 
246 |             # Determine per class detection thresholds that maximise f score
247 |             if npos > 1 and not (type(prec) == int and type(rec) == int and prec+rec ==0):
248 |                 f = np.nan_to_num((prec * rec) / (prec + rec))
249 |                 thresh += [scores[np.argmax(f)]]
250 |             else:
251 |                 thresh += [0]
252 |             aps += [ap]
253 |             nposs += [float(npos)]
254 |             print('AP for {} = {:.4f} (npos={:,})'.format(cls, ap, npos))
255 |             if pickle:
256 |                 with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
257 |                     cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap,
258 |                                   'scores': scores, 'npos': npos}, f)
259 | 
260 |         # Set thresh to mean for classes with poor results
261 |         thresh = np.array(thresh)
262 |         avg_thresh = np.mean(thresh[thresh != 0])
263 |         thresh[thresh == 0] = avg_thresh
264 |         if eval_attributes:
265 |             filename = 'attribute_thresholds_vg.txt'
266 |         else:
267 |             filename = 'object_thresholds_vg.txt'
268 |         path = os.path.join(output_dir, filename)
269 |         with open(path, 'wt') as f:
270 |             for i, cls in enumerate(classes[1:]):
271 |                 f.write('{:s} {:.3f}\n'.format(cls, thresh[i]))
272 | 
273 |         weights = np.array(nposs)
274 |         weights /= weights.sum()
275 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
276 |         print('Weighted Mean AP = {:.4f}'.format(np.average(aps, weights=weights)))
277 |         print('Mean Detection Threshold = {:.3f}'.format(avg_thresh))
278 |         # print('~~~~~~~~')
279 |         # print('Results:')
280 |         # for ap, npos in zip(aps, nposs):
281 |         #     print('{:.3f}\t{:.3f}'.format(ap, npos))
282 |         # print('{:.3f}'.format(np.mean(aps)))
283 |         # print('~~~~~~~~')
284 |         # print('')
285 |         # print('--------------------------------------------------------------')
286 |         print('Results computed with the **unofficial** PASCAL VOC Python eval code.')
287 |         print('--------------------------------------------------------------')
288 | 


--------------------------------------------------------------------------------
/extract_features.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | # pylint: disable=no-member
  3 | """
  4 | TridentNet Training Script.
  5 | 
  6 | This script is a simplified version of the training script in detectron2/tools.
  7 | """
  8 | import argparse
  9 | from ast import arg
 10 | import os
 11 | import sys
 12 | import torch
 13 | # import tqdm
 14 | import cv2
 15 | import numpy as np
 16 | 
 17 | from utils.extract_d2features import extract_feat_d2_start
 18 | sys.path.append('detectron2')
 19 | 
 20 | import detectron2.utils.comm as comm
 21 | from detectron2.checkpoint import DetectionCheckpointer
 22 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 23 | from detectron2.config import get_cfg
 24 | from detectron2.engine import DefaultTrainer, default_setup, launch
 25 | from detectron2.evaluation import COCOEvaluator, verify_results
 26 | from detectron2.structures import Instances
 27 | 
 28 | from utils.utils import mkdir, save_features
 29 | from utils.extract_utils import get_image_blob, save_bbox, save_roi_features_by_bbox, save_roi_features
 30 | from utils.progress_bar import ProgressBar
 31 | from bua import add_config
 32 | from bua.caffe.modeling.box_regression import BUABoxes
 33 | from torch.nn import functional as F
 34 | from detectron2.modeling import postprocessing
 35 | from utils.extract_features_singlegpu import extract_feat_singlegpu_start
 36 | from utils.extract_features_multigpu import extract_feat_multigpu_start
 37 | from utils.extract_features_faster import extract_feat_faster_start
 38 | 
 39 | def switch_extract_mode(mode):
 40 |     if mode == 'roi_feats':
 41 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 1]
 42 |     elif mode == 'bboxes':
 43 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 2]
 44 |     elif mode == 'bbox_feats':
 45 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 3, 'MODEL.PROPOSAL_GENERATOR.NAME', 'PrecomputedProposals']
 46 |     else:
 47 |         print('Wrong extract mode! ')
 48 |         exit()
 49 |     return switch_cmd
 50 |   # ROI_HEADS:  # Add to get 100 box or Delete it to get ~50 boxes
 51 |   #   SCORE_THRESH_TEST: 0.0
 52 |   #   NMS_THRESH_TEST: 0.3   
 53 | def set_min_max_boxes(min_max_boxes, mode):
 54 |     if min_max_boxes == 'min_max_default':
 55 |         return []
 56 |     try:
 57 |         min_boxes = int(min_max_boxes.split(',')[0])
 58 |         max_boxes = int(min_max_boxes.split(',')[1])
 59 |         if mode == "caffe":
 60 |             pass
 61 |         elif mode == "d2":
 62 |             if min_boxes == 100 & max_boxes == 100:
 63 |                 cmd = ['MODEL.BUA.EXTRACTOR.MIN_BOXES', min_boxes, 
 64 |                         'MODEL.BUA.EXTRACTOR.MAX_BOXES', max_boxes,
 65 |                         'MODEL.ROI_HEADS.SCORE_THRESH_TEST', 0.0,
 66 |                         'MODEL.ROI_HEADS.NMS_THRESH_TEST', 0.3 ]
 67 |                 return cmd
 68 |         else:
 69 |             raise Exception("detection mode not supported: {}".format(mode))
 70 |     except:
 71 |         print('Illegal min-max boxes setting, using config default. ')
 72 |         return []
 73 |     cmd = ['MODEL.BUA.EXTRACTOR.MIN_BOXES', min_boxes, 
 74 |             'MODEL.BUA.EXTRACTOR.MAX_BOXES', max_boxes]
 75 |     return cmd
 76 | 
 77 | def setup(args):
 78 |     """
 79 |     Create configs and perform basic setups.
 80 |     """
 81 |     cfg = get_cfg()
 82 |     add_config(args, cfg)
 83 |     cfg.merge_from_file(args.config_file)
 84 |     cfg.merge_from_list(args.opts)
 85 |     cfg.merge_from_list(['MODEL.BUA.EXTRACT_FEATS',True])
 86 |     cfg.merge_from_list(switch_extract_mode(args.extract_mode))
 87 |     cfg.merge_from_list(set_min_max_boxes(args.min_max_boxes, args.mode))
 88 |     cfg.freeze()
 89 |     default_setup(cfg, args)
 90 |     return cfg
 91 | 
 92 | def main():
 93 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection2 Inference")
 94 |     parser.add_argument(
 95 |         "--config-file",
 96 |         default="configs/caffe/test-caffe-r101.yaml",
 97 |         metavar="FILE",
 98 |         help="path to config file",
 99 |     )
100 | 
101 |     parser.add_argument('--num-cpus', default=1, type=int, 
102 |                         help='number of cpus to use for ray, 0 means no limit')
103 | 
104 |     parser.add_argument('--gpus', dest='gpu_id', help='GPU id(s) to use',
105 |                         default='0', type=str)
106 | 
107 |     parser.add_argument("--mode", default="caffe", type=str, help="'caffe' and 'd2' indicates \
108 |                         'use caffe model' and 'use detectron2 model'respectively")
109 | 
110 |     parser.add_argument('--extract-mode', default='roi_feats', type=str,
111 |                         help="'roi_feats', 'bboxes' and 'bbox_feats' indicates \
112 |                         'extract roi features directly', 'extract bboxes only' and \
113 |                         'extract roi features with pre-computed bboxes' respectively")
114 | 
115 |     parser.add_argument('--min-max-boxes', default='min_max_default', type=str, 
116 |                         help='the number of min-max boxes of extractor')
117 | 
118 |     parser.add_argument('--out-dir', dest='output_dir',
119 |                         help='output directory for features',
120 |                         default="features")
121 |     parser.add_argument('--image-dir', dest='image_dir',
122 |                         help='directory with images',
123 |                         default="image")
124 |     parser.add_argument('--bbox-dir', dest='bbox_dir',
125 |                         help='directory with bbox',
126 |                         default="bbox")
127 |     parser.add_argument("--fastmode", action="store_true", help="whether to use multi cpus to extract faster.",)
128 | 
129 |     parser.add_argument(
130 |         "--resume",
131 |         action="store_true",
132 |         help="whether to attempt to resume from the checkpoint directory",
133 |     )
134 |     parser.add_argument(
135 |         "opts",
136 |         help="Modify config options using the command-line",
137 |         default=None,
138 |         nargs=argparse.REMAINDER,
139 |     )
140 | 
141 |     args = parser.parse_args()
142 | 
143 |     cfg = setup(args)
144 |     num_gpus = len(args.gpu_id.split(','))
145 |     print(args.mode)
146 |     if args.mode == "caffe":
147 |         if args.fastmode: # faster.py
148 |             print("faster")
149 |             extract_feat_faster_start(args,cfg)
150 |         else:  # multi or single
151 |             if num_gpus == 1: # without ray
152 |                 print("single")
153 |                 extract_feat_singlegpu_start(args,cfg)
154 |             else: # use ray to accelerate
155 |                 print("multi")
156 |                 extract_feat_multigpu_start(args,cfg)
157 |     elif args.mode == "d2":
158 |         print("d2 mode use ray")
159 |         extract_feat_d2_start(args,cfg)
160 |     else:
161 |         raise Exception("detection model not supported: {}".format(args.model))
162 | 
163 | if __name__ == "__main__":
164 |     main()
165 | 


--------------------------------------------------------------------------------
/opts.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | 
 4 | def parse_opt():
 5 |     """
 6 |     Create a parser with some common arguments used by detectron2 users.
 7 | 
 8 |     Returns:
 9 |         argparse.ArgumentParser:
10 |     """
11 |     parser = argparse.ArgumentParser(description="BottomUpAttention Training")
12 |     parser.add_argument("--mode", default="caffe", type=str, help="'caffe' and 'd2' indicates \
13 |                         'use caffe model' and 'use detectron2 model'respectively")
14 |     parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
15 |     parser.add_argument(
16 |         "--resume",
17 |         action="store_true",
18 |         help="whether to attempt to resume from the checkpoint directory",
19 |     )
20 |     parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
21 |     parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
22 |     parser.add_argument("--num-machines", type=int, default=1)
23 |     parser.add_argument(
24 |         "--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
25 |     )
26 | 
27 |     # PyTorch still may leave orphan processes in multi-gpu training.
28 |     # Therefore we use a deterministic way to obtain port,
29 |     # so that users are aware of orphan processes by seeing the port occupied.
30 |     port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14
31 |     parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
32 |     parser.add_argument(
33 |         "opts",
34 |         help="Modify config options using the command-line",
35 |         default=None,
36 |         nargs=argparse.REMAINDER,
37 |     )
38 |     return parser
39 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import glob
 5 | import os
 6 | from setuptools import find_packages, setup
 7 | import torch
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
12 | 
13 | 
14 | def get_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     extensions_dir = os.path.join(this_dir, "bua","caffe", "modeling","layers", "csrc")
17 | 
18 |     main_source = os.path.join(extensions_dir, "vision.cpp")
19 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
21 |         os.path.join(extensions_dir, "*.cu")
22 |     )
23 | 
24 |     sources = [main_source] + sources
25 | 
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 | 
43 |     sources = [os.path.join(extensions_dir, s) for s in sources]
44 | 
45 |     include_dirs = [extensions_dir]
46 | 
47 |     ext_modules = [
48 |         extension(
49 |             "bua.caffe.modeling._C",
50 |             sources,
51 |             include_dirs=include_dirs,
52 |             define_macros=define_macros,
53 |             extra_compile_args=extra_compile_args,
54 |         )
55 |     ]
56 | 
57 |     return ext_modules
58 | 
59 | 
60 | setup(
61 |     name="bottom-up-attention.pytorch",
62 |     packages=find_packages(exclude=("configs", "tests")),
63 |     python_requires=">=3.6",
64 |     ext_modules=get_extensions(),
65 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
66 | )
67 | 


--------------------------------------------------------------------------------
/train_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | TridentNet Training Script.
  4 | 
  5 | This script is a simplified version of the training script in detectron2/tools.
  6 | """
  7 | 
  8 | import os
  9 | import sys
 10 | import time
 11 | sys.path.append('detectron2')
 12 | 
 13 | import detectron2.utils.comm as comm
 14 | from detectron2.checkpoint import DetectionCheckpointer
 15 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 16 | from detectron2.config import get_cfg
 17 | from detectron2.engine import DefaultTrainer, default_setup, launch
 18 | from detectron2.evaluation import COCOEvaluator, verify_results
 19 | 
 20 | from bua import add_config
 21 | from bua.d2 import build_detection_test_loader_with_attributes, build_detection_train_loader_with_attributes
 22 | from bua.caffe.dataloader import DatasetMapper
 23 | from opts import parse_opt
 24 | from evaluation import VGEvaluator
 25 | 
 26 | 
 27 | class Trainer(DefaultTrainer):
 28 |     def __init__(self, cfg):
 29 |         super().__init__(cfg)
 30 |         self.rpn_box_lw = cfg.MODEL.RPN.BBOX_LOSS_WEIGHT
 31 |         self.rcnn_box_lw = cfg.MODEL.ROI_BOX_HEAD.BBOX_LOSS_WEIGHT
 32 | 
 33 |     @classmethod
 34 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
 35 |         if output_folder is None:
 36 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
 37 |         return VGEvaluator(dataset_name, cfg, True, output_folder)
 38 | 
 39 |     @classmethod
 40 |     def build_test_loader(cls, cfg, dataset_name):
 41 |         if cfg.MODE == "caffe":
 42 |             return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
 43 |         elif cfg.MODE == "d2":
 44 |             return build_detection_test_loader_with_attributes(cfg, dataset_name)
 45 |         else:
 46 |             raise Exception("detectron mode note supported: {}".format(args.model))
 47 |         
 48 | 
 49 | 
 50 |     @classmethod
 51 |     def build_train_loader(cls, cfg):
 52 |         if cfg.MODE == "caffe":
 53 |             return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
 54 |         elif cfg.MODE == "d2":
 55 |             return build_detection_train_loader_with_attributes(cfg)
 56 |         else:
 57 |             raise Exception("detectron mode note supported: {}".format(args.model))
 58 | 
 59 |     def run_step(self):
 60 |         """
 61 |         !!Hack!! for the run_step method in SimpleTrainer to adjust the loss
 62 |         """
 63 |         assert self.model.training, "[Trainer] model was changed to eval mode!"
 64 |         start = time.perf_counter()
 65 |         data = next(self._data_loader_iter)
 66 |         data_time = time.perf_counter() - start
 67 |         loss_dict = self.model(data)
 68 |         # RPN box loss:
 69 |         loss_dict["loss_rpn_loc"] *= self.rpn_box_lw
 70 |         # R-CNN box loss:
 71 |         loss_dict["loss_box_reg"] *= self.rcnn_box_lw
 72 |         losses = sum(loss_dict.values())
 73 |         self._detect_anomaly(losses, loss_dict)
 74 | 
 75 |         metrics_dict = loss_dict
 76 |         metrics_dict["data_time"] = data_time
 77 |         self._write_metrics(metrics_dict)
 78 |         self.optimizer.zero_grad()
 79 |         losses.backward()
 80 |         self.optimizer.step()
 81 | 
 82 | def setup(args):
 83 |     """
 84 |     Create configs and perform basic setups.
 85 |     """
 86 |     cfg = get_cfg()
 87 |     add_config(args, cfg)
 88 |     cfg.merge_from_file(args.config_file)
 89 |     cfg.merge_from_list(args.opts)
 90 |     cfg.MODE = args.mode
 91 |     cfg.freeze()
 92 |     default_setup(cfg, args)
 93 |     return cfg
 94 | 
 95 | 
 96 | def main(args):
 97 |     cfg = setup(args)
 98 | 
 99 |     if args.eval_only:
100 |         model = Trainer.build_model(cfg)
101 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
102 |             cfg.MODEL.WEIGHTS, resume=args.resume
103 |         )
104 |         res = Trainer.test(cfg, model)
105 |         if comm.is_main_process():
106 |             verify_results(cfg, res)
107 |         return res
108 | 
109 |     trainer = Trainer(cfg)
110 |     trainer.resume_or_load(resume=args.resume)
111 |     return trainer.train()
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     args = parse_opt().parse_args()
116 |     print("Command Line Args:", args)
117 |     launch(
118 |         main,
119 |         args.num_gpus,
120 |         num_machines=args.num_machines,
121 |         machine_rank=args.machine_rank,
122 |         dist_url=args.dist_url,
123 |         args=(args,),
124 |     )
125 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import save_features
2 | from .extract_features_faster import extract_feat_faster_start
3 | from .extract_features_multigpu import extract_feat_multigpu_start
4 | from .extract_features_singlegpu import extract_feat_singlegpu_start
5 | from .extract_d2features import extract_feat_d2_start


--------------------------------------------------------------------------------
/utils/extract_features_faster.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | # pylint: disable=no-member
  3 | """
  4 | TridentNet Training Script.
  5 | 
  6 | This script is a simplified version of the training script in detectron2/tools.
  7 | """
  8 | import argparse
  9 | import os
 10 | import sys
 11 | import torch
 12 | # import tqdm
 13 | import cv2
 14 | import numpy as np
 15 | sys.path.append('detectron2')
 16 | 
 17 | import detectron2.utils.comm as comm
 18 | from detectron2.checkpoint import DetectionCheckpointer
 19 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 20 | from detectron2.config import get_cfg
 21 | from detectron2.engine import DefaultTrainer, default_setup, launch
 22 | from detectron2.evaluation import COCOEvaluator, verify_results
 23 | from detectron2.structures import Instances
 24 | 
 25 | from utils.utils import mkdir, save_features
 26 | from utils.extract_utils import get_image_blob, save_bbox, save_roi_features_by_bbox, save_roi_features
 27 | from utils.progress_bar import ProgressBar
 28 | from bua import add_config
 29 | from bua.caffe.modeling.box_regression import BUABoxes
 30 | 
 31 | import ray
 32 | from ray.actor import ActorHandle
 33 | 
 34 | """
 35 | add ray to generate_npz
 36 | """
 37 | def switch_extract_mode(mode):
 38 |     if mode == 'roi_feats':
 39 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 1]
 40 |     elif mode == 'bboxes':
 41 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 2]
 42 |     elif mode == 'bbox_feats':
 43 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 3, 'MODEL.PROPOSAL_GENERATOR.NAME', 'PrecomputedProposals']
 44 |     else:
 45 |         print('Wrong extract mode! ')
 46 |         exit()
 47 |     return switch_cmd
 48 | 
 49 | def set_min_max_boxes(min_max_boxes):
 50 |     if min_max_boxes == 'min_max_default':
 51 |         return []
 52 |     try:
 53 |         min_boxes = int(min_max_boxes.split(',')[0])
 54 |         max_boxes = int(min_max_boxes.split(',')[1])
 55 |     except:
 56 |         print('Illegal min-max boxes setting, using config default. ')
 57 |         return []
 58 |     cmd = ['MODEL.BUA.EXTRACTOR.MIN_BOXES', min_boxes, 
 59 |             'MODEL.BUA.EXTRACTOR.MAX_BOXES', max_boxes]
 60 |     return cmd
 61 | 
 62 | def setup(args):
 63 |     """
 64 |     Create configs and perform basic setups.
 65 |     """
 66 |     cfg = get_cfg()
 67 |     add_config(args, cfg)
 68 |     cfg.merge_from_file(args.config_file)
 69 |     cfg.merge_from_list(args.opts)
 70 |     cfg.merge_from_list(['MODEL.BUA.EXTRACT_FEATS',True])
 71 |     cfg.merge_from_list(switch_extract_mode(args.extract_mode))
 72 |     cfg.merge_from_list(set_min_max_boxes(args.min_max_boxes))
 73 |     cfg.freeze()
 74 |     default_setup(cfg, args)
 75 |     return cfg
 76 | 
 77 | @ray.remote
 78 | def generate_npz(extract_mode, pba: ActorHandle, *args):
 79 |     if extract_mode == 1:
 80 |         save_roi_features(*args)
 81 |     elif extract_mode == 2:
 82 |         save_bbox(*args)
 83 |     elif extract_mode == 3:
 84 |         save_roi_features_by_bbox(*args)
 85 |     else:
 86 |         print('Invalid Extract Mode! ')
 87 |     pba.update.remote(1)
 88 | 
 89 | @ray.remote(num_gpus=1)
 90 | def extract_feat_faster(split_idx, img_list, cfg, args, actor: ActorHandle):
 91 |     num_images = len(img_list)
 92 |     print('Number of images on split{}: {}.'.format(split_idx, num_images))
 93 | 
 94 |     model = DefaultTrainer.build_model(cfg)
 95 |     DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
 96 |         cfg.MODEL.WEIGHTS, resume=args.resume
 97 |     )
 98 |     model.eval()
 99 | 
100 |     generate_npz_list = []
101 |     for im_file in (img_list):
102 |         if os.path.exists(os.path.join(args.output_dir, im_file.split('.')[0]+'.npz')):
103 |             actor.update.remote(1)
104 |             continue
105 |         im = cv2.imread(os.path.join(args.image_dir, im_file))
106 |         if im is None:
107 |             print(os.path.join(args.image_dir, im_file), "is illegal!")
108 |             actor.update.remote(1)
109 |             continue
110 |         dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)
111 |         # extract roi features
112 |         if cfg.MODEL.BUA.EXTRACTOR.MODE == 1:
113 |             attr_scores = None
114 |             with torch.set_grad_enabled(False):
115 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
116 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])
117 |                 else:
118 |                     boxes, scores, features_pooled = model([dataset_dict])
119 |             boxes = [box.tensor.cpu() for box in boxes]
120 |             scores = [score.cpu() for score in scores]
121 |             features_pooled = [feat.cpu() for feat in features_pooled]
122 |             if not attr_scores is None:
123 |                 attr_scores = [attr_score.cpu() for attr_score in attr_scores]
124 |             generate_npz_list.append(generate_npz.remote(1, actor, 
125 |                 args, cfg, im_file, im, dataset_dict, 
126 |                 boxes, scores, features_pooled, attr_scores))
127 |         # extract bbox only
128 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2:
129 |             with torch.set_grad_enabled(False):
130 |                 boxes, scores = model([dataset_dict])
131 |             boxes = [box.cpu() for box in boxes]
132 |             scores = [score.cpu() for score in scores]
133 |             generate_npz_list.append(generate_npz.remote(2, actor, 
134 |                 args, cfg, im_file, im, dataset_dict, 
135 |                 boxes, scores))
136 |         # extract roi features by bbox
137 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3:
138 |             if not os.path.exists(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz')):
139 |                 actor.update.remote(1)
140 |                 continue
141 |             bbox = torch.from_numpy(np.load(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz'))['bbox']) * dataset_dict['im_scale']
142 |             proposals = Instances(dataset_dict['image'].shape[-2:])
143 |             proposals.proposal_boxes = BUABoxes(bbox)
144 |             dataset_dict['proposals'] = proposals
145 | 
146 |             attr_scores = None
147 |             with torch.set_grad_enabled(False):
148 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
149 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])
150 |                 else:
151 |                     boxes, scores, features_pooled = model([dataset_dict])
152 |             boxes = [box.tensor.cpu() for box in boxes]
153 |             scores = [score.cpu() for score in scores]
154 |             features_pooled = [feat.cpu() for feat in features_pooled]
155 |             if not attr_scores is None:
156 |                 attr_scores = [attr_score.data.cpu() for attr_score in attr_scores]
157 |             generate_npz_list.append(generate_npz.remote(3, actor, 
158 |                 args, cfg, im_file, im, dataset_dict, 
159 |                 boxes, scores, features_pooled, attr_scores))
160 | 
161 |     ray.get(generate_npz_list)
162 | 
163 | 
164 | def main():
165 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection2 Inference")
166 |     parser.add_argument(
167 |         "--config-file",
168 |         default="configs/caffe/test-caffe-r101.yaml",
169 |         metavar="FILE",
170 |         help="path to config file",
171 |     )
172 | 
173 |     parser.add_argument('--num-cpus', default=1, type=int, 
174 |                         help='number of cpus to use for ray, 0 means no limit')
175 | 
176 |     parser.add_argument('--gpus', dest='gpu_id', help='GPU id(s) to use',
177 |                         default='0', type=str)
178 | 
179 |     parser.add_argument("--mode", default="caffe", type=str, help="'caffe' and 'd2' indicates \
180 |                         'use caffe model' and 'use detectron2 model'respectively")
181 | 
182 |     parser.add_argument('--extract-mode', default='roi_feats', type=str,
183 |                         help="'roi_feats', 'bboxes' and 'bbox_feats' indicates \
184 |                         'extract roi features directly', 'extract bboxes only' and \
185 |                         'extract roi features with pre-computed bboxes' respectively")
186 | 
187 |     parser.add_argument('--min-max-boxes', default='min_max_default', type=str, 
188 |                         help='the number of min-max boxes of extractor')
189 | 
190 |     parser.add_argument('--out-dir', dest='output_dir',
191 |                         help='output directory for features',
192 |                         default="features")
193 |     parser.add_argument('--image-dir', dest='image_dir',
194 |                         help='directory with images',
195 |                         default="image")
196 |     parser.add_argument('--bbox-dir', dest='bbox_dir',
197 |                         help='directory with bbox',
198 |                         default="bbox")
199 |     parser.add_argument(
200 |         "--resume",
201 |         action="store_true",
202 |         help="whether to attempt to resume from the checkpoint directory",
203 |     )
204 |     parser.add_argument(
205 |         "opts",
206 |         help="Modify config options using the command-line",
207 |         default=None,
208 |         nargs=argparse.REMAINDER,
209 |     )
210 | 
211 |     args = parser.parse_args()
212 | 
213 |     cfg = setup(args)
214 |     extract_feat_faster_start(args,cfg)
215 | 
216 | def extract_feat_faster_start(args,cfg):
217 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
218 |     num_gpus = len(args.gpu_id.split(','))
219 | 
220 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
221 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
222 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
223 | 
224 |     # Extract features.
225 |     imglist = os.listdir(args.image_dir)
226 |     num_images = len(imglist)
227 |     print('Number of images: {}.'.format(num_images))
228 | 
229 |     if args.num_cpus != 0:
230 |         ray.init(num_cpus=args.num_cpus)
231 |     else:
232 |         ray.init()
233 |     img_lists = [imglist[i::num_gpus] for i in range(num_gpus)]
234 | 
235 |     pb = ProgressBar(len(imglist))
236 |     actor = pb.actor
237 | 
238 |     print('Number of GPUs: {}.'.format(num_gpus))
239 |     extract_feat_list = []
240 |     for i in range(num_gpus):
241 |         extract_feat_list.append(extract_feat_faster.remote(i, img_lists[i], cfg, args, actor))
242 |     
243 |     pb.print_until_done()
244 |     ray.get(extract_feat_list)
245 |     ray.get(actor.get_counter.remote())
246 | 
247 | if __name__ == "__main__":
248 |     main()


--------------------------------------------------------------------------------
/utils/extract_features_multigpu.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | # pylint: disable=no-member
  3 | """
  4 | TridentNet Training Script.
  5 | 
  6 | This script is a simplified version of the training script in detectron2/tools.
  7 | """
  8 | import argparse
  9 | import os
 10 | import sys
 11 | import torch
 12 | # import tqdm
 13 | import cv2
 14 | import numpy as np
 15 | sys.path.append('detectron2')
 16 | 
 17 | import detectron2.utils.comm as comm
 18 | from detectron2.checkpoint import DetectionCheckpointer
 19 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 20 | from detectron2.config import get_cfg
 21 | from detectron2.engine import DefaultTrainer, default_setup, launch
 22 | from detectron2.evaluation import COCOEvaluator, verify_results
 23 | from detectron2.structures import Instances
 24 | 
 25 | from utils.utils import mkdir, save_features
 26 | from utils.extract_utils import get_image_blob, save_bbox, save_roi_features_by_bbox, save_roi_features
 27 | from utils.progress_bar import ProgressBar
 28 | from bua import add_config
 29 | from bua.caffe.modeling.box_regression import BUABoxes
 30 | from torch.nn import functional as F
 31 | from detectron2.modeling import postprocessing
 32 | 
 33 | import ray
 34 | from ray.actor import ActorHandle
 35 | """
 36 | use ray to accelerate multi gpu
 37 | """
 38 | def switch_extract_mode(mode):
 39 |     if mode == 'roi_feats':
 40 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 1]
 41 |     elif mode == 'bboxes':
 42 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 2]
 43 |     elif mode == 'bbox_feats':
 44 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 3, 'MODEL.PROPOSAL_GENERATOR.NAME', 'PrecomputedProposals']
 45 |     else:
 46 |         print('Wrong extract mode! ')
 47 |         exit()
 48 |     return switch_cmd
 49 | 
 50 | def set_min_max_boxes(min_max_boxes):
 51 |     if min_max_boxes == 'min_max_default':
 52 |         return []
 53 |     try:
 54 |         min_boxes = int(min_max_boxes.split(',')[0])
 55 |         max_boxes = int(min_max_boxes.split(',')[1])
 56 |     except:
 57 |         print('Illegal min-max boxes setting, using config default. ')
 58 |         return []
 59 |     cmd = ['MODEL.BUA.EXTRACTOR.MIN_BOXES', min_boxes, 
 60 |             'MODEL.BUA.EXTRACTOR.MAX_BOXES', max_boxes]
 61 |     return cmd
 62 | 
 63 | def setup(args):
 64 |     """
 65 |     Create configs and perform basic setups.
 66 |     """
 67 |     cfg = get_cfg()
 68 |     add_config(args, cfg)
 69 |     cfg.merge_from_file(args.config_file)
 70 |     cfg.merge_from_list(args.opts)
 71 |     cfg.merge_from_list(['MODEL.BUA.EXTRACT_FEATS',True])
 72 |     cfg.merge_from_list(switch_extract_mode(args.extract_mode))
 73 |     cfg.merge_from_list(set_min_max_boxes(args.min_max_boxes))
 74 |     cfg.freeze()
 75 |     default_setup(cfg, args)
 76 |     return cfg
 77 | 
 78 | def generate_npz(extract_mode, *args):
 79 |     if extract_mode == 1:
 80 |         save_roi_features(*args)
 81 |     elif extract_mode == 2:
 82 |         save_bbox(*args)
 83 |     elif extract_mode == 3:
 84 |         save_roi_features_by_bbox(*args)
 85 |     else:
 86 |         print('Invalid Extract Mode! ')
 87 | 
 88 | @ray.remote(num_gpus=1)
 89 | def extract_feat_multigpu(split_idx, img_list, cfg, args, actor: ActorHandle): # NOTE ray  
 90 |     num_images = len(img_list)
 91 |     print('Number of images on split{}: {}.'.format(split_idx, num_images))
 92 | 
 93 |     model = DefaultTrainer.build_model(cfg)
 94 |     DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
 95 |         cfg.MODEL.WEIGHTS, resume=args.resume
 96 |     )
 97 |     model.eval()
 98 | 
 99 |     for im_file in (img_list):
100 |         if os.path.exists(os.path.join(args.output_dir, im_file.split('.')[0]+'.npz')):
101 |             actor.update.remote(1)  # NOTE ray
102 |             continue
103 |         im = cv2.imread(os.path.join(args.image_dir, im_file))
104 |         if im is None:
105 |             print(os.path.join(args.image_dir, im_file), "is illegal!")
106 |             actor.update.remote(1) # NOTE ray
107 |             continue
108 |         dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)
109 |         # extract roi features
110 |         if cfg.MODEL.BUA.EXTRACTOR.MODE == 1:
111 |             attr_scores = None
112 |             with torch.set_grad_enabled(False):
113 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
114 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])
115 |                 else:
116 |                     boxes, scores, features_pooled = model([dataset_dict])
117 |             boxes = [box.tensor.cpu() for box in boxes]
118 |             scores = [score.cpu() for score in scores]
119 |             features_pooled = [feat.cpu() for feat in features_pooled]
120 |             if not attr_scores is None:
121 |                 attr_scores = [attr_score.cpu() for attr_score in attr_scores]
122 |             generate_npz(1, 
123 |                 args, cfg, im_file, im, dataset_dict, 
124 |                 boxes, scores, features_pooled, attr_scores)
125 |         # extract bbox only
126 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2:
127 |             with torch.set_grad_enabled(False):
128 |                 boxes, scores = model([dataset_dict])
129 |             boxes = [box.cpu() for box in boxes]
130 |             scores = [score.cpu() for score in scores]
131 |             generate_npz(2,
132 |                 args, cfg, im_file, im, dataset_dict, 
133 |                 boxes, scores)
134 |         # extract roi features by bbox
135 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3:
136 |             if not os.path.exists(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz')):
137 |                 actor.update.remote(1) # NOTE ray
138 |                 continue
139 |             bbox = torch.from_numpy(np.load(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz'))['bbox']) * dataset_dict['im_scale']
140 |             proposals = Instances(dataset_dict['image'].shape[-2:])
141 |             proposals.proposal_boxes = BUABoxes(bbox)
142 |             dataset_dict['proposals'] = proposals
143 | 
144 |             attr_scores = None
145 |             with torch.set_grad_enabled(False):
146 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
147 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])
148 |                 else:
149 |                     boxes, scores, features_pooled = model([dataset_dict])
150 |             boxes = [box.tensor.cpu() for box in boxes]
151 |             scores = [score.cpu() for score in scores]
152 |             features_pooled = [feat.cpu() for feat in features_pooled]
153 |             if not attr_scores is None:
154 |                 attr_scores = [attr_score.data.cpu() for attr_score in attr_scores]
155 |             generate_npz(3, 
156 |                 args, cfg, im_file, im, dataset_dict, 
157 |                 boxes, scores, features_pooled, attr_scores)
158 | 
159 |         actor.update.remote(1) # NOTE ray
160 | 
161 | 
162 | def main():
163 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection2 Inference")
164 |     parser.add_argument(
165 |         "--config-file",
166 |         default="configs/caffe/test-caffe-r101.yaml",
167 |         metavar="FILE",
168 |         help="path to config file",
169 |     )
170 | 
171 |     parser.add_argument('--num-cpus', default=1, type=int, 
172 |                         help='number of cpus to use for ray, 0 means no limit')
173 | 
174 |     parser.add_argument('--gpus', dest='gpu_id', help='GPU id(s) to use',
175 |                         default='0', type=str)
176 | 
177 |     parser.add_argument("--mode", default="caffe", type=str, help="'caffe' and 'd2' indicates \
178 |                         'use caffe model' and 'use detectron2 model'respectively")
179 | 
180 |     parser.add_argument('--extract-mode', default='roi_feats', type=str,
181 |                         help="'roi_feats', 'bboxes' and 'bbox_feats' indicates \
182 |                         'extract roi features directly', 'extract bboxes only' and \
183 |                         'extract roi features with pre-computed bboxes' respectively")
184 | 
185 |     parser.add_argument('--min-max-boxes', default='min_max_default', type=str, 
186 |                         help='the number of min-max boxes of extractor')
187 | 
188 |     parser.add_argument('--out-dir', dest='output_dir',
189 |                         help='output directory for features',
190 |                         default="features")
191 |     parser.add_argument('--image-dir', dest='image_dir',
192 |                         help='directory with images',
193 |                         default="image")
194 |     parser.add_argument('--bbox-dir', dest='bbox_dir',
195 |                         help='directory with bbox',
196 |                         default="bbox")
197 |     parser.add_argument(
198 |         "--resume",
199 |         action="store_true",
200 |         help="whether to attempt to resume from the checkpoint directory",
201 |     )
202 |     parser.add_argument(
203 |         "opts",
204 |         help="Modify config options using the command-line",
205 |         default=None,
206 |         nargs=argparse.REMAINDER,
207 |     )
208 | 
209 |     args = parser.parse_args()
210 | 
211 |     cfg = setup(args)
212 |     extract_feat_multigpu_start(args,cfg)
213 | 
214 | def extract_feat_multigpu_start(args,cfg):
215 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
216 |     num_gpus = len(args.gpu_id.split(','))
217 | 
218 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
219 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
220 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
221 | 
222 |     # Extract features.
223 |     imglist = os.listdir(args.image_dir)
224 |     num_images = len(imglist)
225 |     print('Number of images: {}.'.format(num_images))
226 | 
227 |     # ray
228 |     if args.num_cpus != 0:
229 |         ray.init(num_cpus=args.num_cpus)
230 |     else:
231 |         ray.init()
232 |     img_lists = [imglist[i::num_gpus] for i in range(num_gpus)]
233 | 
234 |     # ray
235 |     pb = ProgressBar(len(imglist))
236 |     actor = pb.actor
237 | 
238 |     print('Number of GPUs: {}.'.format(num_gpus))
239 |     # for i in range(num_gpus):
240 |     #     extract_feat(i, img_lists[i], cfg, args)
241 | 
242 |     extract_feat_list = []
243 |     for i in range(num_gpus):
244 |         extract_feat_list.append(extract_feat_multigpu.remote(i, img_lists[i], cfg, args, actor))
245 |     
246 |     pb.print_until_done()
247 |     ray.get(extract_feat_list)
248 |     ray.get(actor.get_counter.remote())
249 | 
250 | 
251 | if __name__ == "__main__":
252 |     main()
253 | 


--------------------------------------------------------------------------------
/utils/extract_features_singlegpu.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | # pylint: disable=no-member
  3 | """
  4 | TridentNet Training Script.
  5 | 
  6 | This script is a simplified version of the training script in detectron2/tools.
  7 | """
  8 | import argparse
  9 | import os
 10 | import sys
 11 | import torch
 12 | # import tqdm
 13 | import cv2
 14 | import numpy as np
 15 | sys.path.append('detectron2')
 16 | 
 17 | import detectron2.utils.comm as comm
 18 | from detectron2.checkpoint import DetectionCheckpointer
 19 | from detectron2.data import build_detection_test_loader, build_detection_train_loader
 20 | from detectron2.config import get_cfg
 21 | from detectron2.engine import DefaultTrainer, default_setup, launch
 22 | from detectron2.evaluation import COCOEvaluator, verify_results
 23 | from detectron2.structures import Instances
 24 | 
 25 | from utils.utils import mkdir, save_features
 26 | from utils.extract_utils import get_image_blob, save_bbox, save_roi_features_by_bbox, save_roi_features
 27 | from utils.progress_bar import ProgressBar
 28 | from bua import add_config
 29 | from bua.caffe.modeling.box_regression import BUABoxes
 30 | from torch.nn import functional as F
 31 | from detectron2.modeling import postprocessing
 32 | 
 33 | def switch_extract_mode(mode):
 34 |     if mode == 'roi_feats':
 35 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 1]
 36 |     elif mode == 'bboxes':
 37 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 2]
 38 |     elif mode == 'bbox_feats':
 39 |         switch_cmd = ['MODEL.BUA.EXTRACTOR.MODE', 3, 'MODEL.PROPOSAL_GENERATOR.NAME', 'PrecomputedProposals']
 40 |     else:
 41 |         print('Wrong extract mode! ')
 42 |         exit()
 43 |     return switch_cmd
 44 | 
 45 | def set_min_max_boxes(min_max_boxes):
 46 |     if min_max_boxes == 'min_max_default':
 47 |         return []
 48 |     try:
 49 |         min_boxes = int(min_max_boxes.split(',')[0])
 50 |         max_boxes = int(min_max_boxes.split(',')[1])
 51 |     except:
 52 |         print('Illegal min-max boxes setting, using config default. ')
 53 |         return []
 54 |     cmd = ['MODEL.BUA.EXTRACTOR.MIN_BOXES', min_boxes, 
 55 |             'MODEL.BUA.EXTRACTOR.MAX_BOXES', max_boxes]
 56 |     return cmd
 57 | 
 58 | def setup(args):
 59 |     """
 60 |     Create configs and perform basic setups.
 61 |     """
 62 |     cfg = get_cfg()
 63 |     add_config(args, cfg)
 64 |     cfg.merge_from_file(args.config_file)
 65 |     cfg.merge_from_list(args.opts)
 66 |     cfg.merge_from_list(['MODEL.BUA.EXTRACT_FEATS',True])
 67 |     cfg.merge_from_list(switch_extract_mode(args.extract_mode))
 68 |     cfg.merge_from_list(set_min_max_boxes(args.min_max_boxes))
 69 |     cfg.freeze()
 70 |     default_setup(cfg, args)
 71 |     return cfg
 72 | 
 73 | def generate_npz(extract_mode, *args):
 74 |     if extract_mode == 1:
 75 |         save_roi_features(*args)
 76 |     elif extract_mode == 2:
 77 |         save_bbox(*args)
 78 |     elif extract_mode == 3:
 79 |         save_roi_features_by_bbox(*args)
 80 |     else:
 81 |         print('Invalid Extract Mode! ')
 82 | 
 83 | def extract_feat_singlegpu(split_idx, img_list, cfg, args):  
 84 |     num_images = len(img_list)
 85 |     print('Number of images on split{}: {}.'.format(split_idx, num_images))
 86 | 
 87 |     model = DefaultTrainer.build_model(cfg)
 88 |     DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
 89 |         cfg.MODEL.WEIGHTS, resume=args.resume
 90 |     )
 91 |     model.eval()
 92 | 
 93 |     for im_file in (img_list):
 94 |         if os.path.exists(os.path.join(args.output_dir, im_file.split('.')[0]+'.npz')):
 95 |             continue
 96 |         im = cv2.imread(os.path.join(args.image_dir, im_file))
 97 |         if im is None:
 98 |             print(os.path.join(args.image_dir, im_file), "is illegal!")
 99 |             continue
100 |         dataset_dict = get_image_blob(im, cfg.MODEL.PIXEL_MEAN)
101 |         # extract roi features
102 |         if cfg.MODEL.BUA.EXTRACTOR.MODE == 1:
103 |             attr_scores = None
104 |             with torch.set_grad_enabled(False):
105 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
106 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])  # caffe mode
107 |                 else:
108 |                     boxes, scores, features_pooled = model([dataset_dict])
109 |             boxes = [box.tensor.cpu() for box in boxes]
110 |             scores = [score.cpu() for score in scores]
111 |             features_pooled = [feat.cpu() for feat in features_pooled]
112 |             if not attr_scores is None:
113 |                 attr_scores = [attr_score.cpu() for attr_score in attr_scores]
114 |             generate_npz(1, 
115 |                 args, cfg, im_file, im, dataset_dict, 
116 |                 boxes, scores, features_pooled, attr_scores)
117 |         # extract bbox only
118 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 2:
119 |             with torch.set_grad_enabled(False):
120 |                 boxes, scores = model([dataset_dict])
121 |             boxes = [box.cpu() for box in boxes]
122 |             scores = [score.cpu() for score in scores]
123 |             generate_npz(2,
124 |                 args, cfg, im_file, im, dataset_dict, 
125 |                 boxes, scores)
126 |         # extract roi features by bbox
127 |         elif cfg.MODEL.BUA.EXTRACTOR.MODE == 3:
128 |             if not os.path.exists(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz')):
129 |                 continue
130 |             bbox = torch.from_numpy(np.load(os.path.join(args.bbox_dir, im_file.split('.')[0]+'.npz'))['bbox']) * dataset_dict['im_scale']
131 |             proposals = Instances(dataset_dict['image'].shape[-2:])
132 |             proposals.proposal_boxes = BUABoxes(bbox)
133 |             dataset_dict['proposals'] = proposals
134 | 
135 |             attr_scores = None
136 |             with torch.set_grad_enabled(False):
137 |                 if cfg.MODEL.BUA.ATTRIBUTE_ON:
138 |                     boxes, scores, features_pooled, attr_scores = model([dataset_dict])
139 |                 else:
140 |                     boxes, scores, features_pooled = model([dataset_dict])
141 |             boxes = [box.tensor.cpu() for box in boxes]
142 |             scores = [score.cpu() for score in scores]
143 |             features_pooled = [feat.cpu() for feat in features_pooled]
144 |             if not attr_scores is None:
145 |                 attr_scores = [attr_score.data.cpu() for attr_score in attr_scores]
146 |             generate_npz(3, 
147 |                 args, cfg, im_file, im, dataset_dict, 
148 |                 boxes, scores, features_pooled, attr_scores)
149 | 
150 | 
151 | def main():
152 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection2 Inference")
153 |     parser.add_argument(
154 |         "--config-file",
155 |         default="configs/caffe/test-caffe-r101.yaml",
156 |         metavar="FILE",
157 |         help="path to config file",
158 |     )
159 | 
160 |     parser.add_argument('--num-cpus', default=1, type=int, 
161 |                         help='number of cpus to use for ray, 0 means no limit')
162 | 
163 |     parser.add_argument('--gpus', dest='gpu_id', help='GPU id(s) to use',
164 |                         default='0', type=str)
165 | 
166 |     parser.add_argument("--mode", default="caffe", type=str, help="'caffe' and 'd2' indicates \
167 |                         'use caffe model' and 'use detectron2 model'respectively")
168 | 
169 |     parser.add_argument('--extract-mode', default='roi_feats', type=str,
170 |                         help="'roi_feats', 'bboxes' and 'bbox_feats' indicates \
171 |                         'extract roi features directly', 'extract bboxes only' and \
172 |                         'extract roi features with pre-computed bboxes' respectively")
173 | 
174 |     parser.add_argument('--min-max-boxes', default='min_max_default', type=str, 
175 |                         help='the number of min-max boxes of extractor')
176 | 
177 |     parser.add_argument('--out-dir', dest='output_dir',
178 |                         help='output directory for features',
179 |                         default="features")
180 |     parser.add_argument('--image-dir', dest='image_dir',
181 |                         help='directory with images',
182 |                         default="image")
183 |     parser.add_argument('--bbox-dir', dest='bbox_dir',
184 |                         help='directory with bbox',
185 |                         default="bbox")
186 |     parser.add_argument(
187 |         "--resume",
188 |         action="store_true",
189 |         help="whether to attempt to resume from the checkpoint directory",
190 |     )
191 |     parser.add_argument(
192 |         "opts",
193 |         help="Modify config options using the command-line",
194 |         default=None,
195 |         nargs=argparse.REMAINDER,
196 |     )
197 | 
198 |     args = parser.parse_args()
199 | 
200 |     cfg = setup(args)
201 |     extract_feat_singlegpu_start(args,cfg)
202 | 
203 | def extract_feat_singlegpu_start(args,cfg):
204 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
205 |     num_gpus = len(args.gpu_id.split(','))
206 | 
207 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
208 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
209 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
210 | 
211 |     # Extract features.
212 |     imglist = os.listdir(args.image_dir)
213 |     num_images = len(imglist)
214 |     print('Number of images: {}.'.format(num_images))
215 | 
216 |     img_lists = [imglist[i::num_gpus] for i in range(num_gpus)]
217 | 
218 |     print('Number of GPUs: {}.'.format(num_gpus))
219 |     for i in range(num_gpus):
220 |         extract_feat_singlegpu(i, img_lists[i], cfg, args)
221 |     
222 | 
223 | if __name__ == "__main__":
224 |     main()
225 | 


--------------------------------------------------------------------------------
/utils/extract_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | import os
  5 | 
  6 | from bua.caffe.modeling.layers.nms import nms
  7 | from bua.caffe.modeling.box_regression import BUABoxes
  8 | 
  9 | PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
 10 | TEST_SCALES = (600,)
 11 | TEST_MAX_SIZE = 1000
 12 | 
 13 | def im_list_to_blob(ims):
 14 |     """Convert a list of images into a network input.
 15 | 
 16 |     Assumes images are already prepared (means subtracted, BGR order, ...).
 17 |     """
 18 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
 19 |     num_images = len(ims)
 20 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
 21 |                     dtype=np.float32)
 22 |     for i in range(num_images):
 23 |         im = ims[i]
 24 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
 25 | 
 26 |     return blob
 27 | 
 28 | def get_image_blob(im, pixel_means):
 29 |     """Converts an image into a network input.
 30 |     Arguments:
 31 |         im (ndarray): a color image
 32 |     Returns:
 33 |         blob (ndarray): a data blob holding an image pyramid
 34 |         im_scale_factors (list): list of image scales (relative to im) used
 35 |             in the image pyramid
 36 |     """
 37 |     pixel_means = np.array([[pixel_means]])
 38 |     dataset_dict = {}
 39 |     im_orig = im.astype(np.float32, copy=True)
 40 |     im_orig -= pixel_means
 41 | 
 42 |     im_shape = im_orig.shape
 43 |     im_size_min = np.min(im_shape[0:2])
 44 |     im_size_max = np.max(im_shape[0:2])
 45 | 
 46 |     for target_size in TEST_SCALES:
 47 |         im_scale = float(target_size) / float(im_size_min)
 48 |         # Prevent the biggest axis from being more than MAX_SIZE
 49 |         if np.round(im_scale * im_size_max) > TEST_MAX_SIZE:
 50 |             im_scale = float(TEST_MAX_SIZE) / float(im_size_max)
 51 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
 52 |                         interpolation=cv2.INTER_LINEAR)
 53 | 
 54 |     dataset_dict["image"] = torch.from_numpy(im).permute(2, 0, 1)
 55 |     dataset_dict["im_scale"] = im_scale
 56 | 
 57 |     return dataset_dict
 58 | 
 59 | 
 60 | def save_roi_features(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores=None):
 61 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
 62 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
 63 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
 64 |   
 65 |     dets = boxes[0] / dataset_dict['im_scale']
 66 |     scores = scores[0]
 67 |     feats = features_pooled[0]
 68 | 
 69 |     max_conf = torch.zeros((scores.shape[0])).to(scores.device)
 70 |     for cls_ind in range(1, scores.shape[1]):
 71 |             cls_scores = scores[:, cls_ind]
 72 |             keep = nms(dets, cls_scores, 0.3)
 73 |             max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
 74 |                                              cls_scores[keep],
 75 |                                              max_conf[keep])
 76 |             
 77 |     keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
 78 |     if len(keep_boxes) < MIN_BOXES:
 79 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
 80 |     elif len(keep_boxes) > MAX_BOXES:
 81 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
 82 |     image_feat = feats[keep_boxes]
 83 |     image_bboxes = dets[keep_boxes]
 84 |     image_objects_conf = np.max(scores[keep_boxes].numpy()[:,1:], axis=1)
 85 |     image_objects = np.argmax(scores[keep_boxes].numpy()[:,1:], axis=1)
 86 |     if not attr_scores is None:
 87 |         attr_scores = attr_scores[0]
 88 |         image_attrs_conf = np.max(attr_scores[keep_boxes].numpy()[:,1:], axis=1)
 89 |         image_attrs = np.argmax(attr_scores[keep_boxes].numpy()[:,1:], axis=1)
 90 |         info = {
 91 |             'image_id': im_file.split('.')[0],
 92 |             'image_h': np.size(im, 0),
 93 |             'image_w': np.size(im, 1),
 94 |             'num_boxes': len(keep_boxes),
 95 |             'objects_id': image_objects,
 96 |             'objects_conf': image_objects_conf,
 97 |             'attrs_id': image_attrs,
 98 |             'attrs_conf': image_attrs_conf,
 99 |             }
100 |     else:
101 |         info = {
102 |             'image_id': im_file.split('.')[0],
103 |             'image_h': np.size(im, 0),
104 |             'image_w': np.size(im, 1),
105 |             'num_boxes': len(keep_boxes),
106 |             'objects_id': image_objects,
107 |             'objects_conf': image_objects_conf
108 |             }
109 | 
110 |     output_file = os.path.join(args.output_dir, im_file.split('.')[0])
111 |     np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info)
112 | 
113 | def save_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores):
114 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
115 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
116 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
117 | 
118 |     scores = scores[0]
119 |     boxes = boxes[0]
120 |     num_classes = scores.shape[1]
121 |     boxes = BUABoxes(boxes.reshape(-1, 4))
122 |     boxes.clip((dataset_dict['image'].shape[1]/dataset_dict['im_scale'], dataset_dict['image'].shape[2]/dataset_dict['im_scale']))
123 |     boxes = boxes.tensor.view(-1, num_classes*4)  # R x C x 4
124 | 
125 |     cls_boxes = torch.zeros((boxes.shape[0], 4))
126 |     for idx in range(boxes.shape[0]):
127 |         cls_idx = torch.argmax(scores[idx, 1:]) + 1
128 |         cls_boxes[idx, :] = boxes[idx, cls_idx * 4:(cls_idx + 1) * 4]
129 | 
130 |     max_conf = torch.zeros((scores.shape[0])).to(scores.device)
131 |     for cls_ind in range(1, num_classes):
132 |             cls_scores = scores[:, cls_ind]
133 |             keep = nms(cls_boxes, cls_scores, 0.3)
134 |             max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
135 |                                              cls_scores[keep],
136 |                                              max_conf[keep])
137 |             
138 |     keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
139 |     image_bboxes = cls_boxes[keep_boxes]
140 | 
141 |     output_file = os.path.join(args.output_dir, im_file.split('.')[0])
142 |     np.savez_compressed(output_file, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1))
143 | 
144 | def save_roi_features_by_bbox(args, cfg, im_file, im, dataset_dict, boxes, scores, features_pooled, attr_scores=None):
145 |     MIN_BOXES = cfg.MODEL.BUA.EXTRACTOR.MIN_BOXES
146 |     MAX_BOXES = cfg.MODEL.BUA.EXTRACTOR.MAX_BOXES
147 |     CONF_THRESH = cfg.MODEL.BUA.EXTRACTOR.CONF_THRESH
148 |     dets = boxes[0] / dataset_dict['im_scale']
149 |     scores = scores[0]
150 |     feats = features_pooled[0]
151 |     keep_boxes = [i for i in range(scores.shape[0])]
152 | 
153 |     image_feat = feats[keep_boxes]
154 |     image_bboxes = dets[keep_boxes]
155 |     image_objects_conf = np.max(scores[keep_boxes].numpy()[:,1:], axis=1)
156 |     image_objects = np.argmax(scores[keep_boxes].numpy()[:,1:], axis=1)
157 |     if not attr_scores is None:
158 |         attr_scores = attr_scores[0]
159 |         image_attrs_conf = np.max(attr_scores[keep_boxes].numpy()[:,1:], axis=1)
160 |         image_attrs = np.argmax(attr_scores[keep_boxes].numpy()[:,1:], axis=1)
161 |         info = {
162 |             'image_id': im_file.split('.')[0],
163 |             'image_h': np.size(im, 0),
164 |             'image_w': np.size(im, 1),
165 |             'num_boxes': len(keep_boxes),
166 |             'objects_id': image_objects,
167 |             'objects_conf': image_objects_conf,
168 |             'attrs_id': image_attrs,
169 |             'attrs_conf': image_attrs_conf,
170 |             }
171 |     else:
172 |         info = {
173 |             'image_id': im_file.split('.')[0],
174 |             'image_h': np.size(im, 0),
175 |             'image_w': np.size(im, 1),
176 |             'num_boxes': len(keep_boxes),
177 |             'objects_id': image_objects,
178 |             'objects_conf': image_objects_conf
179 |             }
180 | 
181 |     output_file = os.path.join(args.output_dir, im_file.split('.')[0])
182 |     np.savez_compressed(output_file, x=image_feat, bbox=image_bboxes, num_bbox=len(keep_boxes), image_h=np.size(im, 0), image_w=np.size(im, 1), info=info) 
183 | 


--------------------------------------------------------------------------------
/utils/extractor.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import logging
 3 | import time
 4 | import torch
 5 | 
 6 | from contextlib import contextmanager
 7 | 
 8 | def inference_on_dataset(model, data_loader):
 9 |     """
10 |     Run model on the data_loader and extract the features with extractor.
11 |     The model will be used in eval mode.
12 | 
13 |     Args:
14 |         model (nn.Module): a module which accepts an object from
15 |             `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
16 | 
17 |             If you wish to extract a model in `training` mode instead, you can
18 |             wrap the given model and override its behavior of `.eval()` and `.train()`.
19 |         data_loader: an iterable object with a length.
20 |             The elements it generates will be the inputs to the model.
21 |         evaluator (DatasetEvaluator): the evaluator to run. Use
22 |             :class:`DatasetEvaluators([])` if you only want to benchmark, but
23 |             don't want to do any evaluation.
24 | 
25 |     Returns:
26 |         The return value of `evaluator.evaluate()`
27 |     """
28 |     num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1
29 |     logger = logging.getLogger(__name__)
30 |     logger.info("Start inference on {} images".format(len(data_loader)))
31 | 
32 |     total = len(data_loader)  # inference data loader must have a fixed length
33 | 
34 |     logging_interval = 50
35 |     num_warmup = min(5, logging_interval - 1, total - 1)
36 |     start_time = time.time()
37 |     total_compute_time = 0
38 |     with inference_context(model), torch.no_grad():
39 |         for idx, inputs in enumerate(data_loader):
40 |             if idx == num_warmup:
41 |                 start_time = time.time()
42 |                 total_compute_time = 0
43 | 
44 |             start_compute_time = time.time()
45 |             outputs = model(inputs)
46 |             torch.cuda.synchronize()
47 |             total_compute_time += time.time() - start_compute_time
48 |             if (idx + 1) % logging_interval == 0:
49 |                 duration = time.time() - start_time
50 |                 seconds_per_img = duration / (idx + 1 - num_warmup)
51 |                 eta = datetime.timedelta(
52 |                     seconds=int(seconds_per_img * (total - num_warmup) - duration)
53 |                 )
54 |                 logger.info(
55 |                     "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
56 |                         idx + 1, total, seconds_per_img, str(eta)
57 |                     )
58 |                 )
59 | 
60 |     # Measure the time only for this worker (before the synchronization barrier)
61 |     total_time = int(time.time() - start_time)
62 |     total_time_str = str(datetime.timedelta(seconds=total_time))
63 |     # NOTE this format is parsed by grep
64 |     logger.info(
65 |         "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format(
66 |             total_time_str, total_time / (total - num_warmup), num_devices
67 |         )
68 |     )
69 |     total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
70 |     logger.info(
71 |         "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format(
72 |             total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
73 |         )
74 |     )
75 | 
76 | @contextmanager
77 | def inference_context(model):
78 |     """
79 |     A context where the model is temporarily changed to eval mode,
80 |     and restored to previous mode afterwards.
81 | 
82 |     Args:
83 |         model: a torch Module
84 |     """
85 |     training_mode = model.training
86 |     model.eval()
87 |     yield
88 |     model.train(training_mode)


--------------------------------------------------------------------------------
/utils/progress_bar.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Progress Bar for Ray Actors (tqdm)
  3 | ==================================
  4 | 
  5 | Tracking progress of distributed tasks can be tricky.
  6 | 
  7 | This script will demonstrate how to implement a simple
  8 | progress bar for a Ray actor to track progress across various
  9 | different distributed components.
 10 | 
 11 | Original source: `Link <https://github.com/votingworks/arlo-e2e>`_
 12 | 
 13 | Setup: Dependencies
 14 | -------------------
 15 | 
 16 | First, import some dependencies.
 17 | """
 18 | 
 19 | # Inspiration: https://github.com/honnibal/spacy-ray/pull/
 20 | # 1/files#diff-7ede881ddc3e8456b320afb958362b2aR12-R45
 21 | from asyncio import Event
 22 | from typing import Tuple
 23 | from time import sleep
 24 | 
 25 | import ray
 26 | # For typing purposes
 27 | from ray.actor import ActorHandle
 28 | from tqdm import tqdm
 29 | 
 30 | import os
 31 | 
 32 | ############################################################
 33 | # This is the Ray "actor" that can be called from anywhere to update
 34 | # our progress. You'll be using the `update` method. Don't
 35 | # instantiate this class yourself. Instead,
 36 | # it's something that you'll get from a `ProgressBar`.
 37 | 
 38 | 
 39 | @ray.remote
 40 | class ProgressBarActor:
 41 |     counter: int
 42 |     delta: int
 43 |     event: Event
 44 | 
 45 |     def __init__(self) -> None:
 46 |         self.counter = 0
 47 |         self.delta = 0
 48 |         self.event = Event()
 49 | 
 50 |     def update(self, num_items_completed: int) -> None:
 51 |         """Updates the ProgressBar with the incremental
 52 |         number of items that were just completed.
 53 |         """
 54 |         self.counter += num_items_completed
 55 |         self.delta += num_items_completed
 56 |         self.event.set()
 57 | 
 58 |     async def wait_for_update(self) -> Tuple[int, int]:
 59 |         """Blocking call.
 60 | 
 61 |         Waits until somebody calls `update`, then returns a tuple of
 62 |         the number of updates since the last call to
 63 |         `wait_for_update`, and the total number of completed items.
 64 |         """
 65 |         await self.event.wait()
 66 |         self.event.clear()
 67 |         saved_delta = self.delta
 68 |         self.delta = 0
 69 |         return saved_delta, self.counter
 70 | 
 71 |     def get_counter(self) -> int:
 72 |         """
 73 |         Returns the total number of complete items.
 74 |         """
 75 |         return self.counter
 76 | 
 77 | 
 78 | ######################################################################
 79 | # This is where the progress bar starts. You create one of these
 80 | # on the head node, passing in the expected total number of items,
 81 | # and an optional string description.
 82 | # Pass along the `actor` reference to any remote task,
 83 | # and if they complete ten
 84 | # tasks, they'll call `actor.update.remote(10)`.
 85 | 
 86 | # Back on the local node, once you launch your remote Ray tasks, call
 87 | # `print_until_done`, which will feed everything back into a `tqdm` counter.
 88 | 
 89 | 
 90 | class ProgressBar:
 91 |     progress_actor: ActorHandle
 92 |     total: int
 93 |     description: str
 94 |     pbar: tqdm
 95 | 
 96 |     def __init__(self, total: int, description: str = ""):
 97 |         # Ray actors don't seem to play nice with mypy, generating
 98 |         # a spurious warning for the following line,
 99 |         # which we need to suppress. The code is fine.
100 |         self.progress_actor = ProgressBarActor.remote()  # type: ignore
101 |         self.total = total
102 |         self.description = description
103 | 
104 |     @property
105 |     def actor(self) -> ActorHandle:
106 |         """Returns a reference to the remote `ProgressBarActor`.
107 | 
108 |         When you complete tasks, call `update` on the actor.
109 |         """
110 |         return self.progress_actor
111 | 
112 |     def print_until_done(self) -> None:
113 |         """Blocking call.
114 | 
115 |         Do this after starting a series of remote Ray tasks, to which you've
116 |         passed the actor handle. Each of them calls `update` on the actor.
117 |         When the progress meter reaches 100%, this method returns.
118 |         """
119 |         pbar = tqdm(desc=self.description, total=self.total)
120 |         while True:
121 |             delta, counter = ray.get(self.actor.wait_for_update.remote())
122 |             pbar.update(delta)
123 |             if counter >= self.total:
124 |                 pbar.close()
125 |                 return
126 | 
127 | 
128 | #################################################################
129 | # This is an example of a task that increments the progress bar.
130 | # Note that this is a Ray Task, but it could very well
131 | # be any generic Ray Actor.
132 | #
133 | @ray.remote(num_gpus=1)
134 | def sleep_then_increment(i: int, pba: ActorHandle) -> int:
135 |     print('ray.get_gpu_ids():', ray.get_gpu_ids())
136 |     print('CUDA_VISIBLE_DEVICES:', os.environ['CUDA_VISIBLE_DEVICES'])
137 |     sleep(i / 2.0)
138 |     pba.update.remote(1)
139 |     return i
140 | 
141 | 
142 | #################################################################
143 | # Now you can run it and see what happens!
144 | #
145 | 
146 | 
147 | def run():
148 |     ray.init()
149 |     num_ticks = 6
150 |     pb = ProgressBar(num_ticks)
151 |     actor = pb.actor
152 |     # You can replace this with any arbitrary Ray task/actor.
153 |     tasks_pre_launch = [
154 |         sleep_then_increment.remote(i, actor) for i in range(0, num_ticks)
155 |     ]
156 | 
157 |     pb.print_until_done()
158 |     tasks = ray.get(tasks_pre_launch)
159 | 
160 |     tasks == list(range(num_ticks))
161 |     num_ticks == ray.get(actor.get_counter.remote())
162 | 
163 | 
164 | # run()
165 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import errno
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from detectron2.structures import Instances
 7 | from bua.caffe.modeling.layers.nms import nms
 8 | 
 9 | def save_features(output_file, features, boxes=None):
10 |     if boxes is None:
11 |         res = features
12 |         np.save(output_file, res)
13 |     else:
14 |         np.savez(output_file, x=features, bbox=boxes)
15 | 
16 | def mkdir(path):
17 |     try:
18 |         os.makedirs(path)
19 |     except OSError as e:
20 |         if e.errno != errno.EEXIST:
21 |             raise
22 | 
23 | def extractor_postprocess(boxes, scores, features_pooled, input_per_image, extractor):
24 |     """
25 |     Resize the output instances.
26 |     The input images are often resized when entering an object detector.
27 |     As a result, we often need the outputs of the detector in a different
28 |     resolution from its inputs.
29 | 
30 |     This function will resize the raw outputs of an R-CNN detector
31 |     to produce outputs according to the desired output resolution.
32 | 
33 |     Args:
34 |         results (Instances): the raw outputs from the detector.
35 |             `results.image_size` contains the input image resolution the detector sees.
36 |             This object might be modified in-place.
37 |         output_height, output_width: the desired output resolution.
38 | 
39 |     Returns:
40 |         Instances: the resized output from the model, based on the output resolution
41 |     """
42 |     MIN_BOXES = extractor.MIN_BOXES
43 |     MAX_BOXES = extractor.MAX_BOXES
44 |     CONF_THRESH = extractor.CONF_THRESH
45 | 
46 |     cur_device = scores.device
47 | 
48 |     dets = boxes / input_per_image["im_scale"]
49 | 
50 |     max_conf = torch.zeros((scores.shape[0])).to(cur_device)
51 | 
52 |     for cls_ind in range(1, scores.shape[1]):
53 |         cls_scores = scores[:, cls_ind]
54 |         keep = nms(dets, cls_scores, 0.3)
55 |         max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep],
56 |                                         cls_scores[keep],
57 |                                         max_conf[keep])
58 | 
59 |     keep_boxes = torch.nonzero(max_conf >= CONF_THRESH).flatten()
60 |     if len(keep_boxes) < MIN_BOXES:
61 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES]
62 |     elif len(keep_boxes) > MAX_BOXES:
63 |         keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES]
64 |     image_feat = features_pooled[keep_boxes]
65 |     image_bboxes = dets[keep_boxes]
66 | 
67 |     return image_feat, image_bboxes


--------------------------------------------------------------------------------