├── .gitignore
├── .idea
├── .gitignore
├── VisualGroundingR50.iml
├── deployment.xml
├── dictionaries
│ └── liuyf.xml
├── emacs.xml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── sshConfigs.xml
├── vcs.xml
└── webServers.xml
├── INSTALL.md
├── README.md
├── caffe_extract
└── extract.py
├── configs
├── caffe2
│ ├── e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml
│ ├── e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml
│ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
│ ├── e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml
│ ├── e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml
│ ├── e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml
│ ├── e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml
│ └── e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml
├── cityscapes
│ ├── e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml
│ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
├── e2e_flickr_bottom_up_faster_rcnn_R_101_FPN_1x.yaml
├── e2e_flickr_det_faster_rcnn_R_101_FPN_1x.yaml
├── e2e_flickr_faster_rcnn_R_101_FPN_1x.yaml
├── gn_baselines
│ ├── README.md
│ ├── e2e_faster_rcnn_R_50_FPN_1x_gn.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_1x_gn.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml
│ ├── scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml
│ ├── scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
│ ├── scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml
│ └── scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml
├── pascal_voc
│ ├── e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml
│ ├── e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml
│ └── e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml
├── quick_schedules
│ ├── e2e_faster_rcnn_R_50_C4_quick.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_quick.yaml
│ ├── e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml
│ ├── e2e_keypoint_rcnn_R_50_FPN_quick.yaml
│ ├── e2e_mask_rcnn_R_50_C4_quick.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_quick.yaml
│ ├── e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml
│ ├── rpn_R_50_C4_quick.yaml
│ └── rpn_R_50_FPN_quick.yaml
├── retinanet
│ ├── retinanet_R-101-FPN_1x.yaml
│ ├── retinanet_R-101-FPN_P5_1x.yaml
│ ├── retinanet_R-50-FPN_1x.yaml
│ ├── retinanet_R-50-FPN_1x_quick.yaml
│ ├── retinanet_R-50-FPN_P5_1x.yaml
│ └── retinanet_X_101_32x8d_FPN_1x.yaml
└── yaml_hist_MaskRCNN
│ ├── e2e_VRD_faster_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_VRD_faster_rcnn_R_50_C4_1x.yaml
│ ├── e2e_VRD_faster_rcnn_VGG16.yaml
│ ├── e2e_faster_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_faster_rcnn_R_50_C4_1x.yaml
│ ├── e2e_faster_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_faster_rcnn_fbnet.yaml
│ ├── e2e_faster_rcnn_fbnet_600.yaml
│ ├── e2e_faster_rcnn_fbnet_chamv1a_600.yaml
│ ├── e2e_keypoint_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_101_FPN_1x.yaml
│ ├── e2e_mask_rcnn_R_50_C4_1x.yaml
│ ├── e2e_mask_rcnn_R_50_FPN_1x.yaml
│ ├── e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml
│ ├── e2e_mask_rcnn_fbnet.yaml
│ ├── e2e_mask_rcnn_fbnet_600.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml
│ ├── e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml
│ ├── rpn_R_101_FPN_1x.yaml
│ ├── rpn_R_50_C4_1x.yaml
│ ├── rpn_R_50_FPN_1x.yaml
│ └── rpn_X_101_32x8d_FPN_1x.yaml
├── data_analysis
├── VisDetectionBbox.ipynb
├── bert.ipynb
├── data_analysis.ipynb
├── fast_rcnn
│ ├── __init__.py
│ ├── bbox_transform.py
│ ├── bbox_transform_pytorch.py
│ ├── config.py
│ ├── config2.py
│ └── nms_wrapper.py
├── flicker_recall_check.ipynb
├── flicker_recall_check.py
├── flickr_recall_check_v1.py
├── model_size.py
├── nms
│ ├── .gitignore
│ ├── __init__.py
│ ├── cpu_nms.pyx
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── pretrain_weight
│ ├── embed_ba_0.npy
│ ├── embed_ba_1.npy
│ ├── lstm1_0.npy
│ ├── lstm1_1.npy
│ ├── lstm1_2.npy
│ ├── query_bbox_pred_0.npy
│ ├── query_bbox_pred_1.npy
│ ├── query_score_fc_0.npy
│ ├── query_score_fc_1.npy
│ ├── qv_fc1_0.npy
│ └── qv_fc1_1.npy
├── sng_parser
│ ├── __init__.py
│ ├── _data
│ │ ├── phrasal-preps.txt
│ │ ├── phrasal-verbs.txt
│ │ └── scene-nouns.txt
│ ├── backends
│ │ ├── __init__.py
│ │ ├── backend.py
│ │ └── spacy_parser.py
│ ├── database.py
│ ├── parser.py
│ └── utils.py
├── tools
│ ├── .gitignore
│ ├── get-phrasal-verbs.sh
│ ├── get-scene-nouns.sh
│ ├── parse-eos.py
│ └── process-scene-nouns.py
├── untitled.txt
├── upper_bound.ipynb
└── utils
│ ├── __init__.py
│ ├── blob.py
│ ├── boxes-Copy1.py
│ ├── boxes.py
│ ├── cbam.py
│ ├── collections.py
│ ├── colormap.py
│ ├── cython_bbox.c
│ ├── cython_bbox.pyx
│ ├── cython_nms.c
│ ├── cython_nms.pyx
│ ├── detectron_weight_helper.py
│ ├── env.py
│ ├── fpn.py
│ ├── image.py
│ ├── io.py
│ ├── keypoints.py
│ ├── logging.py
│ ├── misc.py
│ ├── net.py
│ ├── resnet_weights_helper.py
│ ├── segms.py
│ ├── subprocess.py
│ ├── timer.py
│ ├── training_stats.py
│ └── vis.py
├── demo
├── README.md
├── demo_e2e_mask_rcnn_R_50_FPN_1x.png
├── demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
├── predictor.py
└── webcam.py
├── killpy.sh
├── maskrcnn_benchmark
├── __init__.py
├── config
│ ├── __init__.py
│ ├── defaults.py
│ └── paths_catalog.py
├── csrc
│ ├── ROIAlign.h
│ ├── ROIPool.h
│ ├── SigmoidFocalLoss.h
│ ├── cpu
│ │ ├── ROIAlign_cpu.cpp
│ │ ├── nms_cpu.cpp
│ │ └── vision.h
│ ├── cuda
│ │ ├── ROIAlign_cuda.cu
│ │ ├── ROIPool_cuda.cu
│ │ ├── SigmoidFocalLoss_cuda.cu
│ │ ├── nms.cu
│ │ └── vision.h
│ ├── nms.h
│ └── vision.cpp
├── data
│ ├── README.md
│ ├── __init__.py
│ ├── build.py
│ ├── collate_batch.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── coco.py
│ │ ├── concat_dataset.py
│ │ ├── evaluation
│ │ │ ├── VG
│ │ │ │ ├── __init__.py
│ │ │ │ └── vg_eval.py
│ │ │ ├── __init__.py
│ │ │ ├── coco
│ │ │ │ ├── __init__.py
│ │ │ │ └── coco_eval.py
│ │ │ ├── flickr
│ │ │ │ ├── __init__.py
│ │ │ │ └── flickr_eval.py
│ │ │ └── voc
│ │ │ │ ├── __init__.py
│ │ │ │ └── voc_eval.py
│ │ ├── flickr.py
│ │ ├── list_dataset.py
│ │ ├── visual_genome.py
│ │ └── voc.py
│ ├── samplers
│ │ ├── __init__.py
│ │ ├── distributed.py
│ │ ├── grouped_batch_sampler.py
│ │ └── iteration_based_batch_sampler.py
│ └── transforms
│ │ ├── __init__.py
│ │ ├── build.py
│ │ ├── transforms.py
│ │ ├── transforms_vg.py
│ │ └── transforms_vg_bp.py
├── engine
│ ├── __init__.py
│ ├── inference.py
│ ├── trainer.py
│ └── trainer_bak.py
├── layers
│ ├── __init__.py
│ ├── _utils.py
│ ├── batch_norm.py
│ ├── generate_dense_relation.py
│ ├── generate_sample_relation.py
│ ├── generate_union_region.py
│ ├── misc.py
│ ├── nms.py
│ ├── numerical_stability_softmax.py
│ ├── roi_align.py
│ ├── roi_pool.py
│ ├── sigmoid_focal_loss.py
│ ├── smooth_l1_loss.py
│ └── spatial_coordinate.py
├── modeling
│ ├── __init__.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── backbone.py
│ │ ├── bottom_up_resnet.py
│ │ ├── fbnet.py
│ │ ├── fbnet_builder.py
│ │ ├── fbnet_modeldef.py
│ │ ├── fpn.py
│ │ ├── resnet.py
│ │ └── vgg16.py
│ ├── balanced_positive_negative_sampler.py
│ ├── box_coder.py
│ ├── detector
│ │ ├── __init__.py
│ │ ├── detectors.py
│ │ ├── generalized_rcnn.py
│ │ └── generalized_rcnn_det.py
│ ├── make_layers.py
│ ├── matcher.py
│ ├── poolers.py
│ ├── registry.py
│ ├── relation
│ │ ├── __init__.py
│ │ ├── feature_refine.py
│ │ ├── loss.py
│ │ └── relation_detection.py
│ ├── roi_heads
│ │ ├── __init__.py
│ │ ├── box_head
│ │ │ ├── __init__.py
│ │ │ ├── box_head.py
│ │ │ ├── inference.py
│ │ │ ├── loss.py
│ │ │ ├── roi_box_feature_extractors.py
│ │ │ └── roi_box_predictors.py
│ │ ├── keypoint_head
│ │ │ ├── __init__.py
│ │ │ ├── inference.py
│ │ │ ├── keypoint_head.py
│ │ │ ├── loss.py
│ │ │ ├── roi_keypoint_feature_extractors.py
│ │ │ └── roi_keypoint_predictors.py
│ │ ├── mask_head
│ │ │ ├── __init__.py
│ │ │ ├── inference.py
│ │ │ ├── loss.py
│ │ │ ├── mask_head.py
│ │ │ ├── roi_mask_feature_extractors.py
│ │ │ └── roi_mask_predictors.py
│ │ └── roi_heads.py
│ ├── rpn
│ │ ├── __init__.py
│ │ ├── anchor_generator.py
│ │ ├── inference.py
│ │ ├── loss.py
│ │ ├── retinanet
│ │ │ ├── __init__.py
│ │ │ ├── inference.py
│ │ │ ├── loss.py
│ │ │ └── retinanet.py
│ │ ├── rpn.py
│ │ └── utils.py
│ ├── utils.py
│ └── vg
│ │ ├── FeatureRefinement.py
│ │ ├── VisualGraph.py
│ │ ├── VisualGraphUpdate.py
│ │ ├── __init__.py
│ │ ├── loss.py
│ │ ├── phrase_embedding.py
│ │ ├── vg_detection.py
│ │ └── vg_detection_2stage_sep_rel_const.py
├── solver
│ ├── __init__.py
│ ├── build.py
│ └── lr_scheduler.py
├── structures
│ ├── __init__.py
│ ├── bounding_box.py
│ ├── boxlist_ops.py
│ ├── image_list.py
│ ├── keypoint.py
│ ├── relation_triplet.py
│ └── segmentation_mask.py
└── utils
│ ├── README.md
│ ├── __init__.py
│ ├── c2_model_loading.py
│ ├── checkpoint.py
│ ├── collect_env.py
│ ├── comm.py
│ ├── cv2_util.py
│ ├── debugger.py
│ ├── direction_word_dict.py
│ ├── env.py
│ ├── imports.py
│ ├── logger.py
│ ├── metric_logger.py
│ ├── miscellaneous.py
│ ├── model_serialization.py
│ ├── model_zoo.py
│ ├── ops.py
│ ├── registry.py
│ └── timer.py
├── requirements.txt
├── scripts
└── train.sh
├── setup.py
├── skipthoughts
├── __init__.py
├── dropout.py
├── gru.py
├── skipthoughts.py
└── version.py
├── tests
├── checkpoint.py
├── env_tests
│ └── env.py
├── test_backbones.py
├── test_box_coder.py
├── test_configs.py
├── test_data_samplers.py
├── test_detectors.py
├── test_fbnet.py
├── test_feature_extractors.py
├── test_metric_logger.py
├── test_nms.py
├── test_predictors.py
├── test_rpn_heads.py
└── utils.py
└── tools
├── cityscapes
├── convert_cityscapes_to_coco.py
└── instances2dict_with_polygons.py
├── test_flickr.py
├── test_net.py
└── train_net.py
/.gitignore:
--------------------------------------------------------------------------------
1 | maskrcnn_benchmark/modeling/vg_bak
2 | maskrcnn_benchmark/structure_prediction
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/VisualGroundingR50.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/.idea/dictionaries/liuyf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | anno
5 | atte
6 | atten
7 | avgpool
8 | baidu
9 | bphr
10 | bvis
11 | conv
12 | datasets
13 | downsample
14 | flickr
15 | inds
16 | intra
17 | keypoint
18 | keypoints
19 | logits
20 | mscoco
21 | msra
22 | noent
23 | phrsbj
24 | phrtnobj
25 | phrtnsbj
26 | pooler
27 | precomp
28 | precompute
29 | pretrained
30 | rcnn
31 | relu
32 | resnet
33 | segm
34 | softmax
35 | topk
36 | xyxy
37 |
38 |
39 |
--------------------------------------------------------------------------------
/.idea/emacs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/sshConfigs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
22 |
23 |
--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
1 | ## Installation
2 |
3 | ### Requirements:
4 | - PyTorch 1.0 from a nightly release. It **will not** work with 1.0 nor 1.0.1. Installation instructions can be found in https://pytorch.org/get-started/locally/
5 | - torchvision from master
6 | - cocoapi
7 | - yacs
8 | - matplotlib
9 | - GCC >= 4.9
10 | - (optional) OpenCV for the webcam demo
11 |
12 |
13 | ### Option 1: Step-by-step installation
14 |
15 | ```bash
16 |
17 | # install pycocotools
18 | cd $INSTALL_DIR
19 | git clone https://github.com/cocodataset/cocoapi.git
20 | cd cocoapi/PythonAPI
21 | python setup.py build_ext install
22 |
23 |
24 | # install allennlp
25 | pip install allennlp
26 |
27 | # install PyTorch Detection
28 | cd $INSTALL_DIR
29 | git clone https://github.com/facebookresearch/maskrcnn-benchmark.git
30 | cd maskrcnn-benchmark
31 |
32 |
33 | # the following will install the lib with
34 | # symbolic links, so that you can modify
35 | # the files if you want and won't need to
36 | # re-build it
37 | python setup.py build develop
38 |
39 | install allennlp
40 |
41 |
42 | ```
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LCMCG.Pytorch
2 |
3 | This repo is the official implementation of ["Learning Cross-Modal Context Graph for Visual Grounding"](https://arxiv.org/pdf/1911.09042.pdf) (AAAI2020)
4 | ## Installation
5 | Check [INSTALL.md](INSTALL.md) for installation instructions.
6 |
7 | ## pre-requirements
8 | 1. Download the flickr30k dataset in this [link](http://bryanplummer.com/Flickr30kEntities/)
9 | 2. Pre-computed bounding boxes are extracted by using [FasterRCNN](https://github.com/facebookresearch/maskrcnn-benchmark) \
10 | We use the config "e2e_faster_rcnn_R_50_C4_1x.yaml" to train the object detector on MSCOCO dataset and extract the feature map at C4 layer.
11 | 3. Language graph extraction by using [SceneGraphParser](https://github.com/vacancy/SceneGraphParser). I have uploaded the sg_anno.json into Google drive. You can download it now.
12 | 4. Some pre-processing data, like sentence annotations, box annotations.
13 | 5. You need to create the './flickr_datasets' folder and put all annotation in it. I would highly recommend you to figure all
14 | the data path out in this project. You can refer this two file "maskrcnn_benchmark/config/paths_catalog.py" and "maskrcnn_benchmark/data/flickr.py" for details.
15 |
16 | The pretrained object detector weights and annotations can be found here at baidu-disk (link:https://pan.baidu.com/s/1bYbGUsHcZJQHele87MzcMg password:5ie6) or [google drive](https://drive.google.com/drive/folders/1dRp61muWDNuFG-V9KMKcZ26zR8f2ujci?usp=sharing)
17 |
18 |
19 | ## training
20 |
21 | 1. You can train our model by running the scripts
22 | ```bash
23 | sh scripts/train.sh
24 | ```
25 |
26 | ""
27 |
28 | ## citation
29 | If you are interested in our paper, please cite it.
30 | ```bash
31 | @inproceedings{liu2019learning,
32 | title={Learning Cross-modal Context Graph for Visual Grounding},
33 | author={Liu, Yongfei and Wan, Bo and Zhu, Xiaodan and He, Xuming},
34 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligenc}
35 | year={2020}
36 | }
37 | ```
38 |
--------------------------------------------------------------------------------
/caffe_extract/extract.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3.6
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2019-06-03 16:46
4 | # @Author : Yongfei Liu
5 | # @Email : liuyf3@shanghaitech.edu.cn
6 |
7 | import caffe
8 | import numpy as np
9 | import argparse
10 | import os
11 |
12 | def extract_caffe_model(model, weights, output_path):
13 | """extract caffe model's parameters to numpy array, and write them to files
14 | Args:
15 | model: path of '.prototxt'
16 | weights: path of '.caffemodel'
17 | output_path: output path of numpy params
18 | Returns:
19 | None
20 | """
21 | net = caffe.Net(model, caffe.TEST)
22 | net.copy_from(weights)
23 |
24 | if not os.path.exists(output_path):
25 | os.makedirs(output_path)
26 |
27 | for item in net.params.items():
28 | name, layer = item
29 | print('convert layer: ' + name)
30 |
31 | num = 0
32 | for p in net.params[name]:
33 | np.save(output_path + '/' + str(name) + '_' + str(num), p.data)
34 | num += 1
35 |
36 | if __name__ == '__main__':
37 | parser = argparse.ArgumentParser()
38 | parser.add_argument("--model", help="model prototxt path .prototxt")
39 | parser.add_argument("--weights", help="caffe model weights path .caffemodel")
40 | parser.add_argument("--output", help="output path")
41 | args = parser.parse_args()
42 | extract_caffe_model(args.model, args.weights, args.output)
43 |
44 |
45 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857890/e2e_faster_rcnn_R-101-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 | TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 | SIZE_DIVISIBILITY: 32
27 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857197/e2e_faster_rcnn_R-50-C4_1x"
4 | DATASETS:
5 | TEST: ("coco_2014_minival",)
6 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35857345/e2e_faster_rcnn_R-50-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 | TEST: ("coco_2014_minival",)
25 | DATALOADER:
26 | SIZE_DIVISIBILITY: 32
27 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_faster_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | ROI_BOX_HEAD:
21 | POOLER_RESOLUTION: 7
22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 | POOLER_SAMPLING_RATIO: 2
24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 | PREDICTOR: "FPNPredictor"
26 | DATASETS:
27 | TEST: ("coco_2014_minival",)
28 | DATALOADER:
29 | SIZE_DIVISIBILITY: 32
30 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_keypoint_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37697547/e2e_keypoint_rcnn_R-50-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | NUM_CLASSES: 2
24 | ROI_KEYPOINT_HEAD:
25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 | PREDICTOR: "KeypointRCNNPredictor"
28 | POOLER_RESOLUTION: 14
29 | POOLER_SAMPLING_RATIO: 2
30 | RESOLUTION: 56
31 | SHARE_BOX_FEATURE_EXTRACTOR: False
32 | KEYPOINT_ON: True
33 | DATASETS:
34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 | TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 | SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 | BASE_LR: 0.02
42 | WEIGHT_DECAY: 0.0001
43 | STEPS: (60000, 80000)
44 | MAX_ITER: 90000
45 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35861795/e2e_mask_rcnn_R-101-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | ROI_MASK_HEAD:
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 | PREDICTOR: "MaskRCNNC4Predictor"
27 | POOLER_RESOLUTION: 14
28 | POOLER_SAMPLING_RATIO: 2
29 | RESOLUTION: 28
30 | SHARE_BOX_FEATURE_EXTRACTOR: False
31 | MASK_ON: True
32 | DATASETS:
33 | TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 | SIZE_DIVISIBILITY: 32
36 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_C4_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858791/e2e_mask_rcnn_R-50-C4_1x"
4 | ROI_MASK_HEAD:
5 | PREDICTOR: "MaskRCNNC4Predictor"
6 | SHARE_BOX_FEATURE_EXTRACTOR: True
7 | MASK_ON: True
8 | DATASETS:
9 | TEST: ("coco_2014_minival",)
10 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/35858933/e2e_mask_rcnn_R-50-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | ROI_MASK_HEAD:
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 | PREDICTOR: "MaskRCNNC4Predictor"
27 | POOLER_RESOLUTION: 14
28 | POOLER_SAMPLING_RATIO: 2
29 | RESOLUTION: 28
30 | SHARE_BOX_FEATURE_EXTRACTOR: False
31 | MASK_ON: True
32 | DATASETS:
33 | TEST: ("coco_2014_minival",)
34 | DATALOADER:
35 | SIZE_DIVISIBILITY: 32
36 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/37129812/e2e_mask_rcnn_X-152-32x8d-FPN-IN5k_1.44x"
4 | BACKBONE:
5 | CONV_BODY: "R-152-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | ROI_BOX_HEAD:
21 | POOLER_RESOLUTION: 7
22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 | POOLER_SAMPLING_RATIO: 2
24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 | PREDICTOR: "FPNPredictor"
26 | ROI_MASK_HEAD:
27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 | PREDICTOR: "MaskRCNNC4Predictor"
30 | POOLER_RESOLUTION: 14
31 | POOLER_SAMPLING_RATIO: 2
32 | RESOLUTION: 28
33 | SHARE_BOX_FEATURE_EXTRACTOR: False
34 | MASK_ON: True
35 | DATASETS:
36 | TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 | SIZE_DIVISIBILITY: 32
39 |
--------------------------------------------------------------------------------
/configs/caffe2/e2e_mask_rcnn_X_101_32x8d_FPN_1x_caffe2.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://Caffe2Detectron/COCO/36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | ROI_BOX_HEAD:
21 | POOLER_RESOLUTION: 7
22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 | POOLER_SAMPLING_RATIO: 2
24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 | PREDICTOR: "FPNPredictor"
26 | ROI_MASK_HEAD:
27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 | PREDICTOR: "MaskRCNNC4Predictor"
30 | POOLER_RESOLUTION: 14
31 | POOLER_SAMPLING_RATIO: 2
32 | RESOLUTION: 28
33 | SHARE_BOX_FEATURE_EXTRACTOR: False
34 | MASK_ON: True
35 | DATASETS:
36 | TEST: ("coco_2014_minival",)
37 | DATALOADER:
38 | SIZE_DIVISIBILITY: 32
39 |
--------------------------------------------------------------------------------
/configs/cityscapes/e2e_faster_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | NUM_CLASSES: 9
24 | DATASETS:
25 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
26 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
27 | DATALOADER:
28 | SIZE_DIVISIBILITY: 32
29 | SOLVER:
30 | BASE_LR: 0.01
31 | WEIGHT_DECAY: 0.0001
32 | STEPS: (18000,)
33 | MAX_ITER: 24000
34 |
--------------------------------------------------------------------------------
/configs/cityscapes/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | NUM_CLASSES: 9
24 | ROI_MASK_HEAD:
25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 | PREDICTOR: "MaskRCNNC4Predictor"
28 | POOLER_RESOLUTION: 14
29 | POOLER_SAMPLING_RATIO: 2
30 | RESOLUTION: 28
31 | SHARE_BOX_FEATURE_EXTRACTOR: False
32 | MASK_ON: True
33 | DATASETS:
34 | TRAIN: ("cityscapes_fine_instanceonly_seg_train_cocostyle",)
35 | TEST: ("cityscapes_fine_instanceonly_seg_val_cocostyle",)
36 | DATALOADER:
37 | SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 | BASE_LR: 0.01
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (18000,)
42 | MAX_ITER: 24000
43 |
--------------------------------------------------------------------------------
/configs/e2e_flickr_det_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNNDet"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | NMS: 0.4
18 | DETECTIONS_PER_IMG: 100
19 |
20 | VG_ON: True
21 | RELATION_ON: False
22 |
23 | VG:
24 | FIXED_BACKBONE: True
25 | FIXED_RPN: True
26 | FIXED_ROI_HEAD: True
27 | FG_IOU_THRESHOLD: 0.5
28 | BG_IOU_THRESHOLD: 0.2
29 | VOCAB_FILE: "datasets/skip-thoughts/vocab.json"
30 | SKIP_THROUGH_DATA_DIR: "datasets/skip-thoughts"
31 |
32 | ROI_BOX_HEAD:
33 | POOLER_RESOLUTION: 7
34 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
35 | POOLER_SAMPLING_RATIO: 2
36 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
37 | PREDICTOR: "FPNPredictor"
38 | NUM_CLASSES: 151
39 |
40 |
41 | DATASETS:
42 | TRAIN: ("flickr_train" ,)
43 | TEST: ("flickr_val" ,)
44 | DATALOADER:
45 | SIZE_DIVISIBILITY: 32
46 | NUM_WORKERS: 2
47 | SOLVER:
48 | BASE_LR: 0.05
49 | WEIGHT_DECAY: 0.0001
50 | STEPS: (40000, )
51 | CHECKPOINT_PERIOD: 4000
52 | START_SAVE_CHECKPOINT: 4000
53 | MAX_ITER: 80000
54 |
--------------------------------------------------------------------------------
/configs/e2e_flickr_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | NMS: 0.4
18 | DETECTIONS_PER_IMG: 100
19 |
20 | VG_ON: True
21 | RELATION_ON: False
22 |
23 | VG:
24 | FIXED_BACKBONE: True
25 | FIXED_RESNET: True
26 | FIXED_RPN: True
27 | FIXED_ROI_HEAD: True
28 | FG_IOU_THRESHOLD: 0.5
29 | BG_IOU_THRESHOLD: 0.2
30 | VOCAB_FILE: "datasets/skip-thoughts/vocab.json"
31 | SKIP_THROUGH_DATA_DIR: "datasets/skip-thoughts"
32 | RESNET_PARAMS_FILE: './outputs/bottom-up-pretrained/bottomup_pretrained_10_100.pth'
33 | FIXED_EMBEDDING: False
34 |
35 | ROI_BOX_HEAD:
36 | POOLER_RESOLUTION: 7
37 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
38 | POOLER_SAMPLING_RATIO: 2
39 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
40 | PREDICTOR: "FPNPredictor"
41 | NUM_CLASSES: 151
42 |
43 |
44 | DATASETS:
45 | TRAIN: ("flickr_train" ,)
46 | TEST: ("flickr_val" ,)
47 | DATALOADER:
48 | SIZE_DIVISIBILITY: 32
49 | NUM_WORKERS: 2
50 | SOLVER:
51 | BASE_LR: 0.05
52 | WEIGHT_DECAY: 0.0001
53 | STEPS: (60000, 100000, )
54 | CHECKPOINT_PERIOD: 5000
55 | START_SAVE_CHECKPOINT: 5000
56 | MAX_ITER: 120001
57 |
--------------------------------------------------------------------------------
/configs/gn_baselines/README.md:
--------------------------------------------------------------------------------
1 | ### Group Normalization
2 | 1 [Group Normalization](https://arxiv.org/abs/1803.08494)
3 | 2 [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883)
4 | 3 [official code](https://github.com/facebookresearch/Detectron/blob/master/projects/GN/README.md)
5 |
6 |
7 | ### Performance
8 | | case | Type | lr schd | im/gpu | bbox AP | mask AP |
9 | |----------------------------|:------------:|:---------:|:-------:|:-------:|:-------:|
10 | | R-50-FPN, GN (paper) | finetune | 2x | 2 | 40.3 | 35.7 |
11 | | R-50-FPN, GN (implement) | finetune | 2x | 2 | 40.2 | 36.0 |
12 | | R-50-FPN, GN (paper) | from scratch | 3x | 2 | 39.5 | 35.2 |
13 | | R-50-FPN, GN (implement) | from scratch | 3x | 2 | 38.9 | 35.1 |
14 |
--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | RESNETS: # use GN for backbone
12 | BACKBONE_OUT_CHANNELS: 256
13 | STRIDE_IN_1X1: False
14 | TRANS_FUNC: "BottleneckWithGN"
15 | STEM_FUNC: "StemWithGN"
16 | FPN:
17 | USE_GN: True # use GN for FPN
18 | RPN:
19 | USE_FPN: True
20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 | PRE_NMS_TOP_N_TRAIN: 2000
22 | PRE_NMS_TOP_N_TEST: 1000
23 | POST_NMS_TOP_N_TEST: 1000
24 | FPN_POST_NMS_TOP_N_TEST: 1000
25 | ROI_HEADS:
26 | USE_FPN: True
27 | BATCH_SIZE_PER_IMAGE: 512
28 | POSITIVE_FRACTION: 0.25
29 | ROI_BOX_HEAD:
30 | USE_GN: True # use GN for bbox head
31 | POOLER_RESOLUTION: 7
32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 | POOLER_SAMPLING_RATIO: 2
34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 | PREDICTOR: "FPNPredictor"
36 | DATASETS:
37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 | TEST: ("coco_2014_minival",)
39 | DATALOADER:
40 | SIZE_DIVISIBILITY: 32
41 | SOLVER:
42 | # Assume 8 gpus
43 | BASE_LR: 0.02
44 | WEIGHT_DECAY: 0.0001
45 | STEPS: (60000, 80000)
46 | MAX_ITER: 90000
47 | IMS_PER_BATCH: 16
48 | TEST:
49 | IMS_PER_BATCH: 8
50 |
--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_faster_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | RESNETS: # use GN for backbone
12 | BACKBONE_OUT_CHANNELS: 256
13 | STRIDE_IN_1X1: False
14 | TRANS_FUNC: "BottleneckWithGN"
15 | STEM_FUNC: "StemWithGN"
16 | FPN:
17 | USE_GN: True # use GN for FPN
18 | RPN:
19 | USE_FPN: True
20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 | PRE_NMS_TOP_N_TRAIN: 2000
22 | PRE_NMS_TOP_N_TEST: 1000
23 | POST_NMS_TOP_N_TEST: 1000
24 | FPN_POST_NMS_TOP_N_TEST: 1000
25 | ROI_HEADS:
26 | USE_FPN: True
27 | BATCH_SIZE_PER_IMAGE: 512
28 | POSITIVE_FRACTION: 0.25
29 | ROI_BOX_HEAD:
30 | USE_GN: True # use GN for bbox head
31 | POOLER_RESOLUTION: 7
32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 | POOLER_SAMPLING_RATIO: 2
34 | CONV_HEAD_DIM: 256
35 | NUM_STACKED_CONVS: 4
36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 | PREDICTOR: "FPNPredictor"
38 | DATASETS:
39 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
40 | TEST: ("coco_2014_minival",)
41 | DATALOADER:
42 | SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 | # Assume 8 gpus
45 | BASE_LR: 0.02
46 | WEIGHT_DECAY: 0.0001
47 | STEPS: (60000, 80000)
48 | MAX_ITER: 90000
49 | IMS_PER_BATCH: 16
50 | TEST:
51 | IMS_PER_BATCH: 8
52 |
--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_1x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | RESNETS: # use GN for backbone
12 | BACKBONE_OUT_CHANNELS: 256
13 | STRIDE_IN_1X1: False
14 | TRANS_FUNC: "BottleneckWithGN"
15 | STEM_FUNC: "StemWithGN"
16 | FPN:
17 | USE_GN: True # use GN for FPN
18 | RPN:
19 | USE_FPN: True
20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 | PRE_NMS_TOP_N_TRAIN: 2000
22 | PRE_NMS_TOP_N_TEST: 1000
23 | POST_NMS_TOP_N_TEST: 1000
24 | FPN_POST_NMS_TOP_N_TEST: 1000
25 | ROI_HEADS:
26 | USE_FPN: True
27 | BATCH_SIZE_PER_IMAGE: 512
28 | POSITIVE_FRACTION: 0.25
29 | ROI_BOX_HEAD:
30 | USE_GN: True # use GN for bbox head
31 | POOLER_RESOLUTION: 7
32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 | POOLER_SAMPLING_RATIO: 2
34 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
35 | PREDICTOR: "FPNPredictor"
36 | ROI_MASK_HEAD:
37 | USE_GN: True # use GN for mask head
38 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
39 | CONV_LAYERS: (256, 256, 256, 256)
40 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
41 | PREDICTOR: "MaskRCNNC4Predictor"
42 | POOLER_RESOLUTION: 14
43 | POOLER_SAMPLING_RATIO: 2
44 | RESOLUTION: 28
45 | SHARE_BOX_FEATURE_EXTRACTOR: False
46 | MASK_ON: True
47 | DATASETS:
48 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
49 | TEST: ("coco_2014_minival",)
50 | DATALOADER:
51 | SIZE_DIVISIBILITY: 32
52 | SOLVER:
53 | # Assume 8 gpus
54 | BASE_LR: 0.02
55 | WEIGHT_DECAY: 0.0001
56 | STEPS: (60000, 80000)
57 | MAX_ITER: 90000
58 | IMS_PER_BATCH: 16
59 | TEST:
60 | IMS_PER_BATCH: 8
61 |
--------------------------------------------------------------------------------
/configs/gn_baselines/e2e_mask_rcnn_R_50_FPN_Xconv1fc_1x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50-GN"
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | RESNETS: # use GN for backbone
12 | BACKBONE_OUT_CHANNELS: 256
13 | STRIDE_IN_1X1: False
14 | TRANS_FUNC: "BottleneckWithGN"
15 | STEM_FUNC: "StemWithGN"
16 | FPN:
17 | USE_GN: True # use GN for FPN
18 | RPN:
19 | USE_FPN: True
20 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
21 | PRE_NMS_TOP_N_TRAIN: 2000
22 | PRE_NMS_TOP_N_TEST: 1000
23 | POST_NMS_TOP_N_TEST: 1000
24 | FPN_POST_NMS_TOP_N_TEST: 1000
25 | ROI_HEADS:
26 | USE_FPN: True
27 | BATCH_SIZE_PER_IMAGE: 512
28 | POSITIVE_FRACTION: 0.25
29 | ROI_BOX_HEAD:
30 | USE_GN: True # use GN for bbox head
31 | POOLER_RESOLUTION: 7
32 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
33 | POOLER_SAMPLING_RATIO: 2
34 | CONV_HEAD_DIM: 256
35 | NUM_STACKED_CONVS: 4
36 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
37 | PREDICTOR: "FPNPredictor"
38 | ROI_MASK_HEAD:
39 | USE_GN: True # use GN for mask head
40 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
41 | CONV_LAYERS: (256, 256, 256, 256)
42 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
43 | PREDICTOR: "MaskRCNNC4Predictor"
44 | POOLER_RESOLUTION: 14
45 | POOLER_SAMPLING_RATIO: 2
46 | RESOLUTION: 28
47 | SHARE_BOX_FEATURE_EXTRACTOR: False
48 | MASK_ON: True
49 | DATASETS:
50 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
51 | TEST: ("coco_2014_minival",)
52 | DATALOADER:
53 | SIZE_DIVISIBILITY: 32
54 | SOLVER:
55 | # Assume 8 gpus
56 | BASE_LR: 0.02
57 | WEIGHT_DECAY: 0.0001
58 | STEPS: (60000, 80000)
59 | MAX_ITER: 90000
60 | IMS_PER_BATCH: 16
61 | TEST:
62 | IMS_PER_BATCH: 8
63 |
--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "" # no pretrained model
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 | RESNETS: # use GN for backbone
13 | BACKBONE_OUT_CHANNELS: 256
14 | STRIDE_IN_1X1: False
15 | TRANS_FUNC: "BottleneckWithGN"
16 | STEM_FUNC: "StemWithGN"
17 | FPN:
18 | USE_GN: True # use GN for FPN
19 | RPN:
20 | USE_FPN: True
21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 | PRE_NMS_TOP_N_TRAIN: 2000
23 | PRE_NMS_TOP_N_TEST: 1000
24 | POST_NMS_TOP_N_TEST: 1000
25 | FPN_POST_NMS_TOP_N_TEST: 1000
26 | ROI_HEADS:
27 | USE_FPN: True
28 | BATCH_SIZE_PER_IMAGE: 512
29 | POSITIVE_FRACTION: 0.25
30 | ROI_BOX_HEAD:
31 | USE_GN: True # use GN for bbox head
32 | POOLER_RESOLUTION: 7
33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 | POOLER_SAMPLING_RATIO: 2
35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 | PREDICTOR: "FPNPredictor"
37 | DATASETS:
38 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
39 | TEST: ("coco_2014_minival",)
40 | DATALOADER:
41 | SIZE_DIVISIBILITY: 32
42 | SOLVER:
43 | # Assume 8 gpus
44 | BASE_LR: 0.02
45 | WEIGHT_DECAY: 0.0001
46 | STEPS: (210000, 250000)
47 | MAX_ITER: 270000
48 | IMS_PER_BATCH: 16
49 | TEST:
50 | IMS_PER_BATCH: 8
51 |
--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_faster_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "" # no pretrained model
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 | RESNETS: # use GN for backbone
13 | BACKBONE_OUT_CHANNELS: 256
14 | STRIDE_IN_1X1: False
15 | TRANS_FUNC: "BottleneckWithGN"
16 | STEM_FUNC: "StemWithGN"
17 | FPN:
18 | USE_GN: True # use GN for FPN
19 | RPN:
20 | USE_FPN: True
21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 | PRE_NMS_TOP_N_TRAIN: 2000
23 | PRE_NMS_TOP_N_TEST: 1000
24 | POST_NMS_TOP_N_TEST: 1000
25 | FPN_POST_NMS_TOP_N_TEST: 1000
26 | ROI_HEADS:
27 | USE_FPN: True
28 | BATCH_SIZE_PER_IMAGE: 512
29 | POSITIVE_FRACTION: 0.25
30 | ROI_BOX_HEAD:
31 | USE_GN: True # use GN for bbox head
32 | POOLER_RESOLUTION: 7
33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 | POOLER_SAMPLING_RATIO: 2
35 | CONV_HEAD_DIM: 256
36 | NUM_STACKED_CONVS: 4
37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 | PREDICTOR: "FPNPredictor"
39 | DATASETS:
40 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
41 | TEST: ("coco_2014_minival",)
42 | DATALOADER:
43 | SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 | # Assume 8 gpus
46 | BASE_LR: 0.02
47 | WEIGHT_DECAY: 0.0001
48 | STEPS: (210000, 250000)
49 | MAX_ITER: 270000
50 | IMS_PER_BATCH: 16
51 | TEST:
52 | IMS_PER_BATCH: 8
53 |
--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "" # no pretrained model
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 | RESNETS: # use GN for backbone
13 | BACKBONE_OUT_CHANNELS: 256
14 | STRIDE_IN_1X1: False
15 | TRANS_FUNC: "BottleneckWithGN"
16 | STEM_FUNC: "StemWithGN"
17 | FPN:
18 | USE_GN: True # use GN for FPN
19 | RPN:
20 | USE_FPN: True
21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 | PRE_NMS_TOP_N_TRAIN: 2000
23 | PRE_NMS_TOP_N_TEST: 1000
24 | POST_NMS_TOP_N_TEST: 1000
25 | FPN_POST_NMS_TOP_N_TEST: 1000
26 | ROI_HEADS:
27 | USE_FPN: True
28 | BATCH_SIZE_PER_IMAGE: 512
29 | POSITIVE_FRACTION: 0.25
30 | ROI_BOX_HEAD:
31 | USE_GN: True # use GN for bbox head
32 | POOLER_RESOLUTION: 7
33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 | POOLER_SAMPLING_RATIO: 2
35 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
36 | PREDICTOR: "FPNPredictor"
37 | ROI_MASK_HEAD:
38 | USE_GN: True # use GN for mask head
39 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
40 | CONV_LAYERS: (256, 256, 256, 256)
41 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
42 | PREDICTOR: "MaskRCNNC4Predictor"
43 | POOLER_RESOLUTION: 14
44 | POOLER_SAMPLING_RATIO: 2
45 | RESOLUTION: 28
46 | SHARE_BOX_FEATURE_EXTRACTOR: False
47 | MASK_ON: True
48 | DATASETS:
49 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
50 | TEST: ("coco_2014_minival",)
51 | DATALOADER:
52 | SIZE_DIVISIBILITY: 32
53 | SOLVER:
54 | # Assume 8 gpus
55 | BASE_LR: 0.02
56 | WEIGHT_DECAY: 0.0001
57 | STEPS: (210000, 250000)
58 | MAX_ITER: 270000
59 | IMS_PER_BATCH: 16
60 | TEST:
61 | IMS_PER_BATCH: 8
62 |
--------------------------------------------------------------------------------
/configs/gn_baselines/scratch_e2e_mask_rcnn_R_50_FPN_Xconv1fc_3x_gn.yaml:
--------------------------------------------------------------------------------
1 | INPUT:
2 | MIN_SIZE_TRAIN: (800,)
3 | MAX_SIZE_TRAIN: 1333
4 | MIN_SIZE_TEST: 800
5 | MAX_SIZE_TEST: 1333
6 | MODEL:
7 | META_ARCHITECTURE: "GeneralizedRCNN"
8 | WEIGHT: "" # no pretrained model
9 | BACKBONE:
10 | CONV_BODY: "R-50-FPN"
11 | FREEZE_CONV_BODY_AT: 0 # finetune all layers
12 | RESNETS: # use GN for backbone
13 | BACKBONE_OUT_CHANNELS: 256
14 | STRIDE_IN_1X1: False
15 | TRANS_FUNC: "BottleneckWithGN"
16 | STEM_FUNC: "StemWithGN"
17 | FPN:
18 | USE_GN: True # use GN for FPN
19 | RPN:
20 | USE_FPN: True
21 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
22 | PRE_NMS_TOP_N_TRAIN: 2000
23 | PRE_NMS_TOP_N_TEST: 1000
24 | POST_NMS_TOP_N_TEST: 1000
25 | FPN_POST_NMS_TOP_N_TEST: 1000
26 | ROI_HEADS:
27 | USE_FPN: True
28 | BATCH_SIZE_PER_IMAGE: 512
29 | POSITIVE_FRACTION: 0.25
30 | ROI_BOX_HEAD:
31 | USE_GN: True # use GN for bbox head
32 | POOLER_RESOLUTION: 7
33 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
34 | POOLER_SAMPLING_RATIO: 2
35 | CONV_HEAD_DIM: 256
36 | NUM_STACKED_CONVS: 4
37 | FEATURE_EXTRACTOR: "FPNXconv1fcFeatureExtractor"
38 | PREDICTOR: "FPNPredictor"
39 | ROI_MASK_HEAD:
40 | USE_GN: True # use GN for mask head
41 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
42 | CONV_LAYERS: (256, 256, 256, 256)
43 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
44 | PREDICTOR: "MaskRCNNC4Predictor"
45 | POOLER_RESOLUTION: 14
46 | POOLER_SAMPLING_RATIO: 2
47 | RESOLUTION: 28
48 | SHARE_BOX_FEATURE_EXTRACTOR: False
49 | MASK_ON: True
50 | DATASETS:
51 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
52 | TEST: ("coco_2014_minival",)
53 | DATALOADER:
54 | SIZE_DIVISIBILITY: 32
55 | SOLVER:
56 | # Assume 8 gpus
57 | BASE_LR: 0.02
58 | WEIGHT_DECAY: 0.0001
59 | STEPS: (210000, 250000)
60 | MAX_ITER: 270000
61 | IMS_PER_BATCH: 16
62 | TEST:
63 | IMS_PER_BATCH: 8
64 |
--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_1_gpu_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 300
7 | ANCHOR_SIZES: (128, 256, 512)
8 | ROI_BOX_HEAD:
9 | NUM_CLASSES: 21
10 | DATASETS:
11 | TRAIN: ("voc_2007_train", "voc_2007_val")
12 | TEST: ("voc_2007_test",)
13 | SOLVER:
14 | BASE_LR: 0.001
15 | WEIGHT_DECAY: 0.0001
16 | STEPS: (50000, )
17 | MAX_ITER: 70000
18 | IMS_PER_BATCH: 1
19 | TEST:
20 | IMS_PER_BATCH: 1
21 |
--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_faster_rcnn_R_50_C4_1x_4_gpu_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 300
7 | ANCHOR_SIZES: (128, 256, 512)
8 | ROI_BOX_HEAD:
9 | NUM_CLASSES: 21
10 | DATASETS:
11 | TRAIN: ("voc_2007_train", "voc_2007_val")
12 | TEST: ("voc_2007_test",)
13 | SOLVER:
14 | BASE_LR: 0.004
15 | WEIGHT_DECAY: 0.0001
16 | STEPS: (12500, )
17 | MAX_ITER: 17500
18 | IMS_PER_BATCH: 4
19 | TEST:
20 | IMS_PER_BATCH: 4
21 |
--------------------------------------------------------------------------------
/configs/pascal_voc/e2e_mask_rcnn_R_50_FPN_1x_cocostyle.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | NUM_CLASSES: 21
24 | ROI_MASK_HEAD:
25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 | PREDICTOR: "MaskRCNNC4Predictor"
28 | POOLER_RESOLUTION: 14
29 | POOLER_SAMPLING_RATIO: 2
30 | RESOLUTION: 28
31 | SHARE_BOX_FEATURE_EXTRACTOR: False
32 | MASK_ON: True
33 | DATASETS:
34 | TRAIN: ("voc_2012_train_cocostyle",)
35 | TEST: ("voc_2012_val_cocostyle",)
36 | DATALOADER:
37 | SIZE_DIVISIBILITY: 32
38 | SOLVER:
39 | BASE_LR: 0.01
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (18000,)
42 | MAX_ITER: 24000
43 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 | ROI_HEADS:
8 | BATCH_SIZE_PER_IMAGE: 256
9 | DATASETS:
10 | TRAIN: ("coco_2014_minival",)
11 | TEST: ("coco_2014_minival",)
12 | INPUT:
13 | MIN_SIZE_TRAIN: (600,)
14 | MAX_SIZE_TRAIN: 1000
15 | MIN_SIZE_TEST: 800
16 | MAX_SIZE_TEST: 1000
17 | SOLVER:
18 | BASE_LR: 0.005
19 | WEIGHT_DECAY: 0.0001
20 | STEPS: (1500,)
21 | MAX_ITER: 2000
22 | IMS_PER_BATCH: 2
23 | TEST:
24 | IMS_PER_BATCH: 2
25 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | BATCH_SIZE_PER_IMAGE: 256
18 | ROI_BOX_HEAD:
19 | POOLER_RESOLUTION: 7
20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 | POOLER_SAMPLING_RATIO: 2
22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 | PREDICTOR: "FPNPredictor"
24 | DATASETS:
25 | TRAIN: ("coco_2014_minival",)
26 | TEST: ("coco_2014_minival",)
27 | INPUT:
28 | MIN_SIZE_TRAIN: (600,)
29 | MAX_SIZE_TRAIN: 1000
30 | MIN_SIZE_TEST: 800
31 | MAX_SIZE_TEST: 1000
32 | DATALOADER:
33 | SIZE_DIVISIBILITY: 32
34 | SOLVER:
35 | BASE_LR: 0.005
36 | WEIGHT_DECAY: 0.0001
37 | STEPS: (1500,)
38 | MAX_ITER: 2000
39 | IMS_PER_BATCH: 4
40 | TEST:
41 | IMS_PER_BATCH: 2
42 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_faster_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | BATCH_SIZE_PER_IMAGE: 256
21 | ROI_BOX_HEAD:
22 | POOLER_RESOLUTION: 7
23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 | POOLER_SAMPLING_RATIO: 2
25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 | PREDICTOR: "FPNPredictor"
27 | DATASETS:
28 | TRAIN: ("coco_2014_minival",)
29 | TEST: ("coco_2014_minival",)
30 | INPUT:
31 | MIN_SIZE_TRAIN: (600,)
32 | MAX_SIZE_TRAIN: 1000
33 | MIN_SIZE_TEST: 800
34 | MAX_SIZE_TEST: 1000
35 | DATALOADER:
36 | SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 | BASE_LR: 0.005
39 | WEIGHT_DECAY: 0.0001
40 | STEPS: (1500,)
41 | MAX_ITER: 2000
42 | IMS_PER_BATCH: 2
43 | TEST:
44 | IMS_PER_BATCH: 2
45 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_keypoint_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | BATCH_SIZE_PER_IMAGE: 256
18 | ROI_BOX_HEAD:
19 | POOLER_RESOLUTION: 7
20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 | POOLER_SAMPLING_RATIO: 2
22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 | PREDICTOR: "FPNPredictor"
24 | NUM_CLASSES: 2
25 | ROI_KEYPOINT_HEAD:
26 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
27 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
28 | PREDICTOR: "KeypointRCNNPredictor"
29 | POOLER_RESOLUTION: 14
30 | POOLER_SAMPLING_RATIO: 2
31 | RESOLUTION: 56
32 | SHARE_BOX_FEATURE_EXTRACTOR: False
33 | KEYPOINT_ON: True
34 | DATASETS:
35 | TRAIN: ("keypoints_coco_2014_minival",)
36 | TEST: ("keypoints_coco_2014_minival",)
37 | INPUT:
38 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
39 | MAX_SIZE_TRAIN: 1000
40 | MIN_SIZE_TEST: 800
41 | MAX_SIZE_TEST: 1000
42 | DATALOADER:
43 | SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 | BASE_LR: 0.005
46 | WEIGHT_DECAY: 0.0001
47 | STEPS: (1500,)
48 | MAX_ITER: 2000
49 | IMS_PER_BATCH: 4
50 | TEST:
51 | IMS_PER_BATCH: 2
52 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 | ROI_HEADS:
8 | BATCH_SIZE_PER_IMAGE: 256
9 | ROI_MASK_HEAD:
10 | PREDICTOR: "MaskRCNNC4Predictor"
11 | SHARE_BOX_FEATURE_EXTRACTOR: True
12 | MASK_ON: True
13 | DATASETS:
14 | TRAIN: ("coco_2014_minival",)
15 | TEST: ("coco_2014_minival",)
16 | INPUT:
17 | MIN_SIZE_TRAIN: (600,)
18 | MAX_SIZE_TRAIN: 1000
19 | MIN_SIZE_TEST: 800
20 | MAX_SIZE_TEST: 1000
21 | SOLVER:
22 | BASE_LR: 0.005
23 | WEIGHT_DECAY: 0.0001
24 | STEPS: (1500,)
25 | MAX_ITER: 2000
26 | IMS_PER_BATCH: 4
27 | TEST:
28 | IMS_PER_BATCH: 2
29 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | BATCH_SIZE_PER_IMAGE: 256
18 | ROI_BOX_HEAD:
19 | POOLER_RESOLUTION: 7
20 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
21 | POOLER_SAMPLING_RATIO: 2
22 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
23 | PREDICTOR: "FPNPredictor"
24 | ROI_MASK_HEAD:
25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
27 | PREDICTOR: "MaskRCNNC4Predictor"
28 | POOLER_RESOLUTION: 14
29 | POOLER_SAMPLING_RATIO: 2
30 | RESOLUTION: 28
31 | SHARE_BOX_FEATURE_EXTRACTOR: False
32 | MASK_ON: True
33 | DATASETS:
34 | TRAIN: ("coco_2014_minival",)
35 | TEST: ("coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (600,)
38 | MAX_SIZE_TRAIN: 1000
39 | MIN_SIZE_TEST: 800
40 | MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 | SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 | BASE_LR: 0.005
45 | WEIGHT_DECAY: 0.0001
46 | STEPS: (1500,)
47 | MAX_ITER: 2000
48 | IMS_PER_BATCH: 4
49 | TEST:
50 | IMS_PER_BATCH: 2
51 |
--------------------------------------------------------------------------------
/configs/quick_schedules/e2e_mask_rcnn_X_101_32x8d_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | BATCH_SIZE_PER_IMAGE: 256
21 | ROI_BOX_HEAD:
22 | POOLER_RESOLUTION: 7
23 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
24 | POOLER_SAMPLING_RATIO: 2
25 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
26 | PREDICTOR: "FPNPredictor"
27 | ROI_MASK_HEAD:
28 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
29 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
30 | PREDICTOR: "MaskRCNNC4Predictor"
31 | POOLER_RESOLUTION: 14
32 | POOLER_SAMPLING_RATIO: 2
33 | RESOLUTION: 28
34 | SHARE_BOX_FEATURE_EXTRACTOR: False
35 | MASK_ON: True
36 | DATASETS:
37 | TRAIN: ("coco_2014_minival",)
38 | TEST: ("coco_2014_minival",)
39 | INPUT:
40 | MIN_SIZE_TRAIN: (600,)
41 | MAX_SIZE_TRAIN: 1000
42 | MIN_SIZE_TEST: 800
43 | MAX_SIZE_TEST: 1000
44 | DATALOADER:
45 | SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 | BASE_LR: 0.005
48 | WEIGHT_DECAY: 0.0001
49 | STEPS: (1500,)
50 | MAX_ITER: 2000
51 | IMS_PER_BATCH: 2
52 | TEST:
53 | IMS_PER_BATCH: 2
54 |
--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_C4_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | RPN:
6 | PRE_NMS_TOP_N_TEST: 12000
7 | POST_NMS_TOP_N_TEST: 2000
8 | DATASETS:
9 | TRAIN: ("coco_2014_minival",)
10 | TEST: ("coco_2014_minival",)
11 | INPUT:
12 | MIN_SIZE_TRAIN: (600,)
13 | MAX_SIZE_TRAIN: 1000
14 | MIN_SIZE_TEST: 800
15 | MAX_SIZE_TEST: 1000
16 | SOLVER:
17 | BASE_LR: 0.005
18 | WEIGHT_DECAY: 0.0001
19 | STEPS: (1500,)
20 | MAX_ITER: 2000
21 | IMS_PER_BATCH: 4
22 | TEST:
23 | IMS_PER_BATCH: 2
24 |
--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | BACKBONE:
6 | CONV_BODY: "R-50-FPN"
7 | RESNETS:
8 | BACKBONE_OUT_CHANNELS: 256
9 | RPN:
10 | USE_FPN: True
11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 2000
14 | FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 | TRAIN: ("coco_2014_minival",)
17 | TEST: ("coco_2014_minival",)
18 | INPUT:
19 | MIN_SIZE_TRAIN: (600,)
20 | MAX_SIZE_TRAIN: 1000
21 | MIN_SIZE_TEST: 800
22 | MAX_SIZE_TEST: 1000
23 | DATALOADER:
24 | SIZE_DIVISIBILITY: 32
25 | SOLVER:
26 | BASE_LR: 0.005
27 | WEIGHT_DECAY: 0.0001
28 | STEPS: (1500,)
29 | MAX_ITER: 2000
30 | IMS_PER_BATCH: 4
31 | TEST:
32 | IMS_PER_BATCH: 2
33 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-101-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | RPN:
11 | USE_FPN: True
12 | FG_IOU_THRESHOLD: 0.5
13 | BG_IOU_THRESHOLD: 0.4
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TRAIN: 2000
16 | PRE_NMS_TOP_N_TEST: 1000
17 | POST_NMS_TOP_N_TEST: 1000
18 | FPN_POST_NMS_TOP_N_TEST: 1000
19 | ROI_HEADS:
20 | USE_FPN: True
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 7
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | POOLER_SAMPLING_RATIO: 2
26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 | PREDICTOR: "FPNPredictor"
28 | RETINANET:
29 | SCALES_PER_OCTAVE: 3
30 | STRADDLE_THRESH: -1
31 | FG_IOU_THRESHOLD: 0.5
32 | BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 | TEST: ("coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (800, )
38 | MAX_SIZE_TRAIN: 1333
39 | MIN_SIZE_TEST: 800
40 | MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 | SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 | # Assume 4 gpus
45 | BASE_LR: 0.005
46 | WEIGHT_DECAY: 0.0001
47 | STEPS: (120000, 160000)
48 | MAX_ITER: 180000
49 | IMS_PER_BATCH: 8
50 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-101-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-101-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | RPN:
11 | USE_FPN: True
12 | FG_IOU_THRESHOLD: 0.5
13 | BG_IOU_THRESHOLD: 0.4
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TRAIN: 2000
16 | PRE_NMS_TOP_N_TEST: 1000
17 | POST_NMS_TOP_N_TEST: 1000
18 | FPN_POST_NMS_TOP_N_TEST: 1000
19 | ROI_HEADS:
20 | USE_FPN: True
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 7
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | POOLER_SAMPLING_RATIO: 2
26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 | PREDICTOR: "FPNPredictor"
28 | RETINANET:
29 | SCALES_PER_OCTAVE: 3
30 | STRADDLE_THRESH: -1
31 | USE_C5: False
32 | FG_IOU_THRESHOLD: 0.5
33 | BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | INPUT:
38 | MIN_SIZE_TRAIN: (800, )
39 | MAX_SIZE_TRAIN: 1333
40 | MIN_SIZE_TEST: 800
41 | MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 | SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 | # Assume 4 gpus
46 | BASE_LR: 0.005
47 | WEIGHT_DECAY: 0.0001
48 | STEPS: (120000, 160000)
49 | MAX_ITER: 180000
50 | IMS_PER_BATCH: 8
51 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-50-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | RPN:
11 | USE_FPN: True
12 | FG_IOU_THRESHOLD: 0.5
13 | BG_IOU_THRESHOLD: 0.4
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TRAIN: 2000
16 | PRE_NMS_TOP_N_TEST: 1000
17 | POST_NMS_TOP_N_TEST: 1000
18 | FPN_POST_NMS_TOP_N_TEST: 1000
19 | ROI_HEADS:
20 | USE_FPN: True
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 7
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | POOLER_SAMPLING_RATIO: 2
26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 | PREDICTOR: "FPNPredictor"
28 | RETINANET:
29 | SCALES_PER_OCTAVE: 3
30 | STRADDLE_THRESH: -1
31 | FG_IOU_THRESHOLD: 0.5
32 | BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
35 | TEST: ("coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (800,)
38 | MAX_SIZE_TRAIN: 1333
39 | MIN_SIZE_TEST: 800
40 | MAX_SIZE_TEST: 1333
41 | DATALOADER:
42 | SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 | # Assume 4 gpus
45 | BASE_LR: 0.005
46 | WEIGHT_DECAY: 0.0001
47 | STEPS: (120000, 160000)
48 | MAX_ITER: 180000
49 | IMS_PER_BATCH: 8
50 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_1x_quick.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-50-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | RPN:
11 | USE_FPN: True
12 | FG_IOU_THRESHOLD: 0.5
13 | BG_IOU_THRESHOLD: 0.4
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TRAIN: 2000
16 | PRE_NMS_TOP_N_TEST: 1000
17 | POST_NMS_TOP_N_TEST: 1000
18 | FPN_POST_NMS_TOP_N_TEST: 1000
19 | ROI_HEADS:
20 | USE_FPN: True
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 7
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | POOLER_SAMPLING_RATIO: 2
26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 | PREDICTOR: "FPNPredictor"
28 | RETINANET:
29 | SCALES_PER_OCTAVE: 3
30 | STRADDLE_THRESH: -1
31 | FG_IOU_THRESHOLD: 0.5
32 | BG_IOU_THRESHOLD: 0.4
33 | DATASETS:
34 | TRAIN: ("coco_2014_minival",)
35 | TEST: ("coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (600,)
38 | MAX_SIZE_TRAIN: 1000
39 | MIN_SIZE_TEST: 800
40 | MAX_SIZE_TEST: 1000
41 | DATALOADER:
42 | SIZE_DIVISIBILITY: 32
43 | SOLVER:
44 | BASE_LR: 0.005
45 | WEIGHT_DECAY: 0.0001
46 | STEPS: (3500,)
47 | MAX_ITER: 4000
48 | IMS_PER_BATCH: 4
49 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_R-50-FPN_P5_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-50-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | RPN:
11 | USE_FPN: True
12 | FG_IOU_THRESHOLD: 0.5
13 | BG_IOU_THRESHOLD: 0.4
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TRAIN: 2000
16 | PRE_NMS_TOP_N_TEST: 1000
17 | POST_NMS_TOP_N_TEST: 1000
18 | FPN_POST_NMS_TOP_N_TEST: 1000
19 | ROI_HEADS:
20 | USE_FPN: True
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 7
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | POOLER_SAMPLING_RATIO: 2
26 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
27 | PREDICTOR: "FPNPredictor"
28 | RETINANET:
29 | SCALES_PER_OCTAVE: 3
30 | STRADDLE_THRESH: -1
31 | USE_C5: False
32 | FG_IOU_THRESHOLD: 0.5
33 | BG_IOU_THRESHOLD: 0.4
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | INPUT:
38 | MIN_SIZE_TRAIN: (800,)
39 | MAX_SIZE_TRAIN: 1333
40 | MIN_SIZE_TEST: 800
41 | MAX_SIZE_TEST: 1333
42 | DATALOADER:
43 | SIZE_DIVISIBILITY: 32
44 | SOLVER:
45 | # Assume 4 gpus
46 | BASE_LR: 0.005
47 | WEIGHT_DECAY: 0.0001
48 | STEPS: (120000, 160000)
49 | MAX_ITER: 180000
50 | IMS_PER_BATCH: 8
51 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | RPN_ONLY: True
5 | RETINANET_ON: True
6 | BACKBONE:
7 | CONV_BODY: "R-101-FPN-RETINANET"
8 | RESNETS:
9 | BACKBONE_OUT_CHANNELS: 256
10 | STRIDE_IN_1X1: False
11 | NUM_GROUPS: 32
12 | WIDTH_PER_GROUP: 8
13 | RPN:
14 | USE_FPN: True
15 | FG_IOU_THRESHOLD: 0.5
16 | BG_IOU_THRESHOLD: 0.4
17 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
18 | PRE_NMS_TOP_N_TRAIN: 2000
19 | PRE_NMS_TOP_N_TEST: 1000
20 | POST_NMS_TOP_N_TEST: 1000
21 | FPN_POST_NMS_TOP_N_TEST: 1000
22 | ROI_HEADS:
23 | USE_FPN: True
24 | BATCH_SIZE_PER_IMAGE: 256
25 | ROI_BOX_HEAD:
26 | POOLER_RESOLUTION: 7
27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 | POOLER_SAMPLING_RATIO: 2
29 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
30 | PREDICTOR: "FPNPredictor"
31 | RETINANET:
32 | SCALES_PER_OCTAVE: 3
33 | STRADDLE_THRESH: -1
34 | FG_IOU_THRESHOLD: 0.5
35 | BG_IOU_THRESHOLD: 0.4
36 | DATASETS:
37 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
38 | TEST: ("coco_2014_minival",)
39 | INPUT:
40 | MIN_SIZE_TRAIN: (800, )
41 | MAX_SIZE_TRAIN: 1333
42 | MIN_SIZE_TEST: 800
43 | MAX_SIZE_TEST: 1333
44 | DATALOADER:
45 | SIZE_DIVISIBILITY: 32
46 | SOLVER:
47 | # Assume 4 gpus
48 | BASE_LR: 0.0025
49 | WEIGHT_DECAY: 0.0001
50 | STEPS: (240000, 320000)
51 | MAX_ITER: 360000
52 | IMS_PER_BATCH: 4
53 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | NMS: 0.5
18 | DETECTIONS_PER_IMG: 128
19 |
20 |
21 | RELATION_ON: True
22 |
23 | RELATION:
24 | MAKE_PAIR_PROPOSAL_CNT: 128
25 | MAX_PROPOSAL_PAIR: 2048
26 | PHRASE_POOLED_SIZE: 8
27 | PHRASE_CLUSTER: True
28 | APPLY_REGRESSION: True
29 | USE_DETECTION_RESULT_FOR_RELATION: True
30 | FIXED_ROI_HEAD: True
31 | FIXED_RPN: True
32 | RELATION_CLASS: 51
33 | NEG_POS_PHRASE_PROP_RATE: 4
34 | SEPARATED_BACKBONE: True
35 | TOPK_TRIPLETS: (50, 100, )
36 | SAMPLE_DETECTION_BOX: False
37 |
38 | FEATURE_REFINE:
39 | MASSAGE_PASSING: 0
40 | MP_UNIT_OUTPUT_DIM: 128
41 |
42 | ROI_BOX_HEAD:
43 | POOLER_RESOLUTION: 7
44 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
45 | POOLER_SAMPLING_RATIO: 2
46 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
47 | PREDICTOR: "FPNPredictor"
48 | NUM_CLASSES: 151
49 |
50 |
51 | DATASETS:
52 | # TRAIN: ("vg_150_50_train_fat" ,)
53 | TRAIN: ("vg_150_50_train_small" ,)
54 | # TRAIN: ("vg_150_50_train" ,)
55 | TEST: ("vg_150_50_test_small" ,)
56 | DATALOADER:
57 | SIZE_DIVISIBILITY: 32
58 | NUM_WORKERS: 2
59 | SOLVER:
60 | BASE_LR: 0.001
61 | WEIGHT_DECAY: 0.0001
62 | STEPS: (100000, )
63 | CHECKPOINT_PERIOD: 5000
64 | START_SAVE_CHECKPOINT: 5000
65 | MAX_ITER: 200000
66 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 |
8 | RELATION_ON: True
9 |
10 | RELATION:
11 | MAKE_PAIR_PROPOSAL_CNT: 128
12 | MAX_PROPOSAL_PAIR: 2048
13 | PHRASE_POOLED_SIZE: 8
14 | PHRASE_CLUSTER: True
15 | APPLY_REGRESSION: True
16 | USE_DETECTION_RESULT_FOR_RELATION: True
17 | FIXED_ROI_HEAD: True
18 | FIXED_RPN: True
19 | RELATION_CLASS: 51
20 | NEG_POS_PHRASE_PROP_RATE: 4
21 | SEPARATED_BACKBONE: True
22 | TOPK_TRIPLETS: (50, 100, )
23 | SAMPLE_DETECTION_BOX: False
24 |
25 | ROI_BOX_HEAD:
26 | NUM_CLASSES: 151
27 | FEATURE_EXTRACTOR: "ResNet50Conv5ROIFeatureExtractorFlatten"
28 | PREDICTOR: "FastRCNNPredictorFlatten"
29 |
30 |
31 | DATASETS:
32 | TRAIN: ("vg_150_50_train" ,)
33 | TEST: ("vg_150_50_test_small" ,)
34 |
35 |
36 | SOLVER:
37 | BASE_LR: 0.001
38 | WEIGHT_DECAY: 0.0001
39 | STEPS: (21000, )
40 | CHECKPOINT_PERIOD: 3000
41 | START_SAVE_CHECKPOINT: 3000
42 | MAX_ITER: 50400
43 | IMS_PER_BATCH: 8
44 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_VRD_faster_rcnn_VGG16.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "VGG16"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 |
8 | BACKBONE:
9 | CONV_BODY: "VGG16"
10 | FREEZE_CONV_BODY_AT: 5
11 |
12 | RELATION_ON: False
13 |
14 | RELATION:
15 | MAKE_PAIR_PROPOSAL_CNT: 128
16 | MAX_PROPOSAL_PAIR: 2048
17 | PHRASE_POOLED_SIZE: 8
18 | PHRASE_CLUSTER: True
19 | APPLY_REGRESSION: True
20 | USE_DETECTION_RESULT_FOR_RELATION: True
21 | FIXED_ROI_HEAD: True
22 | FIXED_RPN: True
23 | RELATION_CLASS: 51
24 | NEG_POS_PHRASE_PROP_RATE: 4
25 | SEPARATED_BACKBONE: True
26 | TOPK_TRIPLETS: (50, 100, )
27 | SAMPLE_DETECTION_BOX: False
28 |
29 | ROI_BOX_HEAD:
30 | NUM_CLASSES: 151
31 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor" # work like a single feature layer FPN
32 | PREDICTOR: "FPNPredictor"
33 |
34 |
35 | DATASETS:
36 | TRAIN: ("vg_150_50_train_fat" ,)
37 | TEST: ("vg_150_50_test_small" ,)
38 |
39 |
40 | SOLVER:
41 | BASE_LR: 0.001
42 | WEIGHT_DECAY: 0.0001
43 | STEPS: (90000, )
44 | CHECKPOINT_PERIOD: 3000
45 | START_SAVE_CHECKPOINT: 3000
46 | MAX_ITER: 200000
47 | IMS_PER_BATCH: 8
48 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 | TEST: ("coco_2014_val",)
26 | DATALOADER:
27 | SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 | BASE_LR: 0.02
30 | WEIGHT_DECAY: 0.0001
31 | STEPS: (60000, 80000)
32 | MAX_ITER: 90000
33 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 | DATASETS:
8 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
9 | TEST: ("coco_2014_minival",)
10 | SOLVER:
11 | BASE_LR: 0.01
12 | WEIGHT_DECAY: 0.0001
13 | STEPS: (120000, 160000)
14 | MAX_ITER: 180000
15 | IMS_PER_BATCH: 8
16 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | DATASETS:
24 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
25 | TEST: ("coco_2014_minival",)
26 | DATALOADER:
27 | SIZE_DIVISIBILITY: 32
28 | SOLVER:
29 | BASE_LR: 0.02
30 | WEIGHT_DECAY: 0.0001
31 | STEPS: (60000, 80000)
32 | MAX_ITER: 90000
33 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RPN:
7 | USE_FPN: True
8 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
9 | PRE_NMS_TOP_N_TRAIN: 2000
10 | PRE_NMS_TOP_N_TEST: 1000
11 | POST_NMS_TOP_N_TEST: 1000
12 | FPN_POST_NMS_TOP_N_TEST: 1000
13 | ROI_HEADS:
14 | USE_FPN: True
15 | ROI_BOX_HEAD:
16 | POOLER_RESOLUTION: 7
17 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
18 | POOLER_SAMPLING_RATIO: 2
19 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
20 | PREDICTOR: "FPNPredictor"
21 | RESNETS:
22 | BACKBONE_OUT_CHANNELS: 256
23 | STRIDE_IN_1X1: False
24 | NUM_GROUPS: 32
25 | WIDTH_PER_GROUP: 8
26 | DATASETS:
27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 | TEST: ("coco_2014_minival",)
29 | DATALOADER:
30 | SIZE_DIVISIBILITY: 32
31 | SOLVER:
32 | BASE_LR: 0.01
33 | WEIGHT_DECAY: 0.0001
34 | STEPS: (120000, 160000)
35 | MAX_ITER: 180000
36 | IMS_PER_BATCH: 8
37 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "default"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | RPN:
12 | ANCHOR_SIZES: (16, 32, 64, 128, 256)
13 | ANCHOR_STRIDE: (16, )
14 | BATCH_SIZE_PER_IMAGE: 256
15 | PRE_NMS_TOP_N_TRAIN: 6000
16 | PRE_NMS_TOP_N_TEST: 6000
17 | POST_NMS_TOP_N_TRAIN: 2000
18 | POST_NMS_TOP_N_TEST: 100
19 | RPN_HEAD: FBNet.rpn_head
20 | ROI_HEADS:
21 | BATCH_SIZE_PER_IMAGE: 512
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 6
24 | FEATURE_EXTRACTOR: FBNet.roi_head
25 | NUM_CLASSES: 81
26 | DATASETS:
27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 | TEST: ("coco_2014_minival",)
29 | SOLVER:
30 | BASE_LR: 0.06
31 | WARMUP_FACTOR: 0.1
32 | WEIGHT_DECAY: 0.0001
33 | STEPS: (60000, 80000)
34 | MAX_ITER: 90000
35 | IMS_PER_BATCH: 128 # for 8GPUs
36 | # TEST:
37 | # IMS_PER_BATCH: 8
38 | INPUT:
39 | MIN_SIZE_TRAIN: (320, )
40 | MAX_SIZE_TRAIN: 640
41 | MIN_SIZE_TEST: 320
42 | MAX_SIZE_TEST: 640
43 | PIXEL_MEAN: [103.53, 116.28, 123.675]
44 | PIXEL_STD: [57.375, 57.12, 58.395]
45 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "default"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | RPN:
12 | ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 | ANCHOR_STRIDE: (16, )
14 | BATCH_SIZE_PER_IMAGE: 256
15 | PRE_NMS_TOP_N_TRAIN: 6000
16 | PRE_NMS_TOP_N_TEST: 6000
17 | POST_NMS_TOP_N_TRAIN: 2000
18 | POST_NMS_TOP_N_TEST: 200
19 | RPN_HEAD: FBNet.rpn_head
20 | ROI_HEADS:
21 | BATCH_SIZE_PER_IMAGE: 256
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 6
24 | FEATURE_EXTRACTOR: FBNet.roi_head
25 | NUM_CLASSES: 81
26 | DATASETS:
27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 | TEST: ("coco_2014_minival",)
29 | SOLVER:
30 | BASE_LR: 0.06
31 | WARMUP_FACTOR: 0.1
32 | WEIGHT_DECAY: 0.0001
33 | STEPS: (60000, 80000)
34 | MAX_ITER: 90000
35 | IMS_PER_BATCH: 128 # for 8GPUs
36 | # TEST:
37 | # IMS_PER_BATCH: 8
38 | INPUT:
39 | MIN_SIZE_TRAIN: (600, )
40 | MAX_SIZE_TRAIN: 1000
41 | MIN_SIZE_TEST: 600
42 | MAX_SIZE_TEST: 1000
43 | PIXEL_MEAN: [103.53, 116.28, 123.675]
44 | PIXEL_STD: [57.375, 57.12, 58.395]
45 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_faster_rcnn_fbnet_chamv1a_600.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "cham_v1a"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | RPN:
12 | ANCHOR_SIZES: (32, 64, 128, 256, 512)
13 | ANCHOR_STRIDE: (16, )
14 | BATCH_SIZE_PER_IMAGE: 256
15 | PRE_NMS_TOP_N_TRAIN: 6000
16 | PRE_NMS_TOP_N_TEST: 6000
17 | POST_NMS_TOP_N_TRAIN: 2000
18 | POST_NMS_TOP_N_TEST: 200
19 | RPN_HEAD: FBNet.rpn_head
20 | ROI_HEADS:
21 | BATCH_SIZE_PER_IMAGE: 128
22 | ROI_BOX_HEAD:
23 | POOLER_RESOLUTION: 6
24 | FEATURE_EXTRACTOR: FBNet.roi_head
25 | NUM_CLASSES: 81
26 | DATASETS:
27 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
28 | TEST: ("coco_2014_minival",)
29 | SOLVER:
30 | BASE_LR: 0.045
31 | WARMUP_FACTOR: 0.1
32 | WEIGHT_DECAY: 0.0001
33 | STEPS: (90000, 120000)
34 | MAX_ITER: 135000
35 | IMS_PER_BATCH: 96 # for 8GPUs
36 | # TEST:
37 | # IMS_PER_BATCH: 8
38 | INPUT:
39 | MIN_SIZE_TRAIN: (600, )
40 | MAX_SIZE_TRAIN: 1000
41 | MIN_SIZE_TEST: 600
42 | MAX_SIZE_TEST: 1000
43 | PIXEL_MEAN: [103.53, 116.28, 123.675]
44 | PIXEL_STD: [57.375, 57.12, 58.395]
45 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | NUM_CLASSES: 2
24 | ROI_KEYPOINT_HEAD:
25 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
26 | FEATURE_EXTRACTOR: "KeypointRCNNFeatureExtractor"
27 | PREDICTOR: "KeypointRCNNPredictor"
28 | POOLER_RESOLUTION: 14
29 | POOLER_SAMPLING_RATIO: 2
30 | RESOLUTION: 56
31 | SHARE_BOX_FEATURE_EXTRACTOR: False
32 | KEYPOINT_ON: True
33 | DATASETS:
34 | TRAIN: ("keypoints_coco_2014_train", "keypoints_coco_2014_valminusminival",)
35 | TEST: ("keypoints_coco_2014_minival",)
36 | INPUT:
37 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
38 | DATALOADER:
39 | SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 | BASE_LR: 0.02
42 | WEIGHT_DECAY: 0.0001
43 | STEPS: (60000, 80000)
44 | MAX_ITER: 90000
45 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | ROI_MASK_HEAD:
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 | PREDICTOR: "MaskRCNNC4Predictor"
27 | POOLER_RESOLUTION: 14
28 | POOLER_SAMPLING_RATIO: 2
29 | RESOLUTION: 28
30 | SHARE_BOX_FEATURE_EXTRACTOR: False
31 | MASK_ON: True
32 | DATASETS:
33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 | TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 | SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 | BASE_LR: 0.02
39 | WEIGHT_DECAY: 0.0001
40 | STEPS: (60000, 80000)
41 | MAX_ITER: 90000
42 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN:
5 | PRE_NMS_TOP_N_TEST: 6000
6 | POST_NMS_TOP_N_TEST: 1000
7 | ROI_MASK_HEAD:
8 | PREDICTOR: "MaskRCNNC4Predictor"
9 | SHARE_BOX_FEATURE_EXTRACTOR: True
10 | MASK_ON: True
11 | DATASETS:
12 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
13 | TEST: ("coco_2014_minival",)
14 | SOLVER:
15 | BASE_LR: 0.01
16 | WEIGHT_DECAY: 0.0001
17 | STEPS: (120000, 160000)
18 | MAX_ITER: 180000
19 | IMS_PER_BATCH: 8
20 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | BACKBONE:
5 | CONV_BODY: "R-50-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | RPN:
9 | USE_FPN: True
10 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
11 | PRE_NMS_TOP_N_TRAIN: 2000
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 1000
14 | FPN_POST_NMS_TOP_N_TEST: 1000
15 | ROI_HEADS:
16 | USE_FPN: True
17 | ROI_BOX_HEAD:
18 | POOLER_RESOLUTION: 7
19 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
20 | POOLER_SAMPLING_RATIO: 2
21 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
22 | PREDICTOR: "FPNPredictor"
23 | ROI_MASK_HEAD:
24 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
25 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
26 | PREDICTOR: "MaskRCNNC4Predictor"
27 | POOLER_RESOLUTION: 14
28 | POOLER_SAMPLING_RATIO: 2
29 | RESOLUTION: 28
30 | SHARE_BOX_FEATURE_EXTRACTOR: False
31 | MASK_ON: True
32 | DATASETS:
33 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
34 | TEST: ("coco_2014_minival",)
35 | DATALOADER:
36 | SIZE_DIVISIBILITY: 32
37 | SOLVER:
38 | BASE_LR: 0.02
39 | WEIGHT_DECAY: 0.0001
40 | STEPS: (60000, 80000)
41 | MAX_ITER: 90000
42 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | BACKBONE:
5 | CONV_BODY: "R-101-FPN"
6 | RESNETS:
7 | BACKBONE_OUT_CHANNELS: 256
8 | STRIDE_IN_1X1: False
9 | NUM_GROUPS: 32
10 | WIDTH_PER_GROUP: 8
11 | RPN:
12 | USE_FPN: True
13 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
14 | PRE_NMS_TOP_N_TRAIN: 2000
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 1000
17 | FPN_POST_NMS_TOP_N_TEST: 1000
18 | ROI_HEADS:
19 | USE_FPN: True
20 | ROI_BOX_HEAD:
21 | POOLER_RESOLUTION: 7
22 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
23 | POOLER_SAMPLING_RATIO: 2
24 | FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
25 | PREDICTOR: "FPNPredictor"
26 | ROI_MASK_HEAD:
27 | POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
28 | FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
29 | PREDICTOR: "MaskRCNNC4Predictor"
30 | POOLER_RESOLUTION: 14
31 | POOLER_SAMPLING_RATIO: 2
32 | RESOLUTION: 28
33 | SHARE_BOX_FEATURE_EXTRACTOR: False
34 | MASK_ON: True
35 | DATASETS:
36 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
37 | TEST: ("coco_2014_minival",)
38 | DATALOADER:
39 | SIZE_DIVISIBILITY: 32
40 | SOLVER:
41 | BASE_LR: 0.01
42 | WEIGHT_DECAY: 0.0001
43 | STEPS: (120000, 160000)
44 | MAX_ITER: 180000
45 | IMS_PER_BATCH: 8
46 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "default"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | DET_HEAD_LAST_SCALE: 0.0
12 | RPN:
13 | ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 | ANCHOR_STRIDE: (16, )
15 | BATCH_SIZE_PER_IMAGE: 256
16 | PRE_NMS_TOP_N_TRAIN: 6000
17 | PRE_NMS_TOP_N_TEST: 6000
18 | POST_NMS_TOP_N_TRAIN: 2000
19 | POST_NMS_TOP_N_TEST: 100
20 | RPN_HEAD: FBNet.rpn_head
21 | ROI_HEADS:
22 | BATCH_SIZE_PER_IMAGE: 256
23 | ROI_BOX_HEAD:
24 | POOLER_RESOLUTION: 6
25 | FEATURE_EXTRACTOR: FBNet.roi_head
26 | NUM_CLASSES: 81
27 | ROI_MASK_HEAD:
28 | POOLER_RESOLUTION: 6
29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 | PREDICTOR: "MaskRCNNConv1x1Predictor"
31 | RESOLUTION: 12
32 | SHARE_BOX_FEATURE_EXTRACTOR: False
33 | MASK_ON: True
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | SOLVER:
38 | BASE_LR: 0.06
39 | WARMUP_FACTOR: 0.1
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (60000, 80000)
42 | MAX_ITER: 90000
43 | IMS_PER_BATCH: 128 # for 8GPUs
44 | # TEST:
45 | # IMS_PER_BATCH: 8
46 | INPUT:
47 | MIN_SIZE_TRAIN: (320, )
48 | MAX_SIZE_TRAIN: 640
49 | MIN_SIZE_TEST: 320
50 | MAX_SIZE_TEST: 640
51 | PIXEL_MEAN: [103.53, 116.28, 123.675]
52 | PIXEL_STD: [57.375, 57.12, 58.395]
53 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_600.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "default"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | DET_HEAD_LAST_SCALE: 0.0
12 | RPN:
13 | ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 | ANCHOR_STRIDE: (16, )
15 | BATCH_SIZE_PER_IMAGE: 256
16 | PRE_NMS_TOP_N_TRAIN: 6000
17 | PRE_NMS_TOP_N_TEST: 6000
18 | POST_NMS_TOP_N_TRAIN: 2000
19 | POST_NMS_TOP_N_TEST: 200
20 | RPN_HEAD: FBNet.rpn_head
21 | ROI_HEADS:
22 | BATCH_SIZE_PER_IMAGE: 256
23 | ROI_BOX_HEAD:
24 | POOLER_RESOLUTION: 6
25 | FEATURE_EXTRACTOR: FBNet.roi_head
26 | NUM_CLASSES: 81
27 | ROI_MASK_HEAD:
28 | POOLER_RESOLUTION: 6
29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 | PREDICTOR: "MaskRCNNConv1x1Predictor"
31 | RESOLUTION: 12
32 | SHARE_BOX_FEATURE_EXTRACTOR: False
33 | MASK_ON: True
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | SOLVER:
38 | BASE_LR: 0.06
39 | WARMUP_FACTOR: 0.1
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (60000, 80000)
42 | MAX_ITER: 90000
43 | IMS_PER_BATCH: 128 # for 8GPUs
44 | # TEST:
45 | # IMS_PER_BATCH: 8
46 | INPUT:
47 | MIN_SIZE_TRAIN: (600, )
48 | MAX_SIZE_TRAIN: 1000
49 | MIN_SIZE_TEST: 600
50 | MAX_SIZE_TEST: 1000
51 | PIXEL_MEAN: [103.53, 116.28, 123.675]
52 | PIXEL_STD: [57.375, 57.12, 58.395]
53 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_xirb16d_dsmask.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "xirb16d_dsmask"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | DET_HEAD_LAST_SCALE: -1.0
12 | RPN:
13 | ANCHOR_SIZES: (16, 32, 64, 128, 256)
14 | ANCHOR_STRIDE: (16, )
15 | BATCH_SIZE_PER_IMAGE: 256
16 | PRE_NMS_TOP_N_TRAIN: 6000
17 | PRE_NMS_TOP_N_TEST: 6000
18 | POST_NMS_TOP_N_TRAIN: 2000
19 | POST_NMS_TOP_N_TEST: 100
20 | RPN_HEAD: FBNet.rpn_head
21 | ROI_HEADS:
22 | BATCH_SIZE_PER_IMAGE: 512
23 | ROI_BOX_HEAD:
24 | POOLER_RESOLUTION: 6
25 | FEATURE_EXTRACTOR: FBNet.roi_head
26 | NUM_CLASSES: 81
27 | ROI_MASK_HEAD:
28 | POOLER_RESOLUTION: 6
29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 | PREDICTOR: "MaskRCNNConv1x1Predictor"
31 | RESOLUTION: 12
32 | SHARE_BOX_FEATURE_EXTRACTOR: False
33 | MASK_ON: True
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | SOLVER:
38 | BASE_LR: 0.06
39 | WARMUP_FACTOR: 0.1
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (60000, 80000)
42 | MAX_ITER: 90000
43 | IMS_PER_BATCH: 128 # for 8GPUs
44 | # TEST:
45 | # IMS_PER_BATCH: 8
46 | INPUT:
47 | MIN_SIZE_TRAIN: (320, )
48 | MAX_SIZE_TRAIN: 640
49 | MIN_SIZE_TEST: 320
50 | MAX_SIZE_TEST: 640
51 | PIXEL_MEAN: [103.53, 116.28, 123.675]
52 | PIXEL_STD: [57.375, 57.12, 58.395]
53 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/e2e_mask_rcnn_fbnet_xirb16d_dsmask_600.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | BACKBONE:
4 | CONV_BODY: FBNet
5 | FBNET:
6 | ARCH: "xirb16d_dsmask"
7 | BN_TYPE: "bn"
8 | WIDTH_DIVISOR: 8
9 | DW_CONV_SKIP_BN: True
10 | DW_CONV_SKIP_RELU: True
11 | DET_HEAD_LAST_SCALE: 0.0
12 | RPN:
13 | ANCHOR_SIZES: (32, 64, 128, 256, 512)
14 | ANCHOR_STRIDE: (16, )
15 | BATCH_SIZE_PER_IMAGE: 256
16 | PRE_NMS_TOP_N_TRAIN: 6000
17 | PRE_NMS_TOP_N_TEST: 6000
18 | POST_NMS_TOP_N_TRAIN: 2000
19 | POST_NMS_TOP_N_TEST: 200
20 | RPN_HEAD: FBNet.rpn_head
21 | ROI_HEADS:
22 | BATCH_SIZE_PER_IMAGE: 256
23 | ROI_BOX_HEAD:
24 | POOLER_RESOLUTION: 6
25 | FEATURE_EXTRACTOR: FBNet.roi_head
26 | NUM_CLASSES: 81
27 | ROI_MASK_HEAD:
28 | POOLER_RESOLUTION: 6
29 | FEATURE_EXTRACTOR: FBNet.roi_head_mask
30 | PREDICTOR: "MaskRCNNConv1x1Predictor"
31 | RESOLUTION: 12
32 | SHARE_BOX_FEATURE_EXTRACTOR: False
33 | MASK_ON: True
34 | DATASETS:
35 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
36 | TEST: ("coco_2014_minival",)
37 | SOLVER:
38 | BASE_LR: 0.06
39 | WARMUP_FACTOR: 0.1
40 | WEIGHT_DECAY: 0.0001
41 | STEPS: (60000, 80000)
42 | MAX_ITER: 90000
43 | IMS_PER_BATCH: 128 # for 8GPUs
44 | # TEST:
45 | # IMS_PER_BATCH: 8
46 | INPUT:
47 | MIN_SIZE_TRAIN: (600, )
48 | MAX_SIZE_TRAIN: 1000
49 | MIN_SIZE_TEST: 600
50 | MAX_SIZE_TEST: 1000
51 | PIXEL_MEAN: [103.53, 116.28, 123.675]
52 | PIXEL_STD: [57.375, 57.12, 58.395]
53 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
4 | RPN_ONLY: True
5 | BACKBONE:
6 | CONV_BODY: "R-101-FPN"
7 | RESNETS:
8 | BACKBONE_OUT_CHANNELS: 256
9 | RPN:
10 | USE_FPN: True
11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 2000
14 | FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 | TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 | SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 | BASE_LR: 0.02
22 | WEIGHT_DECAY: 0.0001
23 | STEPS: (60000, 80000)
24 | MAX_ITER: 90000
25 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | RPN:
6 | PRE_NMS_TOP_N_TEST: 12000
7 | POST_NMS_TOP_N_TEST: 2000
8 | DATASETS:
9 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
10 | TEST: ("coco_2014_minival",)
11 | SOLVER:
12 | BASE_LR: 0.02
13 | WEIGHT_DECAY: 0.0001
14 | STEPS: (60000, 80000)
15 | MAX_ITER: 90000
16 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/MSRA/R-50"
4 | RPN_ONLY: True
5 | BACKBONE:
6 | CONV_BODY: "R-50-FPN"
7 | RESNETS:
8 | BACKBONE_OUT_CHANNELS: 256
9 | RPN:
10 | USE_FPN: True
11 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
12 | PRE_NMS_TOP_N_TEST: 1000
13 | POST_NMS_TOP_N_TEST: 2000
14 | FPN_POST_NMS_TOP_N_TEST: 2000
15 | DATASETS:
16 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
17 | TEST: ("coco_2014_minival",)
18 | DATALOADER:
19 | SIZE_DIVISIBILITY: 32
20 | SOLVER:
21 | BASE_LR: 0.02
22 | WEIGHT_DECAY: 0.0001
23 | STEPS: (60000, 80000)
24 | MAX_ITER: 90000
25 |
--------------------------------------------------------------------------------
/configs/yaml_hist_MaskRCNN/rpn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | META_ARCHITECTURE: "GeneralizedRCNN"
3 | WEIGHT: "catalog://ImageNetPretrained/FAIR/20171220/X-101-32x8d"
4 | RPN_ONLY: True
5 | BACKBONE:
6 | CONV_BODY: "R-101-FPN"
7 | RESNETS:
8 | BACKBONE_OUT_CHANNELS: 256
9 | STRIDE_IN_1X1: False
10 | NUM_GROUPS: 32
11 | WIDTH_PER_GROUP: 8
12 | RPN:
13 | USE_FPN: True
14 | ANCHOR_STRIDE: (4, 8, 16, 32, 64)
15 | PRE_NMS_TOP_N_TEST: 1000
16 | POST_NMS_TOP_N_TEST: 2000
17 | FPN_POST_NMS_TOP_N_TEST: 2000
18 | DATASETS:
19 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
20 | TEST: ("coco_2014_minival",)
21 | DATALOADER:
22 | SIZE_DIVISIBILITY: 32
23 | SOLVER:
24 | BASE_LR: 0.02
25 | WEIGHT_DECAY: 0.0001
26 | STEPS: (60000, 80000)
27 | MAX_ITER: 90000
28 |
--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 |
9 | from data_analysis.fast_rcnn import nms_wrapper
--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/bbox_transform_pytorch.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def bbox_transform_inv(boxes, deltas):
5 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
6 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
7 | ctr_x = boxes[:, 0] + 0.5 * widths
8 | ctr_y = boxes[:, 1] + 0.5 * heights
9 |
10 | dx = deltas[:, 0]
11 | dy = deltas[:, 1]
12 | dw = deltas[:, 2]
13 | dh = deltas[:, 3]
14 |
15 | pred_ctr_x = dx * widths + ctr_x
16 | pred_ctr_y = dy * heights + ctr_y
17 | pred_w = torch.exp(dw) * widths
18 | pred_h = torch.exp(dh) * heights
19 |
20 | pred_boxes = deltas.clone()
21 | # x1
22 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
23 | # y1
24 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
25 | # x2
26 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
27 | # y2
28 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
29 |
30 | return pred_boxes
31 |
32 |
33 | def clip_boxes(boxes, im_shape):
34 | ret = boxes.clone()
35 | # x1
36 | ret[:, 0::4] = boxes[:, 0::4].clamp(0, im_shape[1] - 1)
37 | ret[:, 1::4] = boxes[:, 1::4].clamp(0, im_shape[0] - 1)
38 | ret[:, 2::4] = boxes[:, 2::4].clamp(0, im_shape[1] - 1)
39 | ret[:, 3::4] = boxes[:, 3::4].clamp(0, im_shape[0] - 1)
40 | return ret
41 |
42 | def clip_rois(boxes, im_shape):
43 | ret = boxes.clone()
44 | # x1
45 | ret[:, 1::5] = boxes[:, 1::5].clamp(0, im_shape[1] - 1)
46 | ret[:, 2::5] = boxes[:, 2::5].clamp(0, im_shape[0] - 1)
47 | ret[:, 3::5] = boxes[:, 3::5].clamp(0, im_shape[1] - 1)
48 | ret[:, 4::5] = boxes[:, 4::5].clamp(0, im_shape[0] - 1)
49 | return ret
--------------------------------------------------------------------------------
/data_analysis/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from data_analysis.nms.cpu_nms import cpu_nms
9 | from data_analysis.nms.gpu_nms import gpu_nms
10 |
11 | from .config import cfg
12 |
13 |
14 | def nms(dets, thresh, force_cpu=False):
15 | """Dispatch to either CPU or GPU NMS implementations."""
16 |
17 | if dets.shape[0] == 0:
18 | return []
19 | if cfg.USE_GPU_NMS and not force_cpu:
20 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
21 | else:
22 | return cpu_nms(dets, thresh)
23 |
--------------------------------------------------------------------------------
/data_analysis/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/data_analysis/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/nms/__init__.py
--------------------------------------------------------------------------------
/data_analysis/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/data_analysis/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/data_analysis/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/data_analysis/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/embed_ba_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/embed_ba_0.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/embed_ba_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/embed_ba_1.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_0.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_1.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/lstm1_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/lstm1_2.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_bbox_pred_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_bbox_pred_0.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_bbox_pred_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_bbox_pred_1.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_score_fc_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_score_fc_0.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/query_score_fc_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/query_score_fc_1.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/qv_fc1_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/qv_fc1_0.npy
--------------------------------------------------------------------------------
/data_analysis/pretrain_weight/qv_fc1_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/pretrain_weight/qv_fc1_1.npy
--------------------------------------------------------------------------------
/data_analysis/sng_parser/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/21/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | from .parser import *
13 | from .backends import *
14 | from .utils import *
15 |
16 | __version__ = (0, 1, 0)
17 | __author__ = 'Jiayuan Mao'
18 | __email__ = 'maojiayuan@gmail.com'
19 |
20 |
--------------------------------------------------------------------------------
/data_analysis/sng_parser/_data/phrasal-preps.txt:
--------------------------------------------------------------------------------
1 | in addition to
2 | in front of
3 | in reference to
4 | in regard to
5 | in spite of
6 | on account of
7 | on top of
8 | on side of
9 | on the side of
10 | with regard to
11 |
12 |
--------------------------------------------------------------------------------
/data_analysis/sng_parser/backends/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/21/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | from .spacy_parser import *
13 |
14 |
--------------------------------------------------------------------------------
/data_analysis/sng_parser/backends/backend.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : backend.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/21/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 |
13 | class ParserBackend(object):
14 | """
15 | Based class for all parser backends. This class
16 | specifies the methods that should be override by subclasses.
17 | """
18 |
19 | def parse(self, sentence):
20 | raise NotImplementedError()
21 |
22 |
--------------------------------------------------------------------------------
/data_analysis/sng_parser/database.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : database.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/23/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | import os.path as osp
13 |
14 |
15 | _caches = dict()
16 |
17 |
18 | def load_list(filename):
19 | if filename not in _caches:
20 | out = set()
21 | for x in open(osp.join(osp.dirname(__file__), '_data', filename)):
22 | x = x.strip()
23 | if len(x) > 0:
24 | out.add(x)
25 | _caches[filename] = out
26 | return _caches[filename]
27 |
28 |
29 | def is_phrasal_verb(verb):
30 | return verb in load_list('phrasal-verbs.txt')
31 |
32 |
33 | def is_phrasal_prep(prep):
34 | return prep in load_list('phrasal-preps.txt')
35 |
36 |
37 | def is_scene_noun(noun):
38 | head = noun.split(' ')[-1]
39 | s = load_list('scene-nouns.txt')
40 | return noun in s or head in s
41 |
42 |
--------------------------------------------------------------------------------
/data_analysis/sng_parser/utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : utils.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/21/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | import functools
13 | import tabulate
14 |
15 |
16 | __all__ = ['tprint']
17 |
18 |
19 | def tprint(graph, file=None, show_entities=True, show_relations=True):
20 | """
21 | Print a scene graph as a table.
22 | The printed strings contains only essential information about the parsed scene graph.
23 | """
24 |
25 | _print = functools.partial(print, file=file)
26 |
27 | if show_entities:
28 | _print('Entities:')
29 |
30 | entities_data = [
31 | [e['head'].lower(), e['span'].lower(), ','.join([ x['span'].lower() for x in e['modifiers'] ])]
32 | for e in graph['entities']
33 | ]
34 | _print(tabulate.tabulate(entities_data, headers=['Head', 'Span', 'Modifiers'], tablefmt=_tabulate_format))
35 |
36 | if show_relations:
37 | _print('Relations:')
38 |
39 | entities = graph['entities']
40 | relations_data = [
41 | [
42 | entities[rel['subject']]['head'].lower(),
43 | rel['relation'].lower(),
44 | entities[rel['object']]['head'].lower()
45 | ]
46 | for rel in graph['relations']
47 | ]
48 | _print(tabulate.tabulate(relations_data, headers=['Subject', 'Relation', 'Object'], tablefmt=_tabulate_format))
49 |
50 |
51 | _tabulate_format = tabulate.TableFormat(
52 | lineabove=tabulate.Line("+", "-", "+", "+"),
53 | linebelowheader=tabulate.Line("|", "-", "+", "|"),
54 | linebetweenrows=None,
55 | linebelow=tabulate.Line("+", "-", "+", "+"),
56 | headerrow=tabulate.DataRow("|", "|", "|"),
57 | datarow=tabulate.DataRow("|", "|", "|"),
58 | padding=1, with_header_hide=None
59 | )
60 |
--------------------------------------------------------------------------------
/data_analysis/tools/.gitignore:
--------------------------------------------------------------------------------
1 | /*.txt
2 |
3 |
--------------------------------------------------------------------------------
/data_analysis/tools/get-phrasal-verbs.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | #
3 | # get-phrasal-verbs.sh
4 | # Copyright (C) 2018 Jiayuan Mao
5 | #
6 | # Distributed under terms of the MIT license.
7 |
8 | set -x
9 |
10 | rm -f ../sng_parser/_data/phrasal-verbs.txt
11 |
12 | for alpha in {A..Z}
13 | do
14 | echo curling $alpha
15 | curl \
16 | -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.89 Safari/537.36" \
17 | http://www.english-for-students.com/Phrasal-Verbs-$alpha.html | python3 parse-eos.py >> ../sng_parser/_data/phrasal-verbs.txt
18 | sleep 3
19 | done
20 |
21 |
--------------------------------------------------------------------------------
/data_analysis/tools/get-scene-nouns.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | #
3 | # get-scene-nouns.sh
4 | # Copyright (C) 2018 Jiayuan Mao
5 | #
6 | # Distributed under terms of the MIT license.
7 | #
8 |
9 | set -x
10 |
11 | curl https://raw.githubusercontent.com/CSAILVision/places365/master/IO_places365.txt | \
12 | python3 process-scene-nouns.py > \
13 | ../sng_parser/_data/scene-nouns.txt
14 |
--------------------------------------------------------------------------------
/data_analysis/tools/parse-eos.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : parse-eos.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/23/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | import re
13 |
14 |
15 | def main():
16 | import sys
17 |
18 | text = '\n'.join(sys.stdin.readlines())
19 | groups = re.findall("(.*?)<\/b>", text, re.MULTILINE)
20 |
21 | if len(groups) == 0:
22 | print('Error to get the page.')
23 |
24 | current = groups[len(groups) // 3 * 2][0].lower()
25 | print('Current', current, file=sys.stderr)
26 |
27 | for g in groups:
28 | g = g.lower()
29 | if not g.startswith(current):
30 | print('Filter: ', g, file=sys.stderr)
31 | else:
32 | print(g)
33 |
34 |
35 | if __name__ == '__main__':
36 | main()
37 |
38 |
--------------------------------------------------------------------------------
/data_analysis/tools/process-scene-nouns.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : process-scene-nouns.py
4 | # Author : Jiayuan Mao
5 | # Email : maojiayuan@gmail.com
6 | # Date : 08/23/2018
7 | #
8 | # This file is part of SceneGraphParser.
9 | # Distributed under terms of the MIT license.
10 | # https://github.com/vacancy/SceneGraphParser
11 |
12 | import sys
13 | import spacy
14 |
15 | nlp = spacy.load('en')
16 |
17 | extra_nouns = ['cabin', 'airport', 'terminal', 'arcade', 'park', 'apartment', 'gallery', 'school', 'studio', 'loft', 'field', 'factory', 'showroom', 'bank', 'banquet', 'court', 'salon', 'laboratory', 'station', 'store', 'lab', 'room', 'conference', 'dorm', 'lobby', 'entrance', 'restaurant', 'market', 'office', 'theater', 'skating', 'jail', 'kindergarden', 'dock', 'gym', 'cubicles', 'residential', 'mall', 'resort', 'hole', 'hostel']
18 |
19 |
20 | def main():
21 | nouns = set()
22 | for line in sys.stdin:
23 | line, _ = line.split(' ')
24 | line = line.split('/')[2:]
25 | for x in line:
26 | parts = x.split('_')
27 | nouns.add(' '.join(parts))
28 |
29 | nouns.update(set(extra_nouns))
30 |
31 | for n in sorted(nouns):
32 | print(n)
33 |
34 |
35 | if __name__ == '__main__':
36 | main()
37 |
38 |
--------------------------------------------------------------------------------
/data_analysis/untitled.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/untitled.txt
--------------------------------------------------------------------------------
/data_analysis/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/data_analysis/utils/__init__.py
--------------------------------------------------------------------------------
/data_analysis/utils/collections.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 |
16 | """A simple attribute dictionary used for representing configuration options."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 |
24 | class AttrDict(dict):
25 |
26 | IMMUTABLE = '__immutable__'
27 |
28 | def __init__(self, *args, **kwargs):
29 | super(AttrDict, self).__init__(*args, **kwargs)
30 | self.__dict__[AttrDict.IMMUTABLE] = False
31 |
32 | def __getattr__(self, name):
33 | if name in self.__dict__:
34 | return self.__dict__[name]
35 | elif name in self:
36 | return self[name]
37 | else:
38 | raise AttributeError(name)
39 |
40 | def __setattr__(self, name, value):
41 | if not self.__dict__[AttrDict.IMMUTABLE]:
42 | if name in self.__dict__:
43 | self.__dict__[name] = value
44 | else:
45 | self[name] = value
46 | else:
47 | raise AttributeError(
48 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
49 | format(name, value)
50 | )
51 |
52 | def immutable(self, is_immutable):
53 | """Set immutability to is_immutable and recursively apply the setting
54 | to all nested AttrDicts.
55 | """
56 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable
57 | # Recursively set immutable state
58 | for v in self.__dict__.values():
59 | if isinstance(v, AttrDict):
60 | v.immutable(is_immutable)
61 | for v in self.values():
62 | if isinstance(v, AttrDict):
63 | v.immutable(is_immutable)
64 |
65 | def is_immutable(self):
66 | return self.__dict__[AttrDict.IMMUTABLE]
67 |
--------------------------------------------------------------------------------
/data_analysis/utils/detectron_weight_helper.py:
--------------------------------------------------------------------------------
1 | """Helper functions for loading pretrained weights from Detectron pickle files
2 | """
3 |
4 | import pickle
5 | import re
6 | import torch
7 |
8 |
9 | def load_detectron_weight(net, detectron_weight_file):
10 | name_mapping, orphan_in_detectron = net.detectron_weight_mapping
11 |
12 | with open(detectron_weight_file, 'rb') as fp:
13 | src_blobs = pickle.load(fp, encoding='latin1')
14 | if 'blobs' in src_blobs:
15 | src_blobs = src_blobs['blobs']
16 |
17 | params = net.state_dict()
18 | for p_name, p_tensor in params.items():
19 | d_name = name_mapping[p_name]
20 | if isinstance(d_name, str): # maybe str, None or True
21 | v = src_blobs.get(d_name)
22 | if v is not None:
23 | p_tensor.copy_(torch.Tensor(v))
24 |
25 | def resnet_weights_name_pattern():
26 | pattern = re.compile(r"conv1_w|conv1_gn_[sb]|res_conv1_.+|res\d+_\d+_.+")
27 | return pattern
28 |
29 |
30 | if __name__ == '__main__':
31 | """Testing"""
32 | from pprint import pprint
33 | import sys
34 | sys.path.insert(0, '..')
35 | from modeling.model_builder import Generalized_RCNN
36 | from core.config import cfg, cfg_from_file
37 |
38 | cfg.MODEL.NUM_CLASSES = 81
39 | cfg_from_file('../../cfgs/res50_mask.yml')
40 | net = Generalized_RCNN()
41 |
42 | # pprint(list(net.state_dict().keys()), width=1)
43 |
44 | mapping, orphans = net.detectron_weight_mapping
45 | state_dict = net.state_dict()
46 |
47 | for k in mapping.keys():
48 | assert k in state_dict, '%s' % k
49 |
50 | rest = set(state_dict.keys()) - set(mapping.keys())
51 | assert len(rest) == 0
52 |
--------------------------------------------------------------------------------
/data_analysis/utils/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 |
16 | """Environment helper functions."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import os
24 | import sys
25 |
26 | # Default value of the CMake install prefix
27 | _CMAKE_INSTALL_PREFIX = '/usr/local'
28 |
29 |
30 | def get_runtime_dir():
31 | """Retrieve the path to the runtime directory."""
32 | return os.getcwd()
33 |
34 |
35 | def get_py_bin_ext():
36 | """Retrieve python binary extension."""
37 | return '.py'
38 |
39 |
40 | def set_up_matplotlib():
41 | """Set matplotlib up."""
42 | import matplotlib
43 | # Use a non-interactive backend
44 | matplotlib.use('Agg')
45 |
46 |
47 | def exit_on_error():
48 | """Exit from a detectron tool when there's an error."""
49 | sys.exit(1)
50 |
--------------------------------------------------------------------------------
/data_analysis/utils/image.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 |
16 | """Image helper functions."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 |
23 | import cv2
24 | import numpy as np
25 |
26 |
27 | def aspect_ratio_rel(im, aspect_ratio):
28 | """Performs width-relative aspect ratio transformation."""
29 | im_h, im_w = im.shape[:2]
30 | im_ar_w = int(round(aspect_ratio * im_w))
31 | im_ar = cv2.resize(im, dsize=(im_ar_w, im_h))
32 | return im_ar
33 |
34 |
35 | def aspect_ratio_abs(im, aspect_ratio):
36 | """Performs absolute aspect ratio transformation."""
37 | im_h, im_w = im.shape[:2]
38 | im_area = im_h * im_w
39 |
40 | im_ar_w = np.sqrt(im_area * aspect_ratio)
41 | im_ar_h = np.sqrt(im_area / aspect_ratio)
42 | assert np.isclose(im_ar_w / im_ar_h, aspect_ratio)
43 |
44 | im_ar = cv2.resize(im, dsize=(int(im_ar_w), int(im_ar_h)))
45 | return im_ar
46 |
--------------------------------------------------------------------------------
/data_analysis/utils/timer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import time
7 |
8 |
9 | class Timer(object):
10 | """A simple timer."""
11 |
12 | def __init__(self):
13 | self.reset()
14 |
15 | def tic(self):
16 | # using time.time instead of time.clock because time time.clock
17 | # does not normalize for multithreading
18 | self.start_time = time.time()
19 |
20 | def toc(self, average=True):
21 | self.diff = time.time() - self.start_time
22 | self.total_time += self.diff
23 | self.calls += 1
24 | self.average_time = self.total_time / self.calls
25 | if average:
26 | return self.average_time
27 | else:
28 | return self.diff
29 |
30 | def reset(self):
31 | self.total_time = 0.
32 | self.calls = 0
33 | self.start_time = 0.
34 | self.diff = 0.
35 | self.average_time = 0.
36 |
--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
1 | ## Webcam and Jupyter notebook demo
2 |
3 | This folder contains a simple webcam demo that illustrates how you can use `maskrcnn_benchmark` for inference.
4 |
5 |
6 | ### With your preferred environment
7 |
8 | You can start it by running it from this folder, using one of the following commands:
9 | ```bash
10 | # by default, it runs on the GPU
11 | # for best results, use min-image-size 800
12 | python webcam.py --min-image-size 800
13 | # can also run it on the CPU
14 | python webcam.py --min-image-size 300 MODEL.DEVICE cpu
15 | # or change the model that you want to use
16 | python webcam.py --config-file ../configs/caffe2/e2e_mask_rcnn_R_101_FPN_1x_caffe2.yaml --min-image-size 300 MODEL.DEVICE cpu
17 | # in order to see the probability heatmaps, pass --show-mask-heatmaps
18 | python webcam.py --min-image-size 300 --show-mask-heatmaps MODEL.DEVICE cpu
19 | ```
20 |
21 | ### With Docker
22 |
23 | Build the image with the tag `maskrcnn-benchmark` (check [INSTALL.md](../INSTALL.md) for instructions)
24 |
25 | Adjust permissions of the X server host (be careful with this step, refer to
26 | [here](http://wiki.ros.org/docker/Tutorials/GUI) for alternatives)
27 |
28 | ```bash
29 | xhost +
30 | ```
31 |
32 | Then run a container with the demo:
33 |
34 | ```
35 | docker run --rm -it \
36 | -e DISPLAY=${DISPLAY} \
37 | --privileged \
38 | -v /tmp/.X11-unix:/tmp/.X11-unix \
39 | --device=/dev/video0:/dev/video0 \
40 | --ipc=host maskrcnn-benchmark \
41 | python demo/webcam.py --min-image-size 300
42 | ```
43 |
44 | **DISCLAIMER:** *This was tested for an Ubuntu 16.04 machine,
45 | the volume mapping may vary depending on your platform*
46 |
--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/demo/demo_e2e_mask_rcnn_R_50_FPN_1x.png
--------------------------------------------------------------------------------
/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/demo/demo_e2e_mask_rcnn_X_101_32x8d_FPN_1x.png
--------------------------------------------------------------------------------
/demo/webcam.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import argparse
3 | import cv2
4 |
5 | from maskrcnn_benchmark.config import cfg
6 | from predictor import COCODemo
7 |
8 | import time
9 |
10 |
11 | def main():
12 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
13 | parser.add_argument(
14 | "--config-file",
15 | default="../configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
16 | metavar="FILE",
17 | help="path to config file",
18 | )
19 | parser.add_argument(
20 | "--confidence-threshold",
21 | type=float,
22 | default=0.7,
23 | help="Minimum score for the prediction to be shown",
24 | )
25 | parser.add_argument(
26 | "--min-image-size",
27 | type=int,
28 | default=224,
29 | help="Smallest size of the image to feed to the model. "
30 | "Model was trained with 800, which gives best results",
31 | )
32 | parser.add_argument(
33 | "--show-mask-heatmaps",
34 | dest="show_mask_heatmaps",
35 | help="Show a heatmap probability for the top masks-per-dim masks",
36 | action="store_true",
37 | )
38 | parser.add_argument(
39 | "--masks-per-dim",
40 | type=int,
41 | default=2,
42 | help="Number of heatmaps per dimension to show",
43 | )
44 | parser.add_argument(
45 | "opts",
46 | help="Modify model config options using the command-line",
47 | default=None,
48 | nargs=argparse.REMAINDER,
49 | )
50 |
51 | args = parser.parse_args()
52 |
53 | # load config from file and command-line arguments
54 | cfg.merge_from_file(args.config_file)
55 | cfg.merge_from_list(args.opts)
56 | cfg.freeze()
57 |
58 | # prepare object that handles inference plus adds predictions on top of image
59 | coco_demo = COCODemo(
60 | cfg,
61 | confidence_threshold=args.confidence_threshold,
62 | show_mask_heatmaps=args.show_mask_heatmaps,
63 | masks_per_dim=args.masks_per_dim,
64 | min_image_size=args.min_image_size,
65 | )
66 |
67 | cam = cv2.VideoCapture(0)
68 | while True:
69 | start_time = time.time()
70 | ret_val, img = cam.read()
71 | composite = coco_demo.run_on_opencv_image(img)
72 | print("Time: {:.2f} s / img".format(time.time() - start_time))
73 | cv2.imshow("COCO detections", composite)
74 | if cv2.waitKey(1) == 27:
75 | break # esc to quit
76 | cv2.destroyAllWindows()
77 |
78 |
79 | if __name__ == "__main__":
80 | main()
81 |
--------------------------------------------------------------------------------
/killpy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | ps -ef|grep python|grep -v grep|cut -c 9-15|xargs kill -9
3 |
4 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 |
4 |
5 | def adjustment_for_relation(cfg):
6 | if cfg.MODEL.RELATION_ON:
7 | cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = \
8 | cfg.MODEL.RELATION.MAKE_PAIR_PROPOSAL_CNT * 2
9 | if cfg.MODEL.RELATION.USE_DETECTION_RESULT_FOR_RELATION:
10 | cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = \
11 | cfg.MODEL.RELATION.MAKE_PAIR_PROPOSAL_CNT
12 | return cfg
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 |
4 | #include "cpu/vision.h"
5 |
6 | #ifdef WITH_CUDA
7 | #include "cuda/vision.h"
8 | #endif
9 |
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 | const at::Tensor& rois,
13 | const float spatial_scale,
14 | const int pooled_height,
15 | const int pooled_width,
16 | const int sampling_ratio) {
17 | if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 | AT_ERROR("Not compiled with GPU support");
22 | #endif
23 | }
24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 |
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 | const at::Tensor& rois,
29 | const float spatial_scale,
30 | const int pooled_height,
31 | const int pooled_width,
32 | const int batch_size,
33 | const int channels,
34 | const int height,
35 | const int width,
36 | const int sampling_ratio) {
37 | if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 | AT_ERROR("Not compiled with GPU support");
42 | #endif
43 | }
44 | AT_ERROR("Not implemented on the CPU");
45 | }
46 |
47 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 |
4 | #include "cpu/vision.h"
5 |
6 | #ifdef WITH_CUDA
7 | #include "cuda/vision.h"
8 | #endif
9 |
10 |
11 | std::tuple ROIPool_forward(const at::Tensor& input,
12 | const at::Tensor& rois,
13 | const float spatial_scale,
14 | const int pooled_height,
15 | const int pooled_width) {
16 | if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 | AT_ERROR("Not compiled with GPU support");
21 | #endif
22 | }
23 | AT_ERROR("Not implemented on the CPU");
24 | }
25 |
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 | const at::Tensor& input,
28 | const at::Tensor& rois,
29 | const at::Tensor& argmax,
30 | const float spatial_scale,
31 | const int pooled_height,
32 | const int pooled_width,
33 | const int batch_size,
34 | const int channels,
35 | const int height,
36 | const int width) {
37 | if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 | AT_ERROR("Not compiled with GPU support");
42 | #endif
43 | }
44 | AT_ERROR("Not implemented on the CPU");
45 | }
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | const at::Tensor& logits,
12 | const at::Tensor& targets,
13 | const int num_classes,
14 | const float gamma,
15 | const float alpha) {
16 | if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 | AT_ERROR("Not compiled with GPU support");
21 | #endif
22 | }
23 | AT_ERROR("Not implemented on the CPU");
24 | }
25 |
26 | at::Tensor SigmoidFocalLoss_backward(
27 | const at::Tensor& logits,
28 | const at::Tensor& targets,
29 | const at::Tensor& d_losses,
30 | const int num_classes,
31 | const float gamma,
32 | const float alpha) {
33 | if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 | AT_ERROR("Not compiled with GPU support");
38 | #endif
39 | }
40 | AT_ERROR("Not implemented on the CPU");
41 | }
42 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include "cpu/vision.h"
3 |
4 |
5 | template
6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
7 | const at::Tensor& scores,
8 | const float threshold) {
9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 |
13 | if (dets.numel() == 0) {
14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 | }
16 |
17 | auto x1_t = dets.select(1, 0).contiguous();
18 | auto y1_t = dets.select(1, 1).contiguous();
19 | auto x2_t = dets.select(1, 2).contiguous();
20 | auto y2_t = dets.select(1, 3).contiguous();
21 |
22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 |
24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 |
26 | auto ndets = dets.size(0);
27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 |
29 | auto suppressed = suppressed_t.data();
30 | auto order = order_t.data();
31 | auto x1 = x1_t.data();
32 | auto y1 = y1_t.data();
33 | auto x2 = x2_t.data();
34 | auto y2 = y2_t.data();
35 | auto areas = areas_t.data();
36 |
37 | for (int64_t _i = 0; _i < ndets; _i++) {
38 | auto i = order[_i];
39 | if (suppressed[i] == 1)
40 | continue;
41 | auto ix1 = x1[i];
42 | auto iy1 = y1[i];
43 | auto ix2 = x2[i];
44 | auto iy2 = y2[i];
45 | auto iarea = areas[i];
46 |
47 | for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 | auto j = order[_j];
49 | if (suppressed[j] == 1)
50 | continue;
51 | auto xx1 = std::max(ix1, x1[j]);
52 | auto yy1 = std::max(iy1, y1[j]);
53 | auto xx2 = std::min(ix2, x2[j]);
54 | auto yy2 = std::min(iy2, y2[j]);
55 |
56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1);
57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1);
58 | auto inter = w * h;
59 | auto ovr = inter / (iarea + areas[j] - inter);
60 | if (ovr >= threshold)
61 | suppressed[j] = 1;
62 | }
63 | }
64 | return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 |
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 | const at::Tensor& scores,
69 | const float threshold) {
70 | at::Tensor result;
71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 | result = nms_cpu_kernel(dets, scores, threshold);
73 | });
74 | return result;
75 | }
76 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include
4 |
5 |
6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
7 | const at::Tensor& rois,
8 | const float spatial_scale,
9 | const int pooled_height,
10 | const int pooled_width,
11 | const int sampling_ratio);
12 |
13 |
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 | const at::Tensor& scores,
16 | const float threshold);
17 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #pragma once
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 |
10 | at::Tensor nms(const at::Tensor& dets,
11 | const at::Tensor& scores,
12 | const float threshold) {
13 |
14 | if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 | // TODO raise error if not compiled with CUDA
17 | if (dets.numel() == 0)
18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 | return nms_cuda(b, threshold);
21 | #else
22 | AT_ERROR("Not compiled with GPU support");
23 | #endif
24 | }
25 |
26 | at::Tensor result = nms_cpu(dets, scores, threshold);
27 | return result;
28 | }
29 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #include "nms.h"
3 | #include "ROIAlign.h"
4 | #include "ROIPool.h"
5 | #include "SigmoidFocalLoss.h"
6 |
7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
8 | m.def("nms", &nms, "non-maximum suppression");
9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
14 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
15 | }
16 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from maskrcnn_benchmark.modeling import registry
3 | from maskrcnn_benchmark.structures.image_list import to_image_list
4 |
5 |
6 | @registry.BATCH_COLLATOR.register("DetectionOnlyCollator")
7 | class BatchCollator(object):
8 | """
9 | From a list of samples from the dataset,
10 | returns the batched images and targets.
11 | This should be passed to the DataLoader
12 | """
13 |
14 | def __init__(self, size_divisible=0):
15 | self.size_divisible = size_divisible
16 |
17 | def __call__(self, batch):
18 | transposed_batch = list(zip(*batch))
19 | images = to_image_list(transposed_batch[0], self.size_divisible)
20 | targets = transposed_batch[1]
21 | img_ids = transposed_batch[2]
22 | return images, targets, img_ids
23 |
24 |
25 | @registry.BATCH_COLLATOR.register("RelationCollator")
26 | class RelationBatchCollator:
27 | def __init__(self, size_divisible=0):
28 | self.size_divisible = size_divisible
29 |
30 | def __call__(self, batch):
31 | transposed_batch = list(zip(*batch))
32 | images = to_image_list(transposed_batch[0], self.size_divisible)
33 | det_targets = transposed_batch[1]
34 | rel_targets = transposed_batch[2]
35 | img_ids = transposed_batch[3]
36 |
37 | return images, (det_targets, rel_targets), img_ids
38 |
39 |
40 | @registry.BATCH_COLLATOR.register("VGCollator")
41 | class VGBatchCollator:
42 | def __init__(self, size_divisible=0):
43 | self.size_divisible = size_divisible
44 |
45 | def __call__(self, batch):
46 | transposed_batch = list(zip(*batch))
47 |
48 | # images = to_image_list(transposed_batch[0], self.size_divisible)
49 | images = transposed_batch[0]
50 | targets = transposed_batch[1]
51 | img_id = transposed_batch[2]
52 | phrase_ids = transposed_batch[3]
53 | sent_id = transposed_batch[4]
54 | sentence = transposed_batch[5]
55 | precompute_bbox = transposed_batch[6]
56 | precompute_score = transposed_batch[7]
57 | feature_map = transposed_batch[8]
58 | vocab_label_elmo = transposed_batch[9]
59 | sent_sg = transposed_batch[10]
60 | topN_box = transposed_batch[11]
61 |
62 | return images, targets, img_id, phrase_ids, sent_id, sentence, precompute_bbox, precompute_score, feature_map, vocab_label_elmo, sent_sg, topN_box
63 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .coco import COCODataset
3 | # from .concat_dataset import ConcatDataset
4 | # from .visual_genome import VGDataset
5 | # from .voc import PascalVOCDataset
6 | from .flickr import Flickr
7 |
8 | # __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset", "Flickr"]
9 | __all__ = ["Flickr"]
10 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import bisect
3 |
4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
5 |
6 |
7 | class ConcatDataset(_ConcatDataset):
8 | """
9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 | method for querying the sizes of the image
11 | """
12 |
13 | def get_idxs(self, idx):
14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 | if dataset_idx == 0:
16 | sample_idx = idx
17 | else:
18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 | return dataset_idx, sample_idx
20 |
21 | def get_img_info(self, idx):
22 | dataset_idx, sample_idx = self.get_idxs(idx)
23 | return self.datasets[dataset_idx].get_img_info(sample_idx)
24 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/VG/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from maskrcnn_benchmark.config import cfg
4 | from .vg_eval import eval_detection, eval_relation
5 |
6 |
7 | def vg_evaluation(
8 | dataset,
9 | predictions,
10 | output_folder,
11 | box_only,
12 | iou_types,
13 | expected_results,
14 | expected_results_sigma_tol, ):
15 | logger = logging.getLogger(__name__)
16 | # split prediction
17 | det_predictions = []
18 | rel_predictions = []
19 | for prop, res in predictions:
20 | if cfg.MODEL.RELATION_ON:
21 | det_predictions.append((prop, res[0]))
22 | rel_predictions.append(res[1])
23 | else:
24 | det_predictions.append((prop, res))
25 | proposal_eval_res = None
26 | det_eval_results = None
27 | coco_results = None
28 | rel_eval_results = None
29 | proposal_eval_res, det_eval_results, \
30 | coco_results = eval_detection(dataset=dataset,
31 | predictions=det_predictions,
32 | box_only=box_only,
33 | output_folder=output_folder,
34 | iou_types=iou_types,
35 | expected_results=expected_results,
36 | expected_results_sigma_tol=expected_results_sigma_tol, )
37 | if cfg.MODEL.RELATION_ON:
38 | # relation evaluations
39 | rel_eval_results = eval_relation(dataset=dataset,
40 | predictions=rel_predictions,
41 | output_folder=output_folder)
42 |
43 | logger.info("vg evaluation done")
44 | return proposal_eval_res, det_eval_results, rel_eval_results, coco_results
45 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from maskrcnn_benchmark.data import datasets
2 | # from .VG import vg_evaluation
3 | # from .coco import coco_evaluation
4 | # from .voc import voc_evaluation
5 | from .flickr import flick_evaluation
6 |
7 |
8 | def evaluate(dataset, predictions, image_ids, curr_iter, output_folder, **kwargs):
9 | """evaluate dataset using different methods based on dataset type.
10 | Args:
11 | dataset: Dataset object
12 | predictions(list[BoxList]): each item in the list represents the
13 | prediction results for one image.
14 | output_folder: output folder, to save evaluation files or results.
15 | **kwargs: other args.
16 | Returns:
17 | evaluation result
18 | """
19 | args = dict(
20 | dataset=dataset, predictions=predictions,image_ids=image_ids, curr_iter=curr_iter, output_folder=output_folder
21 | )
22 | # if isinstance(dataset, datasets.COCODataset):
23 | # return coco_evaluation(**args)
24 | #
25 | # elif isinstance(dataset, datasets.VGDataset):
26 | # return vg_evaluation(**args)
27 | #
28 | # elif isinstance(dataset, datasets.PascalVOCDataset):
29 | # return voc_evaluation(**args)
30 | if isinstance(dataset, datasets.Flickr):
31 | return flick_evaluation(**args)
32 |
33 | else:
34 | dataset_name = dataset.__class__.__name__
35 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
36 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_eval import do_coco_evaluation
2 |
3 |
4 | def coco_evaluation(
5 | dataset,
6 | predictions,
7 | output_folder,
8 | box_only,
9 | iou_types,
10 | expected_results,
11 | expected_results_sigma_tol,
12 | ):
13 | return do_coco_evaluation(
14 | dataset=dataset,
15 | predictions=predictions,
16 | box_only=box_only,
17 | output_folder=output_folder,
18 | iou_types=iou_types,
19 | expected_results=expected_results,
20 | expected_results_sigma_tol=expected_results_sigma_tol,
21 | )
22 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/flickr/__init__.py:
--------------------------------------------------------------------------------
1 | from .flickr_eval import eval_recall
2 |
3 |
4 | def flick_evaluation(dataset, predictions, image_ids,curr_iter, output_folder):
5 | return eval_recall(dataset, predictions, image_ids, curr_iter, output_folder)
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from .voc_eval import do_voc_evaluation
4 |
5 |
6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
7 | logger = logging.getLogger("maskrcnn_benchmark.inference")
8 | if box_only:
9 | logger.warning("voc evaluation doesn't support box_only, ignored.")
10 | logger.info("performing voc evaluation, ignored iou_types.")
11 | return do_voc_evaluation(
12 | dataset=dataset,
13 | predictions=predictions,
14 | output_folder=output_folder,
15 | logger=logger,
16 | )
17 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | """
3 | Simple dataset class that wraps a list of path names
4 | """
5 |
6 | from PIL import Image
7 |
8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
9 |
10 |
11 | class ListDataset(object):
12 | def __init__(self, image_lists, transforms=None):
13 | self.image_lists = image_lists
14 | self.transforms = transforms
15 |
16 | def __getitem__(self, item):
17 | img = Image.open(self.image_lists[item]).convert("RGB")
18 |
19 | # dummy target
20 | w, h = img.size
21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 |
23 | if self.transforms is not None:
24 | img, target = self.transforms(img, target)
25 |
26 | return img, target
27 |
28 | def __len__(self):
29 | return len(self.image_lists)
30 |
31 | def get_img_info(self, item):
32 | """
33 | Return the image dimensions for the image, without
34 | loading and pre-processing it
35 | """
36 | pass
37 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 |
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from torch.utils.data.sampler import BatchSampler
3 |
4 |
5 | class IterationBasedBatchSampler(BatchSampler):
6 | """
7 | Wraps a BatchSampler, resampling from it until
8 | a specified number of iterations have been sampled
9 | """
10 |
11 | def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 | self.batch_sampler = batch_sampler
13 | self.num_iterations = num_iterations
14 | self.start_iter = start_iter
15 |
16 | def __iter__(self):
17 | iteration = self.start_iter
18 | while iteration <= self.num_iterations:
19 | # if the underlying sampler has a set_epoch method, like
20 | # DistributedSampler, used for making each process see
21 | # a different split of the dataset, then set it
22 | if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 | self.batch_sampler.sampler.set_epoch(iteration)
24 | for batch in self.batch_sampler:
25 | iteration += 1
26 | if iteration > self.num_iterations:
27 | break
28 | yield batch
29 |
30 | def __len__(self):
31 | return self.num_iterations
32 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import build_transforms
3 | from .transforms import Compose
4 | from .transforms import Normalize
5 | from .transforms import RandomHorizontalFlip
6 | from .transforms import Resize
7 | from .transforms import ToTensor
8 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from . import transforms_vg_bp as TBP
3 |
4 |
5 | def build_transforms(cfg, is_train=True):
6 | if is_train:
7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN
8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN
9 | # flip_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN
10 | flip_prob = 0
11 | else:
12 | min_size = cfg.INPUT.MIN_SIZE_TEST
13 | max_size = cfg.INPUT.MAX_SIZE_TEST
14 | flip_prob = 0
15 |
16 | to_bgr255 = cfg.INPUT.TO_BGR255
17 | normalize_transform = TBP.Normalize(
18 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
19 | )
20 |
21 | To255 = TBP.To255(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255)
22 |
23 | transform = TBP.Compose(
24 | [
25 | TBP.ResizeAndNormalize(min_size, max_size, cfg.INPUT.PIXEL_MEAN, cfg.INPUT.PIXEL_STD),
26 | TBP.ToTensor(),
27 | ]
28 | )
29 | return transform
30 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 |
4 | from .batch_norm import FrozenBatchNorm2d
5 | from .misc import Conv2d
6 | from .misc import ConvTranspose2d
7 | from .misc import BatchNorm2d
8 | from .misc import interpolate
9 | from .nms import nms
10 | from .roi_align import ROIAlign
11 | from .roi_align import roi_align
12 | from .roi_pool import ROIPool
13 | from .roi_pool import roi_pool
14 | from .smooth_l1_loss import smooth_l1_loss
15 | from .sigmoid_focal_loss import SigmoidFocalLoss
16 |
17 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool",
18 | "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate",
19 | "BatchNorm2d", "FrozenBatchNorm2d", "SigmoidFocalLoss"
20 | ]
21 |
22 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import glob
3 | import os.path
4 |
5 | import torch
6 |
7 | try:
8 | from torch.utils.cpp_extension import load as load_ext
9 | from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 |
13 |
14 | def _load_C_extensions():
15 | this_dir = os.path.dirname(os.path.abspath(__file__))
16 | this_dir = os.path.dirname(this_dir)
17 | this_dir = os.path.join(this_dir, "csrc")
18 |
19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 |
23 | source = main_file + source_cpu
24 |
25 | extra_cflags = []
26 | if torch.cuda.is_available() and CUDA_HOME is not None:
27 | source.extend(source_cuda)
28 | extra_cflags = ["-DWITH_CUDA"]
29 | source = [os.path.join(this_dir, s) for s in source]
30 | extra_include_paths = [this_dir]
31 | return load_ext(
32 | "torchvision",
33 | source,
34 | extra_cflags=extra_cflags,
35 | extra_include_paths=extra_include_paths,
36 | )
37 |
38 |
39 | _C = _load_C_extensions()
40 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 | from torch import nn
4 |
5 |
6 | class FrozenBatchNorm2d(nn.Module):
7 | """
8 | BatchNorm2d where the batch statistics and the affine parameters
9 | are fixed
10 | """
11 |
12 | def __init__(self, n):
13 | super(FrozenBatchNorm2d, self).__init__()
14 | self.register_buffer("weight", torch.ones(n))
15 | self.register_buffer("bias", torch.zeros(n))
16 | self.register_buffer("running_mean", torch.zeros(n))
17 | self.register_buffer("running_var", torch.ones(n))
18 |
19 | def forward(self, x):
20 | scale = self.weight * self.running_var.rsqrt()
21 | bias = self.bias - self.running_mean * scale
22 | scale = scale.reshape(1, -1, 1, 1)
23 | bias = bias.reshape(1, -1, 1, 1)
24 | return x * scale + bias
25 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from maskrcnn_benchmark import _C
4 |
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 | from torch import nn
4 | from torch.autograd import Function
5 | from torch.autograd.function import once_differentiable
6 | from torch.nn.modules.utils import _pair
7 |
8 | from maskrcnn_benchmark import _C
9 |
10 |
11 | class _ROIAlign(Function):
12 | @staticmethod
13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 | ctx.save_for_backward(roi)
15 | ctx.output_size = _pair(output_size)
16 | ctx.spatial_scale = spatial_scale
17 | ctx.sampling_ratio = sampling_ratio
18 | ctx.input_shape = input.size()
19 | output = _C.roi_align_forward(
20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 | )
22 | return output
23 |
24 | @staticmethod
25 | @once_differentiable
26 | def backward(ctx, grad_output):
27 | rois, = ctx.saved_tensors
28 | output_size = ctx.output_size
29 | spatial_scale = ctx.spatial_scale
30 | sampling_ratio = ctx.sampling_ratio
31 | bs, ch, h, w = ctx.input_shape
32 | grad_input = _C.roi_align_backward(
33 | grad_output,
34 | rois,
35 | spatial_scale,
36 | output_size[0],
37 | output_size[1],
38 | bs,
39 | ch,
40 | h,
41 | w,
42 | sampling_ratio,
43 | )
44 | return grad_input, None, None, None, None
45 |
46 |
47 | roi_align = _ROIAlign.apply
48 |
49 |
50 | class ROIAlign(nn.Module):
51 | def __init__(self, output_size, spatial_scale, sampling_ratio):
52 | super(ROIAlign, self).__init__()
53 | self.output_size = output_size
54 | self.spatial_scale = spatial_scale
55 | self.sampling_ratio = sampling_ratio
56 |
57 | def forward(self, input, rois):
58 | return roi_align(
59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 | )
61 |
62 | def __repr__(self):
63 | tmpstr = self.__class__.__name__ + "("
64 | tmpstr += "output_size=" + str(self.output_size)
65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 | tmpstr += ")"
68 | return tmpstr
69 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 | from torch import nn
4 | from torch.autograd import Function
5 | from torch.autograd.function import once_differentiable
6 | from torch.nn.modules.utils import _pair
7 |
8 | from maskrcnn_benchmark import _C
9 |
10 |
11 | class _ROIPool(Function):
12 | @staticmethod
13 | def forward(ctx, input, roi, output_size, spatial_scale):
14 | ctx.output_size = _pair(output_size)
15 | ctx.spatial_scale = spatial_scale
16 | ctx.input_shape = input.size()
17 | output, argmax = _C.roi_pool_forward(
18 | input, roi, spatial_scale, output_size[0], output_size[1]
19 | )
20 | ctx.save_for_backward(input, roi, argmax)
21 | return output
22 |
23 | @staticmethod
24 | @once_differentiable
25 | def backward(ctx, grad_output):
26 | input, rois, argmax = ctx.saved_tensors
27 | output_size = ctx.output_size
28 | spatial_scale = ctx.spatial_scale
29 | bs, ch, h, w = ctx.input_shape
30 | grad_input = _C.roi_pool_backward(
31 | grad_output,
32 | input,
33 | rois,
34 | argmax,
35 | spatial_scale,
36 | output_size[0],
37 | output_size[1],
38 | bs,
39 | ch,
40 | h,
41 | w,
42 | )
43 | return grad_input, None, None, None
44 |
45 |
46 | roi_pool = _ROIPool.apply
47 |
48 |
49 | class ROIPool(nn.Module):
50 | def __init__(self, output_size, spatial_scale):
51 | super(ROIPool, self).__init__()
52 | self.output_size = output_size
53 | self.spatial_scale = spatial_scale
54 |
55 | def forward(self, input, rois):
56 | return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 |
58 | def __repr__(self):
59 | tmpstr = self.__class__.__name__ + "("
60 | tmpstr += "output_size=" + str(self.output_size)
61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 | tmpstr += ")"
63 | return tmpstr
64 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.autograd import Function
4 | from torch.autograd.function import once_differentiable
5 |
6 | from maskrcnn_benchmark import _C
7 |
8 | # TODO: Use JIT to replace CUDA implementation in the future.
9 | class _SigmoidFocalLoss(Function):
10 | @staticmethod
11 | def forward(ctx, logits, targets, gamma, alpha):
12 | ctx.save_for_backward(logits, targets)
13 | num_classes = logits.shape[1]
14 | ctx.num_classes = num_classes
15 | ctx.gamma = gamma
16 | ctx.alpha = alpha
17 |
18 | losses = _C.sigmoid_focalloss_forward(
19 | logits, targets, num_classes, gamma, alpha
20 | )
21 | return losses
22 |
23 | @staticmethod
24 | @once_differentiable
25 | def backward(ctx, d_loss):
26 | logits, targets = ctx.saved_tensors
27 | num_classes = ctx.num_classes
28 | gamma = ctx.gamma
29 | alpha = ctx.alpha
30 | d_loss = d_loss.contiguous()
31 | d_logits = _C.sigmoid_focalloss_backward(
32 | logits, targets, d_loss, num_classes, gamma, alpha
33 | )
34 | return d_logits, None, None, None, None
35 |
36 |
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 |
39 |
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 | num_classes = logits.shape[1]
42 | gamma = gamma[0]
43 | alpha = alpha[0]
44 | dtype = targets.dtype
45 | device = targets.device
46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 |
48 | t = targets.unsqueeze(1)
49 | p = torch.sigmoid(logits)
50 | term1 = (1 - p) ** gamma * torch.log(p)
51 | term2 = p ** gamma * torch.log(1 - p)
52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 |
54 |
55 | class SigmoidFocalLoss(nn.Module):
56 | def __init__(self, gamma, alpha):
57 | super(SigmoidFocalLoss, self).__init__()
58 | self.gamma = gamma
59 | self.alpha = alpha
60 |
61 | def forward(self, logits, targets):
62 | device = logits.device
63 | if logits.is_cuda:
64 | loss_func = sigmoid_focal_loss_cuda
65 | else:
66 | loss_func = sigmoid_focal_loss_cpu
67 |
68 | loss = loss_func(logits, targets, self.gamma, self.alpha)
69 | return loss.sum()
70 |
71 | def __repr__(self):
72 | tmpstr = self.__class__.__name__ + "("
73 | tmpstr += "gamma=" + str(self.gamma)
74 | tmpstr += ", alpha=" + str(self.alpha)
75 | tmpstr += ")"
76 | return tmpstr
77 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 |
4 |
5 | # TODO maybe push this to nn?
6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
7 | """
8 | very similar to the smooth_l1_loss from pytorch, but with
9 | the extra beta parameter
10 | """
11 | n = torch.abs(input - target)
12 | cond = n < beta
13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 | if size_average:
15 | return loss.mean()
16 | return loss.sum()
17 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/spatial_coordinate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3.6
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2019-07-09 13:40
4 | # @Author : Yongfei Liu
5 | # @Email : liuyf3@shanghaitech.edu.cn
6 |
7 | import numpy as np
8 | import torch
9 |
10 |
11 |
12 | def meshgrid_generation(feat):
13 |
14 | b, c, h, w = feat.shape
15 |
16 | device = feat.get_device()
17 | half_h = h/2
18 | half_w = w/2
19 |
20 | grid_h, grid_w = torch.meshgrid(torch.arange(h), torch.arange(w))
21 | grid_h = grid_h.float()
22 | grid_w = grid_w.float()
23 | grid_h = grid_h/half_h - 1
24 | grid_w = grid_w/half_w - 1
25 | spatial_coord = torch.cat((grid_h[None,None, :,:], grid_w[None, None, :, :]), 1)
26 | spatial_coord = spatial_coord.to(device)
27 |
28 | return spatial_coord
29 |
30 |
31 | def get_spatial_feat(precomp_boxes):
32 |
33 | bbox = precomp_boxes.bbox
34 | bbox_size = [precomp_boxes.size[0], precomp_boxes.size[1]] ## width, height
35 | bbox_area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1])
36 | bbox_area_ratio = bbox_area / (bbox_size[0] * bbox_area[1])
37 | bbox_area_ratio = bbox_area_ratio.unsqueeze(1) # 100 * 1
38 | device_id = precomp_boxes.bbox.get_device()
39 | bbox_size.extend(bbox_size)
40 | bbox_size = torch.FloatTensor(np.array(bbox_size).astype(np.float32)).to(device_id)
41 | bbox = bbox / bbox_size
42 | vis_spatial = torch.cat((bbox, bbox_area_ratio), 1)
43 | return vis_spatial
44 |
45 | if __name__ == '__main__':
46 |
47 | feat = torch.ones(3,1,50,50)
48 | meshgrid_generation(feat=feat)
49 |
50 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | from . import fbnet
4 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/vgg16.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torchvision.models as models
3 |
4 |
5 | class VGG16(nn.Module):
6 | def __init__(self, cfg):
7 | super(VGG16, self).__init__()
8 | self.features = models.vgg16_bn(pretrained=True).features
9 | self.out_channels = 512
10 | cnt = 0
11 | for each in self.features:
12 | if cnt >= cfg.MODEL.BACKBONE.FREEZE_CONV_BODY_AT:
13 | break
14 | cnt += 1
15 | set_trainable(each, requires_grad=False)
16 |
17 | def forward(self, im_data):
18 | x = self.features(im_data)
19 | return [x] # for the following process
20 |
21 |
22 | def set_trainable(model, requires_grad):
23 | set_trainable_param(model.parameters(), requires_grad)
24 |
25 |
26 | def set_trainable_param(parameters, requires_grad):
27 | for param in parameters:
28 | param.requires_grad = requires_grad
29 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .generalized_rcnn import GeneralizedRCNN
3 | from .generalized_rcnn_det import GeneralizedRCNNDet
4 |
5 |
6 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, "GeneralizedRCNNDet": GeneralizedRCNNDet}
7 |
8 |
9 | def build_detection_model(cfg):
10 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
11 | return meta_arch(cfg)
12 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 | from maskrcnn_benchmark.utils.registry import Registry
4 |
5 | BACKBONES = Registry()
6 | RPN_HEADS = Registry()
7 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
8 | ROI_BOX_PREDICTOR = Registry()
9 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
10 | ROI_KEYPOINT_PREDICTOR = Registry()
11 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
12 | ROI_MASK_PREDICTOR = Registry()
13 |
14 | BATCH_COLLATOR = Registry()
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/relation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/relation/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .inference import make_roi_keypoint_post_processor
4 | from .loss import make_roi_keypoint_loss_evaluator
5 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
6 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
7 |
8 |
9 | class ROIKeypointHead(torch.nn.Module):
10 | def __init__(self, cfg, in_channels):
11 | super(ROIKeypointHead, self).__init__()
12 | self.cfg = cfg.clone()
13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 | self.predictor = make_roi_keypoint_predictor(
15 | cfg, self.feature_extractor.out_channels)
16 | self.post_processor = make_roi_keypoint_post_processor(cfg)
17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 |
19 | def forward(self, features, proposals, targets=None):
20 | """
21 | Arguments:
22 | features (list[Tensor]): feature-maps from possibly several levels
23 | proposals (list[BoxList]): proposal boxes
24 | targets (list[BoxList], optional): the ground-truth targets.
25 |
26 | Returns:
27 | x (Tensor): the result of the feature extractor
28 | proposals (list[BoxList]): during training, the original proposals
29 | are returned. During testing, the predicted boxlists are returned
30 | with the `mask` field set
31 | losses (dict[Tensor]): During training, returns the losses for the
32 | head. During testing, returns an empty dict.
33 | """
34 | if self.training:
35 | with torch.no_grad():
36 | proposals = self.loss_evaluator.subsample(proposals, targets)
37 |
38 | x = self.feature_extractor(features, proposals)
39 | kp_logits = self.predictor(x)
40 |
41 | if not self.training:
42 | result = self.post_processor(kp_logits, proposals)
43 | return x, result, {}
44 |
45 | loss_kp = self.loss_evaluator(proposals, kp_logits)
46 |
47 | return x, proposals, dict(loss_kp=loss_kp)
48 |
49 |
50 | def build_roi_keypoint_head(cfg, in_channels):
51 | return ROIKeypointHead(cfg, in_channels)
52 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torch.nn import functional as F
3 |
4 | from maskrcnn_benchmark.layers import Conv2d
5 | from maskrcnn_benchmark.modeling import registry
6 | from maskrcnn_benchmark.modeling.poolers import Pooler
7 |
8 |
9 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
10 | class KeypointRCNNFeatureExtractor(nn.Module):
11 | def __init__(self, cfg, in_channels):
12 | super(KeypointRCNNFeatureExtractor, self).__init__()
13 |
14 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
15 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
16 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
17 | pooler = Pooler(
18 | output_size=(resolution, resolution),
19 | scales=scales,
20 | sampling_ratio=sampling_ratio,
21 | )
22 | self.pooler = pooler
23 |
24 | input_features = in_channels
25 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
26 | next_feature = input_features
27 | self.blocks = []
28 | for layer_idx, layer_features in enumerate(layers, 1):
29 | layer_name = "conv_fcn{}".format(layer_idx)
30 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
31 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
32 | nn.init.constant_(module.bias, 0)
33 | self.add_module(layer_name, module)
34 | next_feature = layer_features
35 | self.blocks.append(layer_name)
36 | self.out_channels = layer_features
37 |
38 | def forward(self, x, proposals):
39 | x = self.pooler(x, proposals)
40 | for layer_name in self.blocks:
41 | x = F.relu(getattr(self, layer_name)(x))
42 | return x
43 |
44 |
45 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
46 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
47 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
48 | ]
49 | return func(cfg, in_channels)
50 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from maskrcnn_benchmark import layers
4 | from maskrcnn_benchmark.modeling import registry
5 |
6 |
7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
8 | class KeypointRCNNPredictor(nn.Module):
9 | def __init__(self, cfg, in_channels):
10 | super(KeypointRCNNPredictor, self).__init__()
11 | input_features = in_channels
12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 | deconv_kernel = 4
14 | self.kps_score_lowres = layers.ConvTranspose2d(
15 | input_features,
16 | num_keypoints,
17 | deconv_kernel,
18 | stride=2,
19 | padding=deconv_kernel // 2 - 1,
20 | )
21 | nn.init.kaiming_normal_(
22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 | )
24 | nn.init.constant_(self.kps_score_lowres.bias, 0)
25 | self.up_scale = 2
26 | self.out_channels = num_keypoints
27 |
28 | def forward(self, x):
29 | x = self.kps_score_lowres(x)
30 | x = layers.interpolate(
31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 | )
33 | return x
34 |
35 |
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 | return func(cfg, in_channels)
39 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from torch import nn
3 | from torch.nn import functional as F
4 |
5 | from maskrcnn_benchmark.layers import Conv2d
6 | from maskrcnn_benchmark.layers import ConvTranspose2d
7 | from maskrcnn_benchmark.modeling import registry
8 |
9 |
10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
11 | class MaskRCNNC4Predictor(nn.Module):
12 | def __init__(self, cfg, in_channels):
13 | super(MaskRCNNC4Predictor, self).__init__()
14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 | num_inputs = in_channels
17 |
18 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
19 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
20 |
21 | for name, param in self.named_parameters():
22 | if "bias" in name:
23 | nn.init.constant_(param, 0)
24 | elif "weight" in name:
25 | # Caffe2 implementation uses MSRAFill, which in fact
26 | # corresponds to kaiming_normal_ in PyTorch
27 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
28 |
29 | def forward(self, x):
30 | x = F.relu(self.conv5_mask(x))
31 | return self.mask_fcn_logits(x)
32 |
33 |
34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
35 | class MaskRCNNConv1x1Predictor(nn.Module):
36 | def __init__(self, cfg, in_channels):
37 | super(MaskRCNNConv1x1Predictor, self).__init__()
38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 | num_inputs = in_channels
40 |
41 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
42 |
43 | for name, param in self.named_parameters():
44 | if "bias" in name:
45 | nn.init.constant_(param, 0)
46 | elif "weight" in name:
47 | # Caffe2 implementation uses MSRAFill, which in fact
48 | # corresponds to kaiming_normal_ in PyTorch
49 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
50 |
51 | def forward(self, x):
52 | return self.mask_fcn_logits(x)
53 |
54 |
55 | def make_roi_mask_predictor(cfg, in_channels):
56 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
57 | return func(cfg, in_channels)
58 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | """
3 | Utility functions minipulating the prediction layers
4 | """
5 |
6 | from ..utils import cat
7 |
8 | import torch
9 |
10 | def permute_and_flatten(layer, N, A, C, H, W):
11 | layer = layer.view(N, -1, C, H, W)
12 | layer = layer.permute(0, 3, 4, 1, 2)
13 | layer = layer.reshape(N, -1, C)
14 | return layer
15 |
16 |
17 | def concat_box_prediction_layers(box_cls, box_regression):
18 | box_cls_flattened = []
19 | box_regression_flattened = []
20 | # for each feature level, permute the outputs to make them be in the
21 | # same format as the labels. Note that the labels are computed for
22 | # all feature levels concatenated, so we keep the same representation
23 | # for the objectness and the box_regression
24 | for box_cls_per_level, box_regression_per_level in zip(
25 | box_cls, box_regression
26 | ):
27 | N, AxC, H, W = box_cls_per_level.shape
28 | Ax4 = box_regression_per_level.shape[1]
29 | A = Ax4 // 4
30 | C = AxC // A
31 | box_cls_per_level = permute_and_flatten(
32 | box_cls_per_level, N, A, C, H, W
33 | )
34 | box_cls_flattened.append(box_cls_per_level)
35 |
36 | box_regression_per_level = permute_and_flatten(
37 | box_regression_per_level, N, A, 4, H, W
38 | )
39 | box_regression_flattened.append(box_regression_per_level)
40 | # concatenate on the first dimension (representing the feature levels), to
41 | # take into account the way the labels were generated (with all feature maps
42 | # being concatenated as well)
43 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
44 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
45 | return box_cls, box_regression
46 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | """
3 | Miscellaneous utility functions
4 | """
5 |
6 | import torch
7 |
8 |
9 | def cat(tensors, dim=0):
10 | """
11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 | """
13 | assert isinstance(tensors, (list, tuple))
14 | if len(tensors) == 1:
15 | return tensors[0]
16 | return torch.cat(tensors, dim)
17 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/vg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/modeling/vg/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 |
4 | from .lr_scheduler import WarmupMultiStepLR
5 |
6 | def make_optimizer(cfg, model):
7 |
8 | params = []
9 | for key, value in model.named_parameters():
10 |
11 | if not value.requires_grad:
12 | continue
13 | print('gradient', key)
14 | lr = cfg.SOLVER.BASE_LR
15 | if "body" in key or "head" in key:
16 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.RESNET_LR_FACTOR
17 | elif "fpn" in key:
18 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.FPN_LR_FACTOR
19 | elif 'phrase_embed' in key:
20 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.PHRASE_EMBEDDING_LR_FACTOR
21 |
22 | # print(key)
23 | weight_decay = cfg.SOLVER.WEIGHT_DECAY
24 | # if "bias" in key:
25 | # lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
26 | # weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
27 | if "bias" in key:
28 | lr = lr * cfg.SOLVER.BIAS_LR_FACTOR
29 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
30 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
31 |
32 | if cfg.SOLVER.TYPE == "SGD":
33 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
34 | elif cfg.SOLVER.TYPE == 'Adam':
35 | optimizer = torch.optim.Adam(params, lr)
36 | else:
37 | raise NotImplementedError
38 |
39 | return optimizer
40 |
41 |
42 | def make_lr_scheduler(cfg, optimizer):
43 | return WarmupMultiStepLR(
44 | optimizer,
45 | cfg.SOLVER.STEPS,
46 | cfg.SOLVER.GAMMA,
47 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
48 | warmup_iters=cfg.SOLVER.WARMUP_ITERS,
49 | warmup_method=cfg.SOLVER.WARMUP_METHOD,
50 | )
51 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from bisect import bisect_right
3 |
4 | import torch
5 |
6 |
7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
8 | # separating MultiStepLR with WarmupLR
9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 | def __init__(
12 | self,
13 | optimizer,
14 | milestones,
15 | gamma=0.1,
16 | warmup_factor=1.0 / 3,
17 | warmup_iters=500,
18 | warmup_method="linear",
19 | last_epoch=-1,
20 | ):
21 | if not list(milestones) == sorted(milestones):
22 | raise ValueError(
23 | "Milestones should be a list of" " increasing integers. Got {}",
24 | milestones,
25 | )
26 |
27 | if warmup_method not in ("constant", "linear"):
28 | raise ValueError(
29 | "Only 'constant' or 'linear' warmup_method accepted"
30 | "got {}".format(warmup_method)
31 | )
32 | self.milestones = milestones
33 | self.gamma = gamma
34 | self.warmup_factor = warmup_factor
35 | self.warmup_iters = warmup_iters
36 | self.warmup_method = warmup_method
37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 |
39 | def get_lr(self):
40 | warmup_factor = 1
41 | if self.last_epoch < self.warmup_iters:
42 | if self.warmup_method == "constant":
43 | warmup_factor = self.warmup_factor
44 | elif self.warmup_method == "linear":
45 | alpha = float(self.last_epoch) / self.warmup_iters
46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 | return [
48 | base_lr
49 | * warmup_factor
50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 | for base_lr in self.base_lrs
52 | ]
53 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/structures/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from __future__ import division
3 |
4 | import torch
5 |
6 |
7 | class ImageList(object):
8 | """
9 | Structure that holds a list of images (of possibly
10 | varying sizes) as a single tensor.
11 | This works by padding the images to the same size,
12 | and storing in a field the original sizes of each image
13 | """
14 |
15 | def __init__(self, tensors, image_sizes):
16 | """
17 | Arguments:
18 | tensors (tensor)
19 | image_sizes (list[tuple[int, int]])
20 | """
21 | self.tensors = tensors
22 | self.image_sizes = image_sizes
23 |
24 | def to(self, *args, **kwargs):
25 | cast_tensor = self.tensors.to(*args, **kwargs)
26 | return ImageList(cast_tensor, self.image_sizes)
27 |
28 |
29 | def to_image_list(tensors, size_divisible=0):
30 | """
31 | tensors can be an ImageList, a torch.Tensor or
32 | an iterable of Tensors. It can't be a numpy array.
33 | When tensors is an iterable of Tensors, it pads
34 | the Tensors with zeros so that they have the same
35 | shape
36 | """
37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 | tensors = [tensors]
39 | if isinstance(tensors, ImageList):
40 | return tensors
41 | elif isinstance(tensors, torch.Tensor):
42 | # single tensor shape can be inferred
43 | if tensors.dim() == 3:
44 | tensors = tensors[None]
45 | assert tensors.dim() == 4
46 | image_sizes = [tensor.shape[-2:] for tensor in tensors]
47 | return ImageList(tensors, image_sizes)
48 | elif isinstance(tensors, (tuple, list)):
49 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
50 |
51 | # TODO Ideally, just remove this and let me model handle arbitrary
52 | # input sizs
53 | if size_divisible > 0:
54 | import math
55 |
56 | stride = size_divisible
57 | max_size = list(max_size)
58 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
59 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
60 | max_size = tuple(max_size)
61 |
62 | batch_shape = (len(tensors),) + max_size
63 | batched_imgs = tensors[0].new(*batch_shape).zero_()
64 | for img, pad_img in zip(tensors, batched_imgs):
65 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
66 |
67 | image_sizes = [im.shape[-2:] for im in tensors]
68 |
69 | return ImageList(batched_imgs, image_sizes)
70 | else:
71 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
72 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/relation_triplet.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .bounding_box import BoxList
4 |
5 | class RelationTriplet:
6 | def __init__(self, instance:BoxList,
7 | pair_mat:torch.Tensor, phrase_label:torch.Tensor, phrase_score:torch.Tensor):
8 | """
9 |
10 | :param sub: boxlist
11 | :param obj: boxlist
12 | :param pair_mat: shape (connection_num, 2) [sub, obj]
13 | :param phrase_label: phrase label_id
14 | :param phrase_score: phrase label_id
15 | """
16 |
17 | self.instance = instance
18 |
19 | assert len(pair_mat) == len(phrase_label)
20 | assert len(phrase_label) == len(phrase_score)
21 | self.pair_mat = pair_mat
22 | self.phrase_l = phrase_label
23 | self.phrase_s = phrase_score
24 |
25 | self.extra_fields = {}
26 |
27 | def to(self, device):
28 | triplet = RelationTriplet(self.instance.to(device),
29 | self.pair_mat.to(device),
30 | self.phrase_l.to(device),
31 | self.phrase_s.to(device))
32 |
33 | for k, v in self.extra_fields.items():
34 | if hasattr(v, "to"):
35 | v = v.to(device)
36 | triplet.add_field(k, v)
37 | return triplet
38 |
39 |
40 | def get_instance_list(self, side):
41 | assert side in ['sub', 'obj']
42 | if side == 'sub':
43 | return self.instance[self.pair_mat[: ,0]]
44 | else:
45 | return self.instance[self.pair_mat[:, 1]]
46 |
47 |
48 |
49 | """
50 | add extra information to Box
51 | """
52 | def add_field(self, field, field_data):
53 | assert len(field_data) == len(self.pair_mat)
54 | self.extra_fields[field] = field_data
55 |
56 | def get_field(self, field):
57 | return self.extra_fields[field]
58 |
59 | def has_field(self, field):
60 | return field in self.extra_fields
61 |
62 | def fields(self):
63 | return list(self.extra_fields.keys())
64 |
65 |
66 | def __getitem__(self, item):
67 | triplet = RelationTriplet(self.instance,
68 | self.pair_mat[item],
69 | self.phrase_l[item],
70 | self.phrase_s[item])
71 | for k, v in self.extra_fields.items():
72 | triplet.add_field(k, v[item])
73 | return triplet
74 |
75 |
76 | def __repr__(self):
77 | s = self.__class__.__name__ + "("
78 | s += "num_relation={}, ".format(len(self.pair_mat))
79 | s += "instance_num={}, ".format(len(self.instance))
80 | s += ")"
81 | return s
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 |
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/youngfly11/LCMCG-PyTorch/e95299b9a9f1b13e21750ef0dcde0941d703d009/maskrcnn_benchmark/utils/__init__.py
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import PIL
3 |
4 | from torch.utils.collect_env import get_pretty_env_info
5 |
6 |
7 | def get_pil_version():
8 | return "\n Pillow ({})".format(PIL.__version__)
9 |
10 |
11 | def collect_env_info():
12 | env_str = get_pretty_env_info()
13 | env_str += get_pil_version()
14 | return env_str
15 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for cv2 utility functions and maintaining version compatibility
3 | between 3.x and 4.x
4 | """
5 | import cv2
6 |
7 |
8 | def findContours(*args, **kwargs):
9 | """
10 | Wraps cv2.findContours to maintain compatiblity between versions
11 | 3 and 4
12 |
13 | Returns:
14 | contours, hierarchy
15 | """
16 | if cv2.__version__.startswith('4'):
17 | contours, hierarchy = cv2.findContours(*args, **kwargs)
18 | elif cv2.__version__.startswith('3'):
19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 | else:
21 | raise AssertionError(
22 | 'cv2 must be either version 3 or 4 to call this method')
23 |
24 | return contours, hierarchy
25 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/debugger.py:
--------------------------------------------------------------------------------
1 | import ipdb
2 |
3 | from maskrcnn_benchmark.config import cfg
4 |
5 | def set_trace():
6 | if cfg.DEBUG:
7 | ipdb.set_trace(context=10)
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/direction_word_dict.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3.6
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2019-06-16 21:21
4 | # @Author : Yongfei Liu
5 | # @Email : liuyf3@shanghaitech.edu.cn
6 | """
7 | Flickr:
8 | sent level, left:575, right:645
9 | phrase level, left:535, right 572
10 | """
11 |
12 | left_word_dict = ['left-hand', 'left', 'left-turning', 'camera-left']
13 | right_word_dict = ['right', 'upright', 'right-handed']
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import os
3 |
4 | from maskrcnn_benchmark.utils.imports import import_file
5 |
6 |
7 | def setup_environment():
8 | """Perform environment setup work. The default setup is a no-op, but this
9 | function allows the user to specify a Python source file that performs
10 | custom setup work that may be necessary to their computing environment.
11 | """
12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 | if custom_module_path:
14 | setup_custom_environment(custom_module_path)
15 | else:
16 | # The default setup is a no-op
17 | pass
18 |
19 |
20 | def setup_custom_environment(custom_module_path):
21 | """Load custom environment setup from a Python source file and run the setup
22 | function.
23 | """
24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 | assert hasattr(module, "setup_environment") and callable(
26 | module.setup_environment
27 | ), (
28 | "Custom environment module defined in {} does not have the "
29 | "required callable attribute 'setup_environment'."
30 | ).format(
31 | custom_module_path
32 | )
33 | module.setup_environment()
34 |
35 |
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import torch
3 |
4 | if torch._six.PY3:
5 | import importlib
6 | import importlib.util
7 | import sys
8 |
9 |
10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 | def import_file(module_name, file_path, make_importable=False):
12 | spec = importlib.util.spec_from_file_location(module_name, file_path)
13 | module = importlib.util.module_from_spec(spec)
14 | spec.loader.exec_module(module)
15 | if make_importable:
16 | sys.modules[module_name] = module
17 | return module
18 | else:
19 | import imp
20 |
21 | def import_file(module_name, file_path, make_importable=None):
22 | module = imp.load_source(module_name, file_path)
23 | return module
24 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from collections import defaultdict
3 | from collections import deque
4 |
5 | import torch
6 |
7 |
8 | class SmoothedValue(object):
9 | """Track a series of values and provide access to smoothed values over a
10 | window or the global series average.
11 | """
12 |
13 | def __init__(self, window_size=100):
14 | self.deque = deque(maxlen=window_size)
15 | self.series = []
16 | self.total = 0.0
17 | self.count = 0
18 |
19 | def update(self, value):
20 | self.deque.append(value)
21 | self.series.append(value)
22 | self.count += 1
23 | self.total += value
24 |
25 | @property
26 | def median(self):
27 | d = torch.tensor(list(self.deque))
28 | return d.median().item()
29 |
30 | @property
31 | def avg(self):
32 | d = torch.tensor(list(self.deque))
33 | return d.mean().item()
34 |
35 | @property
36 | def global_avg(self):
37 | return self.total / self.count
38 |
39 |
40 | class MetricLogger(object):
41 | def __init__(self, delimiter="\t"):
42 | self.meters = defaultdict(SmoothedValue)
43 | self.delimiter = delimiter
44 |
45 | def update(self, **kwargs):
46 | for k, v in kwargs.items():
47 | if isinstance(v, torch.Tensor):
48 | v = v.item()
49 | assert isinstance(v, (float, int))
50 | self.meters[k].update(v)
51 |
52 | def __getattr__(self, attr):
53 | if attr in self.meters:
54 | return self.meters[attr]
55 | if attr in self.__dict__:
56 | return self.__dict__[attr]
57 | raise AttributeError("'{}' object has no attribute '{}'".format(
58 | type(self).__name__, attr))
59 |
60 | def __str__(self):
61 | loss_str = []
62 | for name, meter in self.meters.items():
63 | loss_str.append(
64 | "{}: {:.4f} ({:.4f})\n".format(name, meter.median, meter.avg)
65 | )
66 | return self.delimiter.join(loss_str)
67 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import errno
3 | import os
4 |
5 |
6 | def mkdir(path):
7 | try:
8 | os.makedirs(path)
9 | except OSError as e:
10 | if e.errno != errno.EEXIST:
11 | raise
12 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/ops.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3.6
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2019/8/11 15:31
4 | # @Author : Yongfei Liu
5 | # @Email : liuyf3@shanghaitech.edu.cn
6 |
7 | import numpy as np
8 | import torch
9 | import torch.nn as nn
10 |
11 | class Linear(nn.Linear):
12 | def __init__(self, *args, **kwargs):
13 | super().__init__(*args, **kwargs)
14 |
15 | # compatible with xavier_initializer in TensorFlow
16 | fan_avg = (self.in_features + self.out_features) / 2.
17 | bound = np.sqrt(3. / fan_avg)
18 | nn.init.uniform_(self.weight, -bound, bound)
19 | if self.bias is not None:
20 | nn.init.constant_(self.bias, 0.)
21 |
22 |
23 | def apply_mask1d(attention, image_locs):
24 | batch_size, num_loc = attention.size()
25 | tmp1 = attention.new_zeros(num_loc)
26 | tmp1[:num_loc] = torch.arange(
27 | 0, num_loc, dtype=attention.dtype).unsqueeze(0)
28 |
29 | tmp1 = tmp1.expand(batch_size, num_loc)
30 | tmp2 = image_locs.type(tmp1.type())
31 | tmp2 = tmp2.unsqueeze(dim=1).expand(batch_size, num_loc)
32 | mask = torch.ge(tmp1, tmp2)
33 | attention = attention.masked_fill(mask, -1e30)
34 | return attention
35 |
36 | def apply_mask2d(attention, image_locs):
37 | batch_size, num_loc, _ = attention.size()
38 | tmp1 = attention.new_zeros(num_loc)
39 | tmp1[:num_loc] = torch.arange(
40 | 0, num_loc, dtype=attention.dtype).unsqueeze(0)
41 |
42 | tmp1 = tmp1.expand(batch_size, num_loc)
43 | tmp2 = image_locs.type(tmp1.type())
44 | tmp2 = tmp2.unsqueeze(dim=1).expand(batch_size, num_loc)
45 | mask1d = torch.ge(tmp1, tmp2)
46 | mask2d = mask1d[:, None, :] | mask1d[:, :, None]
47 | attention = attention.masked_fill(mask2d, -1e30)
48 | return attention
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 |
4 | def _register_generic(module_dict, module_name, module):
5 | assert module_name not in module_dict
6 | module_dict[module_name] = module
7 |
8 |
9 | class Registry(dict):
10 | '''
11 | A helper class for managing registering modules, it extends a dictionary
12 | and provides a register functions.
13 |
14 | Eg. creeting a registry:
15 | some_registry = Registry({"default": default_module})
16 |
17 | There're two ways of registering new modules:
18 | 1): normal way is just calling register function:
19 | def foo():
20 | ...
21 | some_registry.register("foo_module", foo)
22 | 2): used as decorator when declaring the module:
23 | @some_registry.register("foo_module")
24 | @some_registry.register("foo_modeul_nickname")
25 | def foo():
26 | ...
27 |
28 | Access of module is just like using a dictionary, eg:
29 | f = some_registry["foo_modeul"]
30 | '''
31 | def __init__(self, *args, **kwargs):
32 | super(Registry, self).__init__(*args, **kwargs)
33 |
34 | def register(self, module_name, module=None):
35 | # used as function call
36 | if module is not None:
37 | _register_generic(self, module_name, module)
38 | return
39 |
40 | # used as decorator
41 | def register_fn(fn):
42 | _register_generic(self, module_name, fn)
43 | return fn
44 |
45 | return register_fn
46 |
--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/timer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 |
4 | import datetime
5 | import time
6 |
7 | import torch
8 |
9 |
10 | class Timer(object):
11 | def __init__(self):
12 | self.reset()
13 |
14 | @property
15 | def average_time(self):
16 | return self.total_time / self.calls if self.calls > 0 else 0.0
17 |
18 | def tic(self):
19 | # using time.time instead of time.clock because time time.clock
20 | # does not normalize for multithreading
21 | self.start_time = time.time()
22 |
23 | def toc(self, average=True):
24 | self.add(time.time() - self.start_time)
25 | if average:
26 | return self.average_time
27 | else:
28 | return self.diff
29 |
30 | def add(self, time_diff):
31 | self.diff = time_diff
32 | self.total_time += self.diff
33 | self.calls += 1
34 |
35 | def reset(self):
36 | self.total_time = 0.0
37 | self.calls = 0
38 | self.start_time = 0.0
39 | self.diff = 0.0
40 |
41 | def avg_time_str(self):
42 | time_str = str(datetime.timedelta(seconds=self.average_time))
43 | return time_str
44 |
45 |
46 | def get_time_str(time_diff):
47 | time_str = str(datetime.timedelta(seconds=time_diff))
48 | return time_str
49 |
50 |
51 | class SimpleTimer(object):
52 | """A simple timer."""
53 |
54 | def __init__(self):
55 | self.total_time = 0.
56 | self.calls = 0
57 | self.start_time = 0.
58 | self.diff = 0.
59 | self.average_time = 0.
60 |
61 | def tic(self):
62 | # using time.time instead of time.clock because time time.clock
63 | # does not normalize for multithreading
64 | self.start_time = time.time()
65 |
66 | def toc(self, event="", average=False):
67 | torch.cuda.synchronize()
68 | self.diff = time.time() - self.start_time
69 | self.total_time += self.diff
70 | self.calls += 1
71 | self.average_time = self.total_time / self.calls
72 | if average:
73 | return self.average_time
74 | else:
75 | print("%-28s %f" % (event + "cost time:", self.diff))
76 | return self.diff
77 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | yacs
3 | cython
4 | matplotlib
5 | tqdm
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | #!/usr/bin/env python
3 |
4 | import glob
5 | import os
6 |
7 | import torch
8 | from setuptools import find_packages
9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 |
14 | requirements = ["torch", "torchvision"]
15 |
16 |
17 | def get_extensions():
18 | this_dir = os.path.dirname(os.path.abspath(__file__))
19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 |
21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 |
25 | sources = main_file + source_cpu
26 | extension = CppExtension
27 |
28 | extra_compile_args = {"cxx": []}
29 | define_macros = []
30 |
31 | if torch.cuda.is_available() and CUDA_HOME is not None:
32 | extension = CUDAExtension
33 | sources += source_cuda
34 | define_macros += [("WITH_CUDA", None)]
35 | extra_compile_args["nvcc"] = [
36 | "-DCUDA_HAS_FP16=1",
37 | "-D__CUDA_NO_HALF_OPERATORS__",
38 | "-D__CUDA_NO_HALF_CONVERSIONS__",
39 | "-D__CUDA_NO_HALF2_OPERATORS__",
40 | ]
41 |
42 | sources = [os.path.join(extensions_dir, s) for s in sources]
43 |
44 | include_dirs = [extensions_dir]
45 |
46 | ext_modules = [
47 | extension(
48 | "maskrcnn_benchmark._C",
49 | sources,
50 | include_dirs=include_dirs,
51 | define_macros=define_macros,
52 | extra_compile_args=extra_compile_args,
53 | )
54 | ]
55 |
56 | return ext_modules
57 |
58 |
59 | setup(
60 | name="maskrcnn_benchmark",
61 | version="0.1",
62 | author="fmassa",
63 | url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 | description="object detection in pytorch",
65 | packages=find_packages(exclude=("configs", "tests",)),
66 | # install_requires=requirements,
67 | ext_modules=get_extensions(),
68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 |
--------------------------------------------------------------------------------
/skipthoughts/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__
2 |
3 | from .skipthoughts import AbstractSkipThoughts
4 |
5 | from .skipthoughts import AbstractUniSkip
6 | from .skipthoughts import UniSkip
7 | from .skipthoughts import DropUniSkip
8 | from .skipthoughts import BayesianUniSkip
9 |
10 | from .skipthoughts import AbstractBiSkip
11 | from .skipthoughts import BiSkip
12 |
13 | from .gru import AbstractGRUCell
14 | from .gru import GRUCell
15 | from .gru import BayesianGRUCell
16 |
17 | from .gru import AbstractGRU
18 | from .gru import GRU
19 | from .gru import BayesianGRU
20 |
21 | from .dropout import EmbeddingDropout
22 | from .dropout import SequentialDropout
--------------------------------------------------------------------------------
/skipthoughts/version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.0'
--------------------------------------------------------------------------------
/tests/env_tests/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 | import os
4 |
5 |
6 | def get_config_root_path():
7 | ''' Path to configs for unit tests '''
8 | # cur_file_dir is root/tests/env_tests
9 | cur_file_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__)))
10 | ret = os.path.dirname(os.path.dirname(cur_file_dir))
11 | ret = os.path.join(ret, "configs")
12 | return ret
13 |
--------------------------------------------------------------------------------
/tests/test_backbones.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 | import unittest
4 | import copy
5 | import torch
6 | # import modules to to register backbones
7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
8 | from maskrcnn_benchmark.modeling import registry
9 | from maskrcnn_benchmark.config import cfg as g_cfg
10 | from .utils import load_config
11 |
12 |
13 | # overwrite configs if specified, otherwise default config is used
14 | BACKBONE_CFGS = {
15 | "R-50-FPN": "e2e_faster_rcnn_R_50_FPN_1x.yaml",
16 | "R-101-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
17 | "R-152-FPN": "e2e_faster_rcnn_R_101_FPN_1x.yaml",
18 | "R-50-FPN-RETINANET": "retinanet/retinanet_R-50-FPN_1x.yaml",
19 | "R-101-FPN-RETINANET": "retinanet/retinanet_R-101-FPN_1x.yaml",
20 | }
21 |
22 |
23 | class TestBackbones(unittest.TestCase):
24 | def test_build_backbones(self):
25 | ''' Make sure backbones run '''
26 |
27 | self.assertGreater(len(registry.BACKBONES), 0)
28 |
29 | for name, backbone_builder in registry.BACKBONES.items():
30 | print('Testing {}...'.format(name))
31 | if name in BACKBONE_CFGS:
32 | cfg = load_config(BACKBONE_CFGS[name])
33 | else:
34 | # Use default config if config file is not specified
35 | cfg = copy.deepcopy(g_cfg)
36 | backbone = backbone_builder(cfg)
37 |
38 | # make sures the backbone has `out_channels`
39 | self.assertIsNotNone(
40 | getattr(backbone, 'out_channels', None),
41 | 'Need to provide out_channels for backbone {}'.format(name)
42 | )
43 |
44 | N, C_in, H, W = 2, 3, 224, 256
45 | input = torch.rand([N, C_in, H, W], dtype=torch.float32)
46 | out = backbone(input)
47 | for cur_out in out:
48 | self.assertEqual(
49 | cur_out.shape[:2],
50 | torch.Size([N, backbone.out_channels])
51 | )
52 |
53 |
54 | if __name__ == "__main__":
55 | unittest.main()
56 |
--------------------------------------------------------------------------------
/tests/test_configs.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 | import unittest
4 | import glob
5 | import os
6 | import utils
7 |
8 |
9 | class TestConfigs(unittest.TestCase):
10 | def test_configs_load(self):
11 | ''' Make sure configs are loadable '''
12 |
13 | cfg_root_path = utils.get_config_root_path()
14 | files = glob.glob(
15 | os.path.join(cfg_root_path, "./**/*.yaml"), recursive=True)
16 | self.assertGreater(len(files), 0)
17 |
18 | for fn in files:
19 | print('Loading {}...'.format(fn))
20 | utils.load_config_from_file(fn)
21 |
22 |
23 | if __name__ == "__main__":
24 | unittest.main()
25 |
--------------------------------------------------------------------------------
/tests/test_metric_logger.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import unittest
3 |
4 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
5 |
6 |
7 | class TestMetricLogger(unittest.TestCase):
8 | def test_update(self):
9 | meter = MetricLogger()
10 | for i in range(10):
11 | meter.update(metric=float(i))
12 |
13 | m = meter.meters["metric"]
14 | self.assertEqual(m.count, 10)
15 | self.assertEqual(m.total, 45)
16 | self.assertEqual(m.median, 4)
17 | self.assertEqual(m.avg, 4.5)
18 |
19 | def test_no_attr(self):
20 | meter = MetricLogger()
21 | _ = meter.meters
22 | _ = meter.delimiter
23 | def broken():
24 | _ = meter.not_existent
25 | self.assertRaises(AttributeError, broken)
26 |
27 | if __name__ == "__main__":
28 | unittest.main()
29 |
--------------------------------------------------------------------------------
/tests/test_rpn_heads.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 |
3 | import unittest
4 | import copy
5 | import torch
6 | # import modules to to register rpn heads
7 | from maskrcnn_benchmark.modeling.backbone import build_backbone # NoQA
8 | from maskrcnn_benchmark.modeling.rpn.rpn import build_rpn # NoQA
9 | from maskrcnn_benchmark.modeling import registry
10 | from maskrcnn_benchmark.config import cfg as g_cfg
11 | from utils import load_config
12 |
13 |
14 | # overwrite configs if specified, otherwise default config is used
15 | RPN_CFGS = {
16 | }
17 |
18 |
19 | class TestRPNHeads(unittest.TestCase):
20 | def test_build_rpn_heads(self):
21 | ''' Make sure rpn heads run '''
22 |
23 | self.assertGreater(len(registry.RPN_HEADS), 0)
24 |
25 | in_channels = 64
26 | num_anchors = 10
27 |
28 | for name, builder in registry.RPN_HEADS.items():
29 | print('Testing {}...'.format(name))
30 | if name in RPN_CFGS:
31 | cfg = load_config(RPN_CFGS[name])
32 | else:
33 | # Use default config if config file is not specified
34 | cfg = copy.deepcopy(g_cfg)
35 |
36 | rpn = builder(cfg, in_channels, num_anchors)
37 |
38 | N, C_in, H, W = 2, in_channels, 24, 32
39 | input = torch.rand([N, C_in, H, W], dtype=torch.float32)
40 | LAYERS = 3
41 | out = rpn([input] * LAYERS)
42 | self.assertEqual(len(out), 2)
43 | logits, bbox_reg = out
44 | for idx in range(LAYERS):
45 | self.assertEqual(
46 | logits[idx].shape,
47 | torch.Size([
48 | input.shape[0], num_anchors,
49 | input.shape[2], input.shape[3],
50 | ])
51 | )
52 | self.assertEqual(
53 | bbox_reg[idx].shape,
54 | torch.Size([
55 | logits[idx].shape[0], num_anchors * 4,
56 | logits[idx].shape[2], logits[idx].shape[3],
57 | ]),
58 | )
59 |
60 |
61 | if __name__ == "__main__":
62 | unittest.main()
63 |
--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, division, print_function, unicode_literals
2 |
3 | # Set up custom environment before nearly anything else is imported
4 | # NOTE: this should be the first import (no not reorder)
5 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip
6 | import env_tests.env as env_tests
7 |
8 | import os
9 | import copy
10 |
11 | from maskrcnn_benchmark.config import cfg as g_cfg
12 |
13 |
14 | def get_config_root_path():
15 | return env_tests.get_config_root_path()
16 |
17 |
18 | def load_config(rel_path):
19 | ''' Load config from file path specified as path relative to config_root '''
20 | cfg_path = os.path.join(env_tests.get_config_root_path(), rel_path)
21 | return load_config_from_file(cfg_path)
22 |
23 |
24 | def load_config_from_file(file_path):
25 | ''' Load config from file path specified as absolute path '''
26 | ret = copy.deepcopy(g_cfg)
27 | ret.merge_from_file(file_path)
28 | return ret
29 |
--------------------------------------------------------------------------------