├── .gitignore
├── DEVELOP_GUIDE.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── TROUBLESHOOTING.md
├── configs
    ├── efficient_net_b3_ssd300_voc0712.yaml
    ├── mobilenet_v2_ssd320_voc0712.yaml
    ├── mobilenet_v3_ssd320_voc0712.yaml
    ├── vgg_ssd300_coco_trainval35k.yaml
    ├── vgg_ssd300_voc0712.yaml
    ├── vgg_ssd512_coco_trainval35k.yaml
    └── vgg_ssd512_voc0712.yaml
├── demo.py
├── demo
    ├── 000342.jpg
    ├── 000542.jpg
    ├── 003123.jpg
    ├── 004101.jpg
    └── 008591.jpg
├── figures
    ├── 004545.jpg
    ├── losses.png
    ├── lr.png
    └── metrics.png
├── outputs
    └── .gitignore
├── requirements.txt
├── setup.py
├── ssd
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   ├── defaults.py
    │   └── path_catlog.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── coco.py
    │   │   ├── evaluation
    │   │   │   ├── __init__.py
    │   │   │   ├── coco
    │   │   │   │   └── __init__.py
    │   │   │   └── voc
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── eval_detection_voc.py
    │   │   └── voc.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   └── iteration_based_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── target_transform.py
    │   │   └── transforms.py
    ├── engine
    │   ├── __init__.py
    │   ├── inference.py
    │   └── trainer.py
    ├── layers
    │   ├── __init__.py
    │   └── separable_conv.py
    ├── modeling
    │   ├── __init__.py
    │   ├── anchors
    │   │   ├── __init__.py
    │   │   └── prior_box.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── efficient_net
    │   │   │   ├── __init__.py
    │   │   │   ├── efficient_net.py
    │   │   │   └── utils.py
    │   │   ├── mobilenet.py
    │   │   ├── mobilenetv3.py
    │   │   └── vgg.py
    │   ├── box_head
    │   │   ├── __init__.py
    │   │   ├── box_head.py
    │   │   ├── box_predictor.py
    │   │   ├── inference.py
    │   │   └── loss.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   └── ssd_detector.py
    │   └── registry.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   └── container.py
    └── utils
    │   ├── __init__.py
    │   ├── box_utils.py
    │   ├── checkpoint.py
    │   ├── dist_util.py
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── misc.py
    │   ├── model_zoo.py
    │   ├── nms.py
    │   └── registry.py
├── test.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | *.pyc
 4 | *.so
 5 | ext/build/
 6 | ext/torch_extension.egg-info/
 7 | dist/
 8 | *.egg-info
 9 | 
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 | 
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 | 
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 | 
25 | # Pycharm editor settings
26 | .idea
27 | .DS_Store
28 | 


--------------------------------------------------------------------------------
/DEVELOP_GUIDE.md:
--------------------------------------------------------------------------------
  1 | # Develop Guide
  2 | 
  3 | ## Custom Dataset
  4 | Add your custom dataset is simple and flexible.
  5 | For example, create `ssd/data/datasets/my_dataset.py`:
  6 | ```python
  7 | import torch.utils.data
  8 | 
  9 | from ssd.structures.container import Container
 10 | 
 11 | class MyDataset(torch.utils.data.Dataset):
 12 |     def __init__(self, ..., transform=None, target_transform=None):
 13 |         # as you would do normally
 14 |         ...
 15 |         self.transform = transform
 16 |         self.target_transform = target_transform
 17 | 
 18 |     def __getitem__(self, index):
 19 |         # load the image as a PIL Image
 20 |         image = ...
 21 | 
 22 |         # load the bounding boxes in x1, y1, x2, y2 order.
 23 |         boxes = np.array((N, 4), dtype=np.float32)
 24 |         # and labels
 25 |         labels = np.array((N, ), dtype=np.int64)
 26 | 
 27 |         if self.transform:
 28 |             image, boxes, labels = self.transform(image, boxes, labels)
 29 |         if self.target_transform:
 30 |             boxes, labels = self.target_transform(boxes, labels)
 31 |         targets = Container(
 32 |             boxes=boxes,
 33 |             labels=labels,
 34 |         )
 35 |         # return the image, the targets and the index in your dataset
 36 |         return image, targets, index
 37 | ```
 38 | 
 39 | in `ssd/data/datasets/__init__.py`
 40 | ```python
 41 | from .my_dataset import MyDataset
 42 | 
 43 | _DATASETS = {
 44 |     'VOCDataset': VOCDataset,
 45 |     'COCODataset': COCODataset,
 46 |     'MyDataset': MyDataset,
 47 | }
 48 | ```
 49 | 
 50 | in `ssd/config/path_catlog.py`:
 51 | ```python
 52 | DATASETS = {
 53 |     ...
 54 |     'my_custom_dataset': {
 55 |         "arg1": "your/arg",
 56 |         "arg2": "your/arg",
 57 |     },
 58 |     ...
 59 | }
 60 | 
 61 | @staticmethod
 62 | def get(name):
 63 |     ...
 64 |     if name == 'my_custom_dataset':
 65 |         attrs = DatasetCatalog.DATASETS[name]
 66 |         return dict(factory="MyDataset", args=attrs)
 67 |     ...
 68 | ```
 69 | 
 70 | in your `config.ymal`:
 71 | ```yaml
 72 | DATASETS:
 73 |   TRAIN: ("my_custom_dataset", )
 74 |   TEST: ("my_custom_test_dataset", )
 75 | ```
 76 | 
 77 | ### Test
 78 | While the aforementioned example should work for training, it's also easy to add your custom test code:
 79 | in `ssd/data/datasets/evaluation/__init__.py`
 80 | ```python
 81 | if isinstance(dataset, MyDataset):
 82 |     return my_own_evaluation(**args)
 83 | ```
 84 | 
 85 | ## Custom Backbone
 86 | 
 87 | It very simple to add your own backbone for SSD.
 88 | For example, create `ssd/modeling/backbone/my_backbone.py`:
 89 | ```python
 90 | import torch.nn as nn
 91 | 
 92 | from ssd.modeling import registry
 93 | from ssd.utils.model_zoo import load_state_dict_from_url
 94 | 
 95 | 
 96 | class MyBackbone(nn.Module):
 97 |     def __init__(self, cfg):
 98 |         super().__init__()
 99 |         ...
100 | 
101 |     def forward(self, x):
102 |         features = []
103 |         
104 |         # forward your network
105 |         
106 |         # add arbitrary feature you want to do prediction upon it.
107 |         
108 |         features.append(feature1)
109 |         features.append(feature2)
110 |         features.append(feature3)
111 |         features.append(feature4)
112 | 
113 |         # return them as a tuple
114 |         return tuple(features)
115 | 
116 | @registry.BACKBONES.register('my_backbone')
117 | def my_backbone(cfg, pretrained=True):
118 |     model = MyBackbone(cfg)
119 |     model_url = 'you_model_url'
120 |     if pretrained:
121 |         model.init_from_pretrain(load_state_dict_from_url(model_url))
122 |     return model
123 | ```
124 | in `ssd/modeling/backbone/__init__.py`:
125 | ```python
126 | from .my_backbone import MyBackbone
127 | ```
128 | 
129 | in your `config.ymal`:
130 | ```yaml
131 | MODEL:
132 |   BACKBONE:
133 |     NAME: 'my_backbone'
134 |     OUT_CHANNELS: (-, -, -, -) # should match feature1 - feature4's out_channels in MyBackbone
135 |   PRIORS:
136 |     FEATURE_MAPS: [-, -, -, -] # feature1 - feature4's size
137 |     STRIDES: [-, -, -, -] # feature1 - feature4's output stride
138 |     MIN_SIZES: [21, 45, 99, 153] # your custom anchor settings
139 |     MAX_SIZES: [45, 99, 153, 207]
140 |     ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3]]
141 |     BOXES_PER_LOCATION: [6, 6, 6, 6]
142 | ```


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 lufficc
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include configs *.yaml
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # High quality, fast, modular reference implementation of SSD in PyTorch 1.0
  2 | 
  3 | 
  4 | This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.org/abs/1512.02325). The implementation is heavily influenced by the projects [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch), [pytorch-ssd](https://github.com/qfgaohao/pytorch-ssd) and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). This repository aims to be the code base for researches based on SSD.
  5 | 
  6 | <div align="center">
  7 |   <img src="figures/004545.jpg" width="500px" />
  8 |   <p>Example SSD output (vgg_ssd300_voc0712).</p>
  9 | </div>
 10 | 
 11 | | Losses        | Learning rate | Metrics |
 12 | | :-----------: |:-------------:| :------:|
 13 | | ![losses](figures/losses.png) | ![lr](figures/lr.png) | ![metric](figures/metrics.png) |
 14 | 
 15 | ## Highlights
 16 | 
 17 | - **PyTorch 1.0**: Support PyTorch 1.0 or higher.
 18 | - **Multi-GPU training and inference**: We use `DistributedDataParallel`, you can train or test with arbitrary GPU(s), the training schema will change accordingly.
 19 | - **Modular**: Add your own modules without pain. We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor`, etc. You can replace every component with your own code without change the code base. For example, You can add [EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) as backbone, just add `efficient_net.py` (ALREADY ADDED) and register it, specific it in the config file, It's done!
 20 | - **CPU support for inference**: runs on CPU in inference time.
 21 | - **Smooth and enjoyable training procedure**: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`.
 22 | - **Batched inference**: can perform inference using multiple images per batch per GPU.
 23 | - **Evaluating during training**: eval you model every `eval_step` to check performance improving or not.
 24 | - **Metrics Visualization**: visualize metrics details in tensorboard, like AP, APl, APm and APs for COCO dataset or mAP and 20 categories' AP for VOC dataset.
 25 | - **Auto download**: load pre-trained weights from URL and cache it.
 26 | ## Installation
 27 | ### Requirements
 28 | 
 29 | 1. Python3
 30 | 1. PyTorch 1.0 or higher
 31 | 1. yacs
 32 | 1. [Vizer](https://github.com/lufficc/Vizer)
 33 | 1. GCC >= 4.9
 34 | 1. OpenCV
 35 | 
 36 | 
 37 | ### Step-by-step installation
 38 | 
 39 | ```bash
 40 | git clone https://github.com/lufficc/SSD.git
 41 | cd SSD
 42 | # Required packages: torch torchvision yacs tqdm opencv-python vizer
 43 | pip install -r requirements.txt
 44 | 
 45 | # Done! That's ALL! No BUILD! No bothering SETUP!
 46 | 
 47 | # It's recommended to install the latest release of torch and torchvision.
 48 | ```
 49 | 
 50 | 
 51 | ## Train
 52 | 
 53 | ### Setting Up Datasets
 54 | #### Pascal VOC
 55 | 
 56 | For Pascal VOC dataset, make the folder structure like this:
 57 | ```
 58 | VOC_ROOT
 59 | |__ VOC2007
 60 |     |_ JPEGImages
 61 |     |_ Annotations
 62 |     |_ ImageSets
 63 |     |_ SegmentationClass
 64 | |__ VOC2012
 65 |     |_ JPEGImages
 66 |     |_ Annotations
 67 |     |_ ImageSets
 68 |     |_ SegmentationClass
 69 | |__ ...
 70 | ```
 71 | Where `VOC_ROOT` default is `datasets` folder in current project, you can create symlinks to `datasets` or `export VOC_ROOT="/path/to/voc_root"`.
 72 | 
 73 | #### COCO
 74 | 
 75 | For COCO dataset, make the folder structure like this:
 76 | ```
 77 | COCO_ROOT
 78 | |__ annotations
 79 |     |_ instances_valminusminival2014.json
 80 |     |_ instances_minival2014.json
 81 |     |_ instances_train2014.json
 82 |     |_ instances_val2014.json
 83 |     |_ ...
 84 | |__ train2014
 85 |     |_ <im-1-name>.jpg
 86 |     |_ ...
 87 |     |_ <im-N-name>.jpg
 88 | |__ val2014
 89 |     |_ <im-1-name>.jpg
 90 |     |_ ...
 91 |     |_ <im-N-name>.jpg
 92 | |__ ...
 93 | ```
 94 | Where `COCO_ROOT` default is `datasets` folder in current project, you can create symlinks to `datasets` or `export COCO_ROOT="/path/to/coco_root"`.
 95 | 
 96 | ### Single GPU training
 97 | 
 98 | ```bash
 99 | # for example, train SSD300:
100 | python train.py --config-file configs/vgg_ssd300_voc0712.yaml
101 | ```
102 | ### Multi-GPU training
103 | 
104 | ```bash
105 | # for example, train SSD300 with 4 GPUs:
106 | export NGPUS=4
107 | python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml SOLVER.WARMUP_FACTOR 0.03333 SOLVER.WARMUP_ITERS 1000
108 | ```
109 | The configuration files that I provide assume that we are running on single GPU. When changing number of GPUs, hyper-parameter (lr, max_iter, ...) will also changed according to this paper: [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).
110 | 
111 | ## Evaluate
112 | 
113 | ### Single GPU evaluating
114 | 
115 | ```bash
116 | # for example, evaluate SSD300:
117 | python test.py --config-file configs/vgg_ssd300_voc0712.yaml
118 | ```
119 | 
120 | ### Multi-GPU evaluating
121 | 
122 | ```bash
123 | # for example, evaluate SSD300 with 4 GPUs:
124 | export NGPUS=4
125 | python -m torch.distributed.launch --nproc_per_node=$NGPUS test.py --config-file configs/vgg_ssd300_voc0712.yaml
126 | ```
127 | 
128 | ## Demo
129 | 
130 | Predicting image in a folder is simple:
131 | ```bash
132 | python demo.py --config-file configs/vgg_ssd300_voc0712.yaml --images_dir demo --ckpt https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_voc0712.pth
133 | ```
134 | Then it will download and cache `vgg_ssd300_voc0712.pth` automatically and predicted images with boxes, scores and label names will saved to `demo/result` folder by default.
135 | 
136 | You will see a similar output:
137 | ```text
138 | (0001/0005) 004101.jpg: objects 01 | load 010ms | inference 033ms | FPS 31
139 | (0002/0005) 003123.jpg: objects 05 | load 009ms | inference 019ms | FPS 53
140 | (0003/0005) 000342.jpg: objects 02 | load 009ms | inference 019ms | FPS 51
141 | (0004/0005) 008591.jpg: objects 02 | load 008ms | inference 020ms | FPS 50
142 | (0005/0005) 000542.jpg: objects 01 | load 011ms | inference 019ms | FPS 53
143 | ```
144 | 
145 | ## MODEL ZOO
146 | ### Origin Paper:
147 | 
148 | |         | VOC2007 test | coco test-dev2015 |
149 | | :-----: | :----------: |   :----------:    |
150 | | SSD300* |     77.2     |      25.1         |
151 | | SSD512* |     79.8     |      28.8         |
152 | 
153 | ### COCO:
154 | 
155 | | Backbone       | Input Size  |          box AP                  | Model Size |  Download |
156 | | :------------: | :----------:|   :--------------------------:   | :--------: | :-------: |
157 | |  VGG16         |     300     |          25.2                    |  262MB     | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_coco_trainval35k.pth)   |
158 | |  VGG16         |     512     |          29.0                    |  275MB     | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_coco_trainval35k.pth)   |
159 | 
160 | ### PASCAL VOC:
161 | 
162 | | Backbone         | Input Size  |          mAP                     | Model Size | Download  |
163 | | :--------------: | :----------:|   :--------------------------:   | :--------: | :-------: |
164 | |  VGG16           |     300     |          77.7                    |   201MB    | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_voc0712.pth)  |
165 | |  VGG16           |     512     |          80.7                    |   207MB    | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_voc0712.pth)  |
166 | |  Mobilenet V2    |     320     |          68.9                    |   25.5MB   | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v2_ssd320_voc0712_v2.pth) |
167 | |  Mobilenet V3    |     320     |          69.5                    |   29.9MB   | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v3_ssd320_voc0712.pth) |
168 | |  EfficientNet-B3 |     300     |          73.9                    |   97.1MB   | [model](https://github.com/lufficc/SSD/releases/download/1.2/efficient_net_b3_ssd300_voc0712.pth) |
169 | 
170 | ## Develop Guide
171 | 
172 | If you want to add your custom components, please see [DEVELOP_GUIDE.md](DEVELOP_GUIDE.md) for more details.
173 | 
174 | 
175 | ## Troubleshooting
176 | If you have issues running or compiling this code, we have compiled a list of common issues in [TROUBLESHOOTING.md](TROUBLESHOOTING.md). If your issue is not present there, please feel free to open a new issue.
177 | 
178 | ## Citations
179 | If you use this project in your research, please cite this project.
180 | ```text
181 | @misc{lufficc2018ssd,
182 |     author = {Congcong Li},
183 |     title = {{High quality, fast, modular reference implementation of SSD in PyTorch}},
184 |     year = {2018},
185 |     howpublished = {\url{https://github.com/lufficc/SSD}}
186 | }
187 | ```


--------------------------------------------------------------------------------
/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting
2 | 
3 | ## RuntimeError: merge_sort: failed to synchronize: an illegal memory access was encountered
4 | 
5 | This is caused in multi-box loss. The sort method failed due to NaN numbers. This may be a bug in `log_softmax`: https://github.com/pytorch/pytorch/issues/14335 .Three ways to solve :
6 | 1. Use a smaller warmup factor, like 0.1. (append `SOLVER.WARMUP_FACTOR 0.1` to your train cmd's end).
7 | 1. Use a longer warmup iters, like 1000. (append `SOLVER.WARMUP_ITERS 1000` to your train cmd's end).
8 | 1. [Described in the forums by Jinserk Baik](https://discuss.pytorch.org/t/ctcloss-performance-of-pytorch-1-0-0/27524/29)


--------------------------------------------------------------------------------
/configs/efficient_net_b3_ssd300_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 21
 3 |   BACKBONE:
 4 |     NAME: 'efficient_net-b3'
 5 |     OUT_CHANNELS: (48, 136, 384, 256, 256, 256)
 6 | INPUT:
 7 |   IMAGE_SIZE: 300
 8 | DATASETS:
 9 |   TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
10 |   TEST: ("voc_2007_test", )
11 | SOLVER:
12 |   MAX_ITER: 160000
13 |   LR_STEPS: [105000, 135000]
14 |   GAMMA: 0.1
15 |   BATCH_SIZE: 24
16 |   LR: 1e-3
17 | 
18 | OUTPUT_DIR: 'outputs/efficient_net_b3_ssd300_voc0712'


--------------------------------------------------------------------------------
/configs/mobilenet_v2_ssd320_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 21
 3 |   BOX_HEAD:
 4 |     PREDICTOR: 'SSDLiteBoxPredictor'
 5 |   BACKBONE:
 6 |     NAME: 'mobilenet_v2'
 7 |     OUT_CHANNELS: (96, 1280, 512, 256, 256, 64)
 8 |   PRIORS:
 9 |     FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
10 |     STRIDES: [16, 32, 64, 107, 160, 320]
11 |     MIN_SIZES: [60, 105, 150, 195, 240, 285]
12 |     MAX_SIZES: [105, 150, 195, 240, 285, 330]
13 |     ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
14 |     BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
15 | INPUT:
16 |   IMAGE_SIZE: 320
17 | DATASETS:
18 |   TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
19 |   TEST: ("voc_2007_test", )
20 | SOLVER:
21 |   MAX_ITER: 120000
22 |   LR_STEPS: [80000, 100000]
23 |   GAMMA: 0.1
24 |   BATCH_SIZE: 32
25 |   LR: 1e-3
26 | 
27 | OUTPUT_DIR: 'outputs/mobilenet_v2_ssd320_voc0712'


--------------------------------------------------------------------------------
/configs/mobilenet_v3_ssd320_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 21
 3 |   BOX_HEAD:
 4 |     PREDICTOR: 'SSDLiteBoxPredictor'
 5 |   BACKBONE:
 6 |     NAME: 'mobilenet_v3'
 7 |     OUT_CHANNELS: (112, 960, 512, 256, 256, 64)
 8 |   PRIORS:
 9 |     FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
10 |     STRIDES: [16, 32, 64, 107, 160, 320]
11 |     MIN_SIZES: [60, 105, 150, 195, 240, 285]
12 |     MAX_SIZES: [105, 150, 195, 240, 285, 330]
13 |     ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
14 |     BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
15 | INPUT:
16 |   IMAGE_SIZE: 320
17 | DATASETS:
18 |   TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
19 |   TEST: ("voc_2007_test", )
20 | SOLVER:
21 |   MAX_ITER: 120000
22 |   LR_STEPS: [80000, 100000]
23 |   GAMMA: 0.1
24 |   BATCH_SIZE: 32
25 |   LR: 1e-3
26 | 
27 | OUTPUT_DIR: 'outputs/mobilenet_v3_ssd320_voc0712'
28 | 


--------------------------------------------------------------------------------
/configs/vgg_ssd300_coco_trainval35k.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 81
 3 |   PRIORS:
 4 |     FEATURE_MAPS: [38, 19, 10, 5, 3, 1]
 5 |     STRIDES: [8, 16, 32, 64, 100, 300]
 6 |     MIN_SIZES: [21, 45, 99, 153, 207, 261]
 7 |     MAX_SIZES: [45, 99, 153, 207, 261, 315]
 8 |     ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
 9 |     BOXES_PER_LOCATION: [4, 6, 6, 6, 4, 4]
10 | INPUT:
11 |   IMAGE_SIZE: 300
12 | DATASETS:
13 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
14 |   TEST: ("coco_2014_minival", )
15 | SOLVER:
16 |   MAX_ITER: 400000
17 |   LR_STEPS: [280000, 360000]
18 |   GAMMA: 0.1
19 |   BATCH_SIZE: 32
20 |   LR: 1e-3
21 | 
22 | OUTPUT_DIR: 'outputs/vgg_ssd300_coco_trainval35k'


--------------------------------------------------------------------------------
/configs/vgg_ssd300_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 21
 3 | INPUT:
 4 |   IMAGE_SIZE: 300
 5 | DATASETS:
 6 |   TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
 7 |   TEST: ("voc_2007_test", )
 8 | SOLVER:
 9 |   MAX_ITER: 120000
10 |   LR_STEPS: [80000, 100000]
11 |   GAMMA: 0.1
12 |   BATCH_SIZE: 32
13 |   LR: 1e-3
14 | 
15 | OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712'


--------------------------------------------------------------------------------
/configs/vgg_ssd512_coco_trainval35k.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 81
 3 |   BACKBONE:
 4 |     OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
 5 |   PRIORS:
 6 |     FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
 7 |     STRIDES: [8, 16, 32, 64, 128, 256, 512]
 8 |     MIN_SIZES: [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8]
 9 |     MAX_SIZES: [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72]
10 |     ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
11 |     BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
12 | INPUT:
13 |   IMAGE_SIZE: 512
14 | DATASETS:
15 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
16 |   TEST: ("coco_2014_minival", )
17 | SOLVER:
18 |   MAX_ITER: 520000
19 |   LR_STEPS: [360000, 480000]
20 |   GAMMA: 0.1
21 |   BATCH_SIZE: 24
22 |   LR: 1e-3
23 | 
24 | OUTPUT_DIR: 'outputs/vgg_ssd512_coco_trainval35k'


--------------------------------------------------------------------------------
/configs/vgg_ssd512_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   NUM_CLASSES: 21
 3 |   BACKBONE:
 4 |     OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
 5 |   PRIORS:
 6 |     FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
 7 |     STRIDES: [8, 16, 32, 64, 128, 256, 512]
 8 |     MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
 9 |     MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.65]
10 |     ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
11 |     BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
12 | INPUT:
13 |   IMAGE_SIZE: 512
14 | DATASETS:
15 |   TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
16 |   TEST: ("voc_2007_test", )
17 | SOLVER:
18 |   MAX_ITER: 120000
19 |   LR_STEPS: [80000, 100000]
20 |   GAMMA: 0.1
21 |   BATCH_SIZE: 24
22 |   LR: 1e-3
23 | 
24 | OUTPUT_DIR: 'outputs/vgg_ssd512_voc0712'


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import time
  4 | 
  5 | import torch
  6 | from PIL import Image
  7 | from vizer.draw import draw_boxes
  8 | 
  9 | from ssd.config import cfg
 10 | from ssd.data.datasets import COCODataset, VOCDataset
 11 | import argparse
 12 | import numpy as np
 13 | 
 14 | from ssd.data.transforms import build_transforms
 15 | from ssd.modeling.detector import build_detection_model
 16 | from ssd.utils import mkdir
 17 | from ssd.utils.checkpoint import CheckPointer
 18 | 
 19 | 
 20 | @torch.no_grad()
 21 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type):
 22 |     if dataset_type == "voc":
 23 |         class_names = VOCDataset.class_names
 24 |     elif dataset_type == 'coco':
 25 |         class_names = COCODataset.class_names
 26 |     else:
 27 |         raise NotImplementedError('Not implemented now.')
 28 |     device = torch.device(cfg.MODEL.DEVICE)
 29 | 
 30 |     model = build_detection_model(cfg)
 31 |     model = model.to(device)
 32 |     checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR)
 33 |     checkpointer.load(ckpt, use_latest=ckpt is None)
 34 |     weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file()
 35 |     print('Loaded weights from {}'.format(weight_file))
 36 | 
 37 |     image_paths = glob.glob(os.path.join(images_dir, '*.jpg'))
 38 |     mkdir(output_dir)
 39 | 
 40 |     cpu_device = torch.device("cpu")
 41 |     transforms = build_transforms(cfg, is_train=False)
 42 |     model.eval()
 43 |     for i, image_path in enumerate(image_paths):
 44 |         start = time.time()
 45 |         image_name = os.path.basename(image_path)
 46 | 
 47 |         image = np.array(Image.open(image_path).convert("RGB"))
 48 |         height, width = image.shape[:2]
 49 |         images = transforms(image)[0].unsqueeze(0)
 50 |         load_time = time.time() - start
 51 | 
 52 |         start = time.time()
 53 |         result = model(images.to(device))[0]
 54 |         inference_time = time.time() - start
 55 | 
 56 |         result = result.resize((width, height)).to(cpu_device).numpy()
 57 |         boxes, labels, scores = result['boxes'], result['labels'], result['scores']
 58 | 
 59 |         indices = scores > score_threshold
 60 |         boxes = boxes[indices]
 61 |         labels = labels[indices]
 62 |         scores = scores[indices]
 63 |         meters = ' | '.join(
 64 |             [
 65 |                 'objects {:02d}'.format(len(boxes)),
 66 |                 'load {:03d}ms'.format(round(load_time * 1000)),
 67 |                 'inference {:03d}ms'.format(round(inference_time * 1000)),
 68 |                 'FPS {}'.format(round(1.0 / inference_time))
 69 |             ]
 70 |         )
 71 |         print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters))
 72 | 
 73 |         drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8)
 74 |         Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name))
 75 | 
 76 | 
 77 | def main():
 78 |     parser = argparse.ArgumentParser(description="SSD Demo.")
 79 |     parser.add_argument(
 80 |         "--config-file",
 81 |         default="",
 82 |         metavar="FILE",
 83 |         help="path to config file",
 84 |         type=str,
 85 |     )
 86 |     parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.")
 87 |     parser.add_argument("--score_threshold", type=float, default=0.7)
 88 |     parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.')
 89 |     parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.')
 90 |     parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.')
 91 | 
 92 |     parser.add_argument(
 93 |         "opts",
 94 |         help="Modify config options using the command-line",
 95 |         default=None,
 96 |         nargs=argparse.REMAINDER,
 97 |     )
 98 |     args = parser.parse_args()
 99 |     print(args)
100 | 
101 |     cfg.merge_from_file(args.config_file)
102 |     cfg.merge_from_list(args.opts)
103 |     cfg.freeze()
104 | 
105 |     print("Loaded configuration file {}".format(args.config_file))
106 |     with open(args.config_file, "r") as cf:
107 |         config_str = "\n" + cf.read()
108 |         print(config_str)
109 |     print("Running with config:\n{}".format(cfg))
110 | 
111 |     run_demo(cfg=cfg,
112 |              ckpt=args.ckpt,
113 |              score_threshold=args.score_threshold,
114 |              images_dir=args.images_dir,
115 |              output_dir=args.output_dir,
116 |              dataset_type=args.dataset_type)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     main()
121 | 


--------------------------------------------------------------------------------
/demo/000342.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/000342.jpg


--------------------------------------------------------------------------------
/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/000542.jpg


--------------------------------------------------------------------------------
/demo/003123.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/003123.jpg


--------------------------------------------------------------------------------
/demo/004101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/004101.jpg


--------------------------------------------------------------------------------
/demo/008591.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/008591.jpg


--------------------------------------------------------------------------------
/figures/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/004545.jpg


--------------------------------------------------------------------------------
/figures/losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/losses.png


--------------------------------------------------------------------------------
/figures/lr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/lr.png


--------------------------------------------------------------------------------
/figures/metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/metrics.png


--------------------------------------------------------------------------------
/outputs/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/outputs/.gitignore


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.3
2 | torchvision>=0.3
3 | yacs
4 | tqdm
5 | opencv-python
6 | vizer


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="torch-ssd",
 8 |     version="1.2.0",
 9 |     packages=find_packages(exclude=['ext']),
10 |     install_requires=[
11 |         "torch>=1.3",
12 |         "torchvision>=0.3",
13 |         "opencv-python~=4.0",
14 |         "yacs==0.1.6",
15 |         "Vizer~=0.1.4",
16 |     ],
17 |     author="Congcong Li",
18 |     author_email="luffy.lcc@gmail.com",
19 |     description="High quality, fast, modular reference implementation of SSD in PyTorch",
20 |     long_description=long_description,
21 |     long_description_content_type="text/markdown",
22 |     url="https://github.com/lufficc/SSD",
23 |     classifiers=[
24 |         "Programming Language :: Python :: 3",
25 |         "License :: OSI Approved :: MIT License",
26 |         "Operating System :: OS Independent",
27 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
28 |     ],
29 |     license="MIT",
30 |     python_requires=">=3.6",
31 |     include_package_data=True,
32 | )
33 | 


--------------------------------------------------------------------------------
/ssd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/__init__.py


--------------------------------------------------------------------------------
/ssd/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import _C as cfg
2 | 


--------------------------------------------------------------------------------
/ssd/config/defaults.py:
--------------------------------------------------------------------------------
 1 | from yacs.config import CfgNode as CN
 2 | 
 3 | _C = CN()
 4 | 
 5 | _C.MODEL = CN()
 6 | _C.MODEL.META_ARCHITECTURE = 'SSDDetector'
 7 | _C.MODEL.DEVICE = "cuda"
 8 | # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5)
 9 | _C.MODEL.THRESHOLD = 0.5
10 | _C.MODEL.NUM_CLASSES = 21
11 | # Hard negative mining
12 | _C.MODEL.NEG_POS_RATIO = 3
13 | _C.MODEL.CENTER_VARIANCE = 0.1
14 | _C.MODEL.SIZE_VARIANCE = 0.2
15 | 
16 | # ---------------------------------------------------------------------------- #
17 | # Backbone
18 | # ---------------------------------------------------------------------------- #
19 | _C.MODEL.BACKBONE = CN()
20 | _C.MODEL.BACKBONE.NAME = 'vgg'
21 | _C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256)
22 | _C.MODEL.BACKBONE.PRETRAINED = True
23 | 
24 | # -----------------------------------------------------------------------------
25 | # PRIORS
26 | # -----------------------------------------------------------------------------
27 | _C.MODEL.PRIORS = CN()
28 | _C.MODEL.PRIORS.FEATURE_MAPS = [38, 19, 10, 5, 3, 1]
29 | _C.MODEL.PRIORS.STRIDES = [8, 16, 32, 64, 100, 300]
30 | _C.MODEL.PRIORS.MIN_SIZES = [30, 60, 111, 162, 213, 264]
31 | _C.MODEL.PRIORS.MAX_SIZES = [60, 111, 162, 213, 264, 315]
32 | _C.MODEL.PRIORS.ASPECT_RATIOS = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
33 | # When has 1 aspect ratio, every location has 4 boxes, 2 ratio 6 boxes.
34 | # #boxes = 2 + #ratio * 2
35 | _C.MODEL.PRIORS.BOXES_PER_LOCATION = [4, 6, 6, 6, 4, 4]  # number of boxes per feature map location
36 | _C.MODEL.PRIORS.CLIP = True
37 | 
38 | # -----------------------------------------------------------------------------
39 | # Box Head
40 | # -----------------------------------------------------------------------------
41 | _C.MODEL.BOX_HEAD = CN()
42 | _C.MODEL.BOX_HEAD.NAME = 'SSDBoxHead'
43 | _C.MODEL.BOX_HEAD.PREDICTOR = 'SSDBoxPredictor'
44 | 
45 | # -----------------------------------------------------------------------------
46 | # INPUT
47 | # -----------------------------------------------------------------------------
48 | _C.INPUT = CN()
49 | # Image size
50 | _C.INPUT.IMAGE_SIZE = 300
51 | # Values to be used for image normalization, RGB layout
52 | _C.INPUT.PIXEL_MEAN = [123, 117, 104]
53 | 
54 | # -----------------------------------------------------------------------------
55 | # Dataset
56 | # -----------------------------------------------------------------------------
57 | _C.DATASETS = CN()
58 | # List of the dataset names for training, as present in paths_catalog.py
59 | _C.DATASETS.TRAIN = ()
60 | # List of the dataset names for testing, as present in paths_catalog.py
61 | _C.DATASETS.TEST = ()
62 | 
63 | # -----------------------------------------------------------------------------
64 | # DataLoader
65 | # -----------------------------------------------------------------------------
66 | _C.DATA_LOADER = CN()
67 | # Number of data loading threads
68 | _C.DATA_LOADER.NUM_WORKERS = 8
69 | _C.DATA_LOADER.PIN_MEMORY = True
70 | 
71 | # ---------------------------------------------------------------------------- #
72 | # Solver
73 | # ---------------------------------------------------------------------------- #
74 | _C.SOLVER = CN()
75 | # train configs
76 | _C.SOLVER.MAX_ITER = 120000
77 | _C.SOLVER.LR_STEPS = [80000, 100000]
78 | _C.SOLVER.GAMMA = 0.1
79 | _C.SOLVER.BATCH_SIZE = 32
80 | _C.SOLVER.LR = 1e-3
81 | _C.SOLVER.MOMENTUM = 0.9
82 | _C.SOLVER.WEIGHT_DECAY = 5e-4
83 | _C.SOLVER.WARMUP_FACTOR = 1.0 / 3
84 | _C.SOLVER.WARMUP_ITERS = 500
85 | 
86 | # ---------------------------------------------------------------------------- #
87 | # Specific test options
88 | # ---------------------------------------------------------------------------- #
89 | _C.TEST = CN()
90 | _C.TEST.NMS_THRESHOLD = 0.45
91 | _C.TEST.CONFIDENCE_THRESHOLD = 0.01
92 | _C.TEST.MAX_PER_CLASS = -1
93 | _C.TEST.MAX_PER_IMAGE = 100
94 | _C.TEST.BATCH_SIZE = 10
95 | 
96 | _C.OUTPUT_DIR = 'outputs'
97 | 


--------------------------------------------------------------------------------
/ssd/config/path_catlog.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | class DatasetCatalog:
 5 |     DATA_DIR = 'datasets'
 6 |     DATASETS = {
 7 |         'voc_2007_train': {
 8 |             "data_dir": "VOC2007",
 9 |             "split": "train"
10 |         },
11 |         'voc_2007_val': {
12 |             "data_dir": "VOC2007",
13 |             "split": "val"
14 |         },
15 |         'voc_2007_trainval': {
16 |             "data_dir": "VOC2007",
17 |             "split": "trainval"
18 |         },
19 |         'voc_2007_test': {
20 |             "data_dir": "VOC2007",
21 |             "split": "test"
22 |         },
23 |         'voc_2012_train': {
24 |             "data_dir": "VOC2012",
25 |             "split": "train"
26 |         },
27 |         'voc_2012_val': {
28 |             "data_dir": "VOC2012",
29 |             "split": "val"
30 |         },
31 |         'voc_2012_trainval': {
32 |             "data_dir": "VOC2012",
33 |             "split": "trainval"
34 |         },
35 |         'voc_2012_test': {
36 |             "data_dir": "VOC2012",
37 |             "split": "test"
38 |         },
39 |         'coco_2014_valminusminival': {
40 |             "data_dir": "val2014",
41 |             "ann_file": "annotations/instances_valminusminival2014.json"
42 |         },
43 |         'coco_2014_minival': {
44 |             "data_dir": "val2014",
45 |             "ann_file": "annotations/instances_minival2014.json"
46 |         },
47 |         'coco_2014_train': {
48 |             "data_dir": "train2014",
49 |             "ann_file": "annotations/instances_train2014.json"
50 |         },
51 |         'coco_2014_val': {
52 |             "data_dir": "val2014",
53 |             "ann_file": "annotations/instances_val2014.json"
54 |         },
55 |     }
56 | 
57 |     @staticmethod
58 |     def get(name):
59 |         if "voc" in name:
60 |             voc_root = DatasetCatalog.DATA_DIR
61 |             if 'VOC_ROOT' in os.environ:
62 |                 voc_root = os.environ['VOC_ROOT']
63 | 
64 |             attrs = DatasetCatalog.DATASETS[name]
65 |             args = dict(
66 |                 data_dir=os.path.join(voc_root, attrs["data_dir"]),
67 |                 split=attrs["split"],
68 |             )
69 |             return dict(factory="VOCDataset", args=args)
70 |         elif "coco" in name:
71 |             coco_root = DatasetCatalog.DATA_DIR
72 |             if 'COCO_ROOT' in os.environ:
73 |                 coco_root = os.environ['COCO_ROOT']
74 | 
75 |             attrs = DatasetCatalog.DATASETS[name]
76 |             args = dict(
77 |                 data_dir=os.path.join(coco_root, attrs["data_dir"]),
78 |                 ann_file=os.path.join(coco_root, attrs["ann_file"]),
79 |             )
80 |             return dict(factory="COCODataset", args=args)
81 | 
82 |         raise RuntimeError("Dataset not available: {}".format(name))
83 | 


--------------------------------------------------------------------------------
/ssd/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/data/__init__.py


--------------------------------------------------------------------------------
/ssd/data/build.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import DataLoader
 3 | from torch.utils.data.dataloader import default_collate
 4 | 
 5 | from ssd.data import samplers
 6 | from ssd.data.datasets import build_dataset
 7 | from ssd.data.transforms import build_transforms, build_target_transform
 8 | from ssd.structures.container import Container
 9 | 
10 | 
11 | class BatchCollator:
12 |     def __init__(self, is_train=True):
13 |         self.is_train = is_train
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = default_collate(transposed_batch[0])
18 |         img_ids = default_collate(transposed_batch[2])
19 | 
20 |         if self.is_train:
21 |             list_targets = transposed_batch[1]
22 |             targets = Container(
23 |                 {key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]}
24 |             )
25 |         else:
26 |             targets = None
27 |         return images, targets, img_ids
28 | 
29 | 
30 | def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0):
31 |     train_transform = build_transforms(cfg, is_train=is_train)
32 |     target_transform = build_target_transform(cfg) if is_train else None
33 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
34 |     datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train)
35 | 
36 |     shuffle = is_train
37 | 
38 |     data_loaders = []
39 | 
40 |     for dataset in datasets:
41 |         if distributed:
42 |             sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
43 |         elif shuffle:
44 |             sampler = torch.utils.data.RandomSampler(dataset)
45 |         else:
46 |             sampler = torch.utils.data.sampler.SequentialSampler(dataset)
47 | 
48 |         batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE
49 |         batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
50 |         if max_iter is not None:
51 |             batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter)
52 | 
53 |         data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler,
54 |                                  pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train))
55 |         data_loaders.append(data_loader)
56 | 
57 |     if is_train:
58 |         # during training, a single (possibly concatenated) data_loader is returned
59 |         assert len(data_loaders) == 1
60 |         return data_loaders[0]
61 |     return data_loaders
62 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import ConcatDataset
 2 | 
 3 | from ssd.config.path_catlog import DatasetCatalog
 4 | from .voc import VOCDataset
 5 | from .coco import COCODataset
 6 | 
 7 | _DATASETS = {
 8 |     'VOCDataset': VOCDataset,
 9 |     'COCODataset': COCODataset,
10 | }
11 | 
12 | 
13 | def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True):
14 |     assert len(dataset_list) > 0
15 |     datasets = []
16 |     for dataset_name in dataset_list:
17 |         data = DatasetCatalog.get(dataset_name)
18 |         args = data['args']
19 |         factory = _DATASETS[data['factory']]
20 |         args['transform'] = transform
21 |         args['target_transform'] = target_transform
22 |         if factory == VOCDataset:
23 |             args['keep_difficult'] = not is_train
24 |         elif factory == COCODataset:
25 |             args['remove_empty'] = is_train
26 |         dataset = factory(**args)
27 |         datasets.append(dataset)
28 |     # for testing, return a list of datasets
29 |     if not is_train:
30 |         return datasets
31 |     dataset = datasets[0]
32 |     if len(datasets) > 1:
33 |         dataset = ConcatDataset(datasets)
34 | 
35 |     return [dataset]
36 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch.utils.data
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | from ssd.structures.container import Container
 7 | 
 8 | 
 9 | class COCODataset(torch.utils.data.Dataset):
10 |     class_names = ('__background__',
11 |                    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
12 |                    'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
13 |                    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
14 |                    'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
15 |                    'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
16 |                    'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
17 |                    'kite', 'baseball bat', 'baseball glove', 'skateboard',
18 |                    'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
19 |                    'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
20 |                    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
21 |                    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
22 |                    'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
23 |                    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
24 |                    'refrigerator', 'book', 'clock', 'vase', 'scissors',
25 |                    'teddy bear', 'hair drier', 'toothbrush')
26 | 
27 |     def __init__(self, data_dir, ann_file, transform=None, target_transform=None, remove_empty=False):
28 |         from pycocotools.coco import COCO
29 |         self.coco = COCO(ann_file)
30 |         self.data_dir = data_dir
31 |         self.transform = transform
32 |         self.target_transform = target_transform
33 |         self.remove_empty = remove_empty
34 |         if self.remove_empty:
35 |             # when training, images without annotations are removed.
36 |             self.ids = list(self.coco.imgToAnns.keys())
37 |         else:
38 |             # when testing, all images used.
39 |             self.ids = list(self.coco.imgs.keys())
40 |         coco_categories = sorted(self.coco.getCatIds())
41 |         self.coco_id_to_contiguous_id = {coco_id: i + 1 for i, coco_id in enumerate(coco_categories)}
42 |         self.contiguous_id_to_coco_id = {v: k for k, v in self.coco_id_to_contiguous_id.items()}
43 | 
44 |     def __getitem__(self, index):
45 |         image_id = self.ids[index]
46 |         boxes, labels = self._get_annotation(image_id)
47 |         image = self._read_image(image_id)
48 |         if self.transform:
49 |             image, boxes, labels = self.transform(image, boxes, labels)
50 |         if self.target_transform:
51 |             boxes, labels = self.target_transform(boxes, labels)
52 |         targets = Container(
53 |             boxes=boxes,
54 |             labels=labels,
55 |         )
56 |         return image, targets, index
57 | 
58 |     def get_annotation(self, index):
59 |         image_id = self.ids[index]
60 |         return image_id, self._get_annotation(image_id)
61 | 
62 |     def __len__(self):
63 |         return len(self.ids)
64 | 
65 |     def _get_annotation(self, image_id):
66 |         ann_ids = self.coco.getAnnIds(imgIds=image_id)
67 |         ann = self.coco.loadAnns(ann_ids)
68 |         # filter crowd annotations
69 |         ann = [obj for obj in ann if obj["iscrowd"] == 0]
70 |         boxes = np.array([self._xywh2xyxy(obj["bbox"]) for obj in ann], np.float32).reshape((-1, 4))
71 |         labels = np.array([self.coco_id_to_contiguous_id[obj["category_id"]] for obj in ann], np.int64).reshape((-1,))
72 |         # remove invalid boxes
73 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
74 |         boxes = boxes[keep]
75 |         labels = labels[keep]
76 |         return boxes, labels
77 | 
78 |     def _xywh2xyxy(self, box):
79 |         x1, y1, w, h = box
80 |         return [x1, y1, x1 + w, y1 + h]
81 | 
82 |     def get_img_info(self, index):
83 |         image_id = self.ids[index]
84 |         img_data = self.coco.imgs[image_id]
85 |         return img_data
86 | 
87 |     def _read_image(self, image_id):
88 |         file_name = self.coco.loadImgs(image_id)[0]['file_name']
89 |         image_file = os.path.join(self.data_dir, file_name)
90 |         image = Image.open(image_file).convert("RGB")
91 |         image = np.array(image)
92 |         return image
93 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from ssd.data.datasets import VOCDataset, COCODataset
 2 | from .coco import coco_evaluation
 3 | from .voc import voc_evaluation
 4 | 
 5 | 
 6 | def evaluate(dataset, predictions, output_dir, **kwargs):
 7 |     """evaluate dataset using different methods based on dataset type.
 8 |     Args:
 9 |         dataset: Dataset object
10 |         predictions(list[(boxes, labels, scores)]): Each item in the list represents the
11 |             prediction results for one image. And the index should match the dataset index.
12 |         output_dir: output folder, to save evaluation files or results.
13 |     Returns:
14 |         evaluation result
15 |     """
16 |     args = dict(
17 |         dataset=dataset, predictions=predictions, output_dir=output_dir, **kwargs,
18 |     )
19 |     if isinstance(dataset, VOCDataset):
20 |         return voc_evaluation(**args)
21 |     elif isinstance(dataset, COCODataset):
22 |         return coco_evaluation(**args)
23 |     else:
24 |         raise NotImplementedError
25 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | def coco_evaluation(dataset, predictions, output_dir, iteration=None):
 8 |     coco_results = []
 9 |     for i, prediction in enumerate(predictions):
10 |         img_info = dataset.get_img_info(i)
11 |         prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
12 |         boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
13 | 
14 |         image_id, annotation = dataset.get_annotation(i)
15 |         class_mapper = dataset.contiguous_id_to_coco_id
16 |         if labels.shape[0] == 0:
17 |             continue
18 | 
19 |         boxes = boxes.tolist()
20 |         labels = labels.tolist()
21 |         scores = scores.tolist()
22 |         coco_results.extend(
23 |             [
24 |                 {
25 |                     "image_id": image_id,
26 |                     "category_id": class_mapper[labels[k]],
27 |                     "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]],  # to xywh format
28 |                     "score": scores[k],
29 |                 }
30 |                 for k, box in enumerate(boxes)
31 |             ]
32 |         )
33 |     iou_type = 'bbox'
34 |     json_result_file = os.path.join(output_dir, iou_type + ".json")
35 |     logger = logging.getLogger("SSD.inference")
36 |     logger.info('Writing results to {}...'.format(json_result_file))
37 |     with open(json_result_file, "w") as f:
38 |         json.dump(coco_results, f)
39 |     from pycocotools.cocoeval import COCOeval
40 |     coco_gt = dataset.coco
41 |     coco_dt = coco_gt.loadRes(json_result_file)
42 |     coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
43 |     coco_eval.evaluate()
44 |     coco_eval.accumulate()
45 |     coco_eval.summarize()
46 | 
47 |     result_strings = []
48 |     keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"]
49 |     metrics = {}
50 |     for i, key in enumerate(keys):
51 |         metrics[key] = coco_eval.stats[i]
52 |         logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
53 |         result_strings.append('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
54 | 
55 |     if iteration is not None:
56 |         result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
57 |     else:
58 |         result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
59 |     with open(result_path, "w") as f:
60 |         f.write('\n'.join(result_strings))
61 | 
62 |     return dict(metrics=metrics)
63 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from datetime import datetime
 4 | 
 5 | import numpy as np
 6 | 
 7 | from .eval_detection_voc import eval_detection_voc
 8 | 
 9 | 
10 | def voc_evaluation(dataset, predictions, output_dir, iteration=None):
11 |     class_names = dataset.class_names
12 | 
13 |     pred_boxes_list = []
14 |     pred_labels_list = []
15 |     pred_scores_list = []
16 |     gt_boxes_list = []
17 |     gt_labels_list = []
18 |     gt_difficults = []
19 | 
20 |     for i in range(len(dataset)):
21 |         image_id, annotation = dataset.get_annotation(i)
22 |         gt_boxes, gt_labels, is_difficult = annotation
23 |         gt_boxes_list.append(gt_boxes)
24 |         gt_labels_list.append(gt_labels)
25 |         gt_difficults.append(is_difficult.astype(np.bool))
26 | 
27 |         img_info = dataset.get_img_info(i)
28 |         prediction = predictions[i]
29 |         prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
30 |         boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
31 | 
32 |         pred_boxes_list.append(boxes)
33 |         pred_labels_list.append(labels)
34 |         pred_scores_list.append(scores)
35 |     result = eval_detection_voc(pred_bboxes=pred_boxes_list,
36 |                                 pred_labels=pred_labels_list,
37 |                                 pred_scores=pred_scores_list,
38 |                                 gt_bboxes=gt_boxes_list,
39 |                                 gt_labels=gt_labels_list,
40 |                                 gt_difficults=gt_difficults,
41 |                                 iou_thresh=0.5,
42 |                                 use_07_metric=True)
43 |     logger = logging.getLogger("SSD.inference")
44 |     result_str = "mAP: {:.4f}\n".format(result["map"])
45 |     metrics = {'mAP': result["map"]}
46 |     for i, ap in enumerate(result["ap"]):
47 |         if i == 0:  # skip background
48 |             continue
49 |         metrics[class_names[i]] = ap
50 |         result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap)
51 |     logger.info(result_str)
52 | 
53 |     if iteration is not None:
54 |         result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
55 |     else:
56 |         result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
57 |     with open(result_path, "w") as f:
58 |         f.write(result_str)
59 | 
60 |     return dict(metrics=metrics)
61 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/voc/eval_detection_voc.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | from collections import defaultdict
  4 | import itertools
  5 | import numpy as np
  6 | import six
  7 | 
  8 | 
  9 | def bbox_iou(bbox_a, bbox_b):
 10 |     """Calculate the Intersection of Unions (IoUs) between bounding boxes.
 11 |     IoU is calculated as a ratio of area of the intersection
 12 |     and area of the union.
 13 |     This function accepts both :obj:`numpy.ndarray` and :obj:`cupy.ndarray` as
 14 |     inputs. Please note that both :obj:`bbox_a` and :obj:`bbox_b` need to be
 15 |     same type.
 16 |     The output is same type as the type of the inputs.
 17 |     Args:
 18 |         bbox_a (array): An array whose shape is :math:`(N, 4)`.
 19 |             :math:`N` is the number of bounding boxes.
 20 |             The dtype should be :obj:`numpy.float32`.
 21 |         bbox_b (array): An array similar to :obj:`bbox_a`,
 22 |             whose shape is :math:`(K, 4)`.
 23 |             The dtype should be :obj:`numpy.float32`.
 24 |     Returns:
 25 |         array:
 26 |         An array whose shape is :math:`(N, K)`. \
 27 |         An element at index :math:`(n, k)` contains IoUs between \
 28 |         :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
 29 |         box in :obj:`bbox_b`.
 30 |     """
 31 |     if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
 32 |         raise IndexError
 33 | 
 34 |     # top left
 35 |     tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
 36 |     # bottom right
 37 |     br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
 38 | 
 39 |     area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
 40 |     area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
 41 |     area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
 42 |     return area_i / (area_a[:, None] + area_b - area_i)
 43 | 
 44 | 
 45 | def eval_detection_voc(
 46 |         pred_bboxes,
 47 |         pred_labels,
 48 |         pred_scores,
 49 |         gt_bboxes,
 50 |         gt_labels,
 51 |         gt_difficults=None,
 52 |         iou_thresh=0.5,
 53 |         use_07_metric=False):
 54 |     """Calculate average precisions based on evaluation code of PASCAL VOC.
 55 | 
 56 |     This function evaluates predicted bounding boxes obtained from a dataset
 57 |     which has :math:`N` images by using average precision for each class.
 58 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
 59 | 
 60 |     Args:
 61 |         pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
 62 |             sets of bounding boxes.
 63 |             Its index corresponds to an index for the base dataset.
 64 |             Each element of :obj:`pred_bboxes` is a set of coordinates
 65 |             of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
 66 |             where :math:`R` corresponds
 67 |             to the number of bounding boxes, which may vary among boxes.
 68 |             The second axis corresponds to
 69 |             :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
 70 |         pred_labels (iterable of numpy.ndarray): An iterable of labels.
 71 |             Similar to :obj:`pred_bboxes`, its index corresponds to an
 72 |             index for the base dataset. Its length is :math:`N`.
 73 |         pred_scores (iterable of numpy.ndarray): An iterable of confidence
 74 |             scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
 75 |             its index corresponds to an index for the base dataset.
 76 |             Its length is :math:`N`.
 77 |         gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
 78 |             bounding boxes
 79 |             whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
 80 |             bounding box whose shape is :math:`(R, 4)`. Note that the number of
 81 |             bounding boxes in each image does not need to be same as the number
 82 |             of corresponding predicted boxes.
 83 |         gt_labels (iterable of numpy.ndarray): An iterable of ground truth
 84 |             labels which are organized similarly to :obj:`gt_bboxes`.
 85 |         gt_difficults (iterable of numpy.ndarray): An iterable of boolean
 86 |             arrays which is organized similarly to :obj:`gt_bboxes`.
 87 |             This tells whether the
 88 |             corresponding ground truth bounding box is difficult or not.
 89 |             By default, this is :obj:`None`. In that case, this function
 90 |             considers all bounding boxes to be not difficult.
 91 |         iou_thresh (float): A prediction is correct if its Intersection over
 92 |             Union with the ground truth is above this value.
 93 |         use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
 94 |             for calculating average precision. The default value is
 95 |             :obj:`False`.
 96 | 
 97 |     Returns:
 98 |         dict:
 99 | 
100 |         The keys, value-types and the description of the values are listed
101 |         below.
102 | 
103 |         * **ap** (*numpy.ndarray*): An array of average precisions. \
104 |             The :math:`l`-th value corresponds to the average precision \
105 |             for class :math:`l`. If class :math:`l` does not exist in \
106 |             either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
107 |             value is set to :obj:`numpy.nan`.
108 |         * **map** (*float*): The average of Average Precisions over classes.
109 | 
110 |     """
111 | 
112 |     prec, rec = calc_detection_voc_prec_rec(pred_bboxes,
113 |                                             pred_labels,
114 |                                             pred_scores,
115 |                                             gt_bboxes,
116 |                                             gt_labels,
117 |                                             gt_difficults,
118 |                                             iou_thresh=iou_thresh)
119 | 
120 |     ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
121 | 
122 |     return {'ap': ap, 'map': np.nanmean(ap)}
123 | 
124 | 
125 | def calc_detection_voc_prec_rec(
126 |         pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
127 |         gt_difficults=None,
128 |         iou_thresh=0.5):
129 |     """Calculate precision and recall based on evaluation code of PASCAL VOC.
130 | 
131 |     This function calculates precision and recall of
132 |     predicted bounding boxes obtained from a dataset which has :math:`N`
133 |     images.
134 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
135 | 
136 |     Args:
137 |         pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
138 |             sets of bounding boxes.
139 |             Its index corresponds to an index for the base dataset.
140 |             Each element of :obj:`pred_bboxes` is a set of coordinates
141 |             of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
142 |             where :math:`R` corresponds
143 |             to the number of bounding boxes, which may vary among boxes.
144 |             The second axis corresponds to
145 |             :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
146 |         pred_labels (iterable of numpy.ndarray): An iterable of labels.
147 |             Similar to :obj:`pred_bboxes`, its index corresponds to an
148 |             index for the base dataset. Its length is :math:`N`.
149 |         pred_scores (iterable of numpy.ndarray): An iterable of confidence
150 |             scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
151 |             its index corresponds to an index for the base dataset.
152 |             Its length is :math:`N`.
153 |         gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
154 |             bounding boxes
155 |             whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
156 |             bounding box whose shape is :math:`(R, 4)`. Note that the number of
157 |             bounding boxes in each image does not need to be same as the number
158 |             of corresponding predicted boxes.
159 |         gt_labels (iterable of numpy.ndarray): An iterable of ground truth
160 |             labels which are organized similarly to :obj:`gt_bboxes`.
161 |         gt_difficults (iterable of numpy.ndarray): An iterable of boolean
162 |             arrays which is organized similarly to :obj:`gt_bboxes`.
163 |             This tells whether the
164 |             corresponding ground truth bounding box is difficult or not.
165 |             By default, this is :obj:`None`. In that case, this function
166 |             considers all bounding boxes to be not difficult.
167 |         iou_thresh (float): A prediction is correct if its Intersection over
168 |             Union with the ground truth is above this value..
169 | 
170 |     Returns:
171 |         tuple of two lists:
172 |         This function returns two lists: :obj:`prec` and :obj:`rec`.
173 | 
174 |         * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
175 |             for class :math:`l`. If class :math:`l` does not exist in \
176 |             either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
177 |             set to :obj:`None`.
178 |         * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
179 |             for class :math:`l`. If class :math:`l` that is not marked as \
180 |             difficult does not exist in \
181 |             :obj:`gt_labels`, :obj:`rec[l]` is \
182 |             set to :obj:`None`.
183 | 
184 |     """
185 | 
186 |     pred_bboxes = iter(pred_bboxes)
187 |     pred_labels = iter(pred_labels)
188 |     pred_scores = iter(pred_scores)
189 |     gt_bboxes = iter(gt_bboxes)
190 |     gt_labels = iter(gt_labels)
191 |     if gt_difficults is None:
192 |         gt_difficults = itertools.repeat(None)
193 |     else:
194 |         gt_difficults = iter(gt_difficults)
195 | 
196 |     n_pos = defaultdict(int)
197 |     score = defaultdict(list)
198 |     match = defaultdict(list)
199 | 
200 |     for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
201 |             six.moves.zip(
202 |                 pred_bboxes, pred_labels, pred_scores,
203 |                 gt_bboxes, gt_labels, gt_difficults):
204 | 
205 |         if gt_difficult is None:
206 |             gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
207 | 
208 |         for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
209 |             pred_mask_l = pred_label == l
210 |             pred_bbox_l = pred_bbox[pred_mask_l]
211 |             pred_score_l = pred_score[pred_mask_l]
212 |             # sort by score
213 |             order = pred_score_l.argsort()[::-1]
214 |             pred_bbox_l = pred_bbox_l[order]
215 |             pred_score_l = pred_score_l[order]
216 | 
217 |             gt_mask_l = gt_label == l
218 |             gt_bbox_l = gt_bbox[gt_mask_l]
219 |             gt_difficult_l = gt_difficult[gt_mask_l]
220 | 
221 |             n_pos[l] += np.logical_not(gt_difficult_l).sum()
222 |             score[l].extend(pred_score_l)
223 | 
224 |             if len(pred_bbox_l) == 0:
225 |                 continue
226 |             if len(gt_bbox_l) == 0:
227 |                 match[l].extend((0,) * pred_bbox_l.shape[0])
228 |                 continue
229 | 
230 |             # VOC evaluation follows integer typed bounding boxes.
231 |             pred_bbox_l = pred_bbox_l.copy()
232 |             pred_bbox_l[:, 2:] += 1
233 |             gt_bbox_l = gt_bbox_l.copy()
234 |             gt_bbox_l[:, 2:] += 1
235 | 
236 |             iou = bbox_iou(pred_bbox_l, gt_bbox_l)
237 |             gt_index = iou.argmax(axis=1)
238 |             # set -1 if there is no matching ground truth
239 |             gt_index[iou.max(axis=1) < iou_thresh] = -1
240 |             del iou
241 | 
242 |             selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
243 |             for gt_idx in gt_index:
244 |                 if gt_idx >= 0:
245 |                     if gt_difficult_l[gt_idx]:
246 |                         match[l].append(-1)
247 |                     else:
248 |                         if not selec[gt_idx]:
249 |                             match[l].append(1)
250 |                         else:
251 |                             match[l].append(0)
252 |                     selec[gt_idx] = True
253 |                 else:
254 |                     match[l].append(0)
255 | 
256 |     for iter_ in (
257 |             pred_bboxes, pred_labels, pred_scores,
258 |             gt_bboxes, gt_labels, gt_difficults):
259 |         if next(iter_, None) is not None:
260 |             raise ValueError('Length of input iterables need to be same.')
261 | 
262 |     n_fg_class = max(n_pos.keys()) + 1
263 |     prec = [None] * n_fg_class
264 |     rec = [None] * n_fg_class
265 | 
266 |     for l in n_pos.keys():
267 |         score_l = np.array(score[l])
268 |         match_l = np.array(match[l], dtype=np.int8)
269 | 
270 |         order = score_l.argsort()[::-1]
271 |         match_l = match_l[order]
272 | 
273 |         tp = np.cumsum(match_l == 1)
274 |         fp = np.cumsum(match_l == 0)
275 | 
276 |         # If an element of fp + tp is 0,
277 |         # the corresponding element of prec[l] is nan.
278 |         prec[l] = tp / (fp + tp)
279 |         # If n_pos[l] is 0, rec[l] is None.
280 |         if n_pos[l] > 0:
281 |             rec[l] = tp / n_pos[l]
282 | 
283 |     return prec, rec
284 | 
285 | 
286 | def calc_detection_voc_ap(prec, rec, use_07_metric=False):
287 |     """Calculate average precisions based on evaluation code of PASCAL VOC.
288 | 
289 |     This function calculates average precisions
290 |     from given precisions and recalls.
291 |     The code is based on the evaluation code used in PASCAL VOC Challenge.
292 | 
293 |     Args:
294 |         prec (list of numpy.array): A list of arrays.
295 |             :obj:`prec[l]` indicates precision for class :math:`l`.
296 |             If :obj:`prec[l]` is :obj:`None`, this function returns
297 |             :obj:`numpy.nan` for class :math:`l`.
298 |         rec (list of numpy.array): A list of arrays.
299 |             :obj:`rec[l]` indicates recall for class :math:`l`.
300 |             If :obj:`rec[l]` is :obj:`None`, this function returns
301 |             :obj:`numpy.nan` for class :math:`l`.
302 |         use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
303 |             for calculating average precision. The default value is
304 |             :obj:`False`.
305 | 
306 |     Returns:
307 |         ~numpy.ndarray:
308 |         This function returns an array of average precisions.
309 |         The :math:`l`-th value corresponds to the average precision
310 |         for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
311 |         :obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
312 | 
313 |     """
314 | 
315 |     n_fg_class = len(prec)
316 |     ap = np.empty(n_fg_class)
317 |     for l in six.moves.range(n_fg_class):
318 |         if prec[l] is None or rec[l] is None:
319 |             ap[l] = np.nan
320 |             continue
321 | 
322 |         if use_07_metric:
323 |             # 11 point metric
324 |             ap[l] = 0
325 |             for t in np.arange(0., 1.1, 0.1):
326 |                 if np.sum(rec[l] >= t) == 0:
327 |                     p = 0
328 |                 else:
329 |                     p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
330 |                 ap[l] += p / 11
331 |         else:
332 |             # correct AP calculation
333 |             # first append sentinel values at the end
334 |             mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
335 |             mrec = np.concatenate(([0], rec[l], [1]))
336 | 
337 |             mpre = np.maximum.accumulate(mpre[::-1])[::-1]
338 | 
339 |             # to calculate area under PR curve, look for points
340 |             # where X axis (recall) changes value
341 |             i = np.where(mrec[1:] != mrec[:-1])[0]
342 | 
343 |             # and sum (\Delta recall) * prec
344 |             ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
345 | 
346 |     return ap
347 | 


--------------------------------------------------------------------------------
/ssd/data/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch.utils.data
  3 | import numpy as np
  4 | import xml.etree.ElementTree as ET
  5 | from PIL import Image
  6 | 
  7 | from ssd.structures.container import Container
  8 | 
  9 | 
 10 | class VOCDataset(torch.utils.data.Dataset):
 11 |     class_names = ('__background__',
 12 |                    'aeroplane', 'bicycle', 'bird', 'boat',
 13 |                    'bottle', 'bus', 'car', 'cat', 'chair',
 14 |                    'cow', 'diningtable', 'dog', 'horse',
 15 |                    'motorbike', 'person', 'pottedplant',
 16 |                    'sheep', 'sofa', 'train', 'tvmonitor')
 17 | 
 18 |     def __init__(self, data_dir, split, transform=None, target_transform=None, keep_difficult=False):
 19 |         """Dataset for VOC data.
 20 |         Args:
 21 |             data_dir: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
 22 |                 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
 23 |         """
 24 |         self.data_dir = data_dir
 25 |         self.split = split
 26 |         self.transform = transform
 27 |         self.target_transform = target_transform
 28 |         image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split)
 29 |         self.ids = VOCDataset._read_image_ids(image_sets_file)
 30 |         self.keep_difficult = keep_difficult
 31 | 
 32 |         self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
 33 | 
 34 |     def __getitem__(self, index):
 35 |         image_id = self.ids[index]
 36 |         boxes, labels, is_difficult = self._get_annotation(image_id)
 37 |         if not self.keep_difficult:
 38 |             boxes = boxes[is_difficult == 0]
 39 |             labels = labels[is_difficult == 0]
 40 |         image = self._read_image(image_id)
 41 |         if self.transform:
 42 |             image, boxes, labels = self.transform(image, boxes, labels)
 43 |         if self.target_transform:
 44 |             boxes, labels = self.target_transform(boxes, labels)
 45 |         targets = Container(
 46 |             boxes=boxes,
 47 |             labels=labels,
 48 |         )
 49 |         return image, targets, index
 50 | 
 51 |     def get_annotation(self, index):
 52 |         image_id = self.ids[index]
 53 |         return image_id, self._get_annotation(image_id)
 54 | 
 55 |     def __len__(self):
 56 |         return len(self.ids)
 57 | 
 58 |     @staticmethod
 59 |     def _read_image_ids(image_sets_file):
 60 |         ids = []
 61 |         with open(image_sets_file) as f:
 62 |             for line in f:
 63 |                 ids.append(line.rstrip())
 64 |         return ids
 65 | 
 66 |     def _get_annotation(self, image_id):
 67 |         annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id)
 68 |         objects = ET.parse(annotation_file).findall("object")
 69 |         boxes = []
 70 |         labels = []
 71 |         is_difficult = []
 72 |         for obj in objects:
 73 |             class_name = obj.find('name').text.lower().strip()
 74 |             bbox = obj.find('bndbox')
 75 |             # VOC dataset format follows Matlab, in which indexes start from 0
 76 |             x1 = float(bbox.find('xmin').text) - 1
 77 |             y1 = float(bbox.find('ymin').text) - 1
 78 |             x2 = float(bbox.find('xmax').text) - 1
 79 |             y2 = float(bbox.find('ymax').text) - 1
 80 |             boxes.append([x1, y1, x2, y2])
 81 |             labels.append(self.class_dict[class_name])
 82 |             is_difficult_str = obj.find('difficult').text
 83 |             is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
 84 | 
 85 |         return (np.array(boxes, dtype=np.float32),
 86 |                 np.array(labels, dtype=np.int64),
 87 |                 np.array(is_difficult, dtype=np.uint8))
 88 | 
 89 |     def get_img_info(self, index):
 90 |         img_id = self.ids[index]
 91 |         annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id)
 92 |         anno = ET.parse(annotation_file).getroot()
 93 |         size = anno.find("size")
 94 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
 95 |         return {"height": im_info[0], "width": im_info[1]}
 96 | 
 97 |     def _read_image(self, image_id):
 98 |         image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id)
 99 |         image = Image.open(image_file).convert("RGB")
100 |         image = np.array(image)
101 |         return image
102 | 


--------------------------------------------------------------------------------
/ssd/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
2 | from .distributed import DistributedSampler
3 | 
4 | __all__ = ['IterationBasedBatchSampler', 'DistributedSampler']
5 | 


--------------------------------------------------------------------------------
/ssd/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = shuffle
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset: offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/ssd/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.sampler import BatchSampler
 2 | 
 3 | 
 4 | class IterationBasedBatchSampler(BatchSampler):
 5 |     """
 6 |     Wraps a BatchSampler, re-sampling from it until
 7 |     a specified number of iterations have been sampled
 8 |     """
 9 | 
10 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
11 |         self.batch_sampler = batch_sampler
12 |         self.num_iterations = num_iterations
13 |         self.start_iter = start_iter
14 | 
15 |     def __iter__(self):
16 |         iteration = self.start_iter
17 |         while iteration <= self.num_iterations:
18 |             # if the underlying sampler has a set_epoch method, like
19 |             # DistributedSampler, used for making each process see
20 |             # a different split of the dataset, then set it
21 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
22 |                 self.batch_sampler.sampler.set_epoch(iteration)
23 |             for batch in self.batch_sampler:
24 |                 iteration += 1
25 |                 if iteration > self.num_iterations:
26 |                     break
27 |                 yield batch
28 | 
29 |     def __len__(self):
30 |         return self.num_iterations
31 | 


--------------------------------------------------------------------------------
/ssd/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | from ssd.modeling.anchors.prior_box import PriorBox
 2 | from .target_transform import SSDTargetTransform
 3 | from .transforms import *
 4 | 
 5 | 
 6 | def build_transforms(cfg, is_train=True):
 7 |     if is_train:
 8 |         transform = [
 9 |             ConvertFromInts(),
10 |             PhotometricDistort(),
11 |             Expand(cfg.INPUT.PIXEL_MEAN),
12 |             RandomSampleCrop(),
13 |             RandomMirror(),
14 |             ToPercentCoords(),
15 |             Resize(cfg.INPUT.IMAGE_SIZE),
16 |             SubtractMeans(cfg.INPUT.PIXEL_MEAN),
17 |             ToTensor(),
18 |         ]
19 |     else:
20 |         transform = [
21 |             Resize(cfg.INPUT.IMAGE_SIZE),
22 |             SubtractMeans(cfg.INPUT.PIXEL_MEAN),
23 |             ToTensor()
24 |         ]
25 |     transform = Compose(transform)
26 |     return transform
27 | 
28 | 
29 | def build_target_transform(cfg):
30 |     transform = SSDTargetTransform(PriorBox(cfg)(),
31 |                                    cfg.MODEL.CENTER_VARIANCE,
32 |                                    cfg.MODEL.SIZE_VARIANCE,
33 |                                    cfg.MODEL.THRESHOLD)
34 |     return transform
35 | 


--------------------------------------------------------------------------------
/ssd/data/transforms/target_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from ssd.utils import box_utils
 5 | 
 6 | 
 7 | class SSDTargetTransform:
 8 |     def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
 9 |         self.center_form_priors = center_form_priors
10 |         self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
11 |         self.center_variance = center_variance
12 |         self.size_variance = size_variance
13 |         self.iou_threshold = iou_threshold
14 | 
15 |     def __call__(self, gt_boxes, gt_labels):
16 |         if type(gt_boxes) is np.ndarray:
17 |             gt_boxes = torch.from_numpy(gt_boxes)
18 |         if type(gt_labels) is np.ndarray:
19 |             gt_labels = torch.from_numpy(gt_labels)
20 |         boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
21 |                                                 self.corner_form_priors, self.iou_threshold)
22 |         boxes = box_utils.corner_form_to_center_form(boxes)
23 |         locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
24 |        
25 |         return locations, labels
26 | 


--------------------------------------------------------------------------------
/ssd/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # from https://github.com/amdegroot/ssd.pytorch
  2 | 
  3 | 
  4 | import torch
  5 | from torchvision import transforms
  6 | import cv2
  7 | import numpy as np
  8 | import types
  9 | from numpy import random
 10 | 
 11 | 
 12 | def intersect(box_a, box_b):
 13 |     max_xy = np.minimum(box_a[:, 2:], box_b[2:])
 14 |     min_xy = np.maximum(box_a[:, :2], box_b[:2])
 15 |     inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
 16 |     return inter[:, 0] * inter[:, 1]
 17 | 
 18 | 
 19 | def jaccard_numpy(box_a, box_b):
 20 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 21 |     is simply the intersection over union of two boxes.
 22 |     E.g.:
 23 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 24 |     Args:
 25 |         box_a: Multiple bounding boxes, Shape: [num_boxes,4]
 26 |         box_b: Single bounding box, Shape: [4]
 27 |     Return:
 28 |         jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
 29 |     """
 30 |     inter = intersect(box_a, box_b)
 31 |     area_a = ((box_a[:, 2] - box_a[:, 0]) *
 32 |               (box_a[:, 3] - box_a[:, 1]))  # [A,B]
 33 |     area_b = ((box_b[2] - box_b[0]) *
 34 |               (box_b[3] - box_b[1]))  # [A,B]
 35 |     union = area_a + area_b - inter
 36 |     return inter / union  # [A,B]
 37 | 
 38 | 
 39 | def remove_empty_boxes(boxes, labels):
 40 |     """Removes bounding boxes of W or H equal to 0 and its labels
 41 | 
 42 |     Args:
 43 |         boxes   (ndarray): NP Array with bounding boxes as lines
 44 |                            * BBOX[x1, y1, x2, y2]
 45 |         labels  (labels): Corresponding labels with boxes
 46 | 
 47 |     Returns:
 48 |         ndarray: Valid bounding boxes
 49 |         ndarray: Corresponding labels
 50 |     """
 51 |     del_boxes = []
 52 |     for idx, box in enumerate(boxes):
 53 |         if box[0] == box[2] or box[1] == box[3]:
 54 |             del_boxes.append(idx)
 55 | 
 56 |     return np.delete(boxes, del_boxes, 0), np.delete(labels, del_boxes)
 57 | 
 58 | 
 59 | class Compose(object):
 60 |     """Composes several augmentations together.
 61 |     Args:
 62 |         transforms (List[Transform]): list of transforms to compose.
 63 |     Example:
 64 |         >>> augmentations.Compose([
 65 |         >>>     transforms.CenterCrop(10),
 66 |         >>>     transforms.ToTensor(),
 67 |         >>> ])
 68 |     """
 69 | 
 70 |     def __init__(self, transforms):
 71 |         self.transforms = transforms
 72 | 
 73 |     def __call__(self, img, boxes=None, labels=None):
 74 |         for t in self.transforms:
 75 |             img, boxes, labels = t(img, boxes, labels)
 76 |             if boxes is not None:
 77 |                 boxes, labels = remove_empty_boxes(boxes, labels)
 78 |         return img, boxes, labels
 79 | 
 80 | 
 81 | class Lambda(object):
 82 |     """Applies a lambda as a transform."""
 83 | 
 84 |     def __init__(self, lambd):
 85 |         assert isinstance(lambd, types.LambdaType)
 86 |         self.lambd = lambd
 87 | 
 88 |     def __call__(self, img, boxes=None, labels=None):
 89 |         return self.lambd(img, boxes, labels)
 90 | 
 91 | 
 92 | class ConvertFromInts(object):
 93 |     def __call__(self, image, boxes=None, labels=None):
 94 |         return image.astype(np.float32), boxes, labels
 95 | 
 96 | 
 97 | class SubtractMeans(object):
 98 |     def __init__(self, mean):
 99 |         self.mean = np.array(mean, dtype=np.float32)
100 | 
101 |     def __call__(self, image, boxes=None, labels=None):
102 |         image = image.astype(np.float32)
103 |         image -= self.mean
104 |         return image.astype(np.float32), boxes, labels
105 | 
106 | 
107 | class ToAbsoluteCoords(object):
108 |     def __call__(self, image, boxes=None, labels=None):
109 |         height, width, channels = image.shape
110 |         boxes[:, 0] *= width
111 |         boxes[:, 2] *= width
112 |         boxes[:, 1] *= height
113 |         boxes[:, 3] *= height
114 | 
115 |         return image, boxes, labels
116 | 
117 | 
118 | class ToPercentCoords(object):
119 |     def __call__(self, image, boxes=None, labels=None):
120 |         height, width, channels = image.shape
121 |         boxes[:, 0] /= width
122 |         boxes[:, 2] /= width
123 |         boxes[:, 1] /= height
124 |         boxes[:, 3] /= height
125 | 
126 |         return image, boxes, labels
127 | 
128 | 
129 | class Resize(object):
130 |     def __init__(self, size=300):
131 |         self.size = size
132 | 
133 |     def __call__(self, image, boxes=None, labels=None):
134 |         image = cv2.resize(image, (self.size,
135 |                                    self.size))
136 |         return image, boxes, labels
137 | 
138 | 
139 | class RandomSaturation(object):
140 |     def __init__(self, lower=0.5, upper=1.5):
141 |         self.lower = lower
142 |         self.upper = upper
143 |         assert self.upper >= self.lower, "contrast upper must be >= lower."
144 |         assert self.lower >= 0, "contrast lower must be non-negative."
145 | 
146 |     def __call__(self, image, boxes=None, labels=None):
147 |         if random.randint(2):
148 |             image[:, :, 1] *= random.uniform(self.lower, self.upper)
149 | 
150 |         return image, boxes, labels
151 | 
152 | 
153 | class RandomHue(object):
154 |     def __init__(self, delta=18.0):
155 |         assert delta >= 0.0 and delta <= 360.0
156 |         self.delta = delta
157 | 
158 |     def __call__(self, image, boxes=None, labels=None):
159 |         if random.randint(2):
160 |             image[:, :, 0] += random.uniform(-self.delta, self.delta)
161 |             image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
162 |             image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
163 |         return image, boxes, labels
164 | 
165 | 
166 | class RandomLightingNoise(object):
167 |     def __init__(self):
168 |         self.perms = ((0, 1, 2), (0, 2, 1),
169 |                       (1, 0, 2), (1, 2, 0),
170 |                       (2, 0, 1), (2, 1, 0))
171 | 
172 |     def __call__(self, image, boxes=None, labels=None):
173 |         if random.randint(2):
174 |             swap = self.perms[random.randint(len(self.perms))]
175 |             shuffle = SwapChannels(swap)  # shuffle channels
176 |             image = shuffle(image)
177 |         return image, boxes, labels
178 | 
179 | 
180 | class ConvertColor(object):
181 |     def __init__(self, current, transform):
182 |         self.transform = transform
183 |         self.current = current
184 | 
185 |     def __call__(self, image, boxes=None, labels=None):
186 |         if self.current == 'BGR' and self.transform == 'HSV':
187 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
188 |         elif self.current == 'RGB' and self.transform == 'HSV':
189 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
190 |         elif self.current == 'BGR' and self.transform == 'RGB':
191 |             image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
192 |         elif self.current == 'HSV' and self.transform == 'BGR':
193 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
194 |         elif self.current == 'HSV' and self.transform == "RGB":
195 |             image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
196 |         else:
197 |             raise NotImplementedError
198 |         return image, boxes, labels
199 | 
200 | 
201 | class RandomContrast(object):
202 |     def __init__(self, lower=0.5, upper=1.5):
203 |         self.lower = lower
204 |         self.upper = upper
205 |         assert self.upper >= self.lower, "contrast upper must be >= lower."
206 |         assert self.lower >= 0, "contrast lower must be non-negative."
207 | 
208 |     # expects float image
209 |     def __call__(self, image, boxes=None, labels=None):
210 |         if random.randint(2):
211 |             alpha = random.uniform(self.lower, self.upper)
212 |             image *= alpha
213 |         return image, boxes, labels
214 | 
215 | 
216 | class RandomBrightness(object):
217 |     def __init__(self, delta=32):
218 |         assert delta >= 0.0
219 |         assert delta <= 255.0
220 |         self.delta = delta
221 | 
222 |     def __call__(self, image, boxes=None, labels=None):
223 |         if random.randint(2):
224 |             delta = random.uniform(-self.delta, self.delta)
225 |             image += delta
226 |         return image, boxes, labels
227 | 
228 | 
229 | class ToCV2Image(object):
230 |     def __call__(self, tensor, boxes=None, labels=None):
231 |         return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
232 | 
233 | 
234 | class ToTensor(object):
235 |     def __call__(self, cvimage, boxes=None, labels=None):
236 |         return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
237 | 
238 | 
239 | class RandomSampleCrop(object):
240 |     """Crop
241 |     Arguments:
242 |         img (Image): the image being input during training
243 |         boxes (Tensor): the original bounding boxes in pt form
244 |         labels (Tensor): the class labels for each bbox
245 |         mode (float tuple): the min and max jaccard overlaps
246 |     Return:
247 |         (img, boxes, classes)
248 |             img (Image): the cropped image
249 |             boxes (Tensor): the adjusted bounding boxes in pt form
250 |             labels (Tensor): the class labels for each bbox
251 |     """
252 | 
253 |     def __init__(self):
254 |         self.sample_options = (
255 |             # using entire original input image
256 |             None,
257 |             # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
258 |             (0.1, None),
259 |             (0.3, None),
260 |             (0.7, None),
261 |             (0.9, None),
262 |             # randomly sample a patch
263 |             (None, None),
264 |         )
265 | 
266 |     def __call__(self, image, boxes=None, labels=None):
267 |         # guard against no boxes
268 |         if boxes is not None and boxes.shape[0] == 0:
269 |             return image, boxes, labels
270 |         height, width, _ = image.shape
271 |         while True:
272 |             # randomly choose a mode
273 |             mode = self.sample_options[random.randint(0, len(self.sample_options))]
274 |             if mode is None:
275 |                 return image, boxes, labels
276 | 
277 |             min_iou, max_iou = mode
278 |             if min_iou is None:
279 |                 min_iou = float('-inf')
280 |             if max_iou is None:
281 |                 max_iou = float('inf')
282 | 
283 |             # max trails (50)
284 |             for _ in range(50):
285 |                 current_image = image
286 | 
287 |                 w = random.uniform(0.3 * width, width)
288 |                 h = random.uniform(0.3 * height, height)
289 | 
290 |                 # aspect ratio constraint b/t .5 & 2
291 |                 if h / w < 0.5 or h / w > 2:
292 |                     continue
293 | 
294 |                 left = random.uniform(width - w)
295 |                 top = random.uniform(height - h)
296 | 
297 |                 # convert to integer rect x1,y1,x2,y2
298 |                 rect = np.array([int(left), int(top), int(left + w), int(top + h)])
299 | 
300 |                 # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
301 |                 overlap = jaccard_numpy(boxes, rect)
302 | 
303 |                 # is min and max overlap constraint satisfied? if not try again
304 |                 if overlap.max() < min_iou or overlap.min() > max_iou:
305 |                     continue
306 | 
307 |                 # cut the crop from the image
308 |                 current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
309 |                                 :]
310 | 
311 |                 # keep overlap with gt box IF center in sampled patch
312 |                 centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
313 | 
314 |                 # mask in all gt boxes that above and to the left of centers
315 |                 m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
316 | 
317 |                 # mask in all gt boxes that under and to the right of centers
318 |                 m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
319 | 
320 |                 # mask in that both m1 and m2 are true
321 |                 mask = m1 * m2
322 | 
323 |                 # have any valid boxes? try again if not
324 |                 if not mask.any():
325 |                     continue
326 | 
327 |                 # take only matching gt boxes
328 |                 current_boxes = boxes[mask, :].copy()
329 | 
330 |                 # take only matching gt labels
331 |                 current_labels = labels[mask]
332 | 
333 |                 # should we use the box left and top corner or the crop's
334 |                 current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
335 |                                                   rect[:2])
336 |                 # adjust to crop (by substracting crop's left,top)
337 |                 current_boxes[:, :2] -= rect[:2]
338 | 
339 |                 current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
340 |                                                   rect[2:])
341 |                 # adjust to crop (by substracting crop's left,top)
342 |                 current_boxes[:, 2:] -= rect[:2]
343 | 
344 |                 return current_image, current_boxes, current_labels
345 | 
346 | 
347 | class Expand(object):
348 |     def __init__(self, mean):
349 |         self.mean = mean
350 | 
351 |     def __call__(self, image, boxes, labels):
352 |         if random.randint(2):
353 |             return image, boxes, labels
354 | 
355 |         height, width, depth = image.shape
356 |         ratio = random.uniform(1, 4)
357 |         left = random.uniform(0, width * ratio - width)
358 |         top = random.uniform(0, height * ratio - height)
359 | 
360 |         expand_image = np.zeros(
361 |             (int(height * ratio), int(width * ratio), depth),
362 |             dtype=image.dtype)
363 |         expand_image[:, :, :] = self.mean
364 |         expand_image[int(top):int(top + height),
365 |         int(left):int(left + width)] = image
366 |         image = expand_image
367 | 
368 |         boxes = boxes.copy()
369 |         boxes[:, :2] += (int(left), int(top))
370 |         boxes[:, 2:] += (int(left), int(top))
371 | 
372 |         return image, boxes, labels
373 | 
374 | 
375 | class RandomMirror(object):
376 |     def __call__(self, image, boxes, classes):
377 |         _, width, _ = image.shape
378 |         if random.randint(2):
379 |             image = image[:, ::-1]
380 |             boxes = boxes.copy()
381 |             boxes[:, 0::2] = width - boxes[:, 2::-2]
382 |         return image, boxes, classes
383 | 
384 | 
385 | class SwapChannels(object):
386 |     """Transforms a tensorized image by swapping the channels in the order
387 |      specified in the swap tuple.
388 |     Args:
389 |         swaps (int triple): final order of channels
390 |             eg: (2, 1, 0)
391 |     """
392 | 
393 |     def __init__(self, swaps):
394 |         self.swaps = swaps
395 | 
396 |     def __call__(self, image):
397 |         """
398 |         Args:
399 |             image (Tensor): image tensor to be transformed
400 |         Return:
401 |             a tensor with channels swapped according to swap
402 |         """
403 |         # if torch.is_tensor(image):
404 |         #     image = image.data.cpu().numpy()
405 |         # else:
406 |         #     image = np.array(image)
407 |         image = image[:, :, self.swaps]
408 |         return image
409 | 
410 | 
411 | class PhotometricDistort(object):
412 |     def __init__(self):
413 |         self.pd = [
414 |             RandomContrast(),  # RGB
415 |             ConvertColor(current="RGB", transform='HSV'),  # HSV
416 |             RandomSaturation(),  # HSV
417 |             RandomHue(),  # HSV
418 |             ConvertColor(current='HSV', transform='RGB'),  # RGB
419 |             RandomContrast()  # RGB
420 |         ]
421 |         self.rand_brightness = RandomBrightness()
422 |         self.rand_light_noise = RandomLightingNoise()
423 | 
424 |     def __call__(self, image, boxes, labels):
425 |         im = image.copy()
426 |         im, boxes, labels = self.rand_brightness(im, boxes, labels)
427 |         if random.randint(2):
428 |             distort = Compose(self.pd[:-1])
429 |         else:
430 |             distort = Compose(self.pd[1:])
431 |         im, boxes, labels = distort(im, boxes, labels)
432 |         return self.rand_light_noise(im, boxes, labels)
433 | 


--------------------------------------------------------------------------------
/ssd/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/engine/__init__.py


--------------------------------------------------------------------------------
/ssd/engine/inference.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | import torch
 5 | import torch.utils.data
 6 | from tqdm import tqdm
 7 | 
 8 | from ssd.data.build import make_data_loader
 9 | from ssd.data.datasets.evaluation import evaluate
10 | 
11 | from ssd.utils import dist_util, mkdir
12 | from ssd.utils.dist_util import synchronize, is_main_process
13 | 
14 | 
15 | def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
16 |     all_predictions = dist_util.all_gather(predictions_per_gpu)
17 |     if not dist_util.is_main_process():
18 |         return
19 |     # merge the list of dicts
20 |     predictions = {}
21 |     for p in all_predictions:
22 |         predictions.update(p)
23 |     # convert a dict where the key is the index in a list
24 |     image_ids = list(sorted(predictions.keys()))
25 |     if len(image_ids) != image_ids[-1] + 1:
26 |         logger = logging.getLogger("SSD.inference")
27 |         logger.warning(
28 |             "Number of images that were gathered from multiple processes is not "
29 |             "a contiguous set. Some images might be missing from the evaluation"
30 |         )
31 | 
32 |     # convert to a list
33 |     predictions = [predictions[i] for i in image_ids]
34 |     return predictions
35 | 
36 | 
37 | def compute_on_dataset(model, data_loader, device):
38 |     results_dict = {}
39 |     for batch in tqdm(data_loader):
40 |         images, targets, image_ids = batch
41 |         cpu_device = torch.device("cpu")
42 |         with torch.no_grad():
43 |             outputs = model(images.to(device))
44 | 
45 |             outputs = [o.to(cpu_device) for o in outputs]
46 |         results_dict.update(
47 |             {int(img_id): result for img_id, result in zip(image_ids, outputs)}
48 |         )
49 |     return results_dict
50 | 
51 | 
52 | def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs):
53 |     dataset = data_loader.dataset
54 |     logger = logging.getLogger("SSD.inference")
55 |     logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset)))
56 |     predictions_path = os.path.join(output_folder, 'predictions.pth')
57 |     if use_cached and os.path.exists(predictions_path):
58 |         predictions = torch.load(predictions_path, map_location='cpu')
59 |     else:
60 |         predictions = compute_on_dataset(model, data_loader, device)
61 |         synchronize()
62 |         predictions = _accumulate_predictions_from_multiple_gpus(predictions)
63 |     if not is_main_process():
64 |         return
65 |     if output_folder:
66 |         torch.save(predictions, predictions_path)
67 |     return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs)
68 | 
69 | 
70 | @torch.no_grad()
71 | def do_evaluation(cfg, model, distributed, **kwargs):
72 |     if isinstance(model, torch.nn.parallel.DistributedDataParallel):
73 |         model = model.module
74 |     model.eval()
75 |     device = torch.device(cfg.MODEL.DEVICE)
76 |     data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed)
77 |     eval_results = []
78 |     for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val):
79 |         output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
80 |         if not os.path.exists(output_folder):
81 |             mkdir(output_folder)
82 |         eval_result = inference(model, data_loader, dataset_name, device, output_folder, **kwargs)
83 |         eval_results.append(eval_result)
84 |     return eval_results
85 | 


--------------------------------------------------------------------------------
/ssd/engine/trainer.py:
--------------------------------------------------------------------------------
  1 | import collections
  2 | import datetime
  3 | import logging
  4 | import os
  5 | import time
  6 | import torch
  7 | import torch.distributed as dist
  8 | 
  9 | from ssd.engine.inference import do_evaluation
 10 | from ssd.utils import dist_util
 11 | from ssd.utils.metric_logger import MetricLogger
 12 | 
 13 | 
 14 | def write_metric(eval_result, prefix, summary_writer, global_step):
 15 |     for key in eval_result:
 16 |         value = eval_result[key]
 17 |         tag = '{}/{}'.format(prefix, key)
 18 |         if isinstance(value, collections.Mapping):
 19 |             write_metric(value, tag, summary_writer, global_step)
 20 |         else:
 21 |             summary_writer.add_scalar(tag, value, global_step=global_step)
 22 | 
 23 | 
 24 | def reduce_loss_dict(loss_dict):
 25 |     """
 26 |     Reduce the loss dictionary from all processes so that process with rank
 27 |     0 has the averaged results. Returns a dict with the same fields as
 28 |     loss_dict, after reduction.
 29 |     """
 30 |     world_size = dist_util.get_world_size()
 31 |     if world_size < 2:
 32 |         return loss_dict
 33 |     with torch.no_grad():
 34 |         loss_names = []
 35 |         all_losses = []
 36 |         for k in sorted(loss_dict.keys()):
 37 |             loss_names.append(k)
 38 |             all_losses.append(loss_dict[k])
 39 |         all_losses = torch.stack(all_losses, dim=0)
 40 |         dist.reduce(all_losses, dst=0)
 41 |         if dist.get_rank() == 0:
 42 |             # only main process gets accumulated, so only divide by
 43 |             # world_size in this case
 44 |             all_losses /= world_size
 45 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
 46 |     return reduced_losses
 47 | 
 48 | 
 49 | def do_train(cfg, model,
 50 |              data_loader,
 51 |              optimizer,
 52 |              scheduler,
 53 |              checkpointer,
 54 |              device,
 55 |              arguments,
 56 |              args):
 57 |     logger = logging.getLogger("SSD.trainer")
 58 |     logger.info("Start training ...")
 59 |     meters = MetricLogger()
 60 | 
 61 |     model.train()
 62 |     save_to_disk = dist_util.get_rank() == 0
 63 |     if args.use_tensorboard and save_to_disk:
 64 |         try:
 65 |             from torch.utils.tensorboard import SummaryWriter
 66 |         except ImportError:
 67 |             from tensorboardX import SummaryWriter
 68 |         summary_writer = SummaryWriter(log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
 69 |     else:
 70 |         summary_writer = None
 71 | 
 72 |     max_iter = len(data_loader)
 73 |     start_iter = arguments["iteration"]
 74 |     start_training_time = time.time()
 75 |     end = time.time()
 76 |     for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
 77 |         iteration = iteration + 1
 78 |         arguments["iteration"] = iteration
 79 | 
 80 |         images = images.to(device)
 81 |         targets = targets.to(device)
 82 |         loss_dict = model(images, targets=targets)
 83 |         loss = sum(loss for loss in loss_dict.values())
 84 | 
 85 |         # reduce losses over all GPUs for logging purposes
 86 |         loss_dict_reduced = reduce_loss_dict(loss_dict)
 87 |         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
 88 |         meters.update(total_loss=losses_reduced, **loss_dict_reduced)
 89 | 
 90 |         optimizer.zero_grad()
 91 |         loss.backward()
 92 |         optimizer.step()
 93 |         scheduler.step()
 94 | 
 95 |         batch_time = time.time() - end
 96 |         end = time.time()
 97 |         meters.update(time=batch_time)
 98 |         if iteration % args.log_step == 0:
 99 |             eta_seconds = meters.time.global_avg * (max_iter - iteration)
100 |             eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
101 |             if device == "cuda":
102 |                 logger.info(
103 |                     meters.delimiter.join([
104 |                         "iter: {iter:06d}",
105 |                         "lr: {lr:.5f}",
106 |                         '{meters}',
107 |                         "eta: {eta}",
108 |                         'mem: {mem}M',
109 |                     ]).format(
110 |                         iter=iteration,
111 |                         lr=optimizer.param_groups[0]['lr'],
112 |                         meters=str(meters),
113 |                         eta=eta_string,
114 |                         mem=round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0),
115 |                     )
116 |                 )
117 |             else:
118 |                 logger.info(
119 |                     meters.delimiter.join([
120 |                         "iter: {iter:06d}",
121 |                         "lr: {lr:.5f}",
122 |                         '{meters}',
123 |                         "eta: {eta}",
124 |                     ]).format(
125 |                         iter=iteration,
126 |                         lr=optimizer.param_groups[0]['lr'],
127 |                         meters=str(meters),
128 |                         eta=eta_string,
129 |                     )
130 |                 )
131 |             if summary_writer:
132 |                 global_step = iteration
133 |                 summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
134 |                 for loss_name, loss_item in loss_dict_reduced.items():
135 |                     summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
136 |                 summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)
137 | 
138 |         if iteration % args.save_step == 0:
139 |             checkpointer.save("model_{:06d}".format(iteration), **arguments)
140 | 
141 |         if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
142 |             eval_results = do_evaluation(cfg, model, distributed=args.distributed, iteration=iteration)
143 |             if dist_util.get_rank() == 0 and summary_writer:
144 |                 for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
145 |                     write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration)
146 |             model.train()  # *IMPORTANT*: change to train mode after eval.
147 | 
148 |     checkpointer.save("model_final", **arguments)
149 |     # compute training time
150 |     total_training_time = int(time.time() - start_training_time)
151 |     total_time_str = str(datetime.timedelta(seconds=total_training_time))
152 |     logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
153 |     return model
154 | 


--------------------------------------------------------------------------------
/ssd/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.init as init
 4 | from .separable_conv import SeparableConv2d
 5 | 
 6 | __all__ = ['L2Norm', 'SeparableConv2d']
 7 | 
 8 | 
 9 | class L2Norm(nn.Module):
10 |     def __init__(self, n_channels, scale):
11 |         super(L2Norm, self).__init__()
12 |         self.n_channels = n_channels
13 |         self.gamma = scale or None
14 |         self.eps = 1e-10
15 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
16 |         self.reset_parameters()
17 | 
18 |     def reset_parameters(self):
19 |         init.constant_(self.weight, self.gamma)
20 | 
21 |     def forward(self, x):
22 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
23 |         x = torch.div(x, norm)
24 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
25 |         return out
26 | 


--------------------------------------------------------------------------------
/ssd/layers/separable_conv.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class SeparableConv2d(nn.Module):
 5 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False):
 6 |         super().__init__()
 7 |         ReLU = nn.ReLU if onnx_compatible else nn.ReLU6
 8 |         self.conv = nn.Sequential(
 9 |             nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
10 |                       groups=in_channels, stride=stride, padding=padding),
11 |             nn.BatchNorm2d(in_channels),
12 |             ReLU(),
13 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
14 |         )
15 | 
16 |     def forward(self, x):
17 |         return self.conv(x)
18 | 


--------------------------------------------------------------------------------
/ssd/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/modeling/__init__.py


--------------------------------------------------------------------------------
/ssd/modeling/anchors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/modeling/anchors/__init__.py


--------------------------------------------------------------------------------
/ssd/modeling/anchors/prior_box.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | 
 3 | import torch
 4 | from math import sqrt
 5 | 
 6 | 
 7 | class PriorBox:
 8 |     def __init__(self, cfg):
 9 |         self.image_size = cfg.INPUT.IMAGE_SIZE
10 |         prior_config = cfg.MODEL.PRIORS
11 |         self.feature_maps = prior_config.FEATURE_MAPS
12 |         self.min_sizes = prior_config.MIN_SIZES
13 |         self.max_sizes = prior_config.MAX_SIZES
14 |         self.strides = prior_config.STRIDES
15 |         self.aspect_ratios = prior_config.ASPECT_RATIOS
16 |         self.clip = prior_config.CLIP
17 | 
18 |     def __call__(self):
19 |         """Generate SSD Prior Boxes.
20 |             It returns the center, height and width of the priors. The values are relative to the image size
21 |             Returns:
22 |                 priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
23 |                     are relative to the image size.
24 |         """
25 |         priors = []
26 |         for k, f in enumerate(self.feature_maps):
27 |             scale = self.image_size / self.strides[k]
28 |             for i, j in product(range(f), repeat=2):
29 |                 # unit center x,y
30 |                 cx = (j + 0.5) / scale
31 |                 cy = (i + 0.5) / scale
32 | 
33 |                 # small sized square box
34 |                 size = self.min_sizes[k]
35 |                 h = w = size / self.image_size
36 |                 priors.append([cx, cy, w, h])
37 | 
38 |                 # big sized square box
39 |                 size = sqrt(self.min_sizes[k] * self.max_sizes[k])
40 |                 h = w = size / self.image_size
41 |                 priors.append([cx, cy, w, h])
42 | 
43 |                 # change h/w ratio of the small sized box
44 |                 size = self.min_sizes[k]
45 |                 h = w = size / self.image_size
46 |                 for ratio in self.aspect_ratios[k]:
47 |                     ratio = sqrt(ratio)
48 |                     priors.append([cx, cy, w * ratio, h / ratio])
49 |                     priors.append([cx, cy, w / ratio, h * ratio])
50 | 
51 |         priors = torch.tensor(priors)
52 |         if self.clip:
53 |             priors.clamp_(max=1, min=0)
54 |         return priors
55 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | from ssd.modeling import registry
 2 | from .vgg import VGG
 3 | from .mobilenet import MobileNetV2
 4 | from .efficient_net import EfficientNet
 5 | from .mobilenetv3 import MobileNetV3
 6 | 
 7 | __all__ = ['build_backbone', 'VGG', 'MobileNetV2', 'EfficientNet', 'MobileNetV3']
 8 | 
 9 | 
10 | def build_backbone(cfg):
11 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg, cfg.MODEL.BACKBONE.PRETRAINED)
12 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/__init__.py:
--------------------------------------------------------------------------------
 1 | from ssd.modeling import registry
 2 | from .efficient_net import EfficientNet
 3 | 
 4 | __all__ = ['efficient_net_b3', 'EfficientNet']
 5 | 
 6 | 
 7 | @registry.BACKBONES.register('efficient_net-b3')
 8 | def efficient_net_b3(cfg, pretrained=True):
 9 |     if pretrained:
10 |         model = EfficientNet.from_pretrained('efficientnet-b3')
11 |     else:
12 |         model = EfficientNet.from_name('efficientnet-b3')
13 | 
14 |     return model
15 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/efficient_net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | from .utils import (
  5 |     relu_fn,
  6 |     round_filters,
  7 |     round_repeats,
  8 |     drop_connect,
  9 |     Conv2dSamePadding,
 10 |     get_model_params,
 11 |     efficientnet_params,
 12 |     load_pretrained_weights,
 13 | )
 14 | 
 15 | INDICES = {
 16 |     'efficientnet-b3': [7, 17, 25]
 17 | }
 18 | 
 19 | EXTRAS = {
 20 |     'efficientnet-b3': [
 21 |         # in,  out, k, s, p
 22 |         [(384, 128, 1, 1, 0), (128, 256, 3, 2, 1)],  # 5 x 5
 23 |         [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)],  # 3 x 3
 24 |         [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)],  # 1 x 1
 25 | 
 26 |     ]
 27 | }
 28 | 
 29 | 
 30 | def add_extras(cfgs):
 31 |     extras = nn.ModuleList()
 32 |     for cfg in cfgs:
 33 |         extra = []
 34 |         for params in cfg:
 35 |             in_channels, out_channels, kernel_size, stride, padding = params
 36 |             extra.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
 37 |             extra.append(nn.ReLU())
 38 |         extras.append(nn.Sequential(*extra))
 39 |     return extras
 40 | 
 41 | 
 42 | class MBConvBlock(nn.Module):
 43 |     """
 44 |     Mobile Inverted Residual Bottleneck Block
 45 | 
 46 |     Args:
 47 |         block_args (namedtuple): BlockArgs, see above
 48 |         global_params (namedtuple): GlobalParam, see above
 49 | 
 50 |     Attributes:
 51 |         has_se (bool): Whether the block contains a Squeeze and Excitation layer.
 52 |     """
 53 | 
 54 |     def __init__(self, block_args, global_params):
 55 |         super().__init__()
 56 |         self._block_args = block_args
 57 |         self._bn_mom = 1 - global_params.batch_norm_momentum
 58 |         self._bn_eps = global_params.batch_norm_epsilon
 59 |         self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
 60 |         self.id_skip = block_args.id_skip  # skip connection and drop connect
 61 | 
 62 |         # Expansion phase
 63 |         inp = self._block_args.input_filters  # number of input channels
 64 |         oup = self._block_args.input_filters * self._block_args.expand_ratio  # number of output channels
 65 |         if self._block_args.expand_ratio != 1:
 66 |             self._expand_conv = Conv2dSamePadding(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
 67 |             self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 68 | 
 69 |         # Depthwise convolution phase
 70 |         k = self._block_args.kernel_size
 71 |         s = self._block_args.stride
 72 |         self._depthwise_conv = Conv2dSamePadding(
 73 |             in_channels=oup, out_channels=oup, groups=oup,  # groups makes it depthwise
 74 |             kernel_size=k, stride=s, bias=False)
 75 |         self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
 76 | 
 77 |         # Squeeze and Excitation layer, if desired
 78 |         if self.has_se:
 79 |             num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
 80 |             self._se_reduce = Conv2dSamePadding(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
 81 |             self._se_expand = Conv2dSamePadding(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
 82 | 
 83 |         # Output phase
 84 |         final_oup = self._block_args.output_filters
 85 |         self._project_conv = Conv2dSamePadding(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
 86 |         self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
 87 | 
 88 |     def forward(self, inputs, drop_connect_rate=None):
 89 |         """
 90 |         :param inputs: input tensor
 91 |         :param drop_connect_rate: drop connect rate (float, between 0 and 1)
 92 |         :return: output of block
 93 |         """
 94 | 
 95 |         # Expansion and Depthwise Convolution
 96 |         x = inputs
 97 |         if self._block_args.expand_ratio != 1:
 98 |             x = relu_fn(self._bn0(self._expand_conv(inputs)))
 99 |         x = relu_fn(self._bn1(self._depthwise_conv(x)))
100 | 
101 |         # Squeeze and Excitation
102 |         if self.has_se:
103 |             x_squeezed = F.adaptive_avg_pool2d(x, 1)
104 |             x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed)))
105 |             x = torch.sigmoid(x_squeezed) * x
106 | 
107 |         x = self._bn2(self._project_conv(x))
108 | 
109 |         # Skip connection and drop connect
110 |         input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
111 |         if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
112 |             if drop_connect_rate:
113 |                 x = drop_connect(x, p=drop_connect_rate, training=self.training)
114 |             x = x + inputs  # skip connection
115 |         return x
116 | 
117 | 
118 | class EfficientNet(nn.Module):
119 |     """
120 |     An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
121 | 
122 |     Args:
123 |         blocks_args (list): A list of BlockArgs to construct blocks
124 |         global_params (namedtuple): A set of GlobalParams shared between blocks
125 | 
126 |     Example:
127 |         model = EfficientNet.from_pretrained('efficientnet-b0')
128 | 
129 |     """
130 | 
131 |     def __init__(self, model_name, blocks_args=None, global_params=None):
132 |         super().__init__()
133 |         self.indices = INDICES[model_name]
134 |         self.extras = add_extras(EXTRAS[model_name])
135 |         assert isinstance(blocks_args, list), 'blocks_args should be a list'
136 |         assert len(blocks_args) > 0, 'block args must be greater than 0'
137 |         self._global_params = global_params
138 |         self._blocks_args = blocks_args
139 | 
140 |         # Batch norm parameters
141 |         bn_mom = 1 - self._global_params.batch_norm_momentum
142 |         bn_eps = self._global_params.batch_norm_epsilon
143 | 
144 |         # Stem
145 |         in_channels = 3  # rgb
146 |         out_channels = round_filters(32, self._global_params)  # number of output channels
147 |         self._conv_stem = Conv2dSamePadding(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
148 |         self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
149 | 
150 |         # Build blocks
151 |         self._blocks = nn.ModuleList([])
152 |         for block_args in self._blocks_args:
153 | 
154 |             # Update block input and output filters based on depth multiplier.
155 |             block_args = block_args._replace(
156 |                 input_filters=round_filters(block_args.input_filters, self._global_params),
157 |                 output_filters=round_filters(block_args.output_filters, self._global_params),
158 |                 num_repeat=round_repeats(block_args.num_repeat, self._global_params)
159 |             )
160 | 
161 |             # The first block needs to take care of stride and filter size increase.
162 |             self._blocks.append(MBConvBlock(block_args, self._global_params))
163 |             if block_args.num_repeat > 1:
164 |                 block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
165 |             for _ in range(block_args.num_repeat - 1):
166 |                 self._blocks.append(MBConvBlock(block_args, self._global_params))
167 |         self.reset_parameters()
168 | 
169 |     def reset_parameters(self):
170 |         for m in self.extras.modules():
171 |             if isinstance(m, nn.Conv2d):
172 |                 nn.init.xavier_uniform_(m.weight)
173 |                 nn.init.zeros_(m.bias)
174 | 
175 |     def extract_features(self, inputs):
176 |         """ Returns output of the final convolution layer """
177 | 
178 |         # Stem
179 |         x = relu_fn(self._bn0(self._conv_stem(inputs)))
180 | 
181 |         features = []
182 | 
183 |         # Blocks
184 |         for idx, block in enumerate(self._blocks):
185 |             drop_connect_rate = self._global_params.drop_connect_rate
186 |             if drop_connect_rate:
187 |                 drop_connect_rate *= float(idx) / len(self._blocks)
188 |             x = block(x, drop_connect_rate)
189 |             if idx in self.indices:
190 |                 features.append(x)
191 | 
192 |         return x, features
193 | 
194 |     def forward(self, inputs):
195 |         """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
196 | 
197 |         # Convolution layers
198 |         x, features = self.extract_features(inputs)
199 | 
200 |         for layer in self.extras:
201 |             x = layer(x)
202 |             features.append(x)
203 | 
204 |         return tuple(features)
205 | 
206 |     @classmethod
207 |     def from_name(cls, model_name, override_params=None):
208 |         cls._check_model_name_is_valid(model_name)
209 |         blocks_args, global_params = get_model_params(model_name, override_params)
210 |         return EfficientNet(model_name, blocks_args, global_params)
211 | 
212 |     @classmethod
213 |     def from_pretrained(cls, model_name):
214 |         model = EfficientNet.from_name(model_name)
215 |         load_pretrained_weights(model, model_name)
216 |         return model
217 | 
218 |     @classmethod
219 |     def get_image_size(cls, model_name):
220 |         cls._check_model_name_is_valid(model_name)
221 |         _, _, res, _ = efficientnet_params(model_name)
222 |         return res
223 | 
224 |     @classmethod
225 |     def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
226 |         """ Validates model name. None that pretrained weights are only available for
227 |         the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
228 |         num_models = 4 if also_need_pretrained_weights else 8
229 |         valid_models = ['efficientnet_b' + str(i) for i in range(num_models)]
230 |         if model_name.replace('-', '_') not in valid_models:
231 |             raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
232 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains helper functions for building the model and for loading model parameters.
  3 | These helper functions are built to mirror those in the official TensorFlow implementation.
  4 | """
  5 | 
  6 | import re
  7 | import math
  8 | import collections
  9 | import torch
 10 | from torch import nn
 11 | from torch.nn import functional as F
 12 | from ssd.utils.model_zoo import load_state_dict_from_url
 13 | 
 14 | ########################################################################
 15 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
 16 | ########################################################################
 17 | 
 18 | 
 19 | # Parameters for the entire model (stem, all blocks, and head)
 20 | 
 21 | GlobalParams = collections.namedtuple('GlobalParams', [
 22 |     'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
 23 |     'num_classes', 'width_coefficient', 'depth_coefficient',
 24 |     'depth_divisor', 'min_depth', 'drop_connect_rate', ])
 25 | 
 26 | # Parameters for an individual model block
 27 | BlockArgs = collections.namedtuple('BlockArgs', [
 28 |     'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
 29 |     'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
 30 | 
 31 | # Change namedtuple defaults
 32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 34 | 
 35 | 
 36 | def relu_fn(x):
 37 |     """ Swish activation function """
 38 |     return x * torch.sigmoid(x)
 39 | 
 40 | 
 41 | def round_filters(filters, global_params):
 42 |     """ Calculate and round number of filters based on depth multiplier. """
 43 |     multiplier = global_params.width_coefficient
 44 |     if not multiplier:
 45 |         return filters
 46 |     divisor = global_params.depth_divisor
 47 |     min_depth = global_params.min_depth
 48 |     filters *= multiplier
 49 |     min_depth = min_depth or divisor
 50 |     new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
 51 |     if new_filters < 0.9 * filters:  # prevent rounding by more than 10%
 52 |         new_filters += divisor
 53 |     return int(new_filters)
 54 | 
 55 | 
 56 | def round_repeats(repeats, global_params):
 57 |     """ Round number of filters based on depth multiplier. """
 58 |     multiplier = global_params.depth_coefficient
 59 |     if not multiplier:
 60 |         return repeats
 61 |     return int(math.ceil(multiplier * repeats))
 62 | 
 63 | 
 64 | def drop_connect(inputs, p, training):
 65 |     """ Drop connect. """
 66 |     if not training: return inputs
 67 |     batch_size = inputs.shape[0]
 68 |     keep_prob = 1 - p
 69 |     random_tensor = keep_prob
 70 |     random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
 71 |     binary_tensor = torch.floor(random_tensor)
 72 |     output = inputs / keep_prob * binary_tensor
 73 |     return output
 74 | 
 75 | 
 76 | class Conv2dSamePadding(nn.Conv2d):
 77 |     """ 2D Convolutions like TensorFlow """
 78 | 
 79 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
 80 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
 81 |         self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
 82 | 
 83 |     def forward(self, x):
 84 |         ih, iw = x.size()[-2:]
 85 |         kh, kw = self.weight.size()[-2:]
 86 |         sh, sw = self.stride
 87 |         oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
 88 |         pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
 89 |         pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
 90 |         if pad_h > 0 or pad_w > 0:
 91 |             x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
 92 |         return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
 93 | 
 94 | 
 95 | ########################################################################
 96 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
 97 | ########################################################################
 98 | 
 99 | 
100 | def efficientnet_params(model_name):
101 |     """ Map EfficientNet model name to parameter coefficients. """
102 |     params_dict = {
103 |         # Coefficients:   width,depth,res,dropout
104 |         'efficientnet-b0': (1.0, 1.0, 224, 0.2),
105 |         'efficientnet-b1': (1.0, 1.1, 240, 0.2),
106 |         'efficientnet-b2': (1.1, 1.2, 260, 0.3),
107 |         'efficientnet-b3': (1.2, 1.4, 300, 0.3),
108 |         'efficientnet-b4': (1.4, 1.8, 380, 0.4),
109 |         'efficientnet-b5': (1.6, 2.2, 456, 0.4),
110 |         'efficientnet-b6': (1.8, 2.6, 528, 0.5),
111 |         'efficientnet-b7': (2.0, 3.1, 600, 0.5),
112 |     }
113 |     return params_dict[model_name]
114 | 
115 | 
116 | class BlockDecoder(object):
117 |     """ Block Decoder for readability, straight from the official TensorFlow repository """
118 | 
119 |     @staticmethod
120 |     def _decode_block_string(block_string):
121 |         """ Gets a block through a string notation of arguments. """
122 |         assert isinstance(block_string, str)
123 | 
124 |         ops = block_string.split('_')
125 |         options = {}
126 |         for op in ops:
127 |             splits = re.split(r'(\d.*)', op)
128 |             if len(splits) >= 2:
129 |                 key, value = splits[:2]
130 |                 options[key] = value
131 | 
132 |         # Check stride
133 |         assert (('s' in options and len(options['s']) == 1) or
134 |                 (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
135 | 
136 |         return BlockArgs(
137 |             kernel_size=int(options['k']),
138 |             num_repeat=int(options['r']),
139 |             input_filters=int(options['i']),
140 |             output_filters=int(options['o']),
141 |             expand_ratio=int(options['e']),
142 |             id_skip=('noskip' not in block_string),
143 |             se_ratio=float(options['se']) if 'se' in options else None,
144 |             stride=[int(options['s'][0])])
145 | 
146 |     @staticmethod
147 |     def _encode_block_string(block):
148 |         """Encodes a block to a string."""
149 |         args = [
150 |             'r%d' % block.num_repeat,
151 |             'k%d' % block.kernel_size,
152 |             's%d%d' % (block.strides[0], block.strides[1]),
153 |             'e%s' % block.expand_ratio,
154 |             'i%d' % block.input_filters,
155 |             'o%d' % block.output_filters
156 |         ]
157 |         if 0 < block.se_ratio <= 1:
158 |             args.append('se%s' % block.se_ratio)
159 |         if block.id_skip is False:
160 |             args.append('noskip')
161 |         return '_'.join(args)
162 | 
163 |     @staticmethod
164 |     def decode(string_list):
165 |         """
166 |         Decodes a list of string notations to specify blocks inside the network.
167 | 
168 |         :param string_list: a list of strings, each string is a notation of block
169 |         :return: a list of BlockArgs namedtuples of block args
170 |         """
171 |         assert isinstance(string_list, list)
172 |         blocks_args = []
173 |         for block_string in string_list:
174 |             blocks_args.append(BlockDecoder._decode_block_string(block_string))
175 |         return blocks_args
176 | 
177 |     @staticmethod
178 |     def encode(blocks_args):
179 |         """
180 |         Encodes a list of BlockArgs to a list of strings.
181 | 
182 |         :param blocks_args: a list of BlockArgs namedtuples of block args
183 |         :return: a list of strings, each string is a notation of block
184 |         """
185 |         block_strings = []
186 |         for block in blocks_args:
187 |             block_strings.append(BlockDecoder._encode_block_string(block))
188 |         return block_strings
189 | 
190 | 
191 | def efficientnet(width_coefficient=None, depth_coefficient=None,
192 |                  dropout_rate=0.2, drop_connect_rate=0.2):
193 |     """ Creates a efficientnet model. """
194 | 
195 |     blocks_args = [
196 |         'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
197 |         'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
198 |         'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
199 |         'r1_k3_s11_e6_i192_o320_se0.25',
200 |     ]
201 |     blocks_args = BlockDecoder.decode(blocks_args)
202 | 
203 |     global_params = GlobalParams(
204 |         batch_norm_momentum=0.99,
205 |         batch_norm_epsilon=1e-3,
206 |         dropout_rate=dropout_rate,
207 |         drop_connect_rate=drop_connect_rate,
208 |         # data_format='channels_last',  # removed, this is always true in PyTorch
209 |         num_classes=1000,
210 |         width_coefficient=width_coefficient,
211 |         depth_coefficient=depth_coefficient,
212 |         depth_divisor=8,
213 |         min_depth=None
214 |     )
215 | 
216 |     return blocks_args, global_params
217 | 
218 | 
219 | def get_model_params(model_name, override_params):
220 |     """ Get the block args and global params for a given model """
221 |     if model_name.startswith('efficientnet'):
222 |         w, d, _, p = efficientnet_params(model_name)
223 |         # note: all models have drop connect rate = 0.2
224 |         blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p)
225 |     else:
226 |         raise NotImplementedError('model name is not pre-defined: %s' % model_name)
227 |     if override_params:
228 |         # ValueError will be raised here if override_params has fields not included in global_params.
229 |         global_params = global_params._replace(**override_params)
230 |     return blocks_args, global_params
231 | 
232 | 
233 | url_map = {
234 |     'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet-b0-08094119.pth',
235 |     'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet-b1-dbc7070a.pth',
236 |     'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet-b2-27687264.pth',
237 |     'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth',
238 |     'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet-b4-e116e8b3.pth',
239 |     'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet-b5-586e6cc6.pth',
240 | }
241 | 
242 | 
243 | def load_pretrained_weights(model, model_name):
244 |     """ Loads pretrained weights, and downloads if loading for the first time. """
245 |     state_dict = load_state_dict_from_url(url_map[model_name])
246 |     model.load_state_dict(state_dict, strict=False)
247 |     print('Loaded pretrained weights for {}'.format(model_name))
248 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/mobilenet.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from ssd.modeling import registry
  4 | from ssd.utils.model_zoo import load_state_dict_from_url
  5 | 
  6 | model_urls = {
  7 |     'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
  8 | }
  9 | 
 10 | 
 11 | class ConvBNReLU(nn.Sequential):
 12 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
 13 |         padding = (kernel_size - 1) // 2
 14 |         super(ConvBNReLU, self).__init__(
 15 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 16 |             nn.BatchNorm2d(out_planes),
 17 |             nn.ReLU6(inplace=True)
 18 |         )
 19 | 
 20 | 
 21 | class InvertedResidual(nn.Module):
 22 |     def __init__(self, inp, oup, stride, expand_ratio):
 23 |         super(InvertedResidual, self).__init__()
 24 |         self.stride = stride
 25 |         assert stride in [1, 2]
 26 | 
 27 |         hidden_dim = int(round(inp * expand_ratio))
 28 |         self.use_res_connect = self.stride == 1 and inp == oup
 29 | 
 30 |         layers = []
 31 |         if expand_ratio != 1:
 32 |             # pw
 33 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
 34 |         layers.extend([
 35 |             # dw
 36 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
 37 |             # pw-linear
 38 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 39 |             nn.BatchNorm2d(oup),
 40 |         ])
 41 |         self.conv = nn.Sequential(*layers)
 42 | 
 43 |     def forward(self, x):
 44 |         if self.use_res_connect:
 45 |             return x + self.conv(x)
 46 |         else:
 47 |             return self.conv(x)
 48 | 
 49 | 
 50 | class MobileNetV2(nn.Module):
 51 |     def __init__(self, width_mult=1.0, inverted_residual_setting=None):
 52 |         super(MobileNetV2, self).__init__()
 53 |         block = InvertedResidual
 54 |         input_channel = 32
 55 |         last_channel = 1280
 56 | 
 57 |         if inverted_residual_setting is None:
 58 |             inverted_residual_setting = [
 59 |                 # t, c, n, s
 60 |                 [1, 16, 1, 1],
 61 |                 [6, 24, 2, 2],
 62 |                 [6, 32, 3, 2],
 63 |                 [6, 64, 4, 2],
 64 |                 [6, 96, 3, 1],
 65 |                 [6, 160, 3, 2],
 66 |                 [6, 320, 1, 1],
 67 |             ]
 68 | 
 69 |         # only check the first element, assuming user knows t,c,n,s are required
 70 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
 71 |             raise ValueError("inverted_residual_setting should be non-empty "
 72 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
 73 | 
 74 |         # building first layer
 75 |         input_channel = int(input_channel * width_mult)
 76 |         self.last_channel = int(last_channel * max(1.0, width_mult))
 77 |         features = [ConvBNReLU(3, input_channel, stride=2)]
 78 |         # building inverted residual blocks
 79 |         for t, c, n, s in inverted_residual_setting:
 80 |             output_channel = int(c * width_mult)
 81 |             for i in range(n):
 82 |                 stride = s if i == 0 else 1
 83 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t))
 84 |                 input_channel = output_channel
 85 |         # building last several layers
 86 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
 87 |         # make it nn.Sequential
 88 |         self.features = nn.Sequential(*features)
 89 |         self.extras = nn.ModuleList([
 90 |             InvertedResidual(1280, 512, 2, 0.2),
 91 |             InvertedResidual(512, 256, 2, 0.25),
 92 |             InvertedResidual(256, 256, 2, 0.5),
 93 |             InvertedResidual(256, 64, 2, 0.25)
 94 |         ])
 95 | 
 96 |         self.reset_parameters()
 97 | 
 98 |     def reset_parameters(self):
 99 |         # weight initialization
100 |         for m in self.modules():
101 |             if isinstance(m, nn.Conv2d):
102 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
103 |                 if m.bias is not None:
104 |                     nn.init.zeros_(m.bias)
105 |             elif isinstance(m, nn.BatchNorm2d):
106 |                 nn.init.ones_(m.weight)
107 |                 nn.init.zeros_(m.bias)
108 |             elif isinstance(m, nn.Linear):
109 |                 nn.init.normal_(m.weight, 0, 0.01)
110 |                 nn.init.zeros_(m.bias)
111 | 
112 |     def forward(self, x):
113 |         features = []
114 |         for i in range(14):
115 |             x = self.features[i](x)
116 |         features.append(x)
117 | 
118 |         for i in range(14, len(self.features)):
119 |             x = self.features[i](x)
120 |         features.append(x)
121 | 
122 |         for i in range(len(self.extras)):
123 |             x = self.extras[i](x)
124 |             features.append(x)
125 | 
126 |         return tuple(features)
127 | 
128 | 
129 | @registry.BACKBONES.register('mobilenet_v2')
130 | def mobilenet_v2(cfg, pretrained=True):
131 |     model = MobileNetV2()
132 |     if pretrained:
133 |         model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v2']), strict=False)
134 |     return model
135 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/mobilenetv3.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creates a MobileNetV3 Model as defined in:
  3 | Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. (2019).
  4 | Searching for MobileNetV3
  5 | arXiv preprint arXiv:1905.02244.
  6 | 
  7 | 
  8 | @ Credit from https://github.com/d-li14/mobilenetv3.pytorch
  9 | @ Modified by Chakkrit Termritthikun (https://github.com/chakkritte)
 10 | 
 11 | """
 12 | 
 13 | import torch.nn as nn
 14 | import math
 15 | 
 16 | from ssd.modeling import registry
 17 | from ssd.utils.model_zoo import load_state_dict_from_url
 18 | 
 19 | model_urls = {
 20 |     'mobilenet_v3': 'https://github.com/d-li14/mobilenetv3.pytorch/raw/master/pretrained/mobilenetv3-large-1cd25616.pth',
 21 | }
 22 | 
 23 | 
 24 | def _make_divisible(v, divisor, min_value=None):
 25 |     """
 26 |     This function is taken from the original tf repo.
 27 |     It ensures that all layers have a channel number that is divisible by 8
 28 |     It can be seen here:
 29 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 30 |     :param v:
 31 |     :param divisor:
 32 |     :param min_value:
 33 |     :return:
 34 |     """
 35 |     if min_value is None:
 36 |         min_value = divisor
 37 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 38 |     # Make sure that round down does not go down by more than 10%.
 39 |     if new_v < 0.9 * v:
 40 |         new_v += divisor
 41 |     return new_v
 42 | 
 43 | 
 44 | class h_sigmoid(nn.Module):
 45 |     def __init__(self, inplace=True):
 46 |         super(h_sigmoid, self).__init__()
 47 |         self.relu = nn.ReLU6(inplace=inplace)
 48 | 
 49 |     def forward(self, x):
 50 |         return self.relu(x + 3) / 6
 51 | 
 52 | 
 53 | class h_swish(nn.Module):
 54 |     def __init__(self, inplace=True):
 55 |         super(h_swish, self).__init__()
 56 |         self.sigmoid = h_sigmoid(inplace=inplace)
 57 | 
 58 |     def forward(self, x):
 59 |         return x * self.sigmoid(x)
 60 | 
 61 | 
 62 | class SELayer(nn.Module):
 63 |     def __init__(self, channel, reduction=4):
 64 |         super(SELayer, self).__init__()
 65 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 66 |         self.fc = nn.Sequential(
 67 |             nn.Linear(channel, _make_divisible(channel // reduction, 8)),
 68 |             nn.ReLU(inplace=True),
 69 |             nn.Linear(_make_divisible(channel // reduction, 8), channel),
 70 |             h_sigmoid()
 71 |         )
 72 | 
 73 |     def forward(self, x):
 74 |         b, c, _, _ = x.size()
 75 |         y = self.avg_pool(x).view(b, c)
 76 |         y = self.fc(y).view(b, c, 1, 1)
 77 |         return x * y
 78 | 
 79 | 
 80 | def conv_3x3_bn(inp, oup, stride):
 81 |     return nn.Sequential(
 82 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 83 |         nn.BatchNorm2d(oup),
 84 |         h_swish()
 85 |     )
 86 | 
 87 | 
 88 | def conv_1x1_bn(inp, oup):
 89 |     return nn.Sequential(
 90 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 91 |         nn.BatchNorm2d(oup),
 92 |         h_swish()
 93 |     )
 94 | 
 95 | 
 96 | class InvertedResidual(nn.Module):
 97 |     def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
 98 |         super(InvertedResidual, self).__init__()
 99 |         assert stride in [1, 2]
100 | 
101 |         self.identity = stride == 1 and inp == oup
102 | 
103 |         if inp == hidden_dim:
104 |             self.conv = nn.Sequential(
105 |                 # dw
106 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
107 |                 nn.BatchNorm2d(hidden_dim),
108 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
109 |                 # Squeeze-and-Excite
110 |                 SELayer(hidden_dim) if use_se else nn.Identity(),
111 |                 # pw-linear
112 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
113 |                 nn.BatchNorm2d(oup),
114 |             )
115 |         else:
116 |             self.conv = nn.Sequential(
117 |                 # pw
118 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
119 |                 nn.BatchNorm2d(hidden_dim),
120 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
121 |                 # dw
122 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
123 |                 nn.BatchNorm2d(hidden_dim),
124 |                 # Squeeze-and-Excite
125 |                 SELayer(hidden_dim) if use_se else nn.Identity(),
126 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
127 |                 # pw-linear
128 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
129 |                 nn.BatchNorm2d(oup),
130 |             )
131 | 
132 |     def forward(self, x):
133 |         if self.identity:
134 |             return x + self.conv(x)
135 |         else:
136 |             return self.conv(x)
137 | 
138 | 
139 | class MobileNetV3(nn.Module):
140 |     def __init__(self, mode='large', num_classes=1000, width_mult=1.):
141 |         super(MobileNetV3, self).__init__()
142 |         # setting of inverted residual blocks
143 |         self.cfgs = [
144 |             # k, t, c, SE, HS, s
145 |             [3, 1, 16, 0, 0, 1],
146 |             [3, 4, 24, 0, 0, 2],
147 |             [3, 3, 24, 0, 0, 1],
148 |             [5, 3, 40, 1, 0, 2],
149 |             [5, 3, 40, 1, 0, 1],
150 |             [5, 3, 40, 1, 0, 1],
151 |             [3, 6, 80, 0, 1, 2],
152 |             [3, 2.5, 80, 0, 1, 1],
153 |             [3, 2.3, 80, 0, 1, 1],
154 |             [3, 2.3, 80, 0, 1, 1],
155 |             [3, 6, 112, 1, 1, 1],
156 |             [3, 6, 112, 1, 1, 1],
157 |             [5, 6, 160, 1, 1, 2],
158 |             [5, 6, 160, 1, 1, 1],
159 |             [5, 6, 160, 1, 1, 1]]
160 | 
161 |         assert mode in ['large', 'small']
162 | 
163 |         # building first layer
164 |         input_channel = _make_divisible(16 * width_mult, 8)
165 | 
166 |         layers = [conv_3x3_bn(3, input_channel, 2)]
167 |         # building inverted residual blocks
168 |         block = InvertedResidual
169 |         for k, t, c, use_se, use_hs, s in self.cfgs:
170 |             output_channel = _make_divisible(c * width_mult, 8)
171 |             exp_size = _make_divisible(input_channel * t, 8)
172 |             layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
173 |             input_channel = output_channel
174 |         # building last several layers
175 |         layers.append(conv_1x1_bn(input_channel, exp_size))
176 |         self.features = nn.Sequential(*layers)
177 |         self.extras = nn.ModuleList([
178 |             InvertedResidual(960, _make_divisible(960 * 0.2, 8), 512, 3, 2, True, True),
179 |             InvertedResidual(512, _make_divisible(512 * 0.25, 8), 256, 3, 2, True, True),
180 |             InvertedResidual(256, _make_divisible(256 * 0.5, 8), 256, 3, 2, True, True),
181 |             InvertedResidual(256, _make_divisible(256 * 0.25, 8), 64, 3, 2, True, True),
182 |         ])
183 | 
184 |         self.reset_parameters()
185 | 
186 |     def forward(self, x):
187 |         features = []
188 |         for i in range(13):
189 |             x = self.features[i](x)
190 |         features.append(x)
191 | 
192 |         for i in range(13, len(self.features)):
193 |             x = self.features[i](x)
194 |         features.append(x)
195 | 
196 |         for i in range(len(self.extras)):
197 |             x = self.extras[i](x)
198 |             features.append(x)
199 | 
200 |         return tuple(features)
201 | 
202 |     def reset_parameters(self):
203 |         for m in self.modules():
204 |             if isinstance(m, nn.Conv2d):
205 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
206 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
207 |                 if m.bias is not None:
208 |                     m.bias.data.zero_()
209 |             elif isinstance(m, nn.BatchNorm2d):
210 |                 m.weight.data.fill_(1)
211 |                 m.bias.data.zero_()
212 |             elif isinstance(m, nn.Linear):
213 |                 n = m.weight.size(1)
214 |                 m.weight.data.normal_(0, 0.01)
215 |                 m.bias.data.zero_()
216 | 
217 | 
218 | @registry.BACKBONES.register('mobilenet_v3')
219 | def mobilenet_v3(cfg, pretrained=True):
220 |     model = MobileNetV3()
221 |     if pretrained:
222 |         model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v3']), strict=False)
223 |     return model
224 | 


--------------------------------------------------------------------------------
/ssd/modeling/backbone/vgg.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | 
  4 | from ssd.layers import L2Norm
  5 | from ssd.modeling import registry
  6 | from ssd.utils.model_zoo import load_state_dict_from_url
  7 | 
  8 | model_urls = {
  9 |     'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth',
 10 | }
 11 | 
 12 | 
 13 | # borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py
 14 | def add_vgg(cfg, batch_norm=False):
 15 |     layers = []
 16 |     in_channels = 3
 17 |     for v in cfg:
 18 |         if v == 'M':
 19 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
 20 |         elif v == 'C':
 21 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
 22 |         else:
 23 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
 24 |             if batch_norm:
 25 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
 26 |             else:
 27 |                 layers += [conv2d, nn.ReLU(inplace=True)]
 28 |             in_channels = v
 29 |     pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
 30 |     conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
 31 |     conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
 32 |     layers += [pool5, conv6,
 33 |                nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
 34 |     return layers
 35 | 
 36 | 
 37 | def add_extras(cfg, i, size=300):
 38 |     # Extra layers added to VGG for feature scaling
 39 |     layers = []
 40 |     in_channels = i
 41 |     flag = False
 42 |     for k, v in enumerate(cfg):
 43 |         if in_channels != 'S':
 44 |             if v == 'S':
 45 |                 layers += [nn.Conv2d(in_channels, cfg[k + 1], kernel_size=(1, 3)[flag], stride=2, padding=1)]
 46 |             else:
 47 |                 layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
 48 |             flag = not flag
 49 |         in_channels = v
 50 |     if size == 512:
 51 |         layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1))
 52 |         layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1))
 53 |     return layers
 54 | 
 55 | 
 56 | vgg_base = {
 57 |     '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
 58 |             512, 512, 512],
 59 |     '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
 60 |             512, 512, 512],
 61 | }
 62 | extras_base = {
 63 |     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
 64 |     '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256],
 65 | }
 66 | 
 67 | 
 68 | class VGG(nn.Module):
 69 |     def __init__(self, cfg):
 70 |         super().__init__()
 71 |         size = cfg.INPUT.IMAGE_SIZE
 72 |         vgg_config = vgg_base[str(size)]
 73 |         extras_config = extras_base[str(size)]
 74 | 
 75 |         self.vgg = nn.ModuleList(add_vgg(vgg_config))
 76 |         self.extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size))
 77 |         self.l2_norm = L2Norm(512, scale=20)
 78 |         self.reset_parameters()
 79 | 
 80 |     def reset_parameters(self):
 81 |         for m in self.extras.modules():
 82 |             if isinstance(m, nn.Conv2d):
 83 |                 nn.init.xavier_uniform_(m.weight)
 84 |                 nn.init.zeros_(m.bias)
 85 | 
 86 |     def init_from_pretrain(self, state_dict):
 87 |         self.vgg.load_state_dict(state_dict)
 88 | 
 89 |     def forward(self, x):
 90 |         features = []
 91 |         for i in range(23):
 92 |             x = self.vgg[i](x)
 93 |         s = self.l2_norm(x)  # Conv4_3 L2 normalization
 94 |         features.append(s)
 95 | 
 96 |         # apply vgg up to fc7
 97 |         for i in range(23, len(self.vgg)):
 98 |             x = self.vgg[i](x)
 99 |         features.append(x)
100 | 
101 |         for k, v in enumerate(self.extras):
102 |             x = F.relu(v(x), inplace=True)
103 |             if k % 2 == 1:
104 |                 features.append(x)
105 | 
106 |         return tuple(features)
107 | 
108 | 
109 | @registry.BACKBONES.register('vgg')
110 | def vgg(cfg, pretrained=True):
111 |     model = VGG(cfg)
112 |     if pretrained:
113 |         model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg']))
114 |     return model
115 | 


--------------------------------------------------------------------------------
/ssd/modeling/box_head/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.modeling import registry
2 | from .box_head import SSDBoxHead
3 | 
4 | __all__ = ['build_box_head', 'SSDBoxHead']
5 | 
6 | 
7 | def build_box_head(cfg):
8 |     return registry.BOX_HEADS[cfg.MODEL.BOX_HEAD.NAME](cfg)
9 | 


--------------------------------------------------------------------------------
/ssd/modeling/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ssd.modeling import registry
 5 | from ssd.modeling.anchors.prior_box import PriorBox
 6 | from ssd.modeling.box_head.box_predictor import make_box_predictor
 7 | from ssd.utils import box_utils
 8 | from .inference import PostProcessor
 9 | from .loss import MultiBoxLoss
10 | 
11 | 
12 | @registry.BOX_HEADS.register('SSDBoxHead')
13 | class SSDBoxHead(nn.Module):
14 |     def __init__(self, cfg):
15 |         super().__init__()
16 |         self.cfg = cfg
17 |         self.predictor = make_box_predictor(cfg)
18 |         self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO)
19 |         self.post_processor = PostProcessor(cfg)
20 |         self.priors = None
21 | 
22 |     def forward(self, features, targets=None):
23 |         cls_logits, bbox_pred = self.predictor(features)
24 |         if self.training:
25 |             return self._forward_train(cls_logits, bbox_pred, targets)
26 |         else:
27 |             return self._forward_test(cls_logits, bbox_pred)
28 | 
29 |     def _forward_train(self, cls_logits, bbox_pred, targets):
30 |         gt_boxes, gt_labels = targets['boxes'], targets['labels']
31 |         reg_loss, cls_loss = self.loss_evaluator(cls_logits, bbox_pred, gt_labels, gt_boxes)
32 |         loss_dict = dict(
33 |             reg_loss=reg_loss,
34 |             cls_loss=cls_loss,
35 |         )
36 |         detections = (cls_logits, bbox_pred)
37 |         return detections, loss_dict
38 | 
39 |     def _forward_test(self, cls_logits, bbox_pred):
40 |         if self.priors is None:
41 |             self.priors = PriorBox(self.cfg)().to(bbox_pred.device)
42 |         scores = F.softmax(cls_logits, dim=2)
43 |         boxes = box_utils.convert_locations_to_boxes(
44 |             bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE
45 |         )
46 |         boxes = box_utils.center_form_to_corner_form(boxes)
47 |         detections = (scores, boxes)
48 |         detections = self.post_processor(detections)
49 |         return detections, {}
50 | 


--------------------------------------------------------------------------------
/ssd/modeling/box_head/box_predictor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from ssd.layers import SeparableConv2d
 5 | from ssd.modeling import registry
 6 | 
 7 | 
 8 | class BoxPredictor(nn.Module):
 9 |     def __init__(self, cfg):
10 |         super().__init__()
11 |         self.cfg = cfg
12 |         self.cls_headers = nn.ModuleList()
13 |         self.reg_headers = nn.ModuleList()
14 |         for level, (boxes_per_location, out_channels) in enumerate(zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS)):
15 |             self.cls_headers.append(self.cls_block(level, out_channels, boxes_per_location))
16 |             self.reg_headers.append(self.reg_block(level, out_channels, boxes_per_location))
17 |         self.reset_parameters()
18 | 
19 |     def cls_block(self, level, out_channels, boxes_per_location):
20 |         raise NotImplementedError
21 | 
22 |     def reg_block(self, level, out_channels, boxes_per_location):
23 |         raise NotImplementedError
24 | 
25 |     def reset_parameters(self):
26 |         for m in self.modules():
27 |             if isinstance(m, nn.Conv2d):
28 |                 nn.init.xavier_uniform_(m.weight)
29 |                 nn.init.zeros_(m.bias)
30 | 
31 |     def forward(self, features):
32 |         cls_logits = []
33 |         bbox_pred = []
34 |         for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers):
35 |             cls_logits.append(cls_header(feature).permute(0, 2, 3, 1).contiguous())
36 |             bbox_pred.append(reg_header(feature).permute(0, 2, 3, 1).contiguous())
37 | 
38 |         batch_size = features[0].shape[0]
39 |         cls_logits = torch.cat([c.view(c.shape[0], -1) for c in cls_logits], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES)
40 |         bbox_pred = torch.cat([l.view(l.shape[0], -1) for l in bbox_pred], dim=1).view(batch_size, -1, 4)
41 | 
42 |         return cls_logits, bbox_pred
43 | 
44 | 
45 | @registry.BOX_PREDICTORS.register('SSDBoxPredictor')
46 | class SSDBoxPredictor(BoxPredictor):
47 |     def cls_block(self, level, out_channels, boxes_per_location):
48 |         return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1)
49 | 
50 |     def reg_block(self, level, out_channels, boxes_per_location):
51 |         return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1)
52 | 
53 | 
54 | @registry.BOX_PREDICTORS.register('SSDLiteBoxPredictor')
55 | class SSDLiteBoxPredictor(BoxPredictor):
56 |     def cls_block(self, level, out_channels, boxes_per_location):
57 |         num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS)
58 |         if level == num_levels - 1:
59 |             return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=1)
60 |         return SeparableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1)
61 | 
62 |     def reg_block(self, level, out_channels, boxes_per_location):
63 |         num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS)
64 |         if level == num_levels - 1:
65 |             return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=1)
66 |         return SeparableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1)
67 | 
68 | 
69 | def make_box_predictor(cfg):
70 |     return registry.BOX_PREDICTORS[cfg.MODEL.BOX_HEAD.PREDICTOR](cfg)
71 | 


--------------------------------------------------------------------------------
/ssd/modeling/box_head/inference.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ssd.structures.container import Container
 4 | from ssd.utils.nms import batched_nms
 5 | 
 6 | 
 7 | class PostProcessor:
 8 |     def __init__(self, cfg):
 9 |         super().__init__()
10 |         self.cfg = cfg
11 |         self.width = cfg.INPUT.IMAGE_SIZE
12 |         self.height = cfg.INPUT.IMAGE_SIZE
13 | 
14 |     def __call__(self, detections):
15 |         batches_scores, batches_boxes = detections
16 |         device = batches_scores.device
17 |         batch_size = batches_scores.size(0)
18 |         results = []
19 |         for batch_id in range(batch_size):
20 |             scores, boxes = batches_scores[batch_id], batches_boxes[batch_id]  # (N, #CLS) (N, 4)
21 |             num_boxes = scores.shape[0]
22 |             num_classes = scores.shape[1]
23 | 
24 |             boxes = boxes.view(num_boxes, 1, 4).expand(num_boxes, num_classes, 4)
25 |             labels = torch.arange(num_classes, device=device)
26 |             labels = labels.view(1, num_classes).expand_as(scores)
27 | 
28 |             # remove predictions with the background label
29 |             boxes = boxes[:, 1:]
30 |             scores = scores[:, 1:]
31 |             labels = labels[:, 1:]
32 | 
33 |             # batch everything, by making every class prediction be a separate instance
34 |             boxes = boxes.reshape(-1, 4)
35 |             scores = scores.reshape(-1)
36 |             labels = labels.reshape(-1)
37 | 
38 |             # remove low scoring boxes
39 |             indices = torch.nonzero(scores > self.cfg.TEST.CONFIDENCE_THRESHOLD).squeeze(1)
40 |             boxes, scores, labels = boxes[indices], scores[indices], labels[indices]
41 | 
42 |             boxes[:, 0::2] *= self.width
43 |             boxes[:, 1::2] *= self.height
44 | 
45 |             keep = batched_nms(boxes, scores, labels, self.cfg.TEST.NMS_THRESHOLD)
46 |             # keep only topk scoring predictions
47 |             keep = keep[:self.cfg.TEST.MAX_PER_IMAGE]
48 |             boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
49 | 
50 |             container = Container(boxes=boxes, labels=labels, scores=scores)
51 |             container.img_width = self.width
52 |             container.img_height = self.height
53 |             results.append(container)
54 |         return results
55 | 


--------------------------------------------------------------------------------
/ssd/modeling/box_head/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | import torch
 4 | 
 5 | from ssd.utils import box_utils
 6 | 
 7 | 
 8 | class MultiBoxLoss(nn.Module):
 9 |     def __init__(self, neg_pos_ratio):
10 |         """Implement SSD MultiBox Loss.
11 | 
12 |         Basically, MultiBox loss combines classification loss
13 |          and Smooth L1 regression loss.
14 |         """
15 |         super(MultiBoxLoss, self).__init__()
16 |         self.neg_pos_ratio = neg_pos_ratio
17 | 
18 |     def forward(self, confidence, predicted_locations, labels, gt_locations):
19 |         """Compute classification loss and smooth l1 loss.
20 | 
21 |         Args:
22 |             confidence (batch_size, num_priors, num_classes): class predictions.
23 |             predicted_locations (batch_size, num_priors, 4): predicted locations.
24 |             labels (batch_size, num_priors): real labels of all the priors.
25 |             gt_locations (batch_size, num_priors, 4): real boxes corresponding all the priors.
26 |         """
27 |         num_classes = confidence.size(2)
28 |         with torch.no_grad():
29 |             # derived from cross_entropy=sum(log(p))
30 |             loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
31 |             mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
32 | 
33 |         confidence = confidence[mask, :]
34 |         classification_loss = F.cross_entropy(confidence.view(-1, num_classes), labels[mask], reduction='sum')
35 | 
36 |         pos_mask = labels > 0
37 |         predicted_locations = predicted_locations[pos_mask, :].view(-1, 4)
38 |         gt_locations = gt_locations[pos_mask, :].view(-1, 4)
39 |         smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum')
40 |         num_pos = gt_locations.size(0)
41 |         return smooth_l1_loss / num_pos, classification_loss / num_pos
42 | 


--------------------------------------------------------------------------------
/ssd/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
 1 | from .ssd_detector import SSDDetector
 2 | 
 3 | _DETECTION_META_ARCHITECTURES = {
 4 |     "SSDDetector": SSDDetector
 5 | }
 6 | 
 7 | 
 8 | def build_detection_model(cfg):
 9 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
10 |     return meta_arch(cfg)
11 | 


--------------------------------------------------------------------------------
/ssd/modeling/detector/ssd_detector.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from ssd.modeling.backbone import build_backbone
 4 | from ssd.modeling.box_head import build_box_head
 5 | 
 6 | 
 7 | class SSDDetector(nn.Module):
 8 |     def __init__(self, cfg):
 9 |         super().__init__()
10 |         self.cfg = cfg
11 |         self.backbone = build_backbone(cfg)
12 |         self.box_head = build_box_head(cfg)
13 | 
14 |     def forward(self, images, targets=None):
15 |         features = self.backbone(images)
16 |         detections, detector_losses = self.box_head(features, targets)
17 |         if self.training:
18 |             return detector_losses
19 |         return detections
20 | 


--------------------------------------------------------------------------------
/ssd/modeling/registry.py:
--------------------------------------------------------------------------------
1 | from ssd.utils.registry import Registry
2 | 
3 | BACKBONES = Registry()
4 | BOX_HEADS = Registry()
5 | BOX_PREDICTORS = Registry()
6 | 


--------------------------------------------------------------------------------
/ssd/solver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/solver/__init__.py


--------------------------------------------------------------------------------
/ssd/solver/build.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .lr_scheduler import WarmupMultiStepLR
 4 | 
 5 | 
 6 | def make_optimizer(cfg, model, lr=None):
 7 |     lr = cfg.SOLVER.BASE_LR if lr is None else lr
 8 |     return torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY)
 9 | 
10 | 
11 | def make_lr_scheduler(cfg, optimizer, milestones=None):
12 |     return WarmupMultiStepLR(optimizer=optimizer,
13 |                              milestones=cfg.SOLVER.LR_STEPS if milestones is None else milestones,
14 |                              gamma=cfg.SOLVER.GAMMA,
15 |                              warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
16 |                              warmup_iters=cfg.SOLVER.WARMUP_ITERS)
17 | 


--------------------------------------------------------------------------------
/ssd/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | from bisect import bisect_right
 2 | 
 3 | from torch.optim.lr_scheduler import _LRScheduler
 4 | 
 5 | 
 6 | class WarmupMultiStepLR(_LRScheduler):
 7 |     def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3,
 8 |                  warmup_iters=500, last_epoch=-1):
 9 |         if not list(milestones) == sorted(milestones):
10 |             raise ValueError(
11 |                 "Milestones should be a list of" " increasing integers. Got {}",
12 |                 milestones,
13 |             )
14 | 
15 |         self.milestones = milestones
16 |         self.gamma = gamma
17 |         self.warmup_factor = warmup_factor
18 |         self.warmup_iters = warmup_iters
19 |         super().__init__(optimizer, last_epoch)
20 | 
21 |     def get_lr(self):
22 |         warmup_factor = 1
23 |         if self.last_epoch < self.warmup_iters:
24 |             alpha = float(self.last_epoch) / self.warmup_iters
25 |             warmup_factor = self.warmup_factor * (1 - alpha) + alpha
26 |         return [
27 |             base_lr
28 |             * warmup_factor
29 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
30 |             for base_lr in self.base_lrs
31 |         ]
32 | 


--------------------------------------------------------------------------------
/ssd/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/structures/__init__.py


--------------------------------------------------------------------------------
/ssd/structures/container.py:
--------------------------------------------------------------------------------
 1 | class Container:
 2 |     """
 3 |     Help class for manage boxes, labels, etc...
 4 |     Not inherit dict due to `default_collate` will change dict's subclass to dict.
 5 |     """
 6 | 
 7 |     def __init__(self, *args, **kwargs):
 8 |         self._data_dict = dict(*args, **kwargs)
 9 | 
10 |     def __setattr__(self, key, value):
11 |         object.__setattr__(self, key, value)
12 | 
13 |     def __getitem__(self, key):
14 |         return self._data_dict[key]
15 | 
16 |     def __iter__(self):
17 |         return self._data_dict.__iter__()
18 | 
19 |     def __setitem__(self, key, value):
20 |         self._data_dict[key] = value
21 | 
22 |     def _call(self, name, *args, **kwargs):
23 |         keys = list(self._data_dict.keys())
24 |         for key in keys:
25 |             value = self._data_dict[key]
26 |             if hasattr(value, name):
27 |                 self._data_dict[key] = getattr(value, name)(*args, **kwargs)
28 |         return self
29 | 
30 |     def to(self, *args, **kwargs):
31 |         return self._call('to', *args, **kwargs)
32 | 
33 |     def numpy(self):
34 |         return self._call('numpy')
35 | 
36 |     def resize(self, size):
37 |         """resize boxes
38 |         Args:
39 |             size: (width, height)
40 |         Returns:
41 |             self
42 |         """
43 |         img_width = getattr(self, 'img_width', -1)
44 |         img_height = getattr(self, 'img_height', -1)
45 |         assert img_width > 0 and img_height > 0
46 |         assert 'boxes' in self._data_dict
47 |         boxes = self._data_dict['boxes']
48 |         new_width, new_height = size
49 |         boxes[:, 0::2] *= (new_width / img_width)
50 |         boxes[:, 1::2] *= (new_height / img_height)
51 |         return self
52 | 
53 |     def __repr__(self):
54 |         return self._data_dict.__repr__()
55 | 


--------------------------------------------------------------------------------
/ssd/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .misc import *
2 | 


--------------------------------------------------------------------------------
/ssd/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | 
  4 | 
  5 | def convert_locations_to_boxes(locations, priors, center_variance,
  6 |                                size_variance):
  7 |     """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
  8 | 
  9 |     The conversion:
 10 |         $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
 11 |         $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
 12 |     We do it in the inverse direction here.
 13 |     Args:
 14 |         locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
 15 |         priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
 16 |         center_variance: a float used to change the scale of center.
 17 |         size_variance: a float used to change of scale of size.
 18 |     Returns:
 19 |         boxes:  priors: [[center_x, center_y, w, h]]. All the values
 20 |             are relative to the image size.
 21 |     """
 22 |     # priors can have one dimension less.
 23 |     if priors.dim() + 1 == locations.dim():
 24 |         priors = priors.unsqueeze(0)
 25 |     return torch.cat([
 26 |         locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
 27 |         torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
 28 |     ], dim=locations.dim() - 1)
 29 | 
 30 | 
 31 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
 32 |     # priors can have one dimension less
 33 |     if center_form_priors.dim() + 1 == center_form_boxes.dim():
 34 |         center_form_priors = center_form_priors.unsqueeze(0)
 35 |     return torch.cat([
 36 |         (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
 37 |         torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
 38 |     ], dim=center_form_boxes.dim() - 1)
 39 | 
 40 | 
 41 | def area_of(left_top, right_bottom) -> torch.Tensor:
 42 |     """Compute the areas of rectangles given two corners.
 43 | 
 44 |     Args:
 45 |         left_top (N, 2): left top corner.
 46 |         right_bottom (N, 2): right bottom corner.
 47 | 
 48 |     Returns:
 49 |         area (N): return the area.
 50 |     """
 51 |     hw = torch.clamp(right_bottom - left_top, min=0.0)
 52 |     return hw[..., 0] * hw[..., 1]
 53 | 
 54 | 
 55 | def iou_of(boxes0, boxes1, eps=1e-5):
 56 |     """Return intersection-over-union (Jaccard index) of boxes.
 57 | 
 58 |     Args:
 59 |         boxes0 (N, 4): ground truth boxes.
 60 |         boxes1 (N or 1, 4): predicted boxes.
 61 |         eps: a small number to avoid 0 as denominator.
 62 |     Returns:
 63 |         iou (N): IoU values.
 64 |     """
 65 |     overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
 66 |     overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
 67 | 
 68 |     overlap_area = area_of(overlap_left_top, overlap_right_bottom)
 69 |     area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
 70 |     area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
 71 |     return overlap_area / (area0 + area1 - overlap_area + eps)
 72 | 
 73 | 
 74 | def assign_priors(gt_boxes, gt_labels, corner_form_priors,
 75 |                   iou_threshold):
 76 |     """Assign ground truth boxes and targets to priors.
 77 | 
 78 |     Args:
 79 |         gt_boxes (num_targets, 4): ground truth boxes.
 80 |         gt_labels (num_targets): labels of targets.
 81 |         priors (num_priors, 4): corner form priors
 82 |     Returns:
 83 |         boxes (num_priors, 4): real values for priors.
 84 |         labels (num_priros): labels for priors.
 85 |     """
 86 |     # size: num_priors x num_targets
 87 |     ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
 88 |     # size: num_priors
 89 |     best_target_per_prior, best_target_per_prior_index = ious.max(1)
 90 |     # size: num_targets
 91 |     best_prior_per_target, best_prior_per_target_index = ious.max(0)
 92 | 
 93 |     for target_index, prior_index in enumerate(best_prior_per_target_index):
 94 |         best_target_per_prior_index[prior_index] = target_index
 95 |     # 2.0 is used to make sure every target has a prior assigned
 96 |     best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
 97 |     # size: num_priors
 98 |     labels = gt_labels[best_target_per_prior_index]
 99 |     labels[best_target_per_prior < iou_threshold] = 0  # the backgournd id
100 |     boxes = gt_boxes[best_target_per_prior_index]
101 |     return boxes, labels
102 | 
103 | 
104 | def hard_negative_mining(loss, labels, neg_pos_ratio):
105 |     """
106 |     It used to suppress the presence of a large number of negative prediction.
107 |     It works on image level not batch level.
108 |     For any example/image, it keeps all the positive predictions and
109 |      cut the number of negative predictions to make sure the ratio
110 |      between the negative examples and positive examples is no more
111 |      the given ratio for an image.
112 | 
113 |     Args:
114 |         loss (N, num_priors): the loss for each example.
115 |         labels (N, num_priors): the labels.
116 |         neg_pos_ratio:  the ratio between the negative examples and positive examples.
117 |     """
118 |     pos_mask = labels > 0
119 |     num_pos = pos_mask.long().sum(dim=1, keepdim=True)
120 |     num_neg = num_pos * neg_pos_ratio
121 | 
122 |     loss[pos_mask] = -math.inf
123 |     _, indexes = loss.sort(dim=1, descending=True)
124 |     _, orders = indexes.sort(dim=1)
125 |     neg_mask = orders < num_neg
126 |     return pos_mask | neg_mask
127 | 
128 | 
129 | def center_form_to_corner_form(locations):
130 |     return torch.cat([locations[..., :2] - locations[..., 2:] / 2,
131 |                       locations[..., :2] + locations[..., 2:] / 2], locations.dim() - 1)
132 | 
133 | 
134 | def corner_form_to_center_form(boxes):
135 |     return torch.cat([
136 |         (boxes[..., :2] + boxes[..., 2:]) / 2,
137 |         boxes[..., 2:] - boxes[..., :2]
138 |     ], boxes.dim() - 1)
139 | 


--------------------------------------------------------------------------------
/ssd/utils/checkpoint.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import torch
  5 | from torch.nn.parallel import DistributedDataParallel
  6 | 
  7 | from ssd.utils.model_zoo import cache_url
  8 | 
  9 | 
 10 | class CheckPointer:
 11 |     _last_checkpoint_name = 'last_checkpoint.txt'
 12 | 
 13 |     def __init__(self,
 14 |                  model,
 15 |                  optimizer=None,
 16 |                  scheduler=None,
 17 |                  save_dir="",
 18 |                  save_to_disk=None,
 19 |                  logger=None):
 20 |         self.model = model
 21 |         self.optimizer = optimizer
 22 |         self.scheduler = scheduler
 23 |         self.save_dir = save_dir
 24 |         self.save_to_disk = save_to_disk
 25 |         if logger is None:
 26 |             logger = logging.getLogger(__name__)
 27 |         self.logger = logger
 28 | 
 29 |     def save(self, name, **kwargs):
 30 |         if not self.save_dir:
 31 |             return
 32 | 
 33 |         if not self.save_to_disk:
 34 |             return
 35 | 
 36 |         data = {}
 37 |         if isinstance(self.model, DistributedDataParallel):
 38 |             data['model'] = self.model.module.state_dict()
 39 |         else:
 40 |             data['model'] = self.model.state_dict()
 41 |         if self.optimizer is not None:
 42 |             data["optimizer"] = self.optimizer.state_dict()
 43 |         if self.scheduler is not None:
 44 |             data["scheduler"] = self.scheduler.state_dict()
 45 |         data.update(kwargs)
 46 | 
 47 |         save_file = os.path.join(self.save_dir, "{}.pth".format(name))
 48 |         self.logger.info("Saving checkpoint to {}".format(save_file))
 49 |         torch.save(data, save_file)
 50 | 
 51 |         self.tag_last_checkpoint(save_file)
 52 | 
 53 |     def load(self, f=None, use_latest=True):
 54 |         if self.has_checkpoint() and use_latest:
 55 |             # override argument with existing checkpoint
 56 |             f = self.get_checkpoint_file()
 57 |         if not f:
 58 |             # no checkpoint could be found
 59 |             self.logger.info("No checkpoint found.")
 60 |             return {}
 61 | 
 62 |         self.logger.info("Loading checkpoint from {}".format(f))
 63 |         checkpoint = self._load_file(f)
 64 |         model = self.model
 65 |         if isinstance(model, DistributedDataParallel):
 66 |             model = self.model.module
 67 | 
 68 |         model.load_state_dict(checkpoint.pop("model"))
 69 |         if "optimizer" in checkpoint and self.optimizer:
 70 |             self.logger.info("Loading optimizer from {}".format(f))
 71 |             self.optimizer.load_state_dict(checkpoint.pop("optimizer"))
 72 |         if "scheduler" in checkpoint and self.scheduler:
 73 |             self.logger.info("Loading scheduler from {}".format(f))
 74 |             self.scheduler.load_state_dict(checkpoint.pop("scheduler"))
 75 | 
 76 |         # return any further checkpoint data
 77 |         return checkpoint
 78 | 
 79 |     def get_checkpoint_file(self):
 80 |         save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
 81 |         try:
 82 |             with open(save_file, "r") as f:
 83 |                 last_saved = f.read()
 84 |                 last_saved = last_saved.strip()
 85 |         except IOError:
 86 |             # if file doesn't exist, maybe because it has just been
 87 |             # deleted by a separate process
 88 |             last_saved = ""
 89 |         return last_saved
 90 | 
 91 |     def has_checkpoint(self):
 92 |         save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
 93 |         return os.path.exists(save_file)
 94 | 
 95 |     def tag_last_checkpoint(self, last_filename):
 96 |         save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
 97 |         with open(save_file, "w") as f:
 98 |             f.write(last_filename)
 99 | 
100 |     def _load_file(self, f):
101 |         # download url files
102 |         if f.startswith("http"):
103 |             # if the file is a url path, download it and cache it
104 |             cached_f = cache_url(f)
105 |             self.logger.info("url {} cached in {}".format(f, cached_f))
106 |             f = cached_f
107 |         return torch.load(f, map_location=torch.device("cpu"))
108 | 


--------------------------------------------------------------------------------
/ssd/utils/dist_util.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | 
 6 | 
 7 | def get_world_size():
 8 |     if not dist.is_available():
 9 |         return 1
10 |     if not dist.is_initialized():
11 |         return 1
12 |     return dist.get_world_size()
13 | 
14 | 
15 | def get_rank():
16 |     if not dist.is_available():
17 |         return 0
18 |     if not dist.is_initialized():
19 |         return 0
20 |     return dist.get_rank()
21 | 
22 | 
23 | def is_main_process():
24 |     return get_rank() == 0
25 | 
26 | 
27 | def synchronize():
28 |     """
29 |        Helper function to synchronize (barrier) among all processes when
30 |        using distributed training
31 |     """
32 |     if not dist.is_available():
33 |         return
34 |     if not dist.is_initialized():
35 |         return
36 |     world_size = dist.get_world_size()
37 |     if world_size == 1:
38 |         return
39 |     dist.barrier()
40 | 
41 | 
42 | def _encode(encoded_data, data):
43 |     # gets a byte representation for the data
44 |     encoded_bytes = pickle.dumps(data)
45 |     # convert this byte string into a byte tensor
46 |     storage = torch.ByteStorage.from_buffer(encoded_bytes)
47 |     tensor = torch.ByteTensor(storage).to("cuda")
48 |     # encoding: first byte is the size and then rest is the data
49 |     s = tensor.numel()
50 |     assert s <= 255, "Can't encode data greater than 255 bytes"
51 |     # put the encoded data in encoded_data
52 |     encoded_data[0] = s
53 |     encoded_data[1: (s + 1)] = tensor
54 | 
55 | 
56 | def all_gather(data):
57 |     """
58 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
59 |     Args:
60 |         data: any picklable object
61 |     Returns:
62 |         list[data]: list of data gathered from each rank
63 |     """
64 |     world_size = get_world_size()
65 |     if world_size == 1:
66 |         return [data]
67 | 
68 |     # serialized to a Tensor
69 |     buffer = pickle.dumps(data)
70 |     storage = torch.ByteStorage.from_buffer(buffer)
71 |     tensor = torch.ByteTensor(storage).to("cuda")
72 | 
73 |     # obtain Tensor size of each rank
74 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
75 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
76 |     dist.all_gather(size_list, local_size)
77 |     size_list = [int(size.item()) for size in size_list]
78 |     max_size = max(size_list)
79 | 
80 |     # receiving Tensor from all ranks
81 |     # we pad the tensor because torch all_gather does not support
82 |     # gathering tensors of different shapes
83 |     tensor_list = []
84 |     for _ in size_list:
85 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
86 |     if local_size != max_size:
87 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
88 |         tensor = torch.cat((tensor, padding), dim=0)
89 |     dist.all_gather(tensor_list, tensor)
90 | 
91 |     data_list = []
92 |     for size, tensor in zip(size_list, tensor_list):
93 |         buffer = tensor.cpu().numpy().tobytes()[:size]
94 |         data_list.append(pickle.loads(buffer))
95 | 
96 |     return data_list
97 | 


--------------------------------------------------------------------------------
/ssd/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | 
 5 | 
 6 | def setup_logger(name, distributed_rank, save_dir=None):
 7 |     logger = logging.getLogger(name)
 8 |     logger.setLevel(logging.DEBUG)
 9 |     # don't log results for the non-master process
10 |     if distributed_rank > 0:
11 |         return logger
12 |     stream_handler = logging.StreamHandler(stream=sys.stdout)
13 |     stream_handler.setLevel(logging.DEBUG)
14 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
15 |     stream_handler.setFormatter(formatter)
16 |     logger.addHandler(stream_handler)
17 |     if save_dir:
18 |         fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
19 |         fh.setLevel(logging.DEBUG)
20 |         fh.setFormatter(formatter)
21 |         logger.addHandler(fh)
22 |     return logger
23 | 


--------------------------------------------------------------------------------
/ssd/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | from collections import deque, defaultdict
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | class SmoothedValue:
 7 |     """Track a series of values and provide access to smoothed values over a
 8 |     window or the global series average.
 9 |     """
10 | 
11 |     def __init__(self, window_size=10):
12 |         self.deque = deque(maxlen=window_size)
13 |         self.value = np.nan
14 |         self.series = []
15 |         self.total = 0.0
16 |         self.count = 0
17 | 
18 |     def update(self, value):
19 |         self.deque.append(value)
20 |         self.series.append(value)
21 |         self.count += 1
22 |         self.total += value
23 |         self.value = value
24 | 
25 |     @property
26 |     def median(self):
27 |         values = np.array(self.deque)
28 |         return np.median(values)
29 | 
30 |     @property
31 |     def avg(self):
32 |         values = np.array(self.deque)
33 |         return np.mean(values)
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger:
41 |     def __init__(self, delimiter=", "):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |             type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.3f} ({:.3f})".format(name, meter.avg, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/ssd/utils/misc.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import os
 3 | 
 4 | 
 5 | def str2bool(s):
 6 |     return s.lower() in ('true', '1')
 7 | 
 8 | 
 9 | def mkdir(path):
10 |     try:
11 |         os.makedirs(path)
12 |     except OSError as e:
13 |         if e.errno != errno.EEXIST:
14 |             raise
15 | 


--------------------------------------------------------------------------------
/ssd/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | import torch
 6 | 
 7 | from ssd.utils.dist_util import is_main_process, synchronize
 8 | 
 9 | from torch.hub import download_url_to_file
10 | from torch.hub import urlparse
11 | from torch.hub import HASH_REGEX
12 | 
13 | 
14 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
15 | # but with a few improvements and modifications
16 | def cache_url(url, model_dir=None, progress=True):
17 |     r"""Loads the Torch serialized object at the given URL.
18 |     If the object is already present in `model_dir`, it's deserialized and
19 |     returned. The filename part of the URL should follow the naming convention
20 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
21 |     digits of the SHA256 hash of the contents of the file. The hash is used to
22 |     ensure unique names and to verify the contents of the file.
23 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
24 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
25 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
26 |     Args:
27 |         url (string): URL of the object to download
28 |         model_dir (string, optional): directory in which to save the object
29 |         progress (bool, optional): whether or not to display a progress bar to stderr
30 |     Example:
31 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
32 |     """
33 |     if model_dir is None:
34 |         torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
35 |         model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
36 |     if not os.path.exists(model_dir):
37 |         os.makedirs(model_dir)
38 |     parts = urlparse(url)
39 |     filename = os.path.basename(parts.path)
40 |     if filename == "model_final.pkl":
41 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
42 |         # so make the full path the filename by replacing / with _
43 |         filename = parts.path.replace("/", "_")
44 |     cached_file = os.path.join(model_dir, filename)
45 |     if not os.path.exists(cached_file) and is_main_process():
46 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
47 |         hash_prefix = HASH_REGEX.search(filename)
48 |         if hash_prefix is not None:
49 |             hash_prefix = hash_prefix.group(1)
50 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
51 |             # which matches the hash PyTorch uses. So we skip the hash matching
52 |             # if the hash_prefix is less than 6 characters
53 |             if len(hash_prefix) < 6:
54 |                 hash_prefix = None
55 |         download_url_to_file(url, cached_file, hash_prefix, progress=progress)
56 |     synchronize()
57 |     return cached_file
58 | 
59 | 
60 | def load_state_dict_from_url(url, map_location='cpu'):
61 |     cached_file = cache_url(url)
62 |     return torch.load(cached_file, map_location=map_location)
63 | 


--------------------------------------------------------------------------------
/ssd/utils/nms.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import warnings
 3 | 
 4 | import torch
 5 | import torchvision
 6 | 
 7 | if torchvision.__version__ >= '0.3.0':
 8 |     _nms = torchvision.ops.nms
 9 | else:
10 |     warnings.warn('No NMS is available. Please upgrade torchvision to 0.3.0+')
11 |     sys.exit(-1)
12 | 
13 | 
14 | def nms(boxes, scores, nms_thresh):
15 |     """ Performs non-maximum suppression, run on GPU or CPU according to
16 |     boxes's device.
17 |     Args:
18 |         boxes(Tensor[N, 4]): boxes in (x1, y1, x2, y2) format, use absolute coordinates(or relative coordinates)
19 |         scores(Tensor[N]): scores
20 |         nms_thresh(float): thresh
21 |     Returns:
22 |         indices kept.
23 |     """
24 |     keep = _nms(boxes, scores, nms_thresh)
25 |     return keep
26 | 
27 | 
28 | def batched_nms(boxes, scores, idxs, iou_threshold):
29 |     """
30 |     Performs non-maximum suppression in a batched fashion.
31 | 
32 |     Each index value correspond to a category, and NMS
33 |     will not be applied between elements of different categories.
34 | 
35 |     Parameters
36 |     ----------
37 |     boxes : Tensor[N, 4]
38 |         boxes where NMS will be performed. They
39 |         are expected to be in (x1, y1, x2, y2) format
40 |     scores : Tensor[N]
41 |         scores for each one of the boxes
42 |     idxs : Tensor[N]
43 |         indices of the categories for each one of the boxes.
44 |     iou_threshold : float
45 |         discards all overlapping boxes
46 |         with IoU < iou_threshold
47 | 
48 |     Returns
49 |     -------
50 |     keep : Tensor
51 |         int64 tensor with the indices of
52 |         the elements that have been kept by NMS, sorted
53 |         in decreasing order of scores
54 |     """
55 |     if boxes.numel() == 0:
56 |         return torch.empty((0,), dtype=torch.int64, device=boxes.device)
57 |     # strategy: in order to perform NMS independently per class.
58 |     # we add an offset to all the boxes. The offset is dependent
59 |     # only on the class idx, and is large enough so that boxes
60 |     # from different classes do not overlap
61 |     max_coordinate = boxes.max()
62 |     offsets = idxs.to(boxes) * (max_coordinate + 1)
63 |     boxes_for_nms = boxes + offsets[:, None]
64 |     keep = nms(boxes_for_nms, scores, iou_threshold)
65 |     return keep
66 | 


--------------------------------------------------------------------------------
/ssd/utils/registry.py:
--------------------------------------------------------------------------------
 1 | def _register_generic(module_dict, module_name, module):
 2 |     assert module_name not in module_dict
 3 |     module_dict[module_name] = module
 4 | 
 5 | 
 6 | class Registry(dict):
 7 |     """
 8 |     A helper class for managing registering modules, it extends a dictionary
 9 |     and provides a register functions.
10 |     Eg. creating a registry:
11 |         some_registry = Registry({"default": default_module})
12 |     There're two ways of registering new modules:
13 |     1): normal way is just calling register function:
14 |         def foo():
15 |             ...
16 |         some_registry.register("foo_module", foo)
17 |     2): used as decorator when declaring the module:
18 |         @some_registry.register("foo_module")
19 |         @some_registry.register("foo_module_nickname")
20 |         def foo():
21 |             ...
22 |     Access of module is just like using a dictionary, eg:
23 |         f = some_registry["foo_module"]
24 |     """
25 | 
26 |     def __init__(self, *args, **kwargs):
27 |         super(Registry, self).__init__(*args, **kwargs)
28 | 
29 |     def register(self, module_name, module=None):
30 |         # used as function call
31 |         if module is not None:
32 |             _register_generic(self, module_name, module)
33 |             return
34 | 
35 |         # used as decorator
36 |         def register_fn(fn):
37 |             _register_generic(self, module_name, fn)
38 |             return fn
39 | 
40 |         return register_fn
41 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | 
 5 | import torch
 6 | import torch.utils.data
 7 | 
 8 | from ssd.config import cfg
 9 | from ssd.engine.inference import do_evaluation
10 | from ssd.modeling.detector import build_detection_model
11 | from ssd.utils import dist_util
12 | from ssd.utils.checkpoint import CheckPointer
13 | from ssd.utils.dist_util import synchronize
14 | from ssd.utils.logger import setup_logger
15 | 
16 | 
17 | def evaluation(cfg, ckpt, distributed):
18 |     logger = logging.getLogger("SSD.inference")
19 | 
20 |     model = build_detection_model(cfg)
21 |     checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
22 |     device = torch.device(cfg.MODEL.DEVICE)
23 |     model.to(device)
24 |     checkpointer.load(ckpt, use_latest=ckpt is None)
25 |     do_evaluation(cfg, model, distributed)
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.')
30 |     parser.add_argument(
31 |         "--config-file",
32 |         default="",
33 |         metavar="FILE",
34 |         help="path to config file",
35 |         type=str,
36 |     )
37 |     parser.add_argument("--local_rank", type=int, default=0)
38 |     parser.add_argument(
39 |         "--ckpt",
40 |         help="The path to the checkpoint for test, default is the latest checkpoint.",
41 |         default=None,
42 |         type=str,
43 |     )
44 | 
45 |     parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.")
46 | 
47 |     parser.add_argument(
48 |         "opts",
49 |         help="Modify config options using the command-line",
50 |         default=None,
51 |         nargs=argparse.REMAINDER,
52 |     )
53 |     args = parser.parse_args()
54 | 
55 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
56 |     distributed = num_gpus > 1
57 | 
58 |     if torch.cuda.is_available():
59 |         # This flag allows you to enable the inbuilt cudnn auto-tuner to
60 |         # find the best algorithm to use for your hardware.
61 |         torch.backends.cudnn.benchmark = True
62 |     if distributed:
63 |         torch.cuda.set_device(args.local_rank)
64 |         torch.distributed.init_process_group(backend="nccl", init_method="env://")
65 |         synchronize()
66 | 
67 |     cfg.merge_from_file(args.config_file)
68 |     cfg.merge_from_list(args.opts)
69 |     cfg.freeze()
70 | 
71 |     logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
72 |     logger.info("Using {} GPUs".format(num_gpus))
73 |     logger.info(args)
74 | 
75 |     logger.info("Loaded configuration file {}".format(args.config_file))
76 |     with open(args.config_file, "r") as cf:
77 |         config_str = "\n" + cf.read()
78 |         logger.info(config_str)
79 |     logger.info("Running with config:\n{}".format(cfg))
80 |     evaluation(cfg, ckpt=args.ckpt, distributed=distributed)
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main()
85 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | 
  8 | from ssd.engine.inference import do_evaluation
  9 | from ssd.config import cfg
 10 | from ssd.data.build import make_data_loader
 11 | from ssd.engine.trainer import do_train
 12 | from ssd.modeling.detector import build_detection_model
 13 | from ssd.solver.build import make_optimizer, make_lr_scheduler
 14 | from ssd.utils import dist_util, mkdir
 15 | from ssd.utils.checkpoint import CheckPointer
 16 | from ssd.utils.dist_util import synchronize
 17 | from ssd.utils.logger import setup_logger
 18 | from ssd.utils.misc import str2bool
 19 | 
 20 | 
 21 | def train(cfg, args):
 22 |     logger = logging.getLogger('SSD.trainer')
 23 |     model = build_detection_model(cfg)
 24 |     device = torch.device(cfg.MODEL.DEVICE)
 25 |     model.to(device)
 26 |     if args.distributed:
 27 |         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)
 28 | 
 29 |     lr = cfg.SOLVER.LR * args.num_gpus  # scale by num gpus
 30 |     optimizer = make_optimizer(cfg, model, lr)
 31 | 
 32 |     milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS]
 33 |     scheduler = make_lr_scheduler(cfg, optimizer, milestones)
 34 | 
 35 |     arguments = {"iteration": 0}
 36 |     save_to_disk = dist_util.get_rank() == 0
 37 |     checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger)
 38 |     extra_checkpoint_data = checkpointer.load()
 39 |     arguments.update(extra_checkpoint_data)
 40 | 
 41 |     max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus
 42 |     train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration'])
 43 | 
 44 |     model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args)
 45 |     return model
 46 | 
 47 | 
 48 | def main():
 49 |     parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
 50 |     parser.add_argument(
 51 |         "--config-file",
 52 |         default="",
 53 |         metavar="FILE",
 54 |         help="path to config file",
 55 |         type=str,
 56 |     )
 57 |     parser.add_argument("--local_rank", type=int, default=0)
 58 |     parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step')
 59 |     parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step')
 60 |     parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0')
 61 |     parser.add_argument('--use_tensorboard', default=True, type=str2bool)
 62 |     parser.add_argument(
 63 |         "--skip-test",
 64 |         dest="skip_test",
 65 |         help="Do not test the final model",
 66 |         action="store_true",
 67 |     )
 68 |     parser.add_argument(
 69 |         "opts",
 70 |         help="Modify config options using the command-line",
 71 |         default=None,
 72 |         nargs=argparse.REMAINDER,
 73 |     )
 74 |     args = parser.parse_args()
 75 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
 76 |     args.distributed = num_gpus > 1
 77 |     args.num_gpus = num_gpus
 78 | 
 79 |     if torch.cuda.is_available():
 80 |         # This flag allows you to enable the inbuilt cudnn auto-tuner to
 81 |         # find the best algorithm to use for your hardware.
 82 |         torch.backends.cudnn.benchmark = True
 83 |     if args.distributed:
 84 |         torch.cuda.set_device(args.local_rank)
 85 |         torch.distributed.init_process_group(backend="nccl", init_method="env://")
 86 |         synchronize()
 87 | 
 88 |     cfg.merge_from_file(args.config_file)
 89 |     cfg.merge_from_list(args.opts)
 90 |     cfg.freeze()
 91 | 
 92 |     if cfg.OUTPUT_DIR:
 93 |         mkdir(cfg.OUTPUT_DIR)
 94 | 
 95 |     logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
 96 |     logger.info("Using {} GPUs".format(num_gpus))
 97 |     logger.info(args)
 98 | 
 99 |     logger.info("Loaded configuration file {}".format(args.config_file))
100 |     with open(args.config_file, "r") as cf:
101 |         config_str = "\n" + cf.read()
102 |         logger.info(config_str)
103 |     logger.info("Running with config:\n{}".format(cfg))
104 | 
105 |     model = train(cfg, args)
106 | 
107 |     if not args.skip_test:
108 |         logger.info('Start evaluating...')
109 |         torch.cuda.empty_cache()  # speed up evaluating after training finished
110 |         do_evaluation(cfg, model, distributed=args.distributed)
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------