├── .gitignore
├── DEVELOP_GUIDE.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── TROUBLESHOOTING.md
├── configs
├── efficient_net_b3_ssd300_voc0712.yaml
├── mobilenet_v2_ssd320_voc0712.yaml
├── mobilenet_v3_ssd320_voc0712.yaml
├── vgg_ssd300_coco_trainval35k.yaml
├── vgg_ssd300_voc0712.yaml
├── vgg_ssd512_coco_trainval35k.yaml
└── vgg_ssd512_voc0712.yaml
├── demo.py
├── demo
├── 000342.jpg
├── 000542.jpg
├── 003123.jpg
├── 004101.jpg
└── 008591.jpg
├── figures
├── 004545.jpg
├── losses.png
├── lr.png
└── metrics.png
├── outputs
└── .gitignore
├── requirements.txt
├── setup.py
├── ssd
├── __init__.py
├── config
│ ├── __init__.py
│ ├── defaults.py
│ └── path_catlog.py
├── data
│ ├── __init__.py
│ ├── build.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── coco.py
│ │ ├── evaluation
│ │ │ ├── __init__.py
│ │ │ ├── coco
│ │ │ │ └── __init__.py
│ │ │ └── voc
│ │ │ │ ├── __init__.py
│ │ │ │ └── eval_detection_voc.py
│ │ └── voc.py
│ ├── samplers
│ │ ├── __init__.py
│ │ ├── distributed.py
│ │ └── iteration_based_batch_sampler.py
│ └── transforms
│ │ ├── __init__.py
│ │ ├── target_transform.py
│ │ └── transforms.py
├── engine
│ ├── __init__.py
│ ├── inference.py
│ └── trainer.py
├── layers
│ ├── __init__.py
│ └── separable_conv.py
├── modeling
│ ├── __init__.py
│ ├── anchors
│ │ ├── __init__.py
│ │ └── prior_box.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── efficient_net
│ │ │ ├── __init__.py
│ │ │ ├── efficient_net.py
│ │ │ └── utils.py
│ │ ├── mobilenet.py
│ │ ├── mobilenetv3.py
│ │ └── vgg.py
│ ├── box_head
│ │ ├── __init__.py
│ │ ├── box_head.py
│ │ ├── box_predictor.py
│ │ ├── inference.py
│ │ └── loss.py
│ ├── detector
│ │ ├── __init__.py
│ │ └── ssd_detector.py
│ └── registry.py
├── solver
│ ├── __init__.py
│ ├── build.py
│ └── lr_scheduler.py
├── structures
│ ├── __init__.py
│ └── container.py
└── utils
│ ├── __init__.py
│ ├── box_utils.py
│ ├── checkpoint.py
│ ├── dist_util.py
│ ├── logger.py
│ ├── metric_logger.py
│ ├── misc.py
│ ├── model_zoo.py
│ ├── nms.py
│ └── registry.py
├── test.py
└── train.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # compilation and distribution
2 | __pycache__
3 | *.pyc
4 | *.so
5 | ext/build/
6 | ext/torch_extension.egg-info/
7 | dist/
8 | *.egg-info
9 |
10 | # pytorch/python/numpy formats
11 | *.pth
12 | *.pkl
13 | *.npy
14 |
15 | # ipython/jupyter notebooks
16 | *.ipynb
17 | **/.ipynb_checkpoints/
18 |
19 | # Editor temporaries
20 | *.swn
21 | *.swo
22 | *.swp
23 | *~
24 |
25 | # Pycharm editor settings
26 | .idea
27 | .DS_Store
28 |
--------------------------------------------------------------------------------
/DEVELOP_GUIDE.md:
--------------------------------------------------------------------------------
1 | # Develop Guide
2 |
3 | ## Custom Dataset
4 | Add your custom dataset is simple and flexible.
5 | For example, create `ssd/data/datasets/my_dataset.py`:
6 | ```python
7 | import torch.utils.data
8 |
9 | from ssd.structures.container import Container
10 |
11 | class MyDataset(torch.utils.data.Dataset):
12 | def __init__(self, ..., transform=None, target_transform=None):
13 | # as you would do normally
14 | ...
15 | self.transform = transform
16 | self.target_transform = target_transform
17 |
18 | def __getitem__(self, index):
19 | # load the image as a PIL Image
20 | image = ...
21 |
22 | # load the bounding boxes in x1, y1, x2, y2 order.
23 | boxes = np.array((N, 4), dtype=np.float32)
24 | # and labels
25 | labels = np.array((N, ), dtype=np.int64)
26 |
27 | if self.transform:
28 | image, boxes, labels = self.transform(image, boxes, labels)
29 | if self.target_transform:
30 | boxes, labels = self.target_transform(boxes, labels)
31 | targets = Container(
32 | boxes=boxes,
33 | labels=labels,
34 | )
35 | # return the image, the targets and the index in your dataset
36 | return image, targets, index
37 | ```
38 |
39 | in `ssd/data/datasets/__init__.py`
40 | ```python
41 | from .my_dataset import MyDataset
42 |
43 | _DATASETS = {
44 | 'VOCDataset': VOCDataset,
45 | 'COCODataset': COCODataset,
46 | 'MyDataset': MyDataset,
47 | }
48 | ```
49 |
50 | in `ssd/config/path_catlog.py`:
51 | ```python
52 | DATASETS = {
53 | ...
54 | 'my_custom_dataset': {
55 | "arg1": "your/arg",
56 | "arg2": "your/arg",
57 | },
58 | ...
59 | }
60 |
61 | @staticmethod
62 | def get(name):
63 | ...
64 | if name == 'my_custom_dataset':
65 | attrs = DatasetCatalog.DATASETS[name]
66 | return dict(factory="MyDataset", args=attrs)
67 | ...
68 | ```
69 |
70 | in your `config.ymal`:
71 | ```yaml
72 | DATASETS:
73 | TRAIN: ("my_custom_dataset", )
74 | TEST: ("my_custom_test_dataset", )
75 | ```
76 |
77 | ### Test
78 | While the aforementioned example should work for training, it's also easy to add your custom test code:
79 | in `ssd/data/datasets/evaluation/__init__.py`
80 | ```python
81 | if isinstance(dataset, MyDataset):
82 | return my_own_evaluation(**args)
83 | ```
84 |
85 | ## Custom Backbone
86 |
87 | It very simple to add your own backbone for SSD.
88 | For example, create `ssd/modeling/backbone/my_backbone.py`:
89 | ```python
90 | import torch.nn as nn
91 |
92 | from ssd.modeling import registry
93 | from ssd.utils.model_zoo import load_state_dict_from_url
94 |
95 |
96 | class MyBackbone(nn.Module):
97 | def __init__(self, cfg):
98 | super().__init__()
99 | ...
100 |
101 | def forward(self, x):
102 | features = []
103 |
104 | # forward your network
105 |
106 | # add arbitrary feature you want to do prediction upon it.
107 |
108 | features.append(feature1)
109 | features.append(feature2)
110 | features.append(feature3)
111 | features.append(feature4)
112 |
113 | # return them as a tuple
114 | return tuple(features)
115 |
116 | @registry.BACKBONES.register('my_backbone')
117 | def my_backbone(cfg, pretrained=True):
118 | model = MyBackbone(cfg)
119 | model_url = 'you_model_url'
120 | if pretrained:
121 | model.init_from_pretrain(load_state_dict_from_url(model_url))
122 | return model
123 | ```
124 | in `ssd/modeling/backbone/__init__.py`:
125 | ```python
126 | from .my_backbone import MyBackbone
127 | ```
128 |
129 | in your `config.ymal`:
130 | ```yaml
131 | MODEL:
132 | BACKBONE:
133 | NAME: 'my_backbone'
134 | OUT_CHANNELS: (-, -, -, -) # should match feature1 - feature4's out_channels in MyBackbone
135 | PRIORS:
136 | FEATURE_MAPS: [-, -, -, -] # feature1 - feature4's size
137 | STRIDES: [-, -, -, -] # feature1 - feature4's output stride
138 | MIN_SIZES: [21, 45, 99, 153] # your custom anchor settings
139 | MAX_SIZES: [45, 99, 153, 207]
140 | ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3]]
141 | BOXES_PER_LOCATION: [6, 6, 6, 6]
142 | ```
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 lufficc
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include configs *.yaml
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # High quality, fast, modular reference implementation of SSD in PyTorch 1.0
2 |
3 |
4 | This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.org/abs/1512.02325). The implementation is heavily influenced by the projects [ssd.pytorch](https://github.com/amdegroot/ssd.pytorch), [pytorch-ssd](https://github.com/qfgaohao/pytorch-ssd) and [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). This repository aims to be the code base for researches based on SSD.
5 |
6 |
7 |

8 |
Example SSD output (vgg_ssd300_voc0712).
9 |
10 |
11 | | Losses | Learning rate | Metrics |
12 | | :-----------: |:-------------:| :------:|
13 | |  |  |  |
14 |
15 | ## Highlights
16 |
17 | - **PyTorch 1.0**: Support PyTorch 1.0 or higher.
18 | - **Multi-GPU training and inference**: We use `DistributedDataParallel`, you can train or test with arbitrary GPU(s), the training schema will change accordingly.
19 | - **Modular**: Add your own modules without pain. We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor`, etc. You can replace every component with your own code without change the code base. For example, You can add [EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) as backbone, just add `efficient_net.py` (ALREADY ADDED) and register it, specific it in the config file, It's done!
20 | - **CPU support for inference**: runs on CPU in inference time.
21 | - **Smooth and enjoyable training procedure**: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`.
22 | - **Batched inference**: can perform inference using multiple images per batch per GPU.
23 | - **Evaluating during training**: eval you model every `eval_step` to check performance improving or not.
24 | - **Metrics Visualization**: visualize metrics details in tensorboard, like AP, APl, APm and APs for COCO dataset or mAP and 20 categories' AP for VOC dataset.
25 | - **Auto download**: load pre-trained weights from URL and cache it.
26 | ## Installation
27 | ### Requirements
28 |
29 | 1. Python3
30 | 1. PyTorch 1.0 or higher
31 | 1. yacs
32 | 1. [Vizer](https://github.com/lufficc/Vizer)
33 | 1. GCC >= 4.9
34 | 1. OpenCV
35 |
36 |
37 | ### Step-by-step installation
38 |
39 | ```bash
40 | git clone https://github.com/lufficc/SSD.git
41 | cd SSD
42 | # Required packages: torch torchvision yacs tqdm opencv-python vizer
43 | pip install -r requirements.txt
44 |
45 | # Done! That's ALL! No BUILD! No bothering SETUP!
46 |
47 | # It's recommended to install the latest release of torch and torchvision.
48 | ```
49 |
50 |
51 | ## Train
52 |
53 | ### Setting Up Datasets
54 | #### Pascal VOC
55 |
56 | For Pascal VOC dataset, make the folder structure like this:
57 | ```
58 | VOC_ROOT
59 | |__ VOC2007
60 | |_ JPEGImages
61 | |_ Annotations
62 | |_ ImageSets
63 | |_ SegmentationClass
64 | |__ VOC2012
65 | |_ JPEGImages
66 | |_ Annotations
67 | |_ ImageSets
68 | |_ SegmentationClass
69 | |__ ...
70 | ```
71 | Where `VOC_ROOT` default is `datasets` folder in current project, you can create symlinks to `datasets` or `export VOC_ROOT="/path/to/voc_root"`.
72 |
73 | #### COCO
74 |
75 | For COCO dataset, make the folder structure like this:
76 | ```
77 | COCO_ROOT
78 | |__ annotations
79 | |_ instances_valminusminival2014.json
80 | |_ instances_minival2014.json
81 | |_ instances_train2014.json
82 | |_ instances_val2014.json
83 | |_ ...
84 | |__ train2014
85 | |_ .jpg
86 | |_ ...
87 | |_ .jpg
88 | |__ val2014
89 | |_ .jpg
90 | |_ ...
91 | |_ .jpg
92 | |__ ...
93 | ```
94 | Where `COCO_ROOT` default is `datasets` folder in current project, you can create symlinks to `datasets` or `export COCO_ROOT="/path/to/coco_root"`.
95 |
96 | ### Single GPU training
97 |
98 | ```bash
99 | # for example, train SSD300:
100 | python train.py --config-file configs/vgg_ssd300_voc0712.yaml
101 | ```
102 | ### Multi-GPU training
103 |
104 | ```bash
105 | # for example, train SSD300 with 4 GPUs:
106 | export NGPUS=4
107 | python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml SOLVER.WARMUP_FACTOR 0.03333 SOLVER.WARMUP_ITERS 1000
108 | ```
109 | The configuration files that I provide assume that we are running on single GPU. When changing number of GPUs, hyper-parameter (lr, max_iter, ...) will also changed according to this paper: [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).
110 |
111 | ## Evaluate
112 |
113 | ### Single GPU evaluating
114 |
115 | ```bash
116 | # for example, evaluate SSD300:
117 | python test.py --config-file configs/vgg_ssd300_voc0712.yaml
118 | ```
119 |
120 | ### Multi-GPU evaluating
121 |
122 | ```bash
123 | # for example, evaluate SSD300 with 4 GPUs:
124 | export NGPUS=4
125 | python -m torch.distributed.launch --nproc_per_node=$NGPUS test.py --config-file configs/vgg_ssd300_voc0712.yaml
126 | ```
127 |
128 | ## Demo
129 |
130 | Predicting image in a folder is simple:
131 | ```bash
132 | python demo.py --config-file configs/vgg_ssd300_voc0712.yaml --images_dir demo --ckpt https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_voc0712.pth
133 | ```
134 | Then it will download and cache `vgg_ssd300_voc0712.pth` automatically and predicted images with boxes, scores and label names will saved to `demo/result` folder by default.
135 |
136 | You will see a similar output:
137 | ```text
138 | (0001/0005) 004101.jpg: objects 01 | load 010ms | inference 033ms | FPS 31
139 | (0002/0005) 003123.jpg: objects 05 | load 009ms | inference 019ms | FPS 53
140 | (0003/0005) 000342.jpg: objects 02 | load 009ms | inference 019ms | FPS 51
141 | (0004/0005) 008591.jpg: objects 02 | load 008ms | inference 020ms | FPS 50
142 | (0005/0005) 000542.jpg: objects 01 | load 011ms | inference 019ms | FPS 53
143 | ```
144 |
145 | ## MODEL ZOO
146 | ### Origin Paper:
147 |
148 | | | VOC2007 test | coco test-dev2015 |
149 | | :-----: | :----------: | :----------: |
150 | | SSD300* | 77.2 | 25.1 |
151 | | SSD512* | 79.8 | 28.8 |
152 |
153 | ### COCO:
154 |
155 | | Backbone | Input Size | box AP | Model Size | Download |
156 | | :------------: | :----------:| :--------------------------: | :--------: | :-------: |
157 | | VGG16 | 300 | 25.2 | 262MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_coco_trainval35k.pth) |
158 | | VGG16 | 512 | 29.0 | 275MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_coco_trainval35k.pth) |
159 |
160 | ### PASCAL VOC:
161 |
162 | | Backbone | Input Size | mAP | Model Size | Download |
163 | | :--------------: | :----------:| :--------------------------: | :--------: | :-------: |
164 | | VGG16 | 300 | 77.7 | 201MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_voc0712.pth) |
165 | | VGG16 | 512 | 80.7 | 207MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_voc0712.pth) |
166 | | Mobilenet V2 | 320 | 68.9 | 25.5MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v2_ssd320_voc0712_v2.pth) |
167 | | Mobilenet V3 | 320 | 69.5 | 29.9MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v3_ssd320_voc0712.pth) |
168 | | EfficientNet-B3 | 300 | 73.9 | 97.1MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/efficient_net_b3_ssd300_voc0712.pth) |
169 |
170 | ## Develop Guide
171 |
172 | If you want to add your custom components, please see [DEVELOP_GUIDE.md](DEVELOP_GUIDE.md) for more details.
173 |
174 |
175 | ## Troubleshooting
176 | If you have issues running or compiling this code, we have compiled a list of common issues in [TROUBLESHOOTING.md](TROUBLESHOOTING.md). If your issue is not present there, please feel free to open a new issue.
177 |
178 | ## Citations
179 | If you use this project in your research, please cite this project.
180 | ```text
181 | @misc{lufficc2018ssd,
182 | author = {Congcong Li},
183 | title = {{High quality, fast, modular reference implementation of SSD in PyTorch}},
184 | year = {2018},
185 | howpublished = {\url{https://github.com/lufficc/SSD}}
186 | }
187 | ```
--------------------------------------------------------------------------------
/TROUBLESHOOTING.md:
--------------------------------------------------------------------------------
1 | # Troubleshooting
2 |
3 | ## RuntimeError: merge_sort: failed to synchronize: an illegal memory access was encountered
4 |
5 | This is caused in multi-box loss. The sort method failed due to NaN numbers. This may be a bug in `log_softmax`: https://github.com/pytorch/pytorch/issues/14335 .Three ways to solve :
6 | 1. Use a smaller warmup factor, like 0.1. (append `SOLVER.WARMUP_FACTOR 0.1` to your train cmd's end).
7 | 1. Use a longer warmup iters, like 1000. (append `SOLVER.WARMUP_ITERS 1000` to your train cmd's end).
8 | 1. [Described in the forums by Jinserk Baik](https://discuss.pytorch.org/t/ctcloss-performance-of-pytorch-1-0-0/27524/29)
--------------------------------------------------------------------------------
/configs/efficient_net_b3_ssd300_voc0712.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 21
3 | BACKBONE:
4 | NAME: 'efficient_net-b3'
5 | OUT_CHANNELS: (48, 136, 384, 256, 256, 256)
6 | INPUT:
7 | IMAGE_SIZE: 300
8 | DATASETS:
9 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
10 | TEST: ("voc_2007_test", )
11 | SOLVER:
12 | MAX_ITER: 160000
13 | LR_STEPS: [105000, 135000]
14 | GAMMA: 0.1
15 | BATCH_SIZE: 24
16 | LR: 1e-3
17 |
18 | OUTPUT_DIR: 'outputs/efficient_net_b3_ssd300_voc0712'
--------------------------------------------------------------------------------
/configs/mobilenet_v2_ssd320_voc0712.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 21
3 | BOX_HEAD:
4 | PREDICTOR: 'SSDLiteBoxPredictor'
5 | BACKBONE:
6 | NAME: 'mobilenet_v2'
7 | OUT_CHANNELS: (96, 1280, 512, 256, 256, 64)
8 | PRIORS:
9 | FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
10 | STRIDES: [16, 32, 64, 107, 160, 320]
11 | MIN_SIZES: [60, 105, 150, 195, 240, 285]
12 | MAX_SIZES: [105, 150, 195, 240, 285, 330]
13 | ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
14 | BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
15 | INPUT:
16 | IMAGE_SIZE: 320
17 | DATASETS:
18 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
19 | TEST: ("voc_2007_test", )
20 | SOLVER:
21 | MAX_ITER: 120000
22 | LR_STEPS: [80000, 100000]
23 | GAMMA: 0.1
24 | BATCH_SIZE: 32
25 | LR: 1e-3
26 |
27 | OUTPUT_DIR: 'outputs/mobilenet_v2_ssd320_voc0712'
--------------------------------------------------------------------------------
/configs/mobilenet_v3_ssd320_voc0712.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 21
3 | BOX_HEAD:
4 | PREDICTOR: 'SSDLiteBoxPredictor'
5 | BACKBONE:
6 | NAME: 'mobilenet_v3'
7 | OUT_CHANNELS: (112, 960, 512, 256, 256, 64)
8 | PRIORS:
9 | FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
10 | STRIDES: [16, 32, 64, 107, 160, 320]
11 | MIN_SIZES: [60, 105, 150, 195, 240, 285]
12 | MAX_SIZES: [105, 150, 195, 240, 285, 330]
13 | ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
14 | BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
15 | INPUT:
16 | IMAGE_SIZE: 320
17 | DATASETS:
18 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
19 | TEST: ("voc_2007_test", )
20 | SOLVER:
21 | MAX_ITER: 120000
22 | LR_STEPS: [80000, 100000]
23 | GAMMA: 0.1
24 | BATCH_SIZE: 32
25 | LR: 1e-3
26 |
27 | OUTPUT_DIR: 'outputs/mobilenet_v3_ssd320_voc0712'
28 |
--------------------------------------------------------------------------------
/configs/vgg_ssd300_coco_trainval35k.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 81
3 | PRIORS:
4 | FEATURE_MAPS: [38, 19, 10, 5, 3, 1]
5 | STRIDES: [8, 16, 32, 64, 100, 300]
6 | MIN_SIZES: [21, 45, 99, 153, 207, 261]
7 | MAX_SIZES: [45, 99, 153, 207, 261, 315]
8 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
9 | BOXES_PER_LOCATION: [4, 6, 6, 6, 4, 4]
10 | INPUT:
11 | IMAGE_SIZE: 300
12 | DATASETS:
13 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
14 | TEST: ("coco_2014_minival", )
15 | SOLVER:
16 | MAX_ITER: 400000
17 | LR_STEPS: [280000, 360000]
18 | GAMMA: 0.1
19 | BATCH_SIZE: 32
20 | LR: 1e-3
21 |
22 | OUTPUT_DIR: 'outputs/vgg_ssd300_coco_trainval35k'
--------------------------------------------------------------------------------
/configs/vgg_ssd300_voc0712.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 21
3 | INPUT:
4 | IMAGE_SIZE: 300
5 | DATASETS:
6 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
7 | TEST: ("voc_2007_test", )
8 | SOLVER:
9 | MAX_ITER: 120000
10 | LR_STEPS: [80000, 100000]
11 | GAMMA: 0.1
12 | BATCH_SIZE: 32
13 | LR: 1e-3
14 |
15 | OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712'
--------------------------------------------------------------------------------
/configs/vgg_ssd512_coco_trainval35k.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 81
3 | BACKBONE:
4 | OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
5 | PRIORS:
6 | FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
7 | STRIDES: [8, 16, 32, 64, 128, 256, 512]
8 | MIN_SIZES: [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8]
9 | MAX_SIZES: [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72]
10 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
11 | BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
12 | INPUT:
13 | IMAGE_SIZE: 512
14 | DATASETS:
15 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
16 | TEST: ("coco_2014_minival", )
17 | SOLVER:
18 | MAX_ITER: 520000
19 | LR_STEPS: [360000, 480000]
20 | GAMMA: 0.1
21 | BATCH_SIZE: 24
22 | LR: 1e-3
23 |
24 | OUTPUT_DIR: 'outputs/vgg_ssd512_coco_trainval35k'
--------------------------------------------------------------------------------
/configs/vgg_ssd512_voc0712.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | NUM_CLASSES: 21
3 | BACKBONE:
4 | OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
5 | PRIORS:
6 | FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
7 | STRIDES: [8, 16, 32, 64, 128, 256, 512]
8 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
9 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.65]
10 | ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
11 | BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
12 | INPUT:
13 | IMAGE_SIZE: 512
14 | DATASETS:
15 | TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
16 | TEST: ("voc_2007_test", )
17 | SOLVER:
18 | MAX_ITER: 120000
19 | LR_STEPS: [80000, 100000]
20 | GAMMA: 0.1
21 | BATCH_SIZE: 24
22 | LR: 1e-3
23 |
24 | OUTPUT_DIR: 'outputs/vgg_ssd512_voc0712'
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import time
4 |
5 | import torch
6 | from PIL import Image
7 | from vizer.draw import draw_boxes
8 |
9 | from ssd.config import cfg
10 | from ssd.data.datasets import COCODataset, VOCDataset
11 | import argparse
12 | import numpy as np
13 |
14 | from ssd.data.transforms import build_transforms
15 | from ssd.modeling.detector import build_detection_model
16 | from ssd.utils import mkdir
17 | from ssd.utils.checkpoint import CheckPointer
18 |
19 |
20 | @torch.no_grad()
21 | def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type):
22 | if dataset_type == "voc":
23 | class_names = VOCDataset.class_names
24 | elif dataset_type == 'coco':
25 | class_names = COCODataset.class_names
26 | else:
27 | raise NotImplementedError('Not implemented now.')
28 | device = torch.device(cfg.MODEL.DEVICE)
29 |
30 | model = build_detection_model(cfg)
31 | model = model.to(device)
32 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR)
33 | checkpointer.load(ckpt, use_latest=ckpt is None)
34 | weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file()
35 | print('Loaded weights from {}'.format(weight_file))
36 |
37 | image_paths = glob.glob(os.path.join(images_dir, '*.jpg'))
38 | mkdir(output_dir)
39 |
40 | cpu_device = torch.device("cpu")
41 | transforms = build_transforms(cfg, is_train=False)
42 | model.eval()
43 | for i, image_path in enumerate(image_paths):
44 | start = time.time()
45 | image_name = os.path.basename(image_path)
46 |
47 | image = np.array(Image.open(image_path).convert("RGB"))
48 | height, width = image.shape[:2]
49 | images = transforms(image)[0].unsqueeze(0)
50 | load_time = time.time() - start
51 |
52 | start = time.time()
53 | result = model(images.to(device))[0]
54 | inference_time = time.time() - start
55 |
56 | result = result.resize((width, height)).to(cpu_device).numpy()
57 | boxes, labels, scores = result['boxes'], result['labels'], result['scores']
58 |
59 | indices = scores > score_threshold
60 | boxes = boxes[indices]
61 | labels = labels[indices]
62 | scores = scores[indices]
63 | meters = ' | '.join(
64 | [
65 | 'objects {:02d}'.format(len(boxes)),
66 | 'load {:03d}ms'.format(round(load_time * 1000)),
67 | 'inference {:03d}ms'.format(round(inference_time * 1000)),
68 | 'FPS {}'.format(round(1.0 / inference_time))
69 | ]
70 | )
71 | print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters))
72 |
73 | drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8)
74 | Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name))
75 |
76 |
77 | def main():
78 | parser = argparse.ArgumentParser(description="SSD Demo.")
79 | parser.add_argument(
80 | "--config-file",
81 | default="",
82 | metavar="FILE",
83 | help="path to config file",
84 | type=str,
85 | )
86 | parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.")
87 | parser.add_argument("--score_threshold", type=float, default=0.7)
88 | parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.')
89 | parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.')
90 | parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.')
91 |
92 | parser.add_argument(
93 | "opts",
94 | help="Modify config options using the command-line",
95 | default=None,
96 | nargs=argparse.REMAINDER,
97 | )
98 | args = parser.parse_args()
99 | print(args)
100 |
101 | cfg.merge_from_file(args.config_file)
102 | cfg.merge_from_list(args.opts)
103 | cfg.freeze()
104 |
105 | print("Loaded configuration file {}".format(args.config_file))
106 | with open(args.config_file, "r") as cf:
107 | config_str = "\n" + cf.read()
108 | print(config_str)
109 | print("Running with config:\n{}".format(cfg))
110 |
111 | run_demo(cfg=cfg,
112 | ckpt=args.ckpt,
113 | score_threshold=args.score_threshold,
114 | images_dir=args.images_dir,
115 | output_dir=args.output_dir,
116 | dataset_type=args.dataset_type)
117 |
118 |
119 | if __name__ == '__main__':
120 | main()
121 |
--------------------------------------------------------------------------------
/demo/000342.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/000342.jpg
--------------------------------------------------------------------------------
/demo/000542.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/000542.jpg
--------------------------------------------------------------------------------
/demo/003123.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/003123.jpg
--------------------------------------------------------------------------------
/demo/004101.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/004101.jpg
--------------------------------------------------------------------------------
/demo/008591.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/demo/008591.jpg
--------------------------------------------------------------------------------
/figures/004545.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/004545.jpg
--------------------------------------------------------------------------------
/figures/losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/losses.png
--------------------------------------------------------------------------------
/figures/lr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/lr.png
--------------------------------------------------------------------------------
/figures/metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/figures/metrics.png
--------------------------------------------------------------------------------
/outputs/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/outputs/.gitignore
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.3
2 | torchvision>=0.3
3 | yacs
4 | tqdm
5 | opencv-python
6 | vizer
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 | setup(
7 | name="torch-ssd",
8 | version="1.2.0",
9 | packages=find_packages(exclude=['ext']),
10 | install_requires=[
11 | "torch>=1.3",
12 | "torchvision>=0.3",
13 | "opencv-python~=4.0",
14 | "yacs==0.1.6",
15 | "Vizer~=0.1.4",
16 | ],
17 | author="Congcong Li",
18 | author_email="luffy.lcc@gmail.com",
19 | description="High quality, fast, modular reference implementation of SSD in PyTorch",
20 | long_description=long_description,
21 | long_description_content_type="text/markdown",
22 | url="https://github.com/lufficc/SSD",
23 | classifiers=[
24 | "Programming Language :: Python :: 3",
25 | "License :: OSI Approved :: MIT License",
26 | "Operating System :: OS Independent",
27 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
28 | ],
29 | license="MIT",
30 | python_requires=">=3.6",
31 | include_package_data=True,
32 | )
33 |
--------------------------------------------------------------------------------
/ssd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/__init__.py
--------------------------------------------------------------------------------
/ssd/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import _C as cfg
2 |
--------------------------------------------------------------------------------
/ssd/config/defaults.py:
--------------------------------------------------------------------------------
1 | from yacs.config import CfgNode as CN
2 |
3 | _C = CN()
4 |
5 | _C.MODEL = CN()
6 | _C.MODEL.META_ARCHITECTURE = 'SSDDetector'
7 | _C.MODEL.DEVICE = "cuda"
8 | # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5)
9 | _C.MODEL.THRESHOLD = 0.5
10 | _C.MODEL.NUM_CLASSES = 21
11 | # Hard negative mining
12 | _C.MODEL.NEG_POS_RATIO = 3
13 | _C.MODEL.CENTER_VARIANCE = 0.1
14 | _C.MODEL.SIZE_VARIANCE = 0.2
15 |
16 | # ---------------------------------------------------------------------------- #
17 | # Backbone
18 | # ---------------------------------------------------------------------------- #
19 | _C.MODEL.BACKBONE = CN()
20 | _C.MODEL.BACKBONE.NAME = 'vgg'
21 | _C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256)
22 | _C.MODEL.BACKBONE.PRETRAINED = True
23 |
24 | # -----------------------------------------------------------------------------
25 | # PRIORS
26 | # -----------------------------------------------------------------------------
27 | _C.MODEL.PRIORS = CN()
28 | _C.MODEL.PRIORS.FEATURE_MAPS = [38, 19, 10, 5, 3, 1]
29 | _C.MODEL.PRIORS.STRIDES = [8, 16, 32, 64, 100, 300]
30 | _C.MODEL.PRIORS.MIN_SIZES = [30, 60, 111, 162, 213, 264]
31 | _C.MODEL.PRIORS.MAX_SIZES = [60, 111, 162, 213, 264, 315]
32 | _C.MODEL.PRIORS.ASPECT_RATIOS = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
33 | # When has 1 aspect ratio, every location has 4 boxes, 2 ratio 6 boxes.
34 | # #boxes = 2 + #ratio * 2
35 | _C.MODEL.PRIORS.BOXES_PER_LOCATION = [4, 6, 6, 6, 4, 4] # number of boxes per feature map location
36 | _C.MODEL.PRIORS.CLIP = True
37 |
38 | # -----------------------------------------------------------------------------
39 | # Box Head
40 | # -----------------------------------------------------------------------------
41 | _C.MODEL.BOX_HEAD = CN()
42 | _C.MODEL.BOX_HEAD.NAME = 'SSDBoxHead'
43 | _C.MODEL.BOX_HEAD.PREDICTOR = 'SSDBoxPredictor'
44 |
45 | # -----------------------------------------------------------------------------
46 | # INPUT
47 | # -----------------------------------------------------------------------------
48 | _C.INPUT = CN()
49 | # Image size
50 | _C.INPUT.IMAGE_SIZE = 300
51 | # Values to be used for image normalization, RGB layout
52 | _C.INPUT.PIXEL_MEAN = [123, 117, 104]
53 |
54 | # -----------------------------------------------------------------------------
55 | # Dataset
56 | # -----------------------------------------------------------------------------
57 | _C.DATASETS = CN()
58 | # List of the dataset names for training, as present in paths_catalog.py
59 | _C.DATASETS.TRAIN = ()
60 | # List of the dataset names for testing, as present in paths_catalog.py
61 | _C.DATASETS.TEST = ()
62 |
63 | # -----------------------------------------------------------------------------
64 | # DataLoader
65 | # -----------------------------------------------------------------------------
66 | _C.DATA_LOADER = CN()
67 | # Number of data loading threads
68 | _C.DATA_LOADER.NUM_WORKERS = 8
69 | _C.DATA_LOADER.PIN_MEMORY = True
70 |
71 | # ---------------------------------------------------------------------------- #
72 | # Solver
73 | # ---------------------------------------------------------------------------- #
74 | _C.SOLVER = CN()
75 | # train configs
76 | _C.SOLVER.MAX_ITER = 120000
77 | _C.SOLVER.LR_STEPS = [80000, 100000]
78 | _C.SOLVER.GAMMA = 0.1
79 | _C.SOLVER.BATCH_SIZE = 32
80 | _C.SOLVER.LR = 1e-3
81 | _C.SOLVER.MOMENTUM = 0.9
82 | _C.SOLVER.WEIGHT_DECAY = 5e-4
83 | _C.SOLVER.WARMUP_FACTOR = 1.0 / 3
84 | _C.SOLVER.WARMUP_ITERS = 500
85 |
86 | # ---------------------------------------------------------------------------- #
87 | # Specific test options
88 | # ---------------------------------------------------------------------------- #
89 | _C.TEST = CN()
90 | _C.TEST.NMS_THRESHOLD = 0.45
91 | _C.TEST.CONFIDENCE_THRESHOLD = 0.01
92 | _C.TEST.MAX_PER_CLASS = -1
93 | _C.TEST.MAX_PER_IMAGE = 100
94 | _C.TEST.BATCH_SIZE = 10
95 |
96 | _C.OUTPUT_DIR = 'outputs'
97 |
--------------------------------------------------------------------------------
/ssd/config/path_catlog.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 |
4 | class DatasetCatalog:
5 | DATA_DIR = 'datasets'
6 | DATASETS = {
7 | 'voc_2007_train': {
8 | "data_dir": "VOC2007",
9 | "split": "train"
10 | },
11 | 'voc_2007_val': {
12 | "data_dir": "VOC2007",
13 | "split": "val"
14 | },
15 | 'voc_2007_trainval': {
16 | "data_dir": "VOC2007",
17 | "split": "trainval"
18 | },
19 | 'voc_2007_test': {
20 | "data_dir": "VOC2007",
21 | "split": "test"
22 | },
23 | 'voc_2012_train': {
24 | "data_dir": "VOC2012",
25 | "split": "train"
26 | },
27 | 'voc_2012_val': {
28 | "data_dir": "VOC2012",
29 | "split": "val"
30 | },
31 | 'voc_2012_trainval': {
32 | "data_dir": "VOC2012",
33 | "split": "trainval"
34 | },
35 | 'voc_2012_test': {
36 | "data_dir": "VOC2012",
37 | "split": "test"
38 | },
39 | 'coco_2014_valminusminival': {
40 | "data_dir": "val2014",
41 | "ann_file": "annotations/instances_valminusminival2014.json"
42 | },
43 | 'coco_2014_minival': {
44 | "data_dir": "val2014",
45 | "ann_file": "annotations/instances_minival2014.json"
46 | },
47 | 'coco_2014_train': {
48 | "data_dir": "train2014",
49 | "ann_file": "annotations/instances_train2014.json"
50 | },
51 | 'coco_2014_val': {
52 | "data_dir": "val2014",
53 | "ann_file": "annotations/instances_val2014.json"
54 | },
55 | }
56 |
57 | @staticmethod
58 | def get(name):
59 | if "voc" in name:
60 | voc_root = DatasetCatalog.DATA_DIR
61 | if 'VOC_ROOT' in os.environ:
62 | voc_root = os.environ['VOC_ROOT']
63 |
64 | attrs = DatasetCatalog.DATASETS[name]
65 | args = dict(
66 | data_dir=os.path.join(voc_root, attrs["data_dir"]),
67 | split=attrs["split"],
68 | )
69 | return dict(factory="VOCDataset", args=args)
70 | elif "coco" in name:
71 | coco_root = DatasetCatalog.DATA_DIR
72 | if 'COCO_ROOT' in os.environ:
73 | coco_root = os.environ['COCO_ROOT']
74 |
75 | attrs = DatasetCatalog.DATASETS[name]
76 | args = dict(
77 | data_dir=os.path.join(coco_root, attrs["data_dir"]),
78 | ann_file=os.path.join(coco_root, attrs["ann_file"]),
79 | )
80 | return dict(factory="COCODataset", args=args)
81 |
82 | raise RuntimeError("Dataset not available: {}".format(name))
83 |
--------------------------------------------------------------------------------
/ssd/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/data/__init__.py
--------------------------------------------------------------------------------
/ssd/data/build.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.utils.data import DataLoader
3 | from torch.utils.data.dataloader import default_collate
4 |
5 | from ssd.data import samplers
6 | from ssd.data.datasets import build_dataset
7 | from ssd.data.transforms import build_transforms, build_target_transform
8 | from ssd.structures.container import Container
9 |
10 |
11 | class BatchCollator:
12 | def __init__(self, is_train=True):
13 | self.is_train = is_train
14 |
15 | def __call__(self, batch):
16 | transposed_batch = list(zip(*batch))
17 | images = default_collate(transposed_batch[0])
18 | img_ids = default_collate(transposed_batch[2])
19 |
20 | if self.is_train:
21 | list_targets = transposed_batch[1]
22 | targets = Container(
23 | {key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]}
24 | )
25 | else:
26 | targets = None
27 | return images, targets, img_ids
28 |
29 |
30 | def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0):
31 | train_transform = build_transforms(cfg, is_train=is_train)
32 | target_transform = build_target_transform(cfg) if is_train else None
33 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
34 | datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train)
35 |
36 | shuffle = is_train
37 |
38 | data_loaders = []
39 |
40 | for dataset in datasets:
41 | if distributed:
42 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
43 | elif shuffle:
44 | sampler = torch.utils.data.RandomSampler(dataset)
45 | else:
46 | sampler = torch.utils.data.sampler.SequentialSampler(dataset)
47 |
48 | batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE
49 | batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False)
50 | if max_iter is not None:
51 | batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter)
52 |
53 | data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler,
54 | pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train))
55 | data_loaders.append(data_loader)
56 |
57 | if is_train:
58 | # during training, a single (possibly concatenated) data_loader is returned
59 | assert len(data_loaders) == 1
60 | return data_loaders[0]
61 | return data_loaders
62 |
--------------------------------------------------------------------------------
/ssd/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import ConcatDataset
2 |
3 | from ssd.config.path_catlog import DatasetCatalog
4 | from .voc import VOCDataset
5 | from .coco import COCODataset
6 |
7 | _DATASETS = {
8 | 'VOCDataset': VOCDataset,
9 | 'COCODataset': COCODataset,
10 | }
11 |
12 |
13 | def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True):
14 | assert len(dataset_list) > 0
15 | datasets = []
16 | for dataset_name in dataset_list:
17 | data = DatasetCatalog.get(dataset_name)
18 | args = data['args']
19 | factory = _DATASETS[data['factory']]
20 | args['transform'] = transform
21 | args['target_transform'] = target_transform
22 | if factory == VOCDataset:
23 | args['keep_difficult'] = not is_train
24 | elif factory == COCODataset:
25 | args['remove_empty'] = is_train
26 | dataset = factory(**args)
27 | datasets.append(dataset)
28 | # for testing, return a list of datasets
29 | if not is_train:
30 | return datasets
31 | dataset = datasets[0]
32 | if len(datasets) > 1:
33 | dataset = ConcatDataset(datasets)
34 |
35 | return [dataset]
36 |
--------------------------------------------------------------------------------
/ssd/data/datasets/coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch.utils.data
3 | import numpy as np
4 | from PIL import Image
5 |
6 | from ssd.structures.container import Container
7 |
8 |
9 | class COCODataset(torch.utils.data.Dataset):
10 | class_names = ('__background__',
11 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
12 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
13 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
14 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
15 | 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
16 | 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
17 | 'kite', 'baseball bat', 'baseball glove', 'skateboard',
18 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
19 | 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
20 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
21 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
22 | 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
23 | 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
24 | 'refrigerator', 'book', 'clock', 'vase', 'scissors',
25 | 'teddy bear', 'hair drier', 'toothbrush')
26 |
27 | def __init__(self, data_dir, ann_file, transform=None, target_transform=None, remove_empty=False):
28 | from pycocotools.coco import COCO
29 | self.coco = COCO(ann_file)
30 | self.data_dir = data_dir
31 | self.transform = transform
32 | self.target_transform = target_transform
33 | self.remove_empty = remove_empty
34 | if self.remove_empty:
35 | # when training, images without annotations are removed.
36 | self.ids = list(self.coco.imgToAnns.keys())
37 | else:
38 | # when testing, all images used.
39 | self.ids = list(self.coco.imgs.keys())
40 | coco_categories = sorted(self.coco.getCatIds())
41 | self.coco_id_to_contiguous_id = {coco_id: i + 1 for i, coco_id in enumerate(coco_categories)}
42 | self.contiguous_id_to_coco_id = {v: k for k, v in self.coco_id_to_contiguous_id.items()}
43 |
44 | def __getitem__(self, index):
45 | image_id = self.ids[index]
46 | boxes, labels = self._get_annotation(image_id)
47 | image = self._read_image(image_id)
48 | if self.transform:
49 | image, boxes, labels = self.transform(image, boxes, labels)
50 | if self.target_transform:
51 | boxes, labels = self.target_transform(boxes, labels)
52 | targets = Container(
53 | boxes=boxes,
54 | labels=labels,
55 | )
56 | return image, targets, index
57 |
58 | def get_annotation(self, index):
59 | image_id = self.ids[index]
60 | return image_id, self._get_annotation(image_id)
61 |
62 | def __len__(self):
63 | return len(self.ids)
64 |
65 | def _get_annotation(self, image_id):
66 | ann_ids = self.coco.getAnnIds(imgIds=image_id)
67 | ann = self.coco.loadAnns(ann_ids)
68 | # filter crowd annotations
69 | ann = [obj for obj in ann if obj["iscrowd"] == 0]
70 | boxes = np.array([self._xywh2xyxy(obj["bbox"]) for obj in ann], np.float32).reshape((-1, 4))
71 | labels = np.array([self.coco_id_to_contiguous_id[obj["category_id"]] for obj in ann], np.int64).reshape((-1,))
72 | # remove invalid boxes
73 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
74 | boxes = boxes[keep]
75 | labels = labels[keep]
76 | return boxes, labels
77 |
78 | def _xywh2xyxy(self, box):
79 | x1, y1, w, h = box
80 | return [x1, y1, x1 + w, y1 + h]
81 |
82 | def get_img_info(self, index):
83 | image_id = self.ids[index]
84 | img_data = self.coco.imgs[image_id]
85 | return img_data
86 |
87 | def _read_image(self, image_id):
88 | file_name = self.coco.loadImgs(image_id)[0]['file_name']
89 | image_file = os.path.join(self.data_dir, file_name)
90 | image = Image.open(image_file).convert("RGB")
91 | image = np.array(image)
92 | return image
93 |
--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.data.datasets import VOCDataset, COCODataset
2 | from .coco import coco_evaluation
3 | from .voc import voc_evaluation
4 |
5 |
6 | def evaluate(dataset, predictions, output_dir, **kwargs):
7 | """evaluate dataset using different methods based on dataset type.
8 | Args:
9 | dataset: Dataset object
10 | predictions(list[(boxes, labels, scores)]): Each item in the list represents the
11 | prediction results for one image. And the index should match the dataset index.
12 | output_dir: output folder, to save evaluation files or results.
13 | Returns:
14 | evaluation result
15 | """
16 | args = dict(
17 | dataset=dataset, predictions=predictions, output_dir=output_dir, **kwargs,
18 | )
19 | if isinstance(dataset, VOCDataset):
20 | return voc_evaluation(**args)
21 | elif isinstance(dataset, COCODataset):
22 | return coco_evaluation(**args)
23 | else:
24 | raise NotImplementedError
25 |
--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | from datetime import datetime
5 |
6 |
7 | def coco_evaluation(dataset, predictions, output_dir, iteration=None):
8 | coco_results = []
9 | for i, prediction in enumerate(predictions):
10 | img_info = dataset.get_img_info(i)
11 | prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
12 | boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
13 |
14 | image_id, annotation = dataset.get_annotation(i)
15 | class_mapper = dataset.contiguous_id_to_coco_id
16 | if labels.shape[0] == 0:
17 | continue
18 |
19 | boxes = boxes.tolist()
20 | labels = labels.tolist()
21 | scores = scores.tolist()
22 | coco_results.extend(
23 | [
24 | {
25 | "image_id": image_id,
26 | "category_id": class_mapper[labels[k]],
27 | "bbox": [box[0], box[1], box[2] - box[0], box[3] - box[1]], # to xywh format
28 | "score": scores[k],
29 | }
30 | for k, box in enumerate(boxes)
31 | ]
32 | )
33 | iou_type = 'bbox'
34 | json_result_file = os.path.join(output_dir, iou_type + ".json")
35 | logger = logging.getLogger("SSD.inference")
36 | logger.info('Writing results to {}...'.format(json_result_file))
37 | with open(json_result_file, "w") as f:
38 | json.dump(coco_results, f)
39 | from pycocotools.cocoeval import COCOeval
40 | coco_gt = dataset.coco
41 | coco_dt = coco_gt.loadRes(json_result_file)
42 | coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
43 | coco_eval.evaluate()
44 | coco_eval.accumulate()
45 | coco_eval.summarize()
46 |
47 | result_strings = []
48 | keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"]
49 | metrics = {}
50 | for i, key in enumerate(keys):
51 | metrics[key] = coco_eval.stats[i]
52 | logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
53 | result_strings.append('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3)))
54 |
55 | if iteration is not None:
56 | result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
57 | else:
58 | result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
59 | with open(result_path, "w") as f:
60 | f.write('\n'.join(result_strings))
61 |
62 | return dict(metrics=metrics)
63 |
--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | from datetime import datetime
4 |
5 | import numpy as np
6 |
7 | from .eval_detection_voc import eval_detection_voc
8 |
9 |
10 | def voc_evaluation(dataset, predictions, output_dir, iteration=None):
11 | class_names = dataset.class_names
12 |
13 | pred_boxes_list = []
14 | pred_labels_list = []
15 | pred_scores_list = []
16 | gt_boxes_list = []
17 | gt_labels_list = []
18 | gt_difficults = []
19 |
20 | for i in range(len(dataset)):
21 | image_id, annotation = dataset.get_annotation(i)
22 | gt_boxes, gt_labels, is_difficult = annotation
23 | gt_boxes_list.append(gt_boxes)
24 | gt_labels_list.append(gt_labels)
25 | gt_difficults.append(is_difficult.astype(np.bool))
26 |
27 | img_info = dataset.get_img_info(i)
28 | prediction = predictions[i]
29 | prediction = prediction.resize((img_info['width'], img_info['height'])).numpy()
30 | boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores']
31 |
32 | pred_boxes_list.append(boxes)
33 | pred_labels_list.append(labels)
34 | pred_scores_list.append(scores)
35 | result = eval_detection_voc(pred_bboxes=pred_boxes_list,
36 | pred_labels=pred_labels_list,
37 | pred_scores=pred_scores_list,
38 | gt_bboxes=gt_boxes_list,
39 | gt_labels=gt_labels_list,
40 | gt_difficults=gt_difficults,
41 | iou_thresh=0.5,
42 | use_07_metric=True)
43 | logger = logging.getLogger("SSD.inference")
44 | result_str = "mAP: {:.4f}\n".format(result["map"])
45 | metrics = {'mAP': result["map"]}
46 | for i, ap in enumerate(result["ap"]):
47 | if i == 0: # skip background
48 | continue
49 | metrics[class_names[i]] = ap
50 | result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap)
51 | logger.info(result_str)
52 |
53 | if iteration is not None:
54 | result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration))
55 | else:
56 | result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S')))
57 | with open(result_path, "w") as f:
58 | f.write(result_str)
59 |
60 | return dict(metrics=metrics)
61 |
--------------------------------------------------------------------------------
/ssd/data/datasets/evaluation/voc/eval_detection_voc.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from collections import defaultdict
4 | import itertools
5 | import numpy as np
6 | import six
7 |
8 |
9 | def bbox_iou(bbox_a, bbox_b):
10 | """Calculate the Intersection of Unions (IoUs) between bounding boxes.
11 | IoU is calculated as a ratio of area of the intersection
12 | and area of the union.
13 | This function accepts both :obj:`numpy.ndarray` and :obj:`cupy.ndarray` as
14 | inputs. Please note that both :obj:`bbox_a` and :obj:`bbox_b` need to be
15 | same type.
16 | The output is same type as the type of the inputs.
17 | Args:
18 | bbox_a (array): An array whose shape is :math:`(N, 4)`.
19 | :math:`N` is the number of bounding boxes.
20 | The dtype should be :obj:`numpy.float32`.
21 | bbox_b (array): An array similar to :obj:`bbox_a`,
22 | whose shape is :math:`(K, 4)`.
23 | The dtype should be :obj:`numpy.float32`.
24 | Returns:
25 | array:
26 | An array whose shape is :math:`(N, K)`. \
27 | An element at index :math:`(n, k)` contains IoUs between \
28 | :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
29 | box in :obj:`bbox_b`.
30 | """
31 | if bbox_a.shape[1] != 4 or bbox_b.shape[1] != 4:
32 | raise IndexError
33 |
34 | # top left
35 | tl = np.maximum(bbox_a[:, None, :2], bbox_b[:, :2])
36 | # bottom right
37 | br = np.minimum(bbox_a[:, None, 2:], bbox_b[:, 2:])
38 |
39 | area_i = np.prod(br - tl, axis=2) * (tl < br).all(axis=2)
40 | area_a = np.prod(bbox_a[:, 2:] - bbox_a[:, :2], axis=1)
41 | area_b = np.prod(bbox_b[:, 2:] - bbox_b[:, :2], axis=1)
42 | return area_i / (area_a[:, None] + area_b - area_i)
43 |
44 |
45 | def eval_detection_voc(
46 | pred_bboxes,
47 | pred_labels,
48 | pred_scores,
49 | gt_bboxes,
50 | gt_labels,
51 | gt_difficults=None,
52 | iou_thresh=0.5,
53 | use_07_metric=False):
54 | """Calculate average precisions based on evaluation code of PASCAL VOC.
55 |
56 | This function evaluates predicted bounding boxes obtained from a dataset
57 | which has :math:`N` images by using average precision for each class.
58 | The code is based on the evaluation code used in PASCAL VOC Challenge.
59 |
60 | Args:
61 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
62 | sets of bounding boxes.
63 | Its index corresponds to an index for the base dataset.
64 | Each element of :obj:`pred_bboxes` is a set of coordinates
65 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
66 | where :math:`R` corresponds
67 | to the number of bounding boxes, which may vary among boxes.
68 | The second axis corresponds to
69 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
70 | pred_labels (iterable of numpy.ndarray): An iterable of labels.
71 | Similar to :obj:`pred_bboxes`, its index corresponds to an
72 | index for the base dataset. Its length is :math:`N`.
73 | pred_scores (iterable of numpy.ndarray): An iterable of confidence
74 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
75 | its index corresponds to an index for the base dataset.
76 | Its length is :math:`N`.
77 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
78 | bounding boxes
79 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
80 | bounding box whose shape is :math:`(R, 4)`. Note that the number of
81 | bounding boxes in each image does not need to be same as the number
82 | of corresponding predicted boxes.
83 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth
84 | labels which are organized similarly to :obj:`gt_bboxes`.
85 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean
86 | arrays which is organized similarly to :obj:`gt_bboxes`.
87 | This tells whether the
88 | corresponding ground truth bounding box is difficult or not.
89 | By default, this is :obj:`None`. In that case, this function
90 | considers all bounding boxes to be not difficult.
91 | iou_thresh (float): A prediction is correct if its Intersection over
92 | Union with the ground truth is above this value.
93 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
94 | for calculating average precision. The default value is
95 | :obj:`False`.
96 |
97 | Returns:
98 | dict:
99 |
100 | The keys, value-types and the description of the values are listed
101 | below.
102 |
103 | * **ap** (*numpy.ndarray*): An array of average precisions. \
104 | The :math:`l`-th value corresponds to the average precision \
105 | for class :math:`l`. If class :math:`l` does not exist in \
106 | either :obj:`pred_labels` or :obj:`gt_labels`, the corresponding \
107 | value is set to :obj:`numpy.nan`.
108 | * **map** (*float*): The average of Average Precisions over classes.
109 |
110 | """
111 |
112 | prec, rec = calc_detection_voc_prec_rec(pred_bboxes,
113 | pred_labels,
114 | pred_scores,
115 | gt_bboxes,
116 | gt_labels,
117 | gt_difficults,
118 | iou_thresh=iou_thresh)
119 |
120 | ap = calc_detection_voc_ap(prec, rec, use_07_metric=use_07_metric)
121 |
122 | return {'ap': ap, 'map': np.nanmean(ap)}
123 |
124 |
125 | def calc_detection_voc_prec_rec(
126 | pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels,
127 | gt_difficults=None,
128 | iou_thresh=0.5):
129 | """Calculate precision and recall based on evaluation code of PASCAL VOC.
130 |
131 | This function calculates precision and recall of
132 | predicted bounding boxes obtained from a dataset which has :math:`N`
133 | images.
134 | The code is based on the evaluation code used in PASCAL VOC Challenge.
135 |
136 | Args:
137 | pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N`
138 | sets of bounding boxes.
139 | Its index corresponds to an index for the base dataset.
140 | Each element of :obj:`pred_bboxes` is a set of coordinates
141 | of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
142 | where :math:`R` corresponds
143 | to the number of bounding boxes, which may vary among boxes.
144 | The second axis corresponds to
145 | :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box.
146 | pred_labels (iterable of numpy.ndarray): An iterable of labels.
147 | Similar to :obj:`pred_bboxes`, its index corresponds to an
148 | index for the base dataset. Its length is :math:`N`.
149 | pred_scores (iterable of numpy.ndarray): An iterable of confidence
150 | scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`,
151 | its index corresponds to an index for the base dataset.
152 | Its length is :math:`N`.
153 | gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth
154 | bounding boxes
155 | whose length is :math:`N`. An element of :obj:`gt_bboxes` is a
156 | bounding box whose shape is :math:`(R, 4)`. Note that the number of
157 | bounding boxes in each image does not need to be same as the number
158 | of corresponding predicted boxes.
159 | gt_labels (iterable of numpy.ndarray): An iterable of ground truth
160 | labels which are organized similarly to :obj:`gt_bboxes`.
161 | gt_difficults (iterable of numpy.ndarray): An iterable of boolean
162 | arrays which is organized similarly to :obj:`gt_bboxes`.
163 | This tells whether the
164 | corresponding ground truth bounding box is difficult or not.
165 | By default, this is :obj:`None`. In that case, this function
166 | considers all bounding boxes to be not difficult.
167 | iou_thresh (float): A prediction is correct if its Intersection over
168 | Union with the ground truth is above this value..
169 |
170 | Returns:
171 | tuple of two lists:
172 | This function returns two lists: :obj:`prec` and :obj:`rec`.
173 |
174 | * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \
175 | for class :math:`l`. If class :math:`l` does not exist in \
176 | either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \
177 | set to :obj:`None`.
178 | * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \
179 | for class :math:`l`. If class :math:`l` that is not marked as \
180 | difficult does not exist in \
181 | :obj:`gt_labels`, :obj:`rec[l]` is \
182 | set to :obj:`None`.
183 |
184 | """
185 |
186 | pred_bboxes = iter(pred_bboxes)
187 | pred_labels = iter(pred_labels)
188 | pred_scores = iter(pred_scores)
189 | gt_bboxes = iter(gt_bboxes)
190 | gt_labels = iter(gt_labels)
191 | if gt_difficults is None:
192 | gt_difficults = itertools.repeat(None)
193 | else:
194 | gt_difficults = iter(gt_difficults)
195 |
196 | n_pos = defaultdict(int)
197 | score = defaultdict(list)
198 | match = defaultdict(list)
199 |
200 | for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \
201 | six.moves.zip(
202 | pred_bboxes, pred_labels, pred_scores,
203 | gt_bboxes, gt_labels, gt_difficults):
204 |
205 | if gt_difficult is None:
206 | gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool)
207 |
208 | for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)):
209 | pred_mask_l = pred_label == l
210 | pred_bbox_l = pred_bbox[pred_mask_l]
211 | pred_score_l = pred_score[pred_mask_l]
212 | # sort by score
213 | order = pred_score_l.argsort()[::-1]
214 | pred_bbox_l = pred_bbox_l[order]
215 | pred_score_l = pred_score_l[order]
216 |
217 | gt_mask_l = gt_label == l
218 | gt_bbox_l = gt_bbox[gt_mask_l]
219 | gt_difficult_l = gt_difficult[gt_mask_l]
220 |
221 | n_pos[l] += np.logical_not(gt_difficult_l).sum()
222 | score[l].extend(pred_score_l)
223 |
224 | if len(pred_bbox_l) == 0:
225 | continue
226 | if len(gt_bbox_l) == 0:
227 | match[l].extend((0,) * pred_bbox_l.shape[0])
228 | continue
229 |
230 | # VOC evaluation follows integer typed bounding boxes.
231 | pred_bbox_l = pred_bbox_l.copy()
232 | pred_bbox_l[:, 2:] += 1
233 | gt_bbox_l = gt_bbox_l.copy()
234 | gt_bbox_l[:, 2:] += 1
235 |
236 | iou = bbox_iou(pred_bbox_l, gt_bbox_l)
237 | gt_index = iou.argmax(axis=1)
238 | # set -1 if there is no matching ground truth
239 | gt_index[iou.max(axis=1) < iou_thresh] = -1
240 | del iou
241 |
242 | selec = np.zeros(gt_bbox_l.shape[0], dtype=bool)
243 | for gt_idx in gt_index:
244 | if gt_idx >= 0:
245 | if gt_difficult_l[gt_idx]:
246 | match[l].append(-1)
247 | else:
248 | if not selec[gt_idx]:
249 | match[l].append(1)
250 | else:
251 | match[l].append(0)
252 | selec[gt_idx] = True
253 | else:
254 | match[l].append(0)
255 |
256 | for iter_ in (
257 | pred_bboxes, pred_labels, pred_scores,
258 | gt_bboxes, gt_labels, gt_difficults):
259 | if next(iter_, None) is not None:
260 | raise ValueError('Length of input iterables need to be same.')
261 |
262 | n_fg_class = max(n_pos.keys()) + 1
263 | prec = [None] * n_fg_class
264 | rec = [None] * n_fg_class
265 |
266 | for l in n_pos.keys():
267 | score_l = np.array(score[l])
268 | match_l = np.array(match[l], dtype=np.int8)
269 |
270 | order = score_l.argsort()[::-1]
271 | match_l = match_l[order]
272 |
273 | tp = np.cumsum(match_l == 1)
274 | fp = np.cumsum(match_l == 0)
275 |
276 | # If an element of fp + tp is 0,
277 | # the corresponding element of prec[l] is nan.
278 | prec[l] = tp / (fp + tp)
279 | # If n_pos[l] is 0, rec[l] is None.
280 | if n_pos[l] > 0:
281 | rec[l] = tp / n_pos[l]
282 |
283 | return prec, rec
284 |
285 |
286 | def calc_detection_voc_ap(prec, rec, use_07_metric=False):
287 | """Calculate average precisions based on evaluation code of PASCAL VOC.
288 |
289 | This function calculates average precisions
290 | from given precisions and recalls.
291 | The code is based on the evaluation code used in PASCAL VOC Challenge.
292 |
293 | Args:
294 | prec (list of numpy.array): A list of arrays.
295 | :obj:`prec[l]` indicates precision for class :math:`l`.
296 | If :obj:`prec[l]` is :obj:`None`, this function returns
297 | :obj:`numpy.nan` for class :math:`l`.
298 | rec (list of numpy.array): A list of arrays.
299 | :obj:`rec[l]` indicates recall for class :math:`l`.
300 | If :obj:`rec[l]` is :obj:`None`, this function returns
301 | :obj:`numpy.nan` for class :math:`l`.
302 | use_07_metric (bool): Whether to use PASCAL VOC 2007 evaluation metric
303 | for calculating average precision. The default value is
304 | :obj:`False`.
305 |
306 | Returns:
307 | ~numpy.ndarray:
308 | This function returns an array of average precisions.
309 | The :math:`l`-th value corresponds to the average precision
310 | for class :math:`l`. If :obj:`prec[l]` or :obj:`rec[l]` is
311 | :obj:`None`, the corresponding value is set to :obj:`numpy.nan`.
312 |
313 | """
314 |
315 | n_fg_class = len(prec)
316 | ap = np.empty(n_fg_class)
317 | for l in six.moves.range(n_fg_class):
318 | if prec[l] is None or rec[l] is None:
319 | ap[l] = np.nan
320 | continue
321 |
322 | if use_07_metric:
323 | # 11 point metric
324 | ap[l] = 0
325 | for t in np.arange(0., 1.1, 0.1):
326 | if np.sum(rec[l] >= t) == 0:
327 | p = 0
328 | else:
329 | p = np.max(np.nan_to_num(prec[l])[rec[l] >= t])
330 | ap[l] += p / 11
331 | else:
332 | # correct AP calculation
333 | # first append sentinel values at the end
334 | mpre = np.concatenate(([0], np.nan_to_num(prec[l]), [0]))
335 | mrec = np.concatenate(([0], rec[l], [1]))
336 |
337 | mpre = np.maximum.accumulate(mpre[::-1])[::-1]
338 |
339 | # to calculate area under PR curve, look for points
340 | # where X axis (recall) changes value
341 | i = np.where(mrec[1:] != mrec[:-1])[0]
342 |
343 | # and sum (\Delta recall) * prec
344 | ap[l] = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
345 |
346 | return ap
347 |
--------------------------------------------------------------------------------
/ssd/data/datasets/voc.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch.utils.data
3 | import numpy as np
4 | import xml.etree.ElementTree as ET
5 | from PIL import Image
6 |
7 | from ssd.structures.container import Container
8 |
9 |
10 | class VOCDataset(torch.utils.data.Dataset):
11 | class_names = ('__background__',
12 | 'aeroplane', 'bicycle', 'bird', 'boat',
13 | 'bottle', 'bus', 'car', 'cat', 'chair',
14 | 'cow', 'diningtable', 'dog', 'horse',
15 | 'motorbike', 'person', 'pottedplant',
16 | 'sheep', 'sofa', 'train', 'tvmonitor')
17 |
18 | def __init__(self, data_dir, split, transform=None, target_transform=None, keep_difficult=False):
19 | """Dataset for VOC data.
20 | Args:
21 | data_dir: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
22 | Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
23 | """
24 | self.data_dir = data_dir
25 | self.split = split
26 | self.transform = transform
27 | self.target_transform = target_transform
28 | image_sets_file = os.path.join(self.data_dir, "ImageSets", "Main", "%s.txt" % self.split)
29 | self.ids = VOCDataset._read_image_ids(image_sets_file)
30 | self.keep_difficult = keep_difficult
31 |
32 | self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
33 |
34 | def __getitem__(self, index):
35 | image_id = self.ids[index]
36 | boxes, labels, is_difficult = self._get_annotation(image_id)
37 | if not self.keep_difficult:
38 | boxes = boxes[is_difficult == 0]
39 | labels = labels[is_difficult == 0]
40 | image = self._read_image(image_id)
41 | if self.transform:
42 | image, boxes, labels = self.transform(image, boxes, labels)
43 | if self.target_transform:
44 | boxes, labels = self.target_transform(boxes, labels)
45 | targets = Container(
46 | boxes=boxes,
47 | labels=labels,
48 | )
49 | return image, targets, index
50 |
51 | def get_annotation(self, index):
52 | image_id = self.ids[index]
53 | return image_id, self._get_annotation(image_id)
54 |
55 | def __len__(self):
56 | return len(self.ids)
57 |
58 | @staticmethod
59 | def _read_image_ids(image_sets_file):
60 | ids = []
61 | with open(image_sets_file) as f:
62 | for line in f:
63 | ids.append(line.rstrip())
64 | return ids
65 |
66 | def _get_annotation(self, image_id):
67 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % image_id)
68 | objects = ET.parse(annotation_file).findall("object")
69 | boxes = []
70 | labels = []
71 | is_difficult = []
72 | for obj in objects:
73 | class_name = obj.find('name').text.lower().strip()
74 | bbox = obj.find('bndbox')
75 | # VOC dataset format follows Matlab, in which indexes start from 0
76 | x1 = float(bbox.find('xmin').text) - 1
77 | y1 = float(bbox.find('ymin').text) - 1
78 | x2 = float(bbox.find('xmax').text) - 1
79 | y2 = float(bbox.find('ymax').text) - 1
80 | boxes.append([x1, y1, x2, y2])
81 | labels.append(self.class_dict[class_name])
82 | is_difficult_str = obj.find('difficult').text
83 | is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
84 |
85 | return (np.array(boxes, dtype=np.float32),
86 | np.array(labels, dtype=np.int64),
87 | np.array(is_difficult, dtype=np.uint8))
88 |
89 | def get_img_info(self, index):
90 | img_id = self.ids[index]
91 | annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id)
92 | anno = ET.parse(annotation_file).getroot()
93 | size = anno.find("size")
94 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
95 | return {"height": im_info[0], "width": im_info[1]}
96 |
97 | def _read_image(self, image_id):
98 | image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id)
99 | image = Image.open(image_file).convert("RGB")
100 | image = np.array(image)
101 | return image
102 |
--------------------------------------------------------------------------------
/ssd/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
2 | from .distributed import DistributedSampler
3 |
4 | __all__ = ['IterationBasedBatchSampler', 'DistributedSampler']
5 |
--------------------------------------------------------------------------------
/ssd/data/samplers/distributed.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
3 | # FIXME remove this once c10d fixes the bug it has
4 | import math
5 | import torch
6 | import torch.distributed as dist
7 | from torch.utils.data.sampler import Sampler
8 |
9 |
10 | class DistributedSampler(Sampler):
11 | """Sampler that restricts data loading to a subset of the dataset.
12 | It is especially useful in conjunction with
13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 | process can pass a DistributedSampler instance as a DataLoader sampler,
15 | and load a subset of the original dataset that is exclusive to it.
16 | .. note::
17 | Dataset is assumed to be of constant size.
18 | Arguments:
19 | dataset: Dataset used for sampling.
20 | num_replicas (optional): Number of processes participating in
21 | distributed training.
22 | rank (optional): Rank of the current process within num_replicas.
23 | """
24 |
25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 | if num_replicas is None:
27 | if not dist.is_available():
28 | raise RuntimeError("Requires distributed package to be available")
29 | num_replicas = dist.get_world_size()
30 | if rank is None:
31 | if not dist.is_available():
32 | raise RuntimeError("Requires distributed package to be available")
33 | rank = dist.get_rank()
34 | self.dataset = dataset
35 | self.num_replicas = num_replicas
36 | self.rank = rank
37 | self.epoch = 0
38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 | self.total_size = self.num_samples * self.num_replicas
40 | self.shuffle = shuffle
41 |
42 | def __iter__(self):
43 | if self.shuffle:
44 | # deterministically shuffle based on epoch
45 | g = torch.Generator()
46 | g.manual_seed(self.epoch)
47 | indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 | else:
49 | indices = torch.arange(len(self.dataset)).tolist()
50 |
51 | # add extra samples to make it evenly divisible
52 | indices += indices[: (self.total_size - len(indices))]
53 | assert len(indices) == self.total_size
54 |
55 | # subsample
56 | offset = self.num_samples * self.rank
57 | indices = indices[offset: offset + self.num_samples]
58 | assert len(indices) == self.num_samples
59 |
60 | return iter(indices)
61 |
62 | def __len__(self):
63 | return self.num_samples
64 |
65 | def set_epoch(self, epoch):
66 | self.epoch = epoch
67 |
--------------------------------------------------------------------------------
/ssd/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data.sampler import BatchSampler
2 |
3 |
4 | class IterationBasedBatchSampler(BatchSampler):
5 | """
6 | Wraps a BatchSampler, re-sampling from it until
7 | a specified number of iterations have been sampled
8 | """
9 |
10 | def __init__(self, batch_sampler, num_iterations, start_iter=0):
11 | self.batch_sampler = batch_sampler
12 | self.num_iterations = num_iterations
13 | self.start_iter = start_iter
14 |
15 | def __iter__(self):
16 | iteration = self.start_iter
17 | while iteration <= self.num_iterations:
18 | # if the underlying sampler has a set_epoch method, like
19 | # DistributedSampler, used for making each process see
20 | # a different split of the dataset, then set it
21 | if hasattr(self.batch_sampler.sampler, "set_epoch"):
22 | self.batch_sampler.sampler.set_epoch(iteration)
23 | for batch in self.batch_sampler:
24 | iteration += 1
25 | if iteration > self.num_iterations:
26 | break
27 | yield batch
28 |
29 | def __len__(self):
30 | return self.num_iterations
31 |
--------------------------------------------------------------------------------
/ssd/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.modeling.anchors.prior_box import PriorBox
2 | from .target_transform import SSDTargetTransform
3 | from .transforms import *
4 |
5 |
6 | def build_transforms(cfg, is_train=True):
7 | if is_train:
8 | transform = [
9 | ConvertFromInts(),
10 | PhotometricDistort(),
11 | Expand(cfg.INPUT.PIXEL_MEAN),
12 | RandomSampleCrop(),
13 | RandomMirror(),
14 | ToPercentCoords(),
15 | Resize(cfg.INPUT.IMAGE_SIZE),
16 | SubtractMeans(cfg.INPUT.PIXEL_MEAN),
17 | ToTensor(),
18 | ]
19 | else:
20 | transform = [
21 | Resize(cfg.INPUT.IMAGE_SIZE),
22 | SubtractMeans(cfg.INPUT.PIXEL_MEAN),
23 | ToTensor()
24 | ]
25 | transform = Compose(transform)
26 | return transform
27 |
28 |
29 | def build_target_transform(cfg):
30 | transform = SSDTargetTransform(PriorBox(cfg)(),
31 | cfg.MODEL.CENTER_VARIANCE,
32 | cfg.MODEL.SIZE_VARIANCE,
33 | cfg.MODEL.THRESHOLD)
34 | return transform
35 |
--------------------------------------------------------------------------------
/ssd/data/transforms/target_transform.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from ssd.utils import box_utils
5 |
6 |
7 | class SSDTargetTransform:
8 | def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
9 | self.center_form_priors = center_form_priors
10 | self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
11 | self.center_variance = center_variance
12 | self.size_variance = size_variance
13 | self.iou_threshold = iou_threshold
14 |
15 | def __call__(self, gt_boxes, gt_labels):
16 | if type(gt_boxes) is np.ndarray:
17 | gt_boxes = torch.from_numpy(gt_boxes)
18 | if type(gt_labels) is np.ndarray:
19 | gt_labels = torch.from_numpy(gt_labels)
20 | boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
21 | self.corner_form_priors, self.iou_threshold)
22 | boxes = box_utils.corner_form_to_center_form(boxes)
23 | locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
24 |
25 | return locations, labels
26 |
--------------------------------------------------------------------------------
/ssd/data/transforms/transforms.py:
--------------------------------------------------------------------------------
1 | # from https://github.com/amdegroot/ssd.pytorch
2 |
3 |
4 | import torch
5 | from torchvision import transforms
6 | import cv2
7 | import numpy as np
8 | import types
9 | from numpy import random
10 |
11 |
12 | def intersect(box_a, box_b):
13 | max_xy = np.minimum(box_a[:, 2:], box_b[2:])
14 | min_xy = np.maximum(box_a[:, :2], box_b[:2])
15 | inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
16 | return inter[:, 0] * inter[:, 1]
17 |
18 |
19 | def jaccard_numpy(box_a, box_b):
20 | """Compute the jaccard overlap of two sets of boxes. The jaccard overlap
21 | is simply the intersection over union of two boxes.
22 | E.g.:
23 | A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
24 | Args:
25 | box_a: Multiple bounding boxes, Shape: [num_boxes,4]
26 | box_b: Single bounding box, Shape: [4]
27 | Return:
28 | jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
29 | """
30 | inter = intersect(box_a, box_b)
31 | area_a = ((box_a[:, 2] - box_a[:, 0]) *
32 | (box_a[:, 3] - box_a[:, 1])) # [A,B]
33 | area_b = ((box_b[2] - box_b[0]) *
34 | (box_b[3] - box_b[1])) # [A,B]
35 | union = area_a + area_b - inter
36 | return inter / union # [A,B]
37 |
38 |
39 | def remove_empty_boxes(boxes, labels):
40 | """Removes bounding boxes of W or H equal to 0 and its labels
41 |
42 | Args:
43 | boxes (ndarray): NP Array with bounding boxes as lines
44 | * BBOX[x1, y1, x2, y2]
45 | labels (labels): Corresponding labels with boxes
46 |
47 | Returns:
48 | ndarray: Valid bounding boxes
49 | ndarray: Corresponding labels
50 | """
51 | del_boxes = []
52 | for idx, box in enumerate(boxes):
53 | if box[0] == box[2] or box[1] == box[3]:
54 | del_boxes.append(idx)
55 |
56 | return np.delete(boxes, del_boxes, 0), np.delete(labels, del_boxes)
57 |
58 |
59 | class Compose(object):
60 | """Composes several augmentations together.
61 | Args:
62 | transforms (List[Transform]): list of transforms to compose.
63 | Example:
64 | >>> augmentations.Compose([
65 | >>> transforms.CenterCrop(10),
66 | >>> transforms.ToTensor(),
67 | >>> ])
68 | """
69 |
70 | def __init__(self, transforms):
71 | self.transforms = transforms
72 |
73 | def __call__(self, img, boxes=None, labels=None):
74 | for t in self.transforms:
75 | img, boxes, labels = t(img, boxes, labels)
76 | if boxes is not None:
77 | boxes, labels = remove_empty_boxes(boxes, labels)
78 | return img, boxes, labels
79 |
80 |
81 | class Lambda(object):
82 | """Applies a lambda as a transform."""
83 |
84 | def __init__(self, lambd):
85 | assert isinstance(lambd, types.LambdaType)
86 | self.lambd = lambd
87 |
88 | def __call__(self, img, boxes=None, labels=None):
89 | return self.lambd(img, boxes, labels)
90 |
91 |
92 | class ConvertFromInts(object):
93 | def __call__(self, image, boxes=None, labels=None):
94 | return image.astype(np.float32), boxes, labels
95 |
96 |
97 | class SubtractMeans(object):
98 | def __init__(self, mean):
99 | self.mean = np.array(mean, dtype=np.float32)
100 |
101 | def __call__(self, image, boxes=None, labels=None):
102 | image = image.astype(np.float32)
103 | image -= self.mean
104 | return image.astype(np.float32), boxes, labels
105 |
106 |
107 | class ToAbsoluteCoords(object):
108 | def __call__(self, image, boxes=None, labels=None):
109 | height, width, channels = image.shape
110 | boxes[:, 0] *= width
111 | boxes[:, 2] *= width
112 | boxes[:, 1] *= height
113 | boxes[:, 3] *= height
114 |
115 | return image, boxes, labels
116 |
117 |
118 | class ToPercentCoords(object):
119 | def __call__(self, image, boxes=None, labels=None):
120 | height, width, channels = image.shape
121 | boxes[:, 0] /= width
122 | boxes[:, 2] /= width
123 | boxes[:, 1] /= height
124 | boxes[:, 3] /= height
125 |
126 | return image, boxes, labels
127 |
128 |
129 | class Resize(object):
130 | def __init__(self, size=300):
131 | self.size = size
132 |
133 | def __call__(self, image, boxes=None, labels=None):
134 | image = cv2.resize(image, (self.size,
135 | self.size))
136 | return image, boxes, labels
137 |
138 |
139 | class RandomSaturation(object):
140 | def __init__(self, lower=0.5, upper=1.5):
141 | self.lower = lower
142 | self.upper = upper
143 | assert self.upper >= self.lower, "contrast upper must be >= lower."
144 | assert self.lower >= 0, "contrast lower must be non-negative."
145 |
146 | def __call__(self, image, boxes=None, labels=None):
147 | if random.randint(2):
148 | image[:, :, 1] *= random.uniform(self.lower, self.upper)
149 |
150 | return image, boxes, labels
151 |
152 |
153 | class RandomHue(object):
154 | def __init__(self, delta=18.0):
155 | assert delta >= 0.0 and delta <= 360.0
156 | self.delta = delta
157 |
158 | def __call__(self, image, boxes=None, labels=None):
159 | if random.randint(2):
160 | image[:, :, 0] += random.uniform(-self.delta, self.delta)
161 | image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
162 | image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
163 | return image, boxes, labels
164 |
165 |
166 | class RandomLightingNoise(object):
167 | def __init__(self):
168 | self.perms = ((0, 1, 2), (0, 2, 1),
169 | (1, 0, 2), (1, 2, 0),
170 | (2, 0, 1), (2, 1, 0))
171 |
172 | def __call__(self, image, boxes=None, labels=None):
173 | if random.randint(2):
174 | swap = self.perms[random.randint(len(self.perms))]
175 | shuffle = SwapChannels(swap) # shuffle channels
176 | image = shuffle(image)
177 | return image, boxes, labels
178 |
179 |
180 | class ConvertColor(object):
181 | def __init__(self, current, transform):
182 | self.transform = transform
183 | self.current = current
184 |
185 | def __call__(self, image, boxes=None, labels=None):
186 | if self.current == 'BGR' and self.transform == 'HSV':
187 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
188 | elif self.current == 'RGB' and self.transform == 'HSV':
189 | image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
190 | elif self.current == 'BGR' and self.transform == 'RGB':
191 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
192 | elif self.current == 'HSV' and self.transform == 'BGR':
193 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
194 | elif self.current == 'HSV' and self.transform == "RGB":
195 | image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
196 | else:
197 | raise NotImplementedError
198 | return image, boxes, labels
199 |
200 |
201 | class RandomContrast(object):
202 | def __init__(self, lower=0.5, upper=1.5):
203 | self.lower = lower
204 | self.upper = upper
205 | assert self.upper >= self.lower, "contrast upper must be >= lower."
206 | assert self.lower >= 0, "contrast lower must be non-negative."
207 |
208 | # expects float image
209 | def __call__(self, image, boxes=None, labels=None):
210 | if random.randint(2):
211 | alpha = random.uniform(self.lower, self.upper)
212 | image *= alpha
213 | return image, boxes, labels
214 |
215 |
216 | class RandomBrightness(object):
217 | def __init__(self, delta=32):
218 | assert delta >= 0.0
219 | assert delta <= 255.0
220 | self.delta = delta
221 |
222 | def __call__(self, image, boxes=None, labels=None):
223 | if random.randint(2):
224 | delta = random.uniform(-self.delta, self.delta)
225 | image += delta
226 | return image, boxes, labels
227 |
228 |
229 | class ToCV2Image(object):
230 | def __call__(self, tensor, boxes=None, labels=None):
231 | return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
232 |
233 |
234 | class ToTensor(object):
235 | def __call__(self, cvimage, boxes=None, labels=None):
236 | return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
237 |
238 |
239 | class RandomSampleCrop(object):
240 | """Crop
241 | Arguments:
242 | img (Image): the image being input during training
243 | boxes (Tensor): the original bounding boxes in pt form
244 | labels (Tensor): the class labels for each bbox
245 | mode (float tuple): the min and max jaccard overlaps
246 | Return:
247 | (img, boxes, classes)
248 | img (Image): the cropped image
249 | boxes (Tensor): the adjusted bounding boxes in pt form
250 | labels (Tensor): the class labels for each bbox
251 | """
252 |
253 | def __init__(self):
254 | self.sample_options = (
255 | # using entire original input image
256 | None,
257 | # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
258 | (0.1, None),
259 | (0.3, None),
260 | (0.7, None),
261 | (0.9, None),
262 | # randomly sample a patch
263 | (None, None),
264 | )
265 |
266 | def __call__(self, image, boxes=None, labels=None):
267 | # guard against no boxes
268 | if boxes is not None and boxes.shape[0] == 0:
269 | return image, boxes, labels
270 | height, width, _ = image.shape
271 | while True:
272 | # randomly choose a mode
273 | mode = self.sample_options[random.randint(0, len(self.sample_options))]
274 | if mode is None:
275 | return image, boxes, labels
276 |
277 | min_iou, max_iou = mode
278 | if min_iou is None:
279 | min_iou = float('-inf')
280 | if max_iou is None:
281 | max_iou = float('inf')
282 |
283 | # max trails (50)
284 | for _ in range(50):
285 | current_image = image
286 |
287 | w = random.uniform(0.3 * width, width)
288 | h = random.uniform(0.3 * height, height)
289 |
290 | # aspect ratio constraint b/t .5 & 2
291 | if h / w < 0.5 or h / w > 2:
292 | continue
293 |
294 | left = random.uniform(width - w)
295 | top = random.uniform(height - h)
296 |
297 | # convert to integer rect x1,y1,x2,y2
298 | rect = np.array([int(left), int(top), int(left + w), int(top + h)])
299 |
300 | # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
301 | overlap = jaccard_numpy(boxes, rect)
302 |
303 | # is min and max overlap constraint satisfied? if not try again
304 | if overlap.max() < min_iou or overlap.min() > max_iou:
305 | continue
306 |
307 | # cut the crop from the image
308 | current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
309 | :]
310 |
311 | # keep overlap with gt box IF center in sampled patch
312 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
313 |
314 | # mask in all gt boxes that above and to the left of centers
315 | m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
316 |
317 | # mask in all gt boxes that under and to the right of centers
318 | m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
319 |
320 | # mask in that both m1 and m2 are true
321 | mask = m1 * m2
322 |
323 | # have any valid boxes? try again if not
324 | if not mask.any():
325 | continue
326 |
327 | # take only matching gt boxes
328 | current_boxes = boxes[mask, :].copy()
329 |
330 | # take only matching gt labels
331 | current_labels = labels[mask]
332 |
333 | # should we use the box left and top corner or the crop's
334 | current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
335 | rect[:2])
336 | # adjust to crop (by substracting crop's left,top)
337 | current_boxes[:, :2] -= rect[:2]
338 |
339 | current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
340 | rect[2:])
341 | # adjust to crop (by substracting crop's left,top)
342 | current_boxes[:, 2:] -= rect[:2]
343 |
344 | return current_image, current_boxes, current_labels
345 |
346 |
347 | class Expand(object):
348 | def __init__(self, mean):
349 | self.mean = mean
350 |
351 | def __call__(self, image, boxes, labels):
352 | if random.randint(2):
353 | return image, boxes, labels
354 |
355 | height, width, depth = image.shape
356 | ratio = random.uniform(1, 4)
357 | left = random.uniform(0, width * ratio - width)
358 | top = random.uniform(0, height * ratio - height)
359 |
360 | expand_image = np.zeros(
361 | (int(height * ratio), int(width * ratio), depth),
362 | dtype=image.dtype)
363 | expand_image[:, :, :] = self.mean
364 | expand_image[int(top):int(top + height),
365 | int(left):int(left + width)] = image
366 | image = expand_image
367 |
368 | boxes = boxes.copy()
369 | boxes[:, :2] += (int(left), int(top))
370 | boxes[:, 2:] += (int(left), int(top))
371 |
372 | return image, boxes, labels
373 |
374 |
375 | class RandomMirror(object):
376 | def __call__(self, image, boxes, classes):
377 | _, width, _ = image.shape
378 | if random.randint(2):
379 | image = image[:, ::-1]
380 | boxes = boxes.copy()
381 | boxes[:, 0::2] = width - boxes[:, 2::-2]
382 | return image, boxes, classes
383 |
384 |
385 | class SwapChannels(object):
386 | """Transforms a tensorized image by swapping the channels in the order
387 | specified in the swap tuple.
388 | Args:
389 | swaps (int triple): final order of channels
390 | eg: (2, 1, 0)
391 | """
392 |
393 | def __init__(self, swaps):
394 | self.swaps = swaps
395 |
396 | def __call__(self, image):
397 | """
398 | Args:
399 | image (Tensor): image tensor to be transformed
400 | Return:
401 | a tensor with channels swapped according to swap
402 | """
403 | # if torch.is_tensor(image):
404 | # image = image.data.cpu().numpy()
405 | # else:
406 | # image = np.array(image)
407 | image = image[:, :, self.swaps]
408 | return image
409 |
410 |
411 | class PhotometricDistort(object):
412 | def __init__(self):
413 | self.pd = [
414 | RandomContrast(), # RGB
415 | ConvertColor(current="RGB", transform='HSV'), # HSV
416 | RandomSaturation(), # HSV
417 | RandomHue(), # HSV
418 | ConvertColor(current='HSV', transform='RGB'), # RGB
419 | RandomContrast() # RGB
420 | ]
421 | self.rand_brightness = RandomBrightness()
422 | self.rand_light_noise = RandomLightingNoise()
423 |
424 | def __call__(self, image, boxes, labels):
425 | im = image.copy()
426 | im, boxes, labels = self.rand_brightness(im, boxes, labels)
427 | if random.randint(2):
428 | distort = Compose(self.pd[:-1])
429 | else:
430 | distort = Compose(self.pd[1:])
431 | im, boxes, labels = distort(im, boxes, labels)
432 | return self.rand_light_noise(im, boxes, labels)
433 |
--------------------------------------------------------------------------------
/ssd/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/engine/__init__.py
--------------------------------------------------------------------------------
/ssd/engine/inference.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | import torch
5 | import torch.utils.data
6 | from tqdm import tqdm
7 |
8 | from ssd.data.build import make_data_loader
9 | from ssd.data.datasets.evaluation import evaluate
10 |
11 | from ssd.utils import dist_util, mkdir
12 | from ssd.utils.dist_util import synchronize, is_main_process
13 |
14 |
15 | def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu):
16 | all_predictions = dist_util.all_gather(predictions_per_gpu)
17 | if not dist_util.is_main_process():
18 | return
19 | # merge the list of dicts
20 | predictions = {}
21 | for p in all_predictions:
22 | predictions.update(p)
23 | # convert a dict where the key is the index in a list
24 | image_ids = list(sorted(predictions.keys()))
25 | if len(image_ids) != image_ids[-1] + 1:
26 | logger = logging.getLogger("SSD.inference")
27 | logger.warning(
28 | "Number of images that were gathered from multiple processes is not "
29 | "a contiguous set. Some images might be missing from the evaluation"
30 | )
31 |
32 | # convert to a list
33 | predictions = [predictions[i] for i in image_ids]
34 | return predictions
35 |
36 |
37 | def compute_on_dataset(model, data_loader, device):
38 | results_dict = {}
39 | for batch in tqdm(data_loader):
40 | images, targets, image_ids = batch
41 | cpu_device = torch.device("cpu")
42 | with torch.no_grad():
43 | outputs = model(images.to(device))
44 |
45 | outputs = [o.to(cpu_device) for o in outputs]
46 | results_dict.update(
47 | {int(img_id): result for img_id, result in zip(image_ids, outputs)}
48 | )
49 | return results_dict
50 |
51 |
52 | def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs):
53 | dataset = data_loader.dataset
54 | logger = logging.getLogger("SSD.inference")
55 | logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset)))
56 | predictions_path = os.path.join(output_folder, 'predictions.pth')
57 | if use_cached and os.path.exists(predictions_path):
58 | predictions = torch.load(predictions_path, map_location='cpu')
59 | else:
60 | predictions = compute_on_dataset(model, data_loader, device)
61 | synchronize()
62 | predictions = _accumulate_predictions_from_multiple_gpus(predictions)
63 | if not is_main_process():
64 | return
65 | if output_folder:
66 | torch.save(predictions, predictions_path)
67 | return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs)
68 |
69 |
70 | @torch.no_grad()
71 | def do_evaluation(cfg, model, distributed, **kwargs):
72 | if isinstance(model, torch.nn.parallel.DistributedDataParallel):
73 | model = model.module
74 | model.eval()
75 | device = torch.device(cfg.MODEL.DEVICE)
76 | data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed)
77 | eval_results = []
78 | for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val):
79 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
80 | if not os.path.exists(output_folder):
81 | mkdir(output_folder)
82 | eval_result = inference(model, data_loader, dataset_name, device, output_folder, **kwargs)
83 | eval_results.append(eval_result)
84 | return eval_results
85 |
--------------------------------------------------------------------------------
/ssd/engine/trainer.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import datetime
3 | import logging
4 | import os
5 | import time
6 | import torch
7 | import torch.distributed as dist
8 |
9 | from ssd.engine.inference import do_evaluation
10 | from ssd.utils import dist_util
11 | from ssd.utils.metric_logger import MetricLogger
12 |
13 |
14 | def write_metric(eval_result, prefix, summary_writer, global_step):
15 | for key in eval_result:
16 | value = eval_result[key]
17 | tag = '{}/{}'.format(prefix, key)
18 | if isinstance(value, collections.Mapping):
19 | write_metric(value, tag, summary_writer, global_step)
20 | else:
21 | summary_writer.add_scalar(tag, value, global_step=global_step)
22 |
23 |
24 | def reduce_loss_dict(loss_dict):
25 | """
26 | Reduce the loss dictionary from all processes so that process with rank
27 | 0 has the averaged results. Returns a dict with the same fields as
28 | loss_dict, after reduction.
29 | """
30 | world_size = dist_util.get_world_size()
31 | if world_size < 2:
32 | return loss_dict
33 | with torch.no_grad():
34 | loss_names = []
35 | all_losses = []
36 | for k in sorted(loss_dict.keys()):
37 | loss_names.append(k)
38 | all_losses.append(loss_dict[k])
39 | all_losses = torch.stack(all_losses, dim=0)
40 | dist.reduce(all_losses, dst=0)
41 | if dist.get_rank() == 0:
42 | # only main process gets accumulated, so only divide by
43 | # world_size in this case
44 | all_losses /= world_size
45 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
46 | return reduced_losses
47 |
48 |
49 | def do_train(cfg, model,
50 | data_loader,
51 | optimizer,
52 | scheduler,
53 | checkpointer,
54 | device,
55 | arguments,
56 | args):
57 | logger = logging.getLogger("SSD.trainer")
58 | logger.info("Start training ...")
59 | meters = MetricLogger()
60 |
61 | model.train()
62 | save_to_disk = dist_util.get_rank() == 0
63 | if args.use_tensorboard and save_to_disk:
64 | try:
65 | from torch.utils.tensorboard import SummaryWriter
66 | except ImportError:
67 | from tensorboardX import SummaryWriter
68 | summary_writer = SummaryWriter(log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs'))
69 | else:
70 | summary_writer = None
71 |
72 | max_iter = len(data_loader)
73 | start_iter = arguments["iteration"]
74 | start_training_time = time.time()
75 | end = time.time()
76 | for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
77 | iteration = iteration + 1
78 | arguments["iteration"] = iteration
79 |
80 | images = images.to(device)
81 | targets = targets.to(device)
82 | loss_dict = model(images, targets=targets)
83 | loss = sum(loss for loss in loss_dict.values())
84 |
85 | # reduce losses over all GPUs for logging purposes
86 | loss_dict_reduced = reduce_loss_dict(loss_dict)
87 | losses_reduced = sum(loss for loss in loss_dict_reduced.values())
88 | meters.update(total_loss=losses_reduced, **loss_dict_reduced)
89 |
90 | optimizer.zero_grad()
91 | loss.backward()
92 | optimizer.step()
93 | scheduler.step()
94 |
95 | batch_time = time.time() - end
96 | end = time.time()
97 | meters.update(time=batch_time)
98 | if iteration % args.log_step == 0:
99 | eta_seconds = meters.time.global_avg * (max_iter - iteration)
100 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
101 | if device == "cuda":
102 | logger.info(
103 | meters.delimiter.join([
104 | "iter: {iter:06d}",
105 | "lr: {lr:.5f}",
106 | '{meters}',
107 | "eta: {eta}",
108 | 'mem: {mem}M',
109 | ]).format(
110 | iter=iteration,
111 | lr=optimizer.param_groups[0]['lr'],
112 | meters=str(meters),
113 | eta=eta_string,
114 | mem=round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0),
115 | )
116 | )
117 | else:
118 | logger.info(
119 | meters.delimiter.join([
120 | "iter: {iter:06d}",
121 | "lr: {lr:.5f}",
122 | '{meters}',
123 | "eta: {eta}",
124 | ]).format(
125 | iter=iteration,
126 | lr=optimizer.param_groups[0]['lr'],
127 | meters=str(meters),
128 | eta=eta_string,
129 | )
130 | )
131 | if summary_writer:
132 | global_step = iteration
133 | summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step)
134 | for loss_name, loss_item in loss_dict_reduced.items():
135 | summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step)
136 | summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step)
137 |
138 | if iteration % args.save_step == 0:
139 | checkpointer.save("model_{:06d}".format(iteration), **arguments)
140 |
141 | if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter:
142 | eval_results = do_evaluation(cfg, model, distributed=args.distributed, iteration=iteration)
143 | if dist_util.get_rank() == 0 and summary_writer:
144 | for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST):
145 | write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration)
146 | model.train() # *IMPORTANT*: change to train mode after eval.
147 |
148 | checkpointer.save("model_final", **arguments)
149 | # compute training time
150 | total_training_time = int(time.time() - start_training_time)
151 | total_time_str = str(datetime.timedelta(seconds=total_training_time))
152 | logger.info("Total training time: {} ({:.4f} s / it)".format(total_time_str, total_training_time / max_iter))
153 | return model
154 |
--------------------------------------------------------------------------------
/ssd/layers/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.init as init
4 | from .separable_conv import SeparableConv2d
5 |
6 | __all__ = ['L2Norm', 'SeparableConv2d']
7 |
8 |
9 | class L2Norm(nn.Module):
10 | def __init__(self, n_channels, scale):
11 | super(L2Norm, self).__init__()
12 | self.n_channels = n_channels
13 | self.gamma = scale or None
14 | self.eps = 1e-10
15 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
16 | self.reset_parameters()
17 |
18 | def reset_parameters(self):
19 | init.constant_(self.weight, self.gamma)
20 |
21 | def forward(self, x):
22 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
23 | x = torch.div(x, norm)
24 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
25 | return out
26 |
--------------------------------------------------------------------------------
/ssd/layers/separable_conv.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class SeparableConv2d(nn.Module):
5 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False):
6 | super().__init__()
7 | ReLU = nn.ReLU if onnx_compatible else nn.ReLU6
8 | self.conv = nn.Sequential(
9 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
10 | groups=in_channels, stride=stride, padding=padding),
11 | nn.BatchNorm2d(in_channels),
12 | ReLU(),
13 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
14 | )
15 |
16 | def forward(self, x):
17 | return self.conv(x)
18 |
--------------------------------------------------------------------------------
/ssd/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/modeling/__init__.py
--------------------------------------------------------------------------------
/ssd/modeling/anchors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/modeling/anchors/__init__.py
--------------------------------------------------------------------------------
/ssd/modeling/anchors/prior_box.py:
--------------------------------------------------------------------------------
1 | from itertools import product
2 |
3 | import torch
4 | from math import sqrt
5 |
6 |
7 | class PriorBox:
8 | def __init__(self, cfg):
9 | self.image_size = cfg.INPUT.IMAGE_SIZE
10 | prior_config = cfg.MODEL.PRIORS
11 | self.feature_maps = prior_config.FEATURE_MAPS
12 | self.min_sizes = prior_config.MIN_SIZES
13 | self.max_sizes = prior_config.MAX_SIZES
14 | self.strides = prior_config.STRIDES
15 | self.aspect_ratios = prior_config.ASPECT_RATIOS
16 | self.clip = prior_config.CLIP
17 |
18 | def __call__(self):
19 | """Generate SSD Prior Boxes.
20 | It returns the center, height and width of the priors. The values are relative to the image size
21 | Returns:
22 | priors (num_priors, 4): The prior boxes represented as [[center_x, center_y, w, h]]. All the values
23 | are relative to the image size.
24 | """
25 | priors = []
26 | for k, f in enumerate(self.feature_maps):
27 | scale = self.image_size / self.strides[k]
28 | for i, j in product(range(f), repeat=2):
29 | # unit center x,y
30 | cx = (j + 0.5) / scale
31 | cy = (i + 0.5) / scale
32 |
33 | # small sized square box
34 | size = self.min_sizes[k]
35 | h = w = size / self.image_size
36 | priors.append([cx, cy, w, h])
37 |
38 | # big sized square box
39 | size = sqrt(self.min_sizes[k] * self.max_sizes[k])
40 | h = w = size / self.image_size
41 | priors.append([cx, cy, w, h])
42 |
43 | # change h/w ratio of the small sized box
44 | size = self.min_sizes[k]
45 | h = w = size / self.image_size
46 | for ratio in self.aspect_ratios[k]:
47 | ratio = sqrt(ratio)
48 | priors.append([cx, cy, w * ratio, h / ratio])
49 | priors.append([cx, cy, w / ratio, h * ratio])
50 |
51 | priors = torch.tensor(priors)
52 | if self.clip:
53 | priors.clamp_(max=1, min=0)
54 | return priors
55 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.modeling import registry
2 | from .vgg import VGG
3 | from .mobilenet import MobileNetV2
4 | from .efficient_net import EfficientNet
5 | from .mobilenetv3 import MobileNetV3
6 |
7 | __all__ = ['build_backbone', 'VGG', 'MobileNetV2', 'EfficientNet', 'MobileNetV3']
8 |
9 |
10 | def build_backbone(cfg):
11 | return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg, cfg.MODEL.BACKBONE.PRETRAINED)
12 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.modeling import registry
2 | from .efficient_net import EfficientNet
3 |
4 | __all__ = ['efficient_net_b3', 'EfficientNet']
5 |
6 |
7 | @registry.BACKBONES.register('efficient_net-b3')
8 | def efficient_net_b3(cfg, pretrained=True):
9 | if pretrained:
10 | model = EfficientNet.from_pretrained('efficientnet-b3')
11 | else:
12 | model = EfficientNet.from_name('efficientnet-b3')
13 |
14 | return model
15 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/efficient_net.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 | from .utils import (
5 | relu_fn,
6 | round_filters,
7 | round_repeats,
8 | drop_connect,
9 | Conv2dSamePadding,
10 | get_model_params,
11 | efficientnet_params,
12 | load_pretrained_weights,
13 | )
14 |
15 | INDICES = {
16 | 'efficientnet-b3': [7, 17, 25]
17 | }
18 |
19 | EXTRAS = {
20 | 'efficientnet-b3': [
21 | # in, out, k, s, p
22 | [(384, 128, 1, 1, 0), (128, 256, 3, 2, 1)], # 5 x 5
23 | [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 3 x 3
24 | [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 1 x 1
25 |
26 | ]
27 | }
28 |
29 |
30 | def add_extras(cfgs):
31 | extras = nn.ModuleList()
32 | for cfg in cfgs:
33 | extra = []
34 | for params in cfg:
35 | in_channels, out_channels, kernel_size, stride, padding = params
36 | extra.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding))
37 | extra.append(nn.ReLU())
38 | extras.append(nn.Sequential(*extra))
39 | return extras
40 |
41 |
42 | class MBConvBlock(nn.Module):
43 | """
44 | Mobile Inverted Residual Bottleneck Block
45 |
46 | Args:
47 | block_args (namedtuple): BlockArgs, see above
48 | global_params (namedtuple): GlobalParam, see above
49 |
50 | Attributes:
51 | has_se (bool): Whether the block contains a Squeeze and Excitation layer.
52 | """
53 |
54 | def __init__(self, block_args, global_params):
55 | super().__init__()
56 | self._block_args = block_args
57 | self._bn_mom = 1 - global_params.batch_norm_momentum
58 | self._bn_eps = global_params.batch_norm_epsilon
59 | self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1)
60 | self.id_skip = block_args.id_skip # skip connection and drop connect
61 |
62 | # Expansion phase
63 | inp = self._block_args.input_filters # number of input channels
64 | oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels
65 | if self._block_args.expand_ratio != 1:
66 | self._expand_conv = Conv2dSamePadding(in_channels=inp, out_channels=oup, kernel_size=1, bias=False)
67 | self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
68 |
69 | # Depthwise convolution phase
70 | k = self._block_args.kernel_size
71 | s = self._block_args.stride
72 | self._depthwise_conv = Conv2dSamePadding(
73 | in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise
74 | kernel_size=k, stride=s, bias=False)
75 | self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps)
76 |
77 | # Squeeze and Excitation layer, if desired
78 | if self.has_se:
79 | num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio))
80 | self._se_reduce = Conv2dSamePadding(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1)
81 | self._se_expand = Conv2dSamePadding(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1)
82 |
83 | # Output phase
84 | final_oup = self._block_args.output_filters
85 | self._project_conv = Conv2dSamePadding(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False)
86 | self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps)
87 |
88 | def forward(self, inputs, drop_connect_rate=None):
89 | """
90 | :param inputs: input tensor
91 | :param drop_connect_rate: drop connect rate (float, between 0 and 1)
92 | :return: output of block
93 | """
94 |
95 | # Expansion and Depthwise Convolution
96 | x = inputs
97 | if self._block_args.expand_ratio != 1:
98 | x = relu_fn(self._bn0(self._expand_conv(inputs)))
99 | x = relu_fn(self._bn1(self._depthwise_conv(x)))
100 |
101 | # Squeeze and Excitation
102 | if self.has_se:
103 | x_squeezed = F.adaptive_avg_pool2d(x, 1)
104 | x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed)))
105 | x = torch.sigmoid(x_squeezed) * x
106 |
107 | x = self._bn2(self._project_conv(x))
108 |
109 | # Skip connection and drop connect
110 | input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters
111 | if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters:
112 | if drop_connect_rate:
113 | x = drop_connect(x, p=drop_connect_rate, training=self.training)
114 | x = x + inputs # skip connection
115 | return x
116 |
117 |
118 | class EfficientNet(nn.Module):
119 | """
120 | An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods
121 |
122 | Args:
123 | blocks_args (list): A list of BlockArgs to construct blocks
124 | global_params (namedtuple): A set of GlobalParams shared between blocks
125 |
126 | Example:
127 | model = EfficientNet.from_pretrained('efficientnet-b0')
128 |
129 | """
130 |
131 | def __init__(self, model_name, blocks_args=None, global_params=None):
132 | super().__init__()
133 | self.indices = INDICES[model_name]
134 | self.extras = add_extras(EXTRAS[model_name])
135 | assert isinstance(blocks_args, list), 'blocks_args should be a list'
136 | assert len(blocks_args) > 0, 'block args must be greater than 0'
137 | self._global_params = global_params
138 | self._blocks_args = blocks_args
139 |
140 | # Batch norm parameters
141 | bn_mom = 1 - self._global_params.batch_norm_momentum
142 | bn_eps = self._global_params.batch_norm_epsilon
143 |
144 | # Stem
145 | in_channels = 3 # rgb
146 | out_channels = round_filters(32, self._global_params) # number of output channels
147 | self._conv_stem = Conv2dSamePadding(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
148 | self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps)
149 |
150 | # Build blocks
151 | self._blocks = nn.ModuleList([])
152 | for block_args in self._blocks_args:
153 |
154 | # Update block input and output filters based on depth multiplier.
155 | block_args = block_args._replace(
156 | input_filters=round_filters(block_args.input_filters, self._global_params),
157 | output_filters=round_filters(block_args.output_filters, self._global_params),
158 | num_repeat=round_repeats(block_args.num_repeat, self._global_params)
159 | )
160 |
161 | # The first block needs to take care of stride and filter size increase.
162 | self._blocks.append(MBConvBlock(block_args, self._global_params))
163 | if block_args.num_repeat > 1:
164 | block_args = block_args._replace(input_filters=block_args.output_filters, stride=1)
165 | for _ in range(block_args.num_repeat - 1):
166 | self._blocks.append(MBConvBlock(block_args, self._global_params))
167 | self.reset_parameters()
168 |
169 | def reset_parameters(self):
170 | for m in self.extras.modules():
171 | if isinstance(m, nn.Conv2d):
172 | nn.init.xavier_uniform_(m.weight)
173 | nn.init.zeros_(m.bias)
174 |
175 | def extract_features(self, inputs):
176 | """ Returns output of the final convolution layer """
177 |
178 | # Stem
179 | x = relu_fn(self._bn0(self._conv_stem(inputs)))
180 |
181 | features = []
182 |
183 | # Blocks
184 | for idx, block in enumerate(self._blocks):
185 | drop_connect_rate = self._global_params.drop_connect_rate
186 | if drop_connect_rate:
187 | drop_connect_rate *= float(idx) / len(self._blocks)
188 | x = block(x, drop_connect_rate)
189 | if idx in self.indices:
190 | features.append(x)
191 |
192 | return x, features
193 |
194 | def forward(self, inputs):
195 | """ Calls extract_features to extract features, applies final linear layer, and returns logits. """
196 |
197 | # Convolution layers
198 | x, features = self.extract_features(inputs)
199 |
200 | for layer in self.extras:
201 | x = layer(x)
202 | features.append(x)
203 |
204 | return tuple(features)
205 |
206 | @classmethod
207 | def from_name(cls, model_name, override_params=None):
208 | cls._check_model_name_is_valid(model_name)
209 | blocks_args, global_params = get_model_params(model_name, override_params)
210 | return EfficientNet(model_name, blocks_args, global_params)
211 |
212 | @classmethod
213 | def from_pretrained(cls, model_name):
214 | model = EfficientNet.from_name(model_name)
215 | load_pretrained_weights(model, model_name)
216 | return model
217 |
218 | @classmethod
219 | def get_image_size(cls, model_name):
220 | cls._check_model_name_is_valid(model_name)
221 | _, _, res, _ = efficientnet_params(model_name)
222 | return res
223 |
224 | @classmethod
225 | def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False):
226 | """ Validates model name. None that pretrained weights are only available for
227 | the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """
228 | num_models = 4 if also_need_pretrained_weights else 8
229 | valid_models = ['efficientnet_b' + str(i) for i in range(num_models)]
230 | if model_name.replace('-', '_') not in valid_models:
231 | raise ValueError('model_name should be one of: ' + ', '.join(valid_models))
232 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/efficient_net/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | This file contains helper functions for building the model and for loading model parameters.
3 | These helper functions are built to mirror those in the official TensorFlow implementation.
4 | """
5 |
6 | import re
7 | import math
8 | import collections
9 | import torch
10 | from torch import nn
11 | from torch.nn import functional as F
12 | from ssd.utils.model_zoo import load_state_dict_from_url
13 |
14 | ########################################################################
15 | ############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
16 | ########################################################################
17 |
18 |
19 | # Parameters for the entire model (stem, all blocks, and head)
20 |
21 | GlobalParams = collections.namedtuple('GlobalParams', [
22 | 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
23 | 'num_classes', 'width_coefficient', 'depth_coefficient',
24 | 'depth_divisor', 'min_depth', 'drop_connect_rate', ])
25 |
26 | # Parameters for an individual model block
27 | BlockArgs = collections.namedtuple('BlockArgs', [
28 | 'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
29 | 'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
30 |
31 | # Change namedtuple defaults
32 | GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
33 | BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
34 |
35 |
36 | def relu_fn(x):
37 | """ Swish activation function """
38 | return x * torch.sigmoid(x)
39 |
40 |
41 | def round_filters(filters, global_params):
42 | """ Calculate and round number of filters based on depth multiplier. """
43 | multiplier = global_params.width_coefficient
44 | if not multiplier:
45 | return filters
46 | divisor = global_params.depth_divisor
47 | min_depth = global_params.min_depth
48 | filters *= multiplier
49 | min_depth = min_depth or divisor
50 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
51 | if new_filters < 0.9 * filters: # prevent rounding by more than 10%
52 | new_filters += divisor
53 | return int(new_filters)
54 |
55 |
56 | def round_repeats(repeats, global_params):
57 | """ Round number of filters based on depth multiplier. """
58 | multiplier = global_params.depth_coefficient
59 | if not multiplier:
60 | return repeats
61 | return int(math.ceil(multiplier * repeats))
62 |
63 |
64 | def drop_connect(inputs, p, training):
65 | """ Drop connect. """
66 | if not training: return inputs
67 | batch_size = inputs.shape[0]
68 | keep_prob = 1 - p
69 | random_tensor = keep_prob
70 | random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
71 | binary_tensor = torch.floor(random_tensor)
72 | output = inputs / keep_prob * binary_tensor
73 | return output
74 |
75 |
76 | class Conv2dSamePadding(nn.Conv2d):
77 | """ 2D Convolutions like TensorFlow """
78 |
79 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
80 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
81 | self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
82 |
83 | def forward(self, x):
84 | ih, iw = x.size()[-2:]
85 | kh, kw = self.weight.size()[-2:]
86 | sh, sw = self.stride
87 | oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
88 | pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
89 | pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
90 | if pad_h > 0 or pad_w > 0:
91 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
92 | return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
93 |
94 |
95 | ########################################################################
96 | ############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
97 | ########################################################################
98 |
99 |
100 | def efficientnet_params(model_name):
101 | """ Map EfficientNet model name to parameter coefficients. """
102 | params_dict = {
103 | # Coefficients: width,depth,res,dropout
104 | 'efficientnet-b0': (1.0, 1.0, 224, 0.2),
105 | 'efficientnet-b1': (1.0, 1.1, 240, 0.2),
106 | 'efficientnet-b2': (1.1, 1.2, 260, 0.3),
107 | 'efficientnet-b3': (1.2, 1.4, 300, 0.3),
108 | 'efficientnet-b4': (1.4, 1.8, 380, 0.4),
109 | 'efficientnet-b5': (1.6, 2.2, 456, 0.4),
110 | 'efficientnet-b6': (1.8, 2.6, 528, 0.5),
111 | 'efficientnet-b7': (2.0, 3.1, 600, 0.5),
112 | }
113 | return params_dict[model_name]
114 |
115 |
116 | class BlockDecoder(object):
117 | """ Block Decoder for readability, straight from the official TensorFlow repository """
118 |
119 | @staticmethod
120 | def _decode_block_string(block_string):
121 | """ Gets a block through a string notation of arguments. """
122 | assert isinstance(block_string, str)
123 |
124 | ops = block_string.split('_')
125 | options = {}
126 | for op in ops:
127 | splits = re.split(r'(\d.*)', op)
128 | if len(splits) >= 2:
129 | key, value = splits[:2]
130 | options[key] = value
131 |
132 | # Check stride
133 | assert (('s' in options and len(options['s']) == 1) or
134 | (len(options['s']) == 2 and options['s'][0] == options['s'][1]))
135 |
136 | return BlockArgs(
137 | kernel_size=int(options['k']),
138 | num_repeat=int(options['r']),
139 | input_filters=int(options['i']),
140 | output_filters=int(options['o']),
141 | expand_ratio=int(options['e']),
142 | id_skip=('noskip' not in block_string),
143 | se_ratio=float(options['se']) if 'se' in options else None,
144 | stride=[int(options['s'][0])])
145 |
146 | @staticmethod
147 | def _encode_block_string(block):
148 | """Encodes a block to a string."""
149 | args = [
150 | 'r%d' % block.num_repeat,
151 | 'k%d' % block.kernel_size,
152 | 's%d%d' % (block.strides[0], block.strides[1]),
153 | 'e%s' % block.expand_ratio,
154 | 'i%d' % block.input_filters,
155 | 'o%d' % block.output_filters
156 | ]
157 | if 0 < block.se_ratio <= 1:
158 | args.append('se%s' % block.se_ratio)
159 | if block.id_skip is False:
160 | args.append('noskip')
161 | return '_'.join(args)
162 |
163 | @staticmethod
164 | def decode(string_list):
165 | """
166 | Decodes a list of string notations to specify blocks inside the network.
167 |
168 | :param string_list: a list of strings, each string is a notation of block
169 | :return: a list of BlockArgs namedtuples of block args
170 | """
171 | assert isinstance(string_list, list)
172 | blocks_args = []
173 | for block_string in string_list:
174 | blocks_args.append(BlockDecoder._decode_block_string(block_string))
175 | return blocks_args
176 |
177 | @staticmethod
178 | def encode(blocks_args):
179 | """
180 | Encodes a list of BlockArgs to a list of strings.
181 |
182 | :param blocks_args: a list of BlockArgs namedtuples of block args
183 | :return: a list of strings, each string is a notation of block
184 | """
185 | block_strings = []
186 | for block in blocks_args:
187 | block_strings.append(BlockDecoder._encode_block_string(block))
188 | return block_strings
189 |
190 |
191 | def efficientnet(width_coefficient=None, depth_coefficient=None,
192 | dropout_rate=0.2, drop_connect_rate=0.2):
193 | """ Creates a efficientnet model. """
194 |
195 | blocks_args = [
196 | 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
197 | 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
198 | 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
199 | 'r1_k3_s11_e6_i192_o320_se0.25',
200 | ]
201 | blocks_args = BlockDecoder.decode(blocks_args)
202 |
203 | global_params = GlobalParams(
204 | batch_norm_momentum=0.99,
205 | batch_norm_epsilon=1e-3,
206 | dropout_rate=dropout_rate,
207 | drop_connect_rate=drop_connect_rate,
208 | # data_format='channels_last', # removed, this is always true in PyTorch
209 | num_classes=1000,
210 | width_coefficient=width_coefficient,
211 | depth_coefficient=depth_coefficient,
212 | depth_divisor=8,
213 | min_depth=None
214 | )
215 |
216 | return blocks_args, global_params
217 |
218 |
219 | def get_model_params(model_name, override_params):
220 | """ Get the block args and global params for a given model """
221 | if model_name.startswith('efficientnet'):
222 | w, d, _, p = efficientnet_params(model_name)
223 | # note: all models have drop connect rate = 0.2
224 | blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p)
225 | else:
226 | raise NotImplementedError('model name is not pre-defined: %s' % model_name)
227 | if override_params:
228 | # ValueError will be raised here if override_params has fields not included in global_params.
229 | global_params = global_params._replace(**override_params)
230 | return blocks_args, global_params
231 |
232 |
233 | url_map = {
234 | 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet-b0-08094119.pth',
235 | 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet-b1-dbc7070a.pth',
236 | 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet-b2-27687264.pth',
237 | 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth',
238 | 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet-b4-e116e8b3.pth',
239 | 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet-b5-586e6cc6.pth',
240 | }
241 |
242 |
243 | def load_pretrained_weights(model, model_name):
244 | """ Loads pretrained weights, and downloads if loading for the first time. """
245 | state_dict = load_state_dict_from_url(url_map[model_name])
246 | model.load_state_dict(state_dict, strict=False)
247 | print('Loaded pretrained weights for {}'.format(model_name))
248 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/mobilenet.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from ssd.modeling import registry
4 | from ssd.utils.model_zoo import load_state_dict_from_url
5 |
6 | model_urls = {
7 | 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
8 | }
9 |
10 |
11 | class ConvBNReLU(nn.Sequential):
12 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
13 | padding = (kernel_size - 1) // 2
14 | super(ConvBNReLU, self).__init__(
15 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
16 | nn.BatchNorm2d(out_planes),
17 | nn.ReLU6(inplace=True)
18 | )
19 |
20 |
21 | class InvertedResidual(nn.Module):
22 | def __init__(self, inp, oup, stride, expand_ratio):
23 | super(InvertedResidual, self).__init__()
24 | self.stride = stride
25 | assert stride in [1, 2]
26 |
27 | hidden_dim = int(round(inp * expand_ratio))
28 | self.use_res_connect = self.stride == 1 and inp == oup
29 |
30 | layers = []
31 | if expand_ratio != 1:
32 | # pw
33 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
34 | layers.extend([
35 | # dw
36 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
37 | # pw-linear
38 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
39 | nn.BatchNorm2d(oup),
40 | ])
41 | self.conv = nn.Sequential(*layers)
42 |
43 | def forward(self, x):
44 | if self.use_res_connect:
45 | return x + self.conv(x)
46 | else:
47 | return self.conv(x)
48 |
49 |
50 | class MobileNetV2(nn.Module):
51 | def __init__(self, width_mult=1.0, inverted_residual_setting=None):
52 | super(MobileNetV2, self).__init__()
53 | block = InvertedResidual
54 | input_channel = 32
55 | last_channel = 1280
56 |
57 | if inverted_residual_setting is None:
58 | inverted_residual_setting = [
59 | # t, c, n, s
60 | [1, 16, 1, 1],
61 | [6, 24, 2, 2],
62 | [6, 32, 3, 2],
63 | [6, 64, 4, 2],
64 | [6, 96, 3, 1],
65 | [6, 160, 3, 2],
66 | [6, 320, 1, 1],
67 | ]
68 |
69 | # only check the first element, assuming user knows t,c,n,s are required
70 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
71 | raise ValueError("inverted_residual_setting should be non-empty "
72 | "or a 4-element list, got {}".format(inverted_residual_setting))
73 |
74 | # building first layer
75 | input_channel = int(input_channel * width_mult)
76 | self.last_channel = int(last_channel * max(1.0, width_mult))
77 | features = [ConvBNReLU(3, input_channel, stride=2)]
78 | # building inverted residual blocks
79 | for t, c, n, s in inverted_residual_setting:
80 | output_channel = int(c * width_mult)
81 | for i in range(n):
82 | stride = s if i == 0 else 1
83 | features.append(block(input_channel, output_channel, stride, expand_ratio=t))
84 | input_channel = output_channel
85 | # building last several layers
86 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
87 | # make it nn.Sequential
88 | self.features = nn.Sequential(*features)
89 | self.extras = nn.ModuleList([
90 | InvertedResidual(1280, 512, 2, 0.2),
91 | InvertedResidual(512, 256, 2, 0.25),
92 | InvertedResidual(256, 256, 2, 0.5),
93 | InvertedResidual(256, 64, 2, 0.25)
94 | ])
95 |
96 | self.reset_parameters()
97 |
98 | def reset_parameters(self):
99 | # weight initialization
100 | for m in self.modules():
101 | if isinstance(m, nn.Conv2d):
102 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
103 | if m.bias is not None:
104 | nn.init.zeros_(m.bias)
105 | elif isinstance(m, nn.BatchNorm2d):
106 | nn.init.ones_(m.weight)
107 | nn.init.zeros_(m.bias)
108 | elif isinstance(m, nn.Linear):
109 | nn.init.normal_(m.weight, 0, 0.01)
110 | nn.init.zeros_(m.bias)
111 |
112 | def forward(self, x):
113 | features = []
114 | for i in range(14):
115 | x = self.features[i](x)
116 | features.append(x)
117 |
118 | for i in range(14, len(self.features)):
119 | x = self.features[i](x)
120 | features.append(x)
121 |
122 | for i in range(len(self.extras)):
123 | x = self.extras[i](x)
124 | features.append(x)
125 |
126 | return tuple(features)
127 |
128 |
129 | @registry.BACKBONES.register('mobilenet_v2')
130 | def mobilenet_v2(cfg, pretrained=True):
131 | model = MobileNetV2()
132 | if pretrained:
133 | model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v2']), strict=False)
134 | return model
135 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/mobilenetv3.py:
--------------------------------------------------------------------------------
1 | """
2 | Creates a MobileNetV3 Model as defined in:
3 | Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam. (2019).
4 | Searching for MobileNetV3
5 | arXiv preprint arXiv:1905.02244.
6 |
7 |
8 | @ Credit from https://github.com/d-li14/mobilenetv3.pytorch
9 | @ Modified by Chakkrit Termritthikun (https://github.com/chakkritte)
10 |
11 | """
12 |
13 | import torch.nn as nn
14 | import math
15 |
16 | from ssd.modeling import registry
17 | from ssd.utils.model_zoo import load_state_dict_from_url
18 |
19 | model_urls = {
20 | 'mobilenet_v3': 'https://github.com/d-li14/mobilenetv3.pytorch/raw/master/pretrained/mobilenetv3-large-1cd25616.pth',
21 | }
22 |
23 |
24 | def _make_divisible(v, divisor, min_value=None):
25 | """
26 | This function is taken from the original tf repo.
27 | It ensures that all layers have a channel number that is divisible by 8
28 | It can be seen here:
29 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
30 | :param v:
31 | :param divisor:
32 | :param min_value:
33 | :return:
34 | """
35 | if min_value is None:
36 | min_value = divisor
37 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
38 | # Make sure that round down does not go down by more than 10%.
39 | if new_v < 0.9 * v:
40 | new_v += divisor
41 | return new_v
42 |
43 |
44 | class h_sigmoid(nn.Module):
45 | def __init__(self, inplace=True):
46 | super(h_sigmoid, self).__init__()
47 | self.relu = nn.ReLU6(inplace=inplace)
48 |
49 | def forward(self, x):
50 | return self.relu(x + 3) / 6
51 |
52 |
53 | class h_swish(nn.Module):
54 | def __init__(self, inplace=True):
55 | super(h_swish, self).__init__()
56 | self.sigmoid = h_sigmoid(inplace=inplace)
57 |
58 | def forward(self, x):
59 | return x * self.sigmoid(x)
60 |
61 |
62 | class SELayer(nn.Module):
63 | def __init__(self, channel, reduction=4):
64 | super(SELayer, self).__init__()
65 | self.avg_pool = nn.AdaptiveAvgPool2d(1)
66 | self.fc = nn.Sequential(
67 | nn.Linear(channel, _make_divisible(channel // reduction, 8)),
68 | nn.ReLU(inplace=True),
69 | nn.Linear(_make_divisible(channel // reduction, 8), channel),
70 | h_sigmoid()
71 | )
72 |
73 | def forward(self, x):
74 | b, c, _, _ = x.size()
75 | y = self.avg_pool(x).view(b, c)
76 | y = self.fc(y).view(b, c, 1, 1)
77 | return x * y
78 |
79 |
80 | def conv_3x3_bn(inp, oup, stride):
81 | return nn.Sequential(
82 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
83 | nn.BatchNorm2d(oup),
84 | h_swish()
85 | )
86 |
87 |
88 | def conv_1x1_bn(inp, oup):
89 | return nn.Sequential(
90 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
91 | nn.BatchNorm2d(oup),
92 | h_swish()
93 | )
94 |
95 |
96 | class InvertedResidual(nn.Module):
97 | def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
98 | super(InvertedResidual, self).__init__()
99 | assert stride in [1, 2]
100 |
101 | self.identity = stride == 1 and inp == oup
102 |
103 | if inp == hidden_dim:
104 | self.conv = nn.Sequential(
105 | # dw
106 | nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
107 | nn.BatchNorm2d(hidden_dim),
108 | h_swish() if use_hs else nn.ReLU(inplace=True),
109 | # Squeeze-and-Excite
110 | SELayer(hidden_dim) if use_se else nn.Identity(),
111 | # pw-linear
112 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
113 | nn.BatchNorm2d(oup),
114 | )
115 | else:
116 | self.conv = nn.Sequential(
117 | # pw
118 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
119 | nn.BatchNorm2d(hidden_dim),
120 | h_swish() if use_hs else nn.ReLU(inplace=True),
121 | # dw
122 | nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
123 | nn.BatchNorm2d(hidden_dim),
124 | # Squeeze-and-Excite
125 | SELayer(hidden_dim) if use_se else nn.Identity(),
126 | h_swish() if use_hs else nn.ReLU(inplace=True),
127 | # pw-linear
128 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
129 | nn.BatchNorm2d(oup),
130 | )
131 |
132 | def forward(self, x):
133 | if self.identity:
134 | return x + self.conv(x)
135 | else:
136 | return self.conv(x)
137 |
138 |
139 | class MobileNetV3(nn.Module):
140 | def __init__(self, mode='large', num_classes=1000, width_mult=1.):
141 | super(MobileNetV3, self).__init__()
142 | # setting of inverted residual blocks
143 | self.cfgs = [
144 | # k, t, c, SE, HS, s
145 | [3, 1, 16, 0, 0, 1],
146 | [3, 4, 24, 0, 0, 2],
147 | [3, 3, 24, 0, 0, 1],
148 | [5, 3, 40, 1, 0, 2],
149 | [5, 3, 40, 1, 0, 1],
150 | [5, 3, 40, 1, 0, 1],
151 | [3, 6, 80, 0, 1, 2],
152 | [3, 2.5, 80, 0, 1, 1],
153 | [3, 2.3, 80, 0, 1, 1],
154 | [3, 2.3, 80, 0, 1, 1],
155 | [3, 6, 112, 1, 1, 1],
156 | [3, 6, 112, 1, 1, 1],
157 | [5, 6, 160, 1, 1, 2],
158 | [5, 6, 160, 1, 1, 1],
159 | [5, 6, 160, 1, 1, 1]]
160 |
161 | assert mode in ['large', 'small']
162 |
163 | # building first layer
164 | input_channel = _make_divisible(16 * width_mult, 8)
165 |
166 | layers = [conv_3x3_bn(3, input_channel, 2)]
167 | # building inverted residual blocks
168 | block = InvertedResidual
169 | for k, t, c, use_se, use_hs, s in self.cfgs:
170 | output_channel = _make_divisible(c * width_mult, 8)
171 | exp_size = _make_divisible(input_channel * t, 8)
172 | layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
173 | input_channel = output_channel
174 | # building last several layers
175 | layers.append(conv_1x1_bn(input_channel, exp_size))
176 | self.features = nn.Sequential(*layers)
177 | self.extras = nn.ModuleList([
178 | InvertedResidual(960, _make_divisible(960 * 0.2, 8), 512, 3, 2, True, True),
179 | InvertedResidual(512, _make_divisible(512 * 0.25, 8), 256, 3, 2, True, True),
180 | InvertedResidual(256, _make_divisible(256 * 0.5, 8), 256, 3, 2, True, True),
181 | InvertedResidual(256, _make_divisible(256 * 0.25, 8), 64, 3, 2, True, True),
182 | ])
183 |
184 | self.reset_parameters()
185 |
186 | def forward(self, x):
187 | features = []
188 | for i in range(13):
189 | x = self.features[i](x)
190 | features.append(x)
191 |
192 | for i in range(13, len(self.features)):
193 | x = self.features[i](x)
194 | features.append(x)
195 |
196 | for i in range(len(self.extras)):
197 | x = self.extras[i](x)
198 | features.append(x)
199 |
200 | return tuple(features)
201 |
202 | def reset_parameters(self):
203 | for m in self.modules():
204 | if isinstance(m, nn.Conv2d):
205 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
206 | m.weight.data.normal_(0, math.sqrt(2. / n))
207 | if m.bias is not None:
208 | m.bias.data.zero_()
209 | elif isinstance(m, nn.BatchNorm2d):
210 | m.weight.data.fill_(1)
211 | m.bias.data.zero_()
212 | elif isinstance(m, nn.Linear):
213 | n = m.weight.size(1)
214 | m.weight.data.normal_(0, 0.01)
215 | m.bias.data.zero_()
216 |
217 |
218 | @registry.BACKBONES.register('mobilenet_v3')
219 | def mobilenet_v3(cfg, pretrained=True):
220 | model = MobileNetV3()
221 | if pretrained:
222 | model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v3']), strict=False)
223 | return model
224 |
--------------------------------------------------------------------------------
/ssd/modeling/backbone/vgg.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 | from ssd.layers import L2Norm
5 | from ssd.modeling import registry
6 | from ssd.utils.model_zoo import load_state_dict_from_url
7 |
8 | model_urls = {
9 | 'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth',
10 | }
11 |
12 |
13 | # borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py
14 | def add_vgg(cfg, batch_norm=False):
15 | layers = []
16 | in_channels = 3
17 | for v in cfg:
18 | if v == 'M':
19 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
20 | elif v == 'C':
21 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
22 | else:
23 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
24 | if batch_norm:
25 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
26 | else:
27 | layers += [conv2d, nn.ReLU(inplace=True)]
28 | in_channels = v
29 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
30 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
31 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
32 | layers += [pool5, conv6,
33 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
34 | return layers
35 |
36 |
37 | def add_extras(cfg, i, size=300):
38 | # Extra layers added to VGG for feature scaling
39 | layers = []
40 | in_channels = i
41 | flag = False
42 | for k, v in enumerate(cfg):
43 | if in_channels != 'S':
44 | if v == 'S':
45 | layers += [nn.Conv2d(in_channels, cfg[k + 1], kernel_size=(1, 3)[flag], stride=2, padding=1)]
46 | else:
47 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
48 | flag = not flag
49 | in_channels = v
50 | if size == 512:
51 | layers.append(nn.Conv2d(in_channels, 128, kernel_size=1, stride=1))
52 | layers.append(nn.Conv2d(128, 256, kernel_size=4, stride=1, padding=1))
53 | return layers
54 |
55 |
56 | vgg_base = {
57 | '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
58 | 512, 512, 512],
59 | '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
60 | 512, 512, 512],
61 | }
62 | extras_base = {
63 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
64 | '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256],
65 | }
66 |
67 |
68 | class VGG(nn.Module):
69 | def __init__(self, cfg):
70 | super().__init__()
71 | size = cfg.INPUT.IMAGE_SIZE
72 | vgg_config = vgg_base[str(size)]
73 | extras_config = extras_base[str(size)]
74 |
75 | self.vgg = nn.ModuleList(add_vgg(vgg_config))
76 | self.extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size))
77 | self.l2_norm = L2Norm(512, scale=20)
78 | self.reset_parameters()
79 |
80 | def reset_parameters(self):
81 | for m in self.extras.modules():
82 | if isinstance(m, nn.Conv2d):
83 | nn.init.xavier_uniform_(m.weight)
84 | nn.init.zeros_(m.bias)
85 |
86 | def init_from_pretrain(self, state_dict):
87 | self.vgg.load_state_dict(state_dict)
88 |
89 | def forward(self, x):
90 | features = []
91 | for i in range(23):
92 | x = self.vgg[i](x)
93 | s = self.l2_norm(x) # Conv4_3 L2 normalization
94 | features.append(s)
95 |
96 | # apply vgg up to fc7
97 | for i in range(23, len(self.vgg)):
98 | x = self.vgg[i](x)
99 | features.append(x)
100 |
101 | for k, v in enumerate(self.extras):
102 | x = F.relu(v(x), inplace=True)
103 | if k % 2 == 1:
104 | features.append(x)
105 |
106 | return tuple(features)
107 |
108 |
109 | @registry.BACKBONES.register('vgg')
110 | def vgg(cfg, pretrained=True):
111 | model = VGG(cfg)
112 | if pretrained:
113 | model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg']))
114 | return model
115 |
--------------------------------------------------------------------------------
/ssd/modeling/box_head/__init__.py:
--------------------------------------------------------------------------------
1 | from ssd.modeling import registry
2 | from .box_head import SSDBoxHead
3 |
4 | __all__ = ['build_box_head', 'SSDBoxHead']
5 |
6 |
7 | def build_box_head(cfg):
8 | return registry.BOX_HEADS[cfg.MODEL.BOX_HEAD.NAME](cfg)
9 |
--------------------------------------------------------------------------------
/ssd/modeling/box_head/box_head.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | import torch.nn.functional as F
3 |
4 | from ssd.modeling import registry
5 | from ssd.modeling.anchors.prior_box import PriorBox
6 | from ssd.modeling.box_head.box_predictor import make_box_predictor
7 | from ssd.utils import box_utils
8 | from .inference import PostProcessor
9 | from .loss import MultiBoxLoss
10 |
11 |
12 | @registry.BOX_HEADS.register('SSDBoxHead')
13 | class SSDBoxHead(nn.Module):
14 | def __init__(self, cfg):
15 | super().__init__()
16 | self.cfg = cfg
17 | self.predictor = make_box_predictor(cfg)
18 | self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO)
19 | self.post_processor = PostProcessor(cfg)
20 | self.priors = None
21 |
22 | def forward(self, features, targets=None):
23 | cls_logits, bbox_pred = self.predictor(features)
24 | if self.training:
25 | return self._forward_train(cls_logits, bbox_pred, targets)
26 | else:
27 | return self._forward_test(cls_logits, bbox_pred)
28 |
29 | def _forward_train(self, cls_logits, bbox_pred, targets):
30 | gt_boxes, gt_labels = targets['boxes'], targets['labels']
31 | reg_loss, cls_loss = self.loss_evaluator(cls_logits, bbox_pred, gt_labels, gt_boxes)
32 | loss_dict = dict(
33 | reg_loss=reg_loss,
34 | cls_loss=cls_loss,
35 | )
36 | detections = (cls_logits, bbox_pred)
37 | return detections, loss_dict
38 |
39 | def _forward_test(self, cls_logits, bbox_pred):
40 | if self.priors is None:
41 | self.priors = PriorBox(self.cfg)().to(bbox_pred.device)
42 | scores = F.softmax(cls_logits, dim=2)
43 | boxes = box_utils.convert_locations_to_boxes(
44 | bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE
45 | )
46 | boxes = box_utils.center_form_to_corner_form(boxes)
47 | detections = (scores, boxes)
48 | detections = self.post_processor(detections)
49 | return detections, {}
50 |
--------------------------------------------------------------------------------
/ssd/modeling/box_head/box_predictor.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from ssd.layers import SeparableConv2d
5 | from ssd.modeling import registry
6 |
7 |
8 | class BoxPredictor(nn.Module):
9 | def __init__(self, cfg):
10 | super().__init__()
11 | self.cfg = cfg
12 | self.cls_headers = nn.ModuleList()
13 | self.reg_headers = nn.ModuleList()
14 | for level, (boxes_per_location, out_channels) in enumerate(zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS)):
15 | self.cls_headers.append(self.cls_block(level, out_channels, boxes_per_location))
16 | self.reg_headers.append(self.reg_block(level, out_channels, boxes_per_location))
17 | self.reset_parameters()
18 |
19 | def cls_block(self, level, out_channels, boxes_per_location):
20 | raise NotImplementedError
21 |
22 | def reg_block(self, level, out_channels, boxes_per_location):
23 | raise NotImplementedError
24 |
25 | def reset_parameters(self):
26 | for m in self.modules():
27 | if isinstance(m, nn.Conv2d):
28 | nn.init.xavier_uniform_(m.weight)
29 | nn.init.zeros_(m.bias)
30 |
31 | def forward(self, features):
32 | cls_logits = []
33 | bbox_pred = []
34 | for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers):
35 | cls_logits.append(cls_header(feature).permute(0, 2, 3, 1).contiguous())
36 | bbox_pred.append(reg_header(feature).permute(0, 2, 3, 1).contiguous())
37 |
38 | batch_size = features[0].shape[0]
39 | cls_logits = torch.cat([c.view(c.shape[0], -1) for c in cls_logits], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES)
40 | bbox_pred = torch.cat([l.view(l.shape[0], -1) for l in bbox_pred], dim=1).view(batch_size, -1, 4)
41 |
42 | return cls_logits, bbox_pred
43 |
44 |
45 | @registry.BOX_PREDICTORS.register('SSDBoxPredictor')
46 | class SSDBoxPredictor(BoxPredictor):
47 | def cls_block(self, level, out_channels, boxes_per_location):
48 | return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1)
49 |
50 | def reg_block(self, level, out_channels, boxes_per_location):
51 | return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1)
52 |
53 |
54 | @registry.BOX_PREDICTORS.register('SSDLiteBoxPredictor')
55 | class SSDLiteBoxPredictor(BoxPredictor):
56 | def cls_block(self, level, out_channels, boxes_per_location):
57 | num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS)
58 | if level == num_levels - 1:
59 | return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=1)
60 | return SeparableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1)
61 |
62 | def reg_block(self, level, out_channels, boxes_per_location):
63 | num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS)
64 | if level == num_levels - 1:
65 | return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=1)
66 | return SeparableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1)
67 |
68 |
69 | def make_box_predictor(cfg):
70 | return registry.BOX_PREDICTORS[cfg.MODEL.BOX_HEAD.PREDICTOR](cfg)
71 |
--------------------------------------------------------------------------------
/ssd/modeling/box_head/inference.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from ssd.structures.container import Container
4 | from ssd.utils.nms import batched_nms
5 |
6 |
7 | class PostProcessor:
8 | def __init__(self, cfg):
9 | super().__init__()
10 | self.cfg = cfg
11 | self.width = cfg.INPUT.IMAGE_SIZE
12 | self.height = cfg.INPUT.IMAGE_SIZE
13 |
14 | def __call__(self, detections):
15 | batches_scores, batches_boxes = detections
16 | device = batches_scores.device
17 | batch_size = batches_scores.size(0)
18 | results = []
19 | for batch_id in range(batch_size):
20 | scores, boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4)
21 | num_boxes = scores.shape[0]
22 | num_classes = scores.shape[1]
23 |
24 | boxes = boxes.view(num_boxes, 1, 4).expand(num_boxes, num_classes, 4)
25 | labels = torch.arange(num_classes, device=device)
26 | labels = labels.view(1, num_classes).expand_as(scores)
27 |
28 | # remove predictions with the background label
29 | boxes = boxes[:, 1:]
30 | scores = scores[:, 1:]
31 | labels = labels[:, 1:]
32 |
33 | # batch everything, by making every class prediction be a separate instance
34 | boxes = boxes.reshape(-1, 4)
35 | scores = scores.reshape(-1)
36 | labels = labels.reshape(-1)
37 |
38 | # remove low scoring boxes
39 | indices = torch.nonzero(scores > self.cfg.TEST.CONFIDENCE_THRESHOLD).squeeze(1)
40 | boxes, scores, labels = boxes[indices], scores[indices], labels[indices]
41 |
42 | boxes[:, 0::2] *= self.width
43 | boxes[:, 1::2] *= self.height
44 |
45 | keep = batched_nms(boxes, scores, labels, self.cfg.TEST.NMS_THRESHOLD)
46 | # keep only topk scoring predictions
47 | keep = keep[:self.cfg.TEST.MAX_PER_IMAGE]
48 | boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
49 |
50 | container = Container(boxes=boxes, labels=labels, scores=scores)
51 | container.img_width = self.width
52 | container.img_height = self.height
53 | results.append(container)
54 | return results
55 |
--------------------------------------------------------------------------------
/ssd/modeling/box_head/loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | import torch
4 |
5 | from ssd.utils import box_utils
6 |
7 |
8 | class MultiBoxLoss(nn.Module):
9 | def __init__(self, neg_pos_ratio):
10 | """Implement SSD MultiBox Loss.
11 |
12 | Basically, MultiBox loss combines classification loss
13 | and Smooth L1 regression loss.
14 | """
15 | super(MultiBoxLoss, self).__init__()
16 | self.neg_pos_ratio = neg_pos_ratio
17 |
18 | def forward(self, confidence, predicted_locations, labels, gt_locations):
19 | """Compute classification loss and smooth l1 loss.
20 |
21 | Args:
22 | confidence (batch_size, num_priors, num_classes): class predictions.
23 | predicted_locations (batch_size, num_priors, 4): predicted locations.
24 | labels (batch_size, num_priors): real labels of all the priors.
25 | gt_locations (batch_size, num_priors, 4): real boxes corresponding all the priors.
26 | """
27 | num_classes = confidence.size(2)
28 | with torch.no_grad():
29 | # derived from cross_entropy=sum(log(p))
30 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
31 | mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
32 |
33 | confidence = confidence[mask, :]
34 | classification_loss = F.cross_entropy(confidence.view(-1, num_classes), labels[mask], reduction='sum')
35 |
36 | pos_mask = labels > 0
37 | predicted_locations = predicted_locations[pos_mask, :].view(-1, 4)
38 | gt_locations = gt_locations[pos_mask, :].view(-1, 4)
39 | smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum')
40 | num_pos = gt_locations.size(0)
41 | return smooth_l1_loss / num_pos, classification_loss / num_pos
42 |
--------------------------------------------------------------------------------
/ssd/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd_detector import SSDDetector
2 |
3 | _DETECTION_META_ARCHITECTURES = {
4 | "SSDDetector": SSDDetector
5 | }
6 |
7 |
8 | def build_detection_model(cfg):
9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
10 | return meta_arch(cfg)
11 |
--------------------------------------------------------------------------------
/ssd/modeling/detector/ssd_detector.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 | from ssd.modeling.backbone import build_backbone
4 | from ssd.modeling.box_head import build_box_head
5 |
6 |
7 | class SSDDetector(nn.Module):
8 | def __init__(self, cfg):
9 | super().__init__()
10 | self.cfg = cfg
11 | self.backbone = build_backbone(cfg)
12 | self.box_head = build_box_head(cfg)
13 |
14 | def forward(self, images, targets=None):
15 | features = self.backbone(images)
16 | detections, detector_losses = self.box_head(features, targets)
17 | if self.training:
18 | return detector_losses
19 | return detections
20 |
--------------------------------------------------------------------------------
/ssd/modeling/registry.py:
--------------------------------------------------------------------------------
1 | from ssd.utils.registry import Registry
2 |
3 | BACKBONES = Registry()
4 | BOX_HEADS = Registry()
5 | BOX_PREDICTORS = Registry()
6 |
--------------------------------------------------------------------------------
/ssd/solver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/solver/__init__.py
--------------------------------------------------------------------------------
/ssd/solver/build.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .lr_scheduler import WarmupMultiStepLR
4 |
5 |
6 | def make_optimizer(cfg, model, lr=None):
7 | lr = cfg.SOLVER.BASE_LR if lr is None else lr
8 | return torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY)
9 |
10 |
11 | def make_lr_scheduler(cfg, optimizer, milestones=None):
12 | return WarmupMultiStepLR(optimizer=optimizer,
13 | milestones=cfg.SOLVER.LR_STEPS if milestones is None else milestones,
14 | gamma=cfg.SOLVER.GAMMA,
15 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
16 | warmup_iters=cfg.SOLVER.WARMUP_ITERS)
17 |
--------------------------------------------------------------------------------
/ssd/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
1 | from bisect import bisect_right
2 |
3 | from torch.optim.lr_scheduler import _LRScheduler
4 |
5 |
6 | class WarmupMultiStepLR(_LRScheduler):
7 | def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3,
8 | warmup_iters=500, last_epoch=-1):
9 | if not list(milestones) == sorted(milestones):
10 | raise ValueError(
11 | "Milestones should be a list of" " increasing integers. Got {}",
12 | milestones,
13 | )
14 |
15 | self.milestones = milestones
16 | self.gamma = gamma
17 | self.warmup_factor = warmup_factor
18 | self.warmup_iters = warmup_iters
19 | super().__init__(optimizer, last_epoch)
20 |
21 | def get_lr(self):
22 | warmup_factor = 1
23 | if self.last_epoch < self.warmup_iters:
24 | alpha = float(self.last_epoch) / self.warmup_iters
25 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha
26 | return [
27 | base_lr
28 | * warmup_factor
29 | * self.gamma ** bisect_right(self.milestones, self.last_epoch)
30 | for base_lr in self.base_lrs
31 | ]
32 |
--------------------------------------------------------------------------------
/ssd/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lufficc/SSD/68dc0a20efaf3997e58b616afaaaa21bf8ca3c05/ssd/structures/__init__.py
--------------------------------------------------------------------------------
/ssd/structures/container.py:
--------------------------------------------------------------------------------
1 | class Container:
2 | """
3 | Help class for manage boxes, labels, etc...
4 | Not inherit dict due to `default_collate` will change dict's subclass to dict.
5 | """
6 |
7 | def __init__(self, *args, **kwargs):
8 | self._data_dict = dict(*args, **kwargs)
9 |
10 | def __setattr__(self, key, value):
11 | object.__setattr__(self, key, value)
12 |
13 | def __getitem__(self, key):
14 | return self._data_dict[key]
15 |
16 | def __iter__(self):
17 | return self._data_dict.__iter__()
18 |
19 | def __setitem__(self, key, value):
20 | self._data_dict[key] = value
21 |
22 | def _call(self, name, *args, **kwargs):
23 | keys = list(self._data_dict.keys())
24 | for key in keys:
25 | value = self._data_dict[key]
26 | if hasattr(value, name):
27 | self._data_dict[key] = getattr(value, name)(*args, **kwargs)
28 | return self
29 |
30 | def to(self, *args, **kwargs):
31 | return self._call('to', *args, **kwargs)
32 |
33 | def numpy(self):
34 | return self._call('numpy')
35 |
36 | def resize(self, size):
37 | """resize boxes
38 | Args:
39 | size: (width, height)
40 | Returns:
41 | self
42 | """
43 | img_width = getattr(self, 'img_width', -1)
44 | img_height = getattr(self, 'img_height', -1)
45 | assert img_width > 0 and img_height > 0
46 | assert 'boxes' in self._data_dict
47 | boxes = self._data_dict['boxes']
48 | new_width, new_height = size
49 | boxes[:, 0::2] *= (new_width / img_width)
50 | boxes[:, 1::2] *= (new_height / img_height)
51 | return self
52 |
53 | def __repr__(self):
54 | return self._data_dict.__repr__()
55 |
--------------------------------------------------------------------------------
/ssd/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .misc import *
2 |
--------------------------------------------------------------------------------
/ssd/utils/box_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import math
3 |
4 |
5 | def convert_locations_to_boxes(locations, priors, center_variance,
6 | size_variance):
7 | """Convert regressional location results of SSD into boxes in the form of (center_x, center_y, h, w).
8 |
9 | The conversion:
10 | $$predicted\_center * center_variance = \frac {real\_center - prior\_center} {prior\_hw}$$
11 | $$exp(predicted\_hw * size_variance) = \frac {real\_hw} {prior\_hw}$$
12 | We do it in the inverse direction here.
13 | Args:
14 | locations (batch_size, num_priors, 4): the regression output of SSD. It will contain the outputs as well.
15 | priors (num_priors, 4) or (batch_size/1, num_priors, 4): prior boxes.
16 | center_variance: a float used to change the scale of center.
17 | size_variance: a float used to change of scale of size.
18 | Returns:
19 | boxes: priors: [[center_x, center_y, w, h]]. All the values
20 | are relative to the image size.
21 | """
22 | # priors can have one dimension less.
23 | if priors.dim() + 1 == locations.dim():
24 | priors = priors.unsqueeze(0)
25 | return torch.cat([
26 | locations[..., :2] * center_variance * priors[..., 2:] + priors[..., :2],
27 | torch.exp(locations[..., 2:] * size_variance) * priors[..., 2:]
28 | ], dim=locations.dim() - 1)
29 |
30 |
31 | def convert_boxes_to_locations(center_form_boxes, center_form_priors, center_variance, size_variance):
32 | # priors can have one dimension less
33 | if center_form_priors.dim() + 1 == center_form_boxes.dim():
34 | center_form_priors = center_form_priors.unsqueeze(0)
35 | return torch.cat([
36 | (center_form_boxes[..., :2] - center_form_priors[..., :2]) / center_form_priors[..., 2:] / center_variance,
37 | torch.log(center_form_boxes[..., 2:] / center_form_priors[..., 2:]) / size_variance
38 | ], dim=center_form_boxes.dim() - 1)
39 |
40 |
41 | def area_of(left_top, right_bottom) -> torch.Tensor:
42 | """Compute the areas of rectangles given two corners.
43 |
44 | Args:
45 | left_top (N, 2): left top corner.
46 | right_bottom (N, 2): right bottom corner.
47 |
48 | Returns:
49 | area (N): return the area.
50 | """
51 | hw = torch.clamp(right_bottom - left_top, min=0.0)
52 | return hw[..., 0] * hw[..., 1]
53 |
54 |
55 | def iou_of(boxes0, boxes1, eps=1e-5):
56 | """Return intersection-over-union (Jaccard index) of boxes.
57 |
58 | Args:
59 | boxes0 (N, 4): ground truth boxes.
60 | boxes1 (N or 1, 4): predicted boxes.
61 | eps: a small number to avoid 0 as denominator.
62 | Returns:
63 | iou (N): IoU values.
64 | """
65 | overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2])
66 | overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:])
67 |
68 | overlap_area = area_of(overlap_left_top, overlap_right_bottom)
69 | area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
70 | area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
71 | return overlap_area / (area0 + area1 - overlap_area + eps)
72 |
73 |
74 | def assign_priors(gt_boxes, gt_labels, corner_form_priors,
75 | iou_threshold):
76 | """Assign ground truth boxes and targets to priors.
77 |
78 | Args:
79 | gt_boxes (num_targets, 4): ground truth boxes.
80 | gt_labels (num_targets): labels of targets.
81 | priors (num_priors, 4): corner form priors
82 | Returns:
83 | boxes (num_priors, 4): real values for priors.
84 | labels (num_priros): labels for priors.
85 | """
86 | # size: num_priors x num_targets
87 | ious = iou_of(gt_boxes.unsqueeze(0), corner_form_priors.unsqueeze(1))
88 | # size: num_priors
89 | best_target_per_prior, best_target_per_prior_index = ious.max(1)
90 | # size: num_targets
91 | best_prior_per_target, best_prior_per_target_index = ious.max(0)
92 |
93 | for target_index, prior_index in enumerate(best_prior_per_target_index):
94 | best_target_per_prior_index[prior_index] = target_index
95 | # 2.0 is used to make sure every target has a prior assigned
96 | best_target_per_prior.index_fill_(0, best_prior_per_target_index, 2)
97 | # size: num_priors
98 | labels = gt_labels[best_target_per_prior_index]
99 | labels[best_target_per_prior < iou_threshold] = 0 # the backgournd id
100 | boxes = gt_boxes[best_target_per_prior_index]
101 | return boxes, labels
102 |
103 |
104 | def hard_negative_mining(loss, labels, neg_pos_ratio):
105 | """
106 | It used to suppress the presence of a large number of negative prediction.
107 | It works on image level not batch level.
108 | For any example/image, it keeps all the positive predictions and
109 | cut the number of negative predictions to make sure the ratio
110 | between the negative examples and positive examples is no more
111 | the given ratio for an image.
112 |
113 | Args:
114 | loss (N, num_priors): the loss for each example.
115 | labels (N, num_priors): the labels.
116 | neg_pos_ratio: the ratio between the negative examples and positive examples.
117 | """
118 | pos_mask = labels > 0
119 | num_pos = pos_mask.long().sum(dim=1, keepdim=True)
120 | num_neg = num_pos * neg_pos_ratio
121 |
122 | loss[pos_mask] = -math.inf
123 | _, indexes = loss.sort(dim=1, descending=True)
124 | _, orders = indexes.sort(dim=1)
125 | neg_mask = orders < num_neg
126 | return pos_mask | neg_mask
127 |
128 |
129 | def center_form_to_corner_form(locations):
130 | return torch.cat([locations[..., :2] - locations[..., 2:] / 2,
131 | locations[..., :2] + locations[..., 2:] / 2], locations.dim() - 1)
132 |
133 |
134 | def corner_form_to_center_form(boxes):
135 | return torch.cat([
136 | (boxes[..., :2] + boxes[..., 2:]) / 2,
137 | boxes[..., 2:] - boxes[..., :2]
138 | ], boxes.dim() - 1)
139 |
--------------------------------------------------------------------------------
/ssd/utils/checkpoint.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | import torch
5 | from torch.nn.parallel import DistributedDataParallel
6 |
7 | from ssd.utils.model_zoo import cache_url
8 |
9 |
10 | class CheckPointer:
11 | _last_checkpoint_name = 'last_checkpoint.txt'
12 |
13 | def __init__(self,
14 | model,
15 | optimizer=None,
16 | scheduler=None,
17 | save_dir="",
18 | save_to_disk=None,
19 | logger=None):
20 | self.model = model
21 | self.optimizer = optimizer
22 | self.scheduler = scheduler
23 | self.save_dir = save_dir
24 | self.save_to_disk = save_to_disk
25 | if logger is None:
26 | logger = logging.getLogger(__name__)
27 | self.logger = logger
28 |
29 | def save(self, name, **kwargs):
30 | if not self.save_dir:
31 | return
32 |
33 | if not self.save_to_disk:
34 | return
35 |
36 | data = {}
37 | if isinstance(self.model, DistributedDataParallel):
38 | data['model'] = self.model.module.state_dict()
39 | else:
40 | data['model'] = self.model.state_dict()
41 | if self.optimizer is not None:
42 | data["optimizer"] = self.optimizer.state_dict()
43 | if self.scheduler is not None:
44 | data["scheduler"] = self.scheduler.state_dict()
45 | data.update(kwargs)
46 |
47 | save_file = os.path.join(self.save_dir, "{}.pth".format(name))
48 | self.logger.info("Saving checkpoint to {}".format(save_file))
49 | torch.save(data, save_file)
50 |
51 | self.tag_last_checkpoint(save_file)
52 |
53 | def load(self, f=None, use_latest=True):
54 | if self.has_checkpoint() and use_latest:
55 | # override argument with existing checkpoint
56 | f = self.get_checkpoint_file()
57 | if not f:
58 | # no checkpoint could be found
59 | self.logger.info("No checkpoint found.")
60 | return {}
61 |
62 | self.logger.info("Loading checkpoint from {}".format(f))
63 | checkpoint = self._load_file(f)
64 | model = self.model
65 | if isinstance(model, DistributedDataParallel):
66 | model = self.model.module
67 |
68 | model.load_state_dict(checkpoint.pop("model"))
69 | if "optimizer" in checkpoint and self.optimizer:
70 | self.logger.info("Loading optimizer from {}".format(f))
71 | self.optimizer.load_state_dict(checkpoint.pop("optimizer"))
72 | if "scheduler" in checkpoint and self.scheduler:
73 | self.logger.info("Loading scheduler from {}".format(f))
74 | self.scheduler.load_state_dict(checkpoint.pop("scheduler"))
75 |
76 | # return any further checkpoint data
77 | return checkpoint
78 |
79 | def get_checkpoint_file(self):
80 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
81 | try:
82 | with open(save_file, "r") as f:
83 | last_saved = f.read()
84 | last_saved = last_saved.strip()
85 | except IOError:
86 | # if file doesn't exist, maybe because it has just been
87 | # deleted by a separate process
88 | last_saved = ""
89 | return last_saved
90 |
91 | def has_checkpoint(self):
92 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
93 | return os.path.exists(save_file)
94 |
95 | def tag_last_checkpoint(self, last_filename):
96 | save_file = os.path.join(self.save_dir, self._last_checkpoint_name)
97 | with open(save_file, "w") as f:
98 | f.write(last_filename)
99 |
100 | def _load_file(self, f):
101 | # download url files
102 | if f.startswith("http"):
103 | # if the file is a url path, download it and cache it
104 | cached_f = cache_url(f)
105 | self.logger.info("url {} cached in {}".format(f, cached_f))
106 | f = cached_f
107 | return torch.load(f, map_location=torch.device("cpu"))
108 |
--------------------------------------------------------------------------------
/ssd/utils/dist_util.py:
--------------------------------------------------------------------------------
1 | import pickle
2 |
3 | import torch
4 | import torch.distributed as dist
5 |
6 |
7 | def get_world_size():
8 | if not dist.is_available():
9 | return 1
10 | if not dist.is_initialized():
11 | return 1
12 | return dist.get_world_size()
13 |
14 |
15 | def get_rank():
16 | if not dist.is_available():
17 | return 0
18 | if not dist.is_initialized():
19 | return 0
20 | return dist.get_rank()
21 |
22 |
23 | def is_main_process():
24 | return get_rank() == 0
25 |
26 |
27 | def synchronize():
28 | """
29 | Helper function to synchronize (barrier) among all processes when
30 | using distributed training
31 | """
32 | if not dist.is_available():
33 | return
34 | if not dist.is_initialized():
35 | return
36 | world_size = dist.get_world_size()
37 | if world_size == 1:
38 | return
39 | dist.barrier()
40 |
41 |
42 | def _encode(encoded_data, data):
43 | # gets a byte representation for the data
44 | encoded_bytes = pickle.dumps(data)
45 | # convert this byte string into a byte tensor
46 | storage = torch.ByteStorage.from_buffer(encoded_bytes)
47 | tensor = torch.ByteTensor(storage).to("cuda")
48 | # encoding: first byte is the size and then rest is the data
49 | s = tensor.numel()
50 | assert s <= 255, "Can't encode data greater than 255 bytes"
51 | # put the encoded data in encoded_data
52 | encoded_data[0] = s
53 | encoded_data[1: (s + 1)] = tensor
54 |
55 |
56 | def all_gather(data):
57 | """
58 | Run all_gather on arbitrary picklable data (not necessarily tensors)
59 | Args:
60 | data: any picklable object
61 | Returns:
62 | list[data]: list of data gathered from each rank
63 | """
64 | world_size = get_world_size()
65 | if world_size == 1:
66 | return [data]
67 |
68 | # serialized to a Tensor
69 | buffer = pickle.dumps(data)
70 | storage = torch.ByteStorage.from_buffer(buffer)
71 | tensor = torch.ByteTensor(storage).to("cuda")
72 |
73 | # obtain Tensor size of each rank
74 | local_size = torch.LongTensor([tensor.numel()]).to("cuda")
75 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
76 | dist.all_gather(size_list, local_size)
77 | size_list = [int(size.item()) for size in size_list]
78 | max_size = max(size_list)
79 |
80 | # receiving Tensor from all ranks
81 | # we pad the tensor because torch all_gather does not support
82 | # gathering tensors of different shapes
83 | tensor_list = []
84 | for _ in size_list:
85 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
86 | if local_size != max_size:
87 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
88 | tensor = torch.cat((tensor, padding), dim=0)
89 | dist.all_gather(tensor_list, tensor)
90 |
91 | data_list = []
92 | for size, tensor in zip(size_list, tensor_list):
93 | buffer = tensor.cpu().numpy().tobytes()[:size]
94 | data_list.append(pickle.loads(buffer))
95 |
96 | return data_list
97 |
--------------------------------------------------------------------------------
/ssd/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import sys
4 |
5 |
6 | def setup_logger(name, distributed_rank, save_dir=None):
7 | logger = logging.getLogger(name)
8 | logger.setLevel(logging.DEBUG)
9 | # don't log results for the non-master process
10 | if distributed_rank > 0:
11 | return logger
12 | stream_handler = logging.StreamHandler(stream=sys.stdout)
13 | stream_handler.setLevel(logging.DEBUG)
14 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
15 | stream_handler.setFormatter(formatter)
16 | logger.addHandler(stream_handler)
17 | if save_dir:
18 | fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
19 | fh.setLevel(logging.DEBUG)
20 | fh.setFormatter(formatter)
21 | logger.addHandler(fh)
22 | return logger
23 |
--------------------------------------------------------------------------------
/ssd/utils/metric_logger.py:
--------------------------------------------------------------------------------
1 | from collections import deque, defaultdict
2 | import numpy as np
3 | import torch
4 |
5 |
6 | class SmoothedValue:
7 | """Track a series of values and provide access to smoothed values over a
8 | window or the global series average.
9 | """
10 |
11 | def __init__(self, window_size=10):
12 | self.deque = deque(maxlen=window_size)
13 | self.value = np.nan
14 | self.series = []
15 | self.total = 0.0
16 | self.count = 0
17 |
18 | def update(self, value):
19 | self.deque.append(value)
20 | self.series.append(value)
21 | self.count += 1
22 | self.total += value
23 | self.value = value
24 |
25 | @property
26 | def median(self):
27 | values = np.array(self.deque)
28 | return np.median(values)
29 |
30 | @property
31 | def avg(self):
32 | values = np.array(self.deque)
33 | return np.mean(values)
34 |
35 | @property
36 | def global_avg(self):
37 | return self.total / self.count
38 |
39 |
40 | class MetricLogger:
41 | def __init__(self, delimiter=", "):
42 | self.meters = defaultdict(SmoothedValue)
43 | self.delimiter = delimiter
44 |
45 | def update(self, **kwargs):
46 | for k, v in kwargs.items():
47 | if isinstance(v, torch.Tensor):
48 | v = v.item()
49 | assert isinstance(v, (float, int))
50 | self.meters[k].update(v)
51 |
52 | def __getattr__(self, attr):
53 | if attr in self.meters:
54 | return self.meters[attr]
55 | if attr in self.__dict__:
56 | return self.__dict__[attr]
57 | raise AttributeError("'{}' object has no attribute '{}'".format(
58 | type(self).__name__, attr))
59 |
60 | def __str__(self):
61 | loss_str = []
62 | for name, meter in self.meters.items():
63 | loss_str.append(
64 | "{}: {:.3f} ({:.3f})".format(name, meter.avg, meter.global_avg)
65 | )
66 | return self.delimiter.join(loss_str)
67 |
--------------------------------------------------------------------------------
/ssd/utils/misc.py:
--------------------------------------------------------------------------------
1 | import errno
2 | import os
3 |
4 |
5 | def str2bool(s):
6 | return s.lower() in ('true', '1')
7 |
8 |
9 | def mkdir(path):
10 | try:
11 | os.makedirs(path)
12 | except OSError as e:
13 | if e.errno != errno.EEXIST:
14 | raise
15 |
--------------------------------------------------------------------------------
/ssd/utils/model_zoo.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | import os
3 | import sys
4 |
5 | import torch
6 |
7 | from ssd.utils.dist_util import is_main_process, synchronize
8 |
9 | from torch.hub import download_url_to_file
10 | from torch.hub import urlparse
11 | from torch.hub import HASH_REGEX
12 |
13 |
14 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
15 | # but with a few improvements and modifications
16 | def cache_url(url, model_dir=None, progress=True):
17 | r"""Loads the Torch serialized object at the given URL.
18 | If the object is already present in `model_dir`, it's deserialized and
19 | returned. The filename part of the URL should follow the naming convention
20 | ``filename-.ext`` where ```` is the first eight or more
21 | digits of the SHA256 hash of the contents of the file. The hash is used to
22 | ensure unique names and to verify the contents of the file.
23 | The default value of `model_dir` is ``$TORCH_HOME/models`` where
24 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
25 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
26 | Args:
27 | url (string): URL of the object to download
28 | model_dir (string, optional): directory in which to save the object
29 | progress (bool, optional): whether or not to display a progress bar to stderr
30 | Example:
31 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
32 | """
33 | if model_dir is None:
34 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
35 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
36 | if not os.path.exists(model_dir):
37 | os.makedirs(model_dir)
38 | parts = urlparse(url)
39 | filename = os.path.basename(parts.path)
40 | if filename == "model_final.pkl":
41 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename
42 | # so make the full path the filename by replacing / with _
43 | filename = parts.path.replace("/", "_")
44 | cached_file = os.path.join(model_dir, filename)
45 | if not os.path.exists(cached_file) and is_main_process():
46 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
47 | hash_prefix = HASH_REGEX.search(filename)
48 | if hash_prefix is not None:
49 | hash_prefix = hash_prefix.group(1)
50 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
51 | # which matches the hash PyTorch uses. So we skip the hash matching
52 | # if the hash_prefix is less than 6 characters
53 | if len(hash_prefix) < 6:
54 | hash_prefix = None
55 | download_url_to_file(url, cached_file, hash_prefix, progress=progress)
56 | synchronize()
57 | return cached_file
58 |
59 |
60 | def load_state_dict_from_url(url, map_location='cpu'):
61 | cached_file = cache_url(url)
62 | return torch.load(cached_file, map_location=map_location)
63 |
--------------------------------------------------------------------------------
/ssd/utils/nms.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import warnings
3 |
4 | import torch
5 | import torchvision
6 |
7 | if torchvision.__version__ >= '0.3.0':
8 | _nms = torchvision.ops.nms
9 | else:
10 | warnings.warn('No NMS is available. Please upgrade torchvision to 0.3.0+')
11 | sys.exit(-1)
12 |
13 |
14 | def nms(boxes, scores, nms_thresh):
15 | """ Performs non-maximum suppression, run on GPU or CPU according to
16 | boxes's device.
17 | Args:
18 | boxes(Tensor[N, 4]): boxes in (x1, y1, x2, y2) format, use absolute coordinates(or relative coordinates)
19 | scores(Tensor[N]): scores
20 | nms_thresh(float): thresh
21 | Returns:
22 | indices kept.
23 | """
24 | keep = _nms(boxes, scores, nms_thresh)
25 | return keep
26 |
27 |
28 | def batched_nms(boxes, scores, idxs, iou_threshold):
29 | """
30 | Performs non-maximum suppression in a batched fashion.
31 |
32 | Each index value correspond to a category, and NMS
33 | will not be applied between elements of different categories.
34 |
35 | Parameters
36 | ----------
37 | boxes : Tensor[N, 4]
38 | boxes where NMS will be performed. They
39 | are expected to be in (x1, y1, x2, y2) format
40 | scores : Tensor[N]
41 | scores for each one of the boxes
42 | idxs : Tensor[N]
43 | indices of the categories for each one of the boxes.
44 | iou_threshold : float
45 | discards all overlapping boxes
46 | with IoU < iou_threshold
47 |
48 | Returns
49 | -------
50 | keep : Tensor
51 | int64 tensor with the indices of
52 | the elements that have been kept by NMS, sorted
53 | in decreasing order of scores
54 | """
55 | if boxes.numel() == 0:
56 | return torch.empty((0,), dtype=torch.int64, device=boxes.device)
57 | # strategy: in order to perform NMS independently per class.
58 | # we add an offset to all the boxes. The offset is dependent
59 | # only on the class idx, and is large enough so that boxes
60 | # from different classes do not overlap
61 | max_coordinate = boxes.max()
62 | offsets = idxs.to(boxes) * (max_coordinate + 1)
63 | boxes_for_nms = boxes + offsets[:, None]
64 | keep = nms(boxes_for_nms, scores, iou_threshold)
65 | return keep
66 |
--------------------------------------------------------------------------------
/ssd/utils/registry.py:
--------------------------------------------------------------------------------
1 | def _register_generic(module_dict, module_name, module):
2 | assert module_name not in module_dict
3 | module_dict[module_name] = module
4 |
5 |
6 | class Registry(dict):
7 | """
8 | A helper class for managing registering modules, it extends a dictionary
9 | and provides a register functions.
10 | Eg. creating a registry:
11 | some_registry = Registry({"default": default_module})
12 | There're two ways of registering new modules:
13 | 1): normal way is just calling register function:
14 | def foo():
15 | ...
16 | some_registry.register("foo_module", foo)
17 | 2): used as decorator when declaring the module:
18 | @some_registry.register("foo_module")
19 | @some_registry.register("foo_module_nickname")
20 | def foo():
21 | ...
22 | Access of module is just like using a dictionary, eg:
23 | f = some_registry["foo_module"]
24 | """
25 |
26 | def __init__(self, *args, **kwargs):
27 | super(Registry, self).__init__(*args, **kwargs)
28 |
29 | def register(self, module_name, module=None):
30 | # used as function call
31 | if module is not None:
32 | _register_generic(self, module_name, module)
33 | return
34 |
35 | # used as decorator
36 | def register_fn(fn):
37 | _register_generic(self, module_name, fn)
38 | return fn
39 |
40 | return register_fn
41 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import os
4 |
5 | import torch
6 | import torch.utils.data
7 |
8 | from ssd.config import cfg
9 | from ssd.engine.inference import do_evaluation
10 | from ssd.modeling.detector import build_detection_model
11 | from ssd.utils import dist_util
12 | from ssd.utils.checkpoint import CheckPointer
13 | from ssd.utils.dist_util import synchronize
14 | from ssd.utils.logger import setup_logger
15 |
16 |
17 | def evaluation(cfg, ckpt, distributed):
18 | logger = logging.getLogger("SSD.inference")
19 |
20 | model = build_detection_model(cfg)
21 | checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger)
22 | device = torch.device(cfg.MODEL.DEVICE)
23 | model.to(device)
24 | checkpointer.load(ckpt, use_latest=ckpt is None)
25 | do_evaluation(cfg, model, distributed)
26 |
27 |
28 | def main():
29 | parser = argparse.ArgumentParser(description='SSD Evaluation on VOC and COCO dataset.')
30 | parser.add_argument(
31 | "--config-file",
32 | default="",
33 | metavar="FILE",
34 | help="path to config file",
35 | type=str,
36 | )
37 | parser.add_argument("--local_rank", type=int, default=0)
38 | parser.add_argument(
39 | "--ckpt",
40 | help="The path to the checkpoint for test, default is the latest checkpoint.",
41 | default=None,
42 | type=str,
43 | )
44 |
45 | parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.")
46 |
47 | parser.add_argument(
48 | "opts",
49 | help="Modify config options using the command-line",
50 | default=None,
51 | nargs=argparse.REMAINDER,
52 | )
53 | args = parser.parse_args()
54 |
55 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
56 | distributed = num_gpus > 1
57 |
58 | if torch.cuda.is_available():
59 | # This flag allows you to enable the inbuilt cudnn auto-tuner to
60 | # find the best algorithm to use for your hardware.
61 | torch.backends.cudnn.benchmark = True
62 | if distributed:
63 | torch.cuda.set_device(args.local_rank)
64 | torch.distributed.init_process_group(backend="nccl", init_method="env://")
65 | synchronize()
66 |
67 | cfg.merge_from_file(args.config_file)
68 | cfg.merge_from_list(args.opts)
69 | cfg.freeze()
70 |
71 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
72 | logger.info("Using {} GPUs".format(num_gpus))
73 | logger.info(args)
74 |
75 | logger.info("Loaded configuration file {}".format(args.config_file))
76 | with open(args.config_file, "r") as cf:
77 | config_str = "\n" + cf.read()
78 | logger.info(config_str)
79 | logger.info("Running with config:\n{}".format(cfg))
80 | evaluation(cfg, ckpt=args.ckpt, distributed=distributed)
81 |
82 |
83 | if __name__ == '__main__':
84 | main()
85 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import os
4 |
5 | import torch
6 | import torch.distributed as dist
7 |
8 | from ssd.engine.inference import do_evaluation
9 | from ssd.config import cfg
10 | from ssd.data.build import make_data_loader
11 | from ssd.engine.trainer import do_train
12 | from ssd.modeling.detector import build_detection_model
13 | from ssd.solver.build import make_optimizer, make_lr_scheduler
14 | from ssd.utils import dist_util, mkdir
15 | from ssd.utils.checkpoint import CheckPointer
16 | from ssd.utils.dist_util import synchronize
17 | from ssd.utils.logger import setup_logger
18 | from ssd.utils.misc import str2bool
19 |
20 |
21 | def train(cfg, args):
22 | logger = logging.getLogger('SSD.trainer')
23 | model = build_detection_model(cfg)
24 | device = torch.device(cfg.MODEL.DEVICE)
25 | model.to(device)
26 | if args.distributed:
27 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank)
28 |
29 | lr = cfg.SOLVER.LR * args.num_gpus # scale by num gpus
30 | optimizer = make_optimizer(cfg, model, lr)
31 |
32 | milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS]
33 | scheduler = make_lr_scheduler(cfg, optimizer, milestones)
34 |
35 | arguments = {"iteration": 0}
36 | save_to_disk = dist_util.get_rank() == 0
37 | checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger)
38 | extra_checkpoint_data = checkpointer.load()
39 | arguments.update(extra_checkpoint_data)
40 |
41 | max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus
42 | train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration'])
43 |
44 | model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args)
45 | return model
46 |
47 |
48 | def main():
49 | parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
50 | parser.add_argument(
51 | "--config-file",
52 | default="",
53 | metavar="FILE",
54 | help="path to config file",
55 | type=str,
56 | )
57 | parser.add_argument("--local_rank", type=int, default=0)
58 | parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step')
59 | parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step')
60 | parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0')
61 | parser.add_argument('--use_tensorboard', default=True, type=str2bool)
62 | parser.add_argument(
63 | "--skip-test",
64 | dest="skip_test",
65 | help="Do not test the final model",
66 | action="store_true",
67 | )
68 | parser.add_argument(
69 | "opts",
70 | help="Modify config options using the command-line",
71 | default=None,
72 | nargs=argparse.REMAINDER,
73 | )
74 | args = parser.parse_args()
75 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
76 | args.distributed = num_gpus > 1
77 | args.num_gpus = num_gpus
78 |
79 | if torch.cuda.is_available():
80 | # This flag allows you to enable the inbuilt cudnn auto-tuner to
81 | # find the best algorithm to use for your hardware.
82 | torch.backends.cudnn.benchmark = True
83 | if args.distributed:
84 | torch.cuda.set_device(args.local_rank)
85 | torch.distributed.init_process_group(backend="nccl", init_method="env://")
86 | synchronize()
87 |
88 | cfg.merge_from_file(args.config_file)
89 | cfg.merge_from_list(args.opts)
90 | cfg.freeze()
91 |
92 | if cfg.OUTPUT_DIR:
93 | mkdir(cfg.OUTPUT_DIR)
94 |
95 | logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
96 | logger.info("Using {} GPUs".format(num_gpus))
97 | logger.info(args)
98 |
99 | logger.info("Loaded configuration file {}".format(args.config_file))
100 | with open(args.config_file, "r") as cf:
101 | config_str = "\n" + cf.read()
102 | logger.info(config_str)
103 | logger.info("Running with config:\n{}".format(cfg))
104 |
105 | model = train(cfg, args)
106 |
107 | if not args.skip_test:
108 | logger.info('Start evaluating...')
109 | torch.cuda.empty_cache() # speed up evaluating after training finished
110 | do_evaluation(cfg, model, distributed=args.distributed)
111 |
112 |
113 | if __name__ == '__main__':
114 | main()
115 |
--------------------------------------------------------------------------------