├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── demo.py
├── doc
    ├── Makefile
    ├── imgs
    │   ├── archor_box.jpg
    │   ├── feature_map_visualize.png
    │   ├── graph.jpg
    │   ├── pr_curve.jpg
    │   ├── preprocess.jpg
    │   └── train_process.jpg
    └── source
    │   ├── _static
    │       ├── css
    │       │   └── ssds.css
    │       └── img
    │       │   └── ssds.pytorch.png
    │   ├── api
    │       ├── ssds.core.rst
    │       ├── ssds.dataset.rst
    │       ├── ssds.modeling.layers.rst
    │       ├── ssds.modeling.nets.rst
    │       ├── ssds.modeling.rst
    │       ├── ssds.modeling.ssds.rst
    │       ├── ssds.pipeline.rst
    │       └── ssds.rst
    │   ├── conf.py
    │   ├── config.md
    │   ├── index.rst
    │   ├── install.md
    │   └── usage.md
├── experiments
    ├── cfgs
    │   └── tests
    │   │   └── test.yml
    └── person.jpg
├── requirements.txt
├── setup.py
└── ssds
    ├── __init__.py
    ├── core
        ├── __init__.py
        ├── checkpoint.py
        ├── config.py
        ├── criterion.py
        ├── data_parallel.py
        ├── evaluation_metrics.py
        ├── optimizer.py
        ├── tools.py
        └── visualize_funcs.py
    ├── dataset
        ├── __init__.py
        ├── coco.py
        ├── dali_coco.py
        ├── dali_dataiterator.py
        ├── dali_tfrecord.py
        ├── dataset_factory.py
        ├── detection_dataset.py
        └── transforms.py
    ├── modeling
        ├── __init__.py
        ├── layers
        │   ├── __init__.py
        │   ├── basic_layers.py
        │   ├── box.py
        │   ├── decoder.py
        │   ├── layers_parser.py
        │   └── rfb_layers.py
        ├── model_builder.py
        ├── nets
        │   ├── __init__.py
        │   ├── darknet.py
        │   ├── densenet.py
        │   ├── efficientnet.py
        │   ├── effnet.py
        │   ├── inception_v2.py
        │   ├── mobilenet.py
        │   ├── regnet.py
        │   ├── resnet.py
        │   ├── rutils.py
        │   └── shufflenet.py
        └── ssds
        │   ├── __init__.py
        │   ├── bifpn.py
        │   ├── fcos.py
        │   ├── fpn.py
        │   ├── fssd.py
        │   ├── shelf.py
        │   ├── ssd.py
        │   ├── ssdsbase.py
        │   └── yolo.py
    ├── pipeline
        ├── __init__.py
        ├── pipeline_anchor_apex.py
        └── pipeline_anchor_basic.py
    ├── ssds.py
    └── utils
        ├── __init__.py
        ├── export.py
        ├── train.py
        ├── train_ddp.py
        └── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | weights/
 3 | data/
 4 | data
 5 | experiments/models/
 6 | run.sh
 7 | __pycache__
 8 | *.pyc
 9 | log*
10 | ssds.egg-info
11 | doc/build
12 | doc/node_modules
13 | doc/package-lock.json
14 | dist
15 | build/lib/ssds


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvcr.io/nvidia/pytorch:20.06-py3
2 | 
3 | RUN pip install opencv-python \
4 |                 pynvml \
5 |                 git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
6 | 
7 | COPY . ssds.pytorch/
8 | RUN pip install --no-cache-dir -e ssds.pytorch/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Irene
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img src="doc/source/_static/img/ssds.pytorch.png" width="400"/>
 3 | </div>
 4 | 
 5 | # ssds.pytorch
 6 | Repository for Single Shot MultiBox Detector and its variants, implemented with pytorch, python3. This repo is easy to setup and has plenty of visualization methods. We hope this repo can help people have a better understanding for ssd-like model and help people train and deploy the ssds model easily.
 7 | 
 8 | Currently, it contains these features:
 9 | - **Multiple SSD Variants**: ssd, fpn, bifpn, yolo and etc.
10 | - **Multiple Base Network**: resnet, regnet, mobilenet and etc.
11 | - **Visualize** the features of the ssd-like models to help the user understand the model design and performance.
12 | - **Fast Training and Inference**: Utilize Nvidia Apex and Dali to fast training and support the user convert the model to ONNX or TensorRT for deployment.
13 | 
14 | This repo is depended on the work of [ODTK](https://github.com/NVIDIA/retinanet-examples), [Detectron](https://github.com/facebookresearch/Detectron) and [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). Thanks for their works.
15 | 
16 | **Notice** The pretrain model for the current version does not finished yet, please check the [previous version](https://github.com/ShuangXieIrene/ssds.pytorch/tree/v0.3.1) for enrich pretrain models.
17 | 
18 | ### Table of Contents
19 | - <a href='#installation'>Installation</a>
20 | - <a href='#usage'>Usage</a>
21 | - <a href='#performance'>Performance and Model Zoo</a>
22 | - <a href='#visualization'>Visualization</a>
23 | - [Documentation](https://foreveryounggithub.github.io/ssds.doc)
24 | 
25 | ## Installation
26 | ### requirements
27 | * python>=3.7
28 | * CUDA>=10.0
29 | * pytorch>=1.4
30 | ### basic installation:
31 | ```bash
32 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch
33 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git
34 | cd ssds.pytorch
35 | python setup.py clean -a install
36 | ```
37 | #### extra python libs for parallel training
38 | Currently, nvidia DALI and apex is not include in the requirements.txt and need to install manually.
39 | 
40 | * [DALI](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html)
41 | ```bash
42 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali
43 | ```
44 | * [apex](https://github.com/NVIDIA/apex#linux)
45 | ```bash
46 | git clone https://github.com/NVIDIA/apex
47 | cd apex
48 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
49 | ```
50 | 
51 | ## Docker
52 | ```bash
53 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git
54 | docker build -t ssds:local ./ssds.pytorch/
55 | docker run --gpus all -it --rm -v /data:/data ssds:local
56 | ```
57 | 
58 | ## Usage
59 | ### 0. Check the config file by Visualization
60 | Defined the network in a [config file](experiments/cfgs/tests/test.yml) and tweak the config file based on the visualized anchor boxes
61 | ```bash
62 | python -m ssds.utils.visualize -cfg experiments/cfgs/tests/test.yml
63 | ```
64 | 
65 | ### 1. Training
66 | ```bash
67 | # basic training
68 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml
69 | # parallel training
70 | python -m torch.distributed.launch --nproc_per_node={num_gpus} -m ssds.utils.train_ddp -cfg experiments/cfgs/tests/test.yml
71 | ```
72 | 
73 | ### 2. Evaluation
74 | ```bash
75 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml -e
76 | ```
77 | 
78 | ### 3. Export to ONNX or TRT model
79 | ```bash
80 | python -m ssds.utils.export -cfg experiments/cfgs/tests/test.yml -c best_mAP.pth -h
81 | ```
82 | 
83 | ## Performance
84 | 
85 | 
86 | ## Visualization


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import cv2
  6 | from tqdm import tqdm
  7 | from ssds.ssds import SSDDetector
  8 | 
  9 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
 10 | FONT = cv2.FONT_HERSHEY_SIMPLEX
 11 | 
 12 | def plot_one_box(img, x, color=None, label=None, line_thickness=None):
 13 |     # Plots one bounding box on image img
 14 |     tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
 15 |     color = color or [random.randint(0, 255) for _ in range(3)]
 16 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
 17 |     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
 18 |     if label:
 19 |         tf = max(tl - 1, 1)  # font thickness
 20 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
 21 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
 22 |         cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
 23 |         cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
 24 | 
 25 | def demo_image(model, image_path, display):
 26 |     # 1. prepare image
 27 |     image = cv2.imread(image_path)
 28 |     image = cv2.resize(image, model.image_size)
 29 | 
 30 |     # 2. model infer
 31 |     scores, boxes, classes = model(image)
 32 | 
 33 |     # 3. draw bounding box on the image
 34 |     for score, box, labels in zip(scores, boxes, classes):
 35 |         plot_one_box(image, box, COLORS[labels % 3], '{label}: {score:.3f}'.format(label=labels, score=score))
 36 |     
 37 |     # 4. visualize result
 38 |     if display:
 39 |         cv2.imshow('result', image)
 40 |         cv2.waitKey(0)
 41 |     else:
 42 |         path, _ = os.path.splitext(image_path)
 43 |         cv2.imwrite(path + '_result.jpg', image)
 44 |         print("output file save at '{}'".format(path + '_result.jpg'))
 45 | 
 46 | def demo_video(model, video_path, display):
 47 |     # 0. prepare video
 48 |     cap    = cv2.VideoCapture(video_path)
 49 |     frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 50 |     if cap.isOpened() and (not display): 
 51 |         width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
 52 |         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 53 |         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 54 |         fps    = int(cap.get(cv2.CAP_PROP_FPS))
 55 |         writer = cv2.VideoWriter(video_path+"_output.mp4", fourcc, fps, (width,height))
 56 |     
 57 |     for fid in tqdm(range(frames)):
 58 |         # 1. prepare image
 59 |         flag, image = cap.read()
 60 |         image = cv2.resize(image, model.image_size)
 61 | 
 62 |         # 2. model infer
 63 |         scores, boxes, classes = model(image)
 64 | 
 65 |         # 3. draw bounding box on the image
 66 |         for score, box, labels in zip(scores, boxes, classes):
 67 |             plot_one_box(image, box, COLORS[labels % 3], '{label}: {score:.3f}'.format(label=labels, score=score))
 68 | 
 69 |         image = cv2.resize(image, (width,height))
 70 | 
 71 |         # 4. visualize result
 72 |         if display:
 73 |             cv2.imshow("Image", image)
 74 |             if cv2.waitKey(1) & 0xFF == ord('q'):
 75 |                 break
 76 |         else:
 77 |             writer.write(image)
 78 |         
 79 |     # 5. release the video resources
 80 |     cap.release()
 81 |     if display:
 82 |         cv2.destroyAllWindows()
 83 |     else:
 84 |         writer.release()
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 |     parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network')
 89 |     parser.add_argument('-cfg', '--confg-file',
 90 |             help='the address of optional config file', default=None, type=str, required=True)
 91 |     parser.add_argument('-i', '--demo-file',
 92 |             help='the address of the demo file', default=None, type=str, required=True)
 93 |     parser.add_argument('-t', '--type', 
 94 |             default='image', choices=['image', 'video'])
 95 |     parser.add_argument('-d', '--display', 
 96 |             help='whether display the detection result', action="store_true")
 97 |     parser.add_argument('-s', '--shift', action="store_true")  
 98 | 
 99 |     if len(sys.argv) == 1:
100 |         parser.print_help()
101 |         sys.exit(1)
102 |     args = parser.parse_args()
103 | 
104 |     model = SSDDetector(args.confg_file, args.shift)
105 |     getattr(sys.modules[__name__], "demo_"+args.type)(model, args.demo_file, args.display)


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    ?= -j auto
 6 | SPHINXBUILD   ?= sphinx-build
 7 | SPHINXPROJ    ?= ssds.pytorch
 8 | SOURCEDIR     ?= source
 9 | BUILDDIR      ?= build
10 | PYCMD         ?= python
11 | 
12 | # Put it first so that "make" without argument is like "make help".
13 | help:
14 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
15 | 
16 | # figures:
17 | # 	@$(PYCMD) source/scripts/build_activation_images.py
18 | 
19 | docset: html
20 | 	doc2dash --name $(SPHINXPROJ) --enable-js --force $(BUILDDIR)/html/
21 | 
22 | 	# Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution.
23 | 	cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png
24 | 	convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png
25 | 
26 | html-stable:
27 | 	# stable differs from `make html` in two ways:
28 | 	# 1) The stable logo is used instead of the unstable logo
29 | 	# 2) There will not be a link to the stable docs.
30 | 	# See conf.py for more details.
31 | 	RELEASE=1 make html
32 | 
33 | .PHONY: help Makefile docset
34 | 
35 | # Catch-all target: route all unknown targets to Sphinx using the new
36 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
37 | %: Makefile
38 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
39 | 
40 | clean:
41 | 	@echo "Removing everything under 'build'.."
42 | 	@rm -rf $(BUILDDIR)/html/ $(BUILDDIR)/doctrees


--------------------------------------------------------------------------------
/doc/imgs/archor_box.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/archor_box.jpg


--------------------------------------------------------------------------------
/doc/imgs/feature_map_visualize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/feature_map_visualize.png


--------------------------------------------------------------------------------
/doc/imgs/graph.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/graph.jpg


--------------------------------------------------------------------------------
/doc/imgs/pr_curve.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/pr_curve.jpg


--------------------------------------------------------------------------------
/doc/imgs/preprocess.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/preprocess.jpg


--------------------------------------------------------------------------------
/doc/imgs/train_process.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/train_process.jpg


--------------------------------------------------------------------------------
/doc/source/_static/css/ssds.css:
--------------------------------------------------------------------------------
 1 | .rst-content .hidden-section {
 2 |     display: none;
 3 | }
 4 | 
 5 | .rst-toc .hidden-section {
 6 |     display: none;
 7 | }
 8 | 
 9 | nav .hidden-section {
10 |     display: inherit;
11 | }
12 | 
13 | .wy-side-nav-search {
14 |     background-color: #fff;
15 |     color: #333;
16 | }
17 | 
18 | .version{
19 |     color: #404040 !important;
20 | }


--------------------------------------------------------------------------------
/doc/source/_static/img/ssds.pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/source/_static/img/ssds.pytorch.png


--------------------------------------------------------------------------------
/doc/source/api/ssds.core.rst:
--------------------------------------------------------------------------------
 1 | ssds.core
 2 | =================
 3 | 
 4 | ssds.core.checkpoint
 5 | ---------------------------
 6 | 
 7 | .. automodule:: ssds.core.checkpoint
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | ssds.core.config
13 | -----------------------
14 | 
15 | .. automodule:: ssds.core.config
16 |    :members: cfg_from_file
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 
20 | ssds.core.criterion
21 | --------------------------
22 | 
23 | .. automodule:: ssds.core.criterion
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 |    :member-order: bysource
28 | 
29 | ssds.core.data\_parallel
30 | -------------------------------
31 | 
32 | .. autoclass:: ssds.core.data_parallel.BalancedDataParallel
33 | 
34 | ssds.core.evaluation\_metrics
35 | ------------------------------------
36 | 
37 | .. autoclass:: ssds.core.evaluation_metrics.MeanAveragePrecision
38 |    :members: __call__, get_results
39 |    :undoc-members:
40 |    :show-inheritance:
41 | 
42 | ssds.core.optimizer
43 | --------------------------
44 | 
45 | .. automodule:: ssds.core.optimizer
46 |    :members:
47 |    :undoc-members:
48 |    :show-inheritance:
49 | 
50 | ssds.core.visualize\_funcs
51 | ---------------------------------
52 | 
53 | .. automodule:: ssds.core.visualize_funcs
54 |    :members:
55 |    :undoc-members:
56 |    :show-inheritance:
57 | 


--------------------------------------------------------------------------------
/doc/source/api/ssds.dataset.rst:
--------------------------------------------------------------------------------
 1 | ssds.dataset
 2 | ====================
 3 | 
 4 | ssds.dataset.dataset\_factory
 5 | ------------------------------------
 6 | 
 7 | .. automodule:: ssds.dataset.dataset_factory
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | ssds.dataset.detection\_dataset
13 | --------------------------------------
14 | 
15 | .. autoclass:: ssds.dataset.detection_dataset.DetectionDataset
16 |    :members: __getitem__
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 
20 | ssds.dataset.coco
21 | ------------------------
22 | 
23 | .. automodule:: ssds.dataset.coco
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 | 
28 | ssds.dataset.dali\_dataiterator
29 | --------------------------------------
30 | 
31 | .. automodule:: ssds.dataset.dali_dataiterator
32 |    :members:
33 |    :undoc-members:
34 |    :show-inheritance:
35 | 
36 | ssds.dataset.dali\_coco
37 | ------------------------------
38 | 
39 | .. automodule:: ssds.dataset.dali_coco
40 |    :members:
41 |    :undoc-members:
42 |    :show-inheritance:
43 | 
44 | ssds.dataset.dali\_tfrecord
45 | ----------------------------------
46 | 
47 | .. automodule:: ssds.dataset.dali_tfrecord
48 |    :members:
49 |    :undoc-members:
50 |    :show-inheritance:
51 | 
52 | ssds.dataset.transforms
53 | ------------------------------
54 | 
55 | .. automodule:: ssds.dataset.transforms
56 |    :members:
57 |    :undoc-members:
58 |    :show-inheritance:
59 | 


--------------------------------------------------------------------------------
/doc/source/api/ssds.modeling.layers.rst:
--------------------------------------------------------------------------------
 1 | ssds.modeling.layers
 2 | ============================
 3 | 
 4 | ssds.modeling.layers.box
 5 | -------------------------------
 6 | 
 7 | .. automodule:: ssds.modeling.layers.box
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | ssds.modeling.layers.decoder
13 | -----------------------------------
14 | 
15 | .. autoclass:: ssds.modeling.layers.decoder.Decoder
16 |    :members: __call__
17 |    :undoc-members:
18 |    :show-inheritance:


--------------------------------------------------------------------------------
/doc/source/api/ssds.modeling.nets.rst:
--------------------------------------------------------------------------------
 1 | ssds.modeling.nets
 2 | ==========================
 3 | 
 4 | ssds.modeling.nets.darknet
 5 | ---------------------------------
 6 | 
 7 | .. automodule:: ssds.modeling.nets.darknet
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 |    :member-order: bysource
12 | 
13 | ssds.modeling.nets.densenet
14 | ----------------------------------
15 | 
16 | .. automodule:: ssds.modeling.nets.densenet
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:
20 |    :member-order: bysource
21 | 
22 | ssds.modeling.nets.efficientnet
23 | --------------------------------------
24 | 
25 | .. automodule:: ssds.modeling.nets.efficientnet
26 |    :members:
27 |    :undoc-members:
28 |    :show-inheritance:
29 |    :member-order: bysource
30 | 
31 | ssds.modeling.nets.effnet
32 | --------------------------------
33 | 
34 | .. automodule:: ssds.modeling.nets.effnet
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 |    :member-order: bysource
39 | 
40 | ssds.modeling.nets.inception\_v2
41 | ---------------------------------------
42 | 
43 | .. autoclass:: ssds.modeling.nets.inception_v2.InceptionV2
44 | 
45 | ssds.modeling.nets.mobilenet
46 | -----------------------------------
47 | 
48 | .. automodule:: ssds.modeling.nets.mobilenet
49 |    :members:
50 |    :undoc-members:
51 |    :show-inheritance:
52 |    :member-order: bysource
53 | 
54 | ssds.modeling.nets.regnet
55 | --------------------------------
56 | 
57 | .. automodule:: ssds.modeling.nets.regnet
58 |    :members:
59 |    :undoc-members:
60 |    :show-inheritance:
61 |    :member-order: bysource
62 | 
63 | ssds.modeling.nets.resnet
64 | --------------------------------
65 | 
66 | .. automodule:: ssds.modeling.nets.resnet
67 |    :members:
68 |    :undoc-members:
69 |    :show-inheritance:
70 |    :member-order: bysource
71 | 
72 | ssds.modeling.nets.shufflenet
73 | ------------------------------------
74 | 
75 | .. automodule:: ssds.modeling.nets.shufflenet
76 |    :members:
77 |    :undoc-members:
78 |    :show-inheritance:
79 |    :member-order: bysource


--------------------------------------------------------------------------------
/doc/source/api/ssds.modeling.rst:
--------------------------------------------------------------------------------
 1 | ssds.modeling
 2 | =====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    ssds.modeling.layers
 8 |    ssds.modeling.nets
 9 |    ssds.modeling.ssds
10 | 
11 | ssds.modeling.model\_builder
12 | -----------------------------------
13 | 
14 | .. automodule:: ssds.modeling.model_builder
15 |    :members:
16 |    :undoc-members:
17 |    :show-inheritance:


--------------------------------------------------------------------------------
/doc/source/api/ssds.modeling.ssds.rst:
--------------------------------------------------------------------------------
 1 | ssds.modeling.ssds
 2 | ==========================
 3 | 
 4 | ssds.modeling.ssds.ssdsbase
 5 | ----------------------------------
 6 | 
 7 | .. autoclass:: ssds.modeling.ssds.ssdsbase.SSDSBase
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | ssds.modeling.ssds.ssd
13 | -----------------------------
14 | 
15 | .. autoclass:: ssds.modeling.ssds.SSD
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 
20 | ssds.modeling.ssds.yolo
21 | ------------------------------
22 | 
23 | .. autoclass:: ssds.modeling.ssds.YOLOV3
24 |    :members:
25 |    :undoc-members:
26 |    :show-inheritance:
27 | 
28 | .. autoclass:: ssds.modeling.ssds.YOLOV4
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | ssds.modeling.ssds.fpn
34 | -----------------------------
35 | 
36 | .. autoclass:: ssds.modeling.ssds.SSDFPN
37 |    :members:
38 |    :undoc-members:
39 |    :show-inheritance:
40 | 
41 | ssds.modeling.ssds.bifpn
42 | -------------------------------
43 | 
44 | .. autoclass:: ssds.modeling.ssds.SSDBiFPN
45 |    :members:
46 |    :undoc-members:
47 |    :show-inheritance:


--------------------------------------------------------------------------------
/doc/source/api/ssds.pipeline.rst:
--------------------------------------------------------------------------------
 1 | ssds.pipeline
 2 | =====================
 3 | 
 4 | ssds.pipeline.pipeline\_anchor\_apex
 5 | -------------------------------------------
 6 | 
 7 | .. automodule:: ssds.pipeline.pipeline_anchor_apex
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:
11 | 
12 | ssds.pipeline.pipeline\_anchor\_basic
13 | --------------------------------------------
14 | 
15 | .. automodule:: ssds.pipeline.pipeline_anchor_basic
16 |    :members:
17 |    :undoc-members:
18 |    :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/doc/source/api/ssds.rst:
--------------------------------------------------------------------------------
 1 | ssds.pytorch API 
 2 | ====================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 1
 6 | 
 7 |    ssds.core
 8 |    ssds.dataset
 9 |    ssds.modeling
10 |    ssds.pipeline
11 | 
12 | ssds.ssds
13 | ---------------------------
14 | 
15 | .. autoclass:: ssds.ssds.SSDDetector
16 |    :members: __call__


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | import os
 14 | import sys
 15 | 
 16 | # source code directory, relative to this file, for sphinx-autobuild
 17 | sys.path.insert(0, os.path.abspath('../..'))
 18 | 
 19 | import ssds
 20 | 
 21 | import sphinx_rtd_theme
 22 | 
 23 | 
 24 | # -- Project information -----------------------------------------------------
 25 | 
 26 | project = 'ssds.pytorch'
 27 | copyright = '2020, Shuang Xie, Yang Liu'
 28 | author = 'Shuang Xie, Yang Liu'
 29 | 
 30 | # The full version, including alpha/beta/rc tags
 31 | release = 'v1.5'
 32 | 
 33 | 
 34 | # -- General configuration ---------------------------------------------------
 35 | 
 36 | # Add any Sphinx extension module names here, as strings. They can be
 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 38 | # ones.
 39 | extensions = [
 40 |     'sphinx.ext.autodoc',
 41 |     'sphinx.ext.autosummary',
 42 |     'sphinx.ext.doctest',
 43 |     'sphinx.ext.intersphinx',
 44 |     'sphinx.ext.todo',
 45 |     'sphinx.ext.coverage',
 46 |     'sphinx.ext.napoleon',
 47 |     'sphinx.ext.viewcode',
 48 |     'sphinxcontrib.katex',
 49 |     'sphinx.ext.githubpages',
 50 |     'sphinx.ext.autosectionlabel',
 51 |     'sphinx_markdown_tables',
 52 |     'recommonmark',
 53 | ]
 54 | 
 55 | # build the templated autosummary files
 56 | autosummary_generate = True
 57 | numpydoc_show_class_members = False
 58 | 
 59 | # autosectionlabel throws warnings if section names are duplicated.
 60 | # The following tells autosectionlabel to not throw a warning for
 61 | # duplicated section names that are in different documents.
 62 | autosectionlabel_prefix_document = True
 63 | 
 64 | # katex options
 65 | katex_prerender = True
 66 | 
 67 | napoleon_use_ivar = True
 68 | 
 69 | # Add any paths that contain templates here, relative to this directory.
 70 | templates_path = ['_templates']
 71 | 
 72 | # List of patterns, relative to source directory, that match files and
 73 | # directories to ignore when looking for source files.
 74 | # This pattern also affects html_static_path and html_extra_path.
 75 | exclude_patterns = []
 76 | 
 77 | # The suffix(es) of source filenames.
 78 | # You can specify multiple suffix as a list of string:
 79 | #
 80 | source_suffix = {
 81 |     '.rst': 'restructuredtext',
 82 |     '.md': 'markdown',
 83 | }
 84 | 
 85 | # The master toctree document.
 86 | master_doc = 'index'
 87 | 
 88 | # -- Options for HTML output -------------------------------------------------
 89 | 
 90 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 91 | # a list of builtin themes.
 92 | #
 93 | html_theme = 'sphinx_rtd_theme'
 94 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | 
100 | html_theme_options = {
101 |     'collapse_navigation': False,
102 |     'display_version': True,
103 |     'logo_only': True,
104 | }
105 | 
106 | html_logo = '_static/img/ssds.pytorch.png'
107 | 
108 | # Add any paths that contain custom static files (such as style sheets) here,
109 | # relative to this directory. They are copied after the builtin static files,
110 | # so a file named "default.css" will overwrite the builtin "default.css".
111 | html_static_path = ['_static']
112 | 
113 | html_css_files = [
114 |     'css/ssds.css',
115 | ]


--------------------------------------------------------------------------------
/doc/source/config.md:
--------------------------------------------------------------------------------
  1 | ## Config System
  2 | 
  3 | config system for ssds.pytorch
  4 | 
  5 | ### MODEL
  6 | 
  7 | | MODEL parameters | discription |
  8 | |---|---|
  9 | | MODEL.NETS | type of the backbone used to extract the features |
 10 | | MODEL.SSDS | type of the ssds model used to detect boundingbox |
 11 | | MODEL.IMAGE_SIZE | image size for ssd |
 12 | | MODEL.NUM_CLASSES | number of the class for the model |
 13 | | MODEL.FEATURE_LAYER | FEATURE_LAYER to extract the proposed bounding box, the first dimension is the feature layer/type, while the second dimension is feature map channel. |
 14 | | MODEL.SIZES | SIZES for the proposed anchor box, 1 is default contains |
 15 | | MODEL.ASPECT_RATIOS | ASPECT_RATIOS for the proposed anchor box, 1 is default contains |
 16 | 
 17 | ### TRAIN
 18 | 
 19 | | TRAIN parameters | discription |
 20 | |---|---|
 21 | | TRAIN.BATCH_SIZE | batch size for training |
 22 | | TRAIN.TRAINABLE_SCOPE | trainable scope |
 23 | | TRAIN.RESUME_SCOPE | resuming scope |
 24 | | TRAIN.MAX_EPOCHS | the number of max epoch |
 25 | | TRAIN.CHECKPOINTS_EPOCHS | the number of interval epoch for checkpoints saving |
 26 | | TRAIN.CHECKPOINTS_KEPT | The number of checkpoints kept, older ones are deleted to save space |
 27 | 
 28 | #### TRAIN.OPTIMIZER
 29 | 
 30 | | TRAIN.OPTIMIZER parameters | discription |
 31 | |---|---|
 32 | | TRAIN.OPTIMIZER.OPTIMIZER | type of the optimizer |
 33 | | TRAIN.OPTIMIZER.LEARNING_RATE | Initial learning rate |
 34 | | TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE | Initial differential learning rate for different layers |
 35 | | TRAIN.OPTIMIZER.MOMENTUM | Momentum |
 36 | | TRAIN.OPTIMIZER.MOMENTUM_2 | Momentum_2 |
 37 | | TRAIN.OPTIMIZER.EPS | epsilon |
 38 | | TRAIN.OPTIMIZER.WEIGHT_DECAY | Weight decay, for regularization |
 39 | 
 40 | #### TRAIN.LR_SCHEDULER
 41 | 
 42 | | TRAIN.LR_SCHEDULER parameters | discription |
 43 | |---|---|
 44 | | TRAIN.LR_SCHEDULER.SCHEDULER | type of the LR_SCHEDULER |
 45 | | TRAIN.LR_SCHEDULER.STEPS | Step size for reducing the learning rate |
 46 | | TRAIN.LR_SCHEDULER.GAMMA | Factor for reducing the learning rate |
 47 | | TRAIN.LR_SCHEDULER.LR_MIN | min learning rate |
 48 | 
 49 | ### TEST
 50 | 
 51 | | TEST parameters | discription |
 52 | |---|---|
 53 | | TEST.BATCH_SIZE | batch size for test |
 54 | | TEST.TEST_SCOPE | the epoch scope for test |
 55 | 
 56 | ### POST_PROCESS
 57 | 
 58 | POST_PROCESS controls the parameter for ssds.modeling.layers.decoder.Decoder. which is used to decode the loc and conf feature maps
 59 | to predicted boxes.
 60 | 
 61 | | POST_PROCESS parameters | discription |
 62 | |---|---|
 63 | | POST_PROCESS.SCORE_THRESHOLD | the score threshold to filter the predict boxes, put it as 0.01 for evaluation |
 64 | | POST_PROCESS.IOU_THRESHOLD | the iou threshold to filter the predict boxes |
 65 | | POST_PROCESS.MAX_DETECTIONS | the max detection boxes for the final predicted output of ssds model |
 66 | | POST_PROCESS.MAX_DETECTIONS_PER_LEVEL | the max detection boxes for the each level output of ssds detect heads |
 67 | | POST_PROCESS.USE_DIOU | whether using diou to replace the iou in the nms part |
 68 | | POST_PROCESS.RESCORE_CENTER | whether rescore the boxes based on its anchor center location |
 69 | 
 70 | ### DATASET
 71 | 
 72 | | DATASET parameters | discription |
 73 | |---|---|
 74 | | DATASET.DATASET | type of the dataset |
 75 | | DATASET.DATASET_DIR | path to the dataset folder |
 76 | | DATASET.TRAIN_SETS | train set scope |
 77 | | DATASET.TEST_SETS | test set scope |
 78 | | DATASET.PICKLE | whether use pickle to saved images and annotation (only works for Non-DALI dataset) |
 79 | | DATASET.NUM_WORKERS | 8 (only works for Non-DALI dataset) |
 80 | | DATASET.DEVICE_ID | the list of devices used to distributaed the data loading (only works for apex parrellel training)) |
 81 | | DATASET.MULTISCALE | list of image size used for multiscale training |
 82 | 
 83 | 
 84 | ### DATASET.PREPROC
 85 | 
 86 | | DATASET.PREPROC parameters | discription |
 87 | |---|---|
 88 | | DATASET.PREPROC.MEAN | float, the mean for normalization |
 89 | | DATASET.PREPROC.STD | float, the std for normalization |
 90 | | DATASET.PREPROC.CROP_SCALE | list, the lower and upper bounder size for ssd random crop |
 91 | | DATASET.PREPROC.CROP_ASPECT_RATIO | list, the lower and upper bounder aspect ratio for ssd random crop |
 92 | | DATASET.PREPROC.CROP_ATTEMPTS | int, the numbder attempts to do the ssd random crop |
 93 | | DATASET.PREPROC.HUE_DELTA | float, hue delta |
 94 | | DATASET.PREPROC.BRI_DELTA | float, brightness delta |
 95 | | DATASET.PREPROC.CONTRAST_RANGE | list, the lower and upper bounder for contrast |
 96 | | DATASET.PREPROC.SATURATION_RANGE | list, the lower and upper bounder for saturation |
 97 | | DATASET.PREPROC.MAX_EXPAND_RATIO | float, the max expand ratio for padding |
 98 | 
 99 | ### Others
100 | 
101 | | Others parameters | discription |
102 | |---|---|
103 | | EXP_DIR | the export dir |
104 | | LOG_DIR | the log dir |
105 | | RESUME_CHECKPOINT | The checkpoint used to resume |
106 | | PHASE | The phases |
107 | | DEVICE_ID | the list of devices used to distributaed the model training |


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. ssds.pytorch documentation master file, created by
 2 |    sphinx-quickstart on Sun Jul 19 17:03:00 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | ssds.pytorch documentation
 7 | ========================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 |    install.md
13 |    usage.md
14 |    config.md
15 | 
16 | 
17 | API
18 | ===
19 | .. toctree::
20 |    :maxdepth: 2
21 | 
22 |    api/ssds
23 | 
24 | Indices and tables
25 | ==================
26 | 
27 | * :ref:`genindex`
28 | * :ref:`search`


--------------------------------------------------------------------------------
/doc/source/install.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | ### Python Installation
 4 | 
 5 | #### requirements
 6 | * python>=3.7
 7 | * CUDA>=10.0
 8 | * pytorch>=1.4
 9 | #### basic installation:
10 | ```bash
11 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch
12 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git
13 | cd ssds.pytorch
14 | python setup.py clean -a install
15 | ```
16 | #### extra python libs for parallel training
17 | Currently, nvidia DALI and apex is not include in the requirements.txt and need to install manually.
18 | 
19 | * [DALI](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html)
20 | ```bash
21 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali
22 | ```
23 | * [apex](https://github.com/NVIDIA/apex#linux)
24 | ```bash
25 | git clone https://github.com/NVIDIA/apex
26 | cd apex
27 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
28 | ```
29 | 
30 | ### Docker
31 | ```bash
32 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git
33 | docker build -t ssds:local ./ssds.pytorch/
34 | docker run --gpus all -it --rm -v /data:/data ssds:local


--------------------------------------------------------------------------------
/doc/source/usage.md:
--------------------------------------------------------------------------------
 1 | ## Usage
 2 | ### 0. Check the config file by Visualization
 3 | Defined the network in a [config file](../experiments/cfgs/tests/test.yml) and tweak the config file based on the visualized anchor boxes
 4 | ```bash
 5 | python -m ssds.utils.visualize -cfg experiments/cfgs/tests/test.yml
 6 | ```
 7 | 
 8 | ### 1. Training
 9 | ```bash
10 | # basic training
11 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml
12 | # parallel training
13 | python -m torch.distributed.launch --nproc_per_node={num_gpus} -m ssds.utils.train_ddp -cfg experiments/cfgs/tests/test.yml
14 | ```
15 | 
16 | ### 2. Evaluation
17 | ```bash
18 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml -e
19 | ```
20 | 
21 | ### 3. Export to ONNX or TRT model
22 | ```bash
23 | python -m ssds.utils.export -cfg experiments/cfgs/tests/test.yml -c best_mAP.pth -h
24 | ```
25 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/test.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: YOLOV3
 3 |   NETS: ResNet18
 4 |   IMAGE_SIZE: [320, 320]
 5 |   NUM_CLASSES: 80
 6 |   FEATURE_LAYER: [[3, 4, 5], [128, 256, 512]]
 7 |   SIZES: [[2.0, 2.828], [2.0, 2.828], [2.0, 4.0, 8.0]]
 8 |   ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 50
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 1
14 |   TRAINABLE_SCOPE: 'transforms,extras,loc,conf'
15 |   RESUME_SCOPE: ''
16 |   OPTIMIZER:
17 |     OPTIMIZER: adam
18 |     LEARNING_RATE: 0.0001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: exponential
23 |     GAMMA: 0.97
24 |     WARM_UP_EPOCHS: 0
25 | 
26 | TEST:
27 |   BATCH_SIZE: 1
28 |   TEST_SCOPE: [49, 50]
29 | 
30 | MATCHER:
31 |   LOCATE_LOSS: "CIOULoss"
32 | 
33 | POST_PROCESS:
34 |   SCORE_THRESHOLD: 0.01
35 |   IOU_THRESHOLD: 0.6
36 |   MAX_DETECTIONS: 100
37 | 
38 | DATASET:
39 |   DATASET: 'DaliCOCO'
40 |   DATASET_DIR: '/data1/dataset/tiny_coco'
41 |   TRAIN_SETS: ["train2017"]
42 |   TEST_SETS:  ["val2017"]
43 |   DEVICE_ID: [0]
44 | 
45 | EXP_DIR: './experiments/models/pretrain/yolo_resnet18_coco_test2'
46 | LOG_DIR: './experiments/models/pretrain/yolo_resnet18_coco_test2'
47 | # RESUME_CHECKPOINT: './experiments/weights/pretrain/SSD_ResNet18_300x300_COCO_20.4.pth'
48 | PHASE: ['train']
49 | DEVICE_ID: [0]


--------------------------------------------------------------------------------
/experiments/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/experiments/person.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | numpy
3 | pyyaml
4 | pynvml
5 | cython
6 | opencv-python
7 | matplotlib
8 | tensorboard
9 | git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | with open('./requirements.txt') as f:
 5 |     required_packages = f.read().splitlines()
 6 | dependency_links = [required_packages.pop()[4:]]
 7 | 
 8 | setup(name='ssds',
 9 |       version='1.5',
10 |       description='Single Shot Detector and its variants',
11 |       install_requires=required_packages,
12 |       dependency_links=dependency_links,
13 |       python_requires='>=3.6',
14 |       packages=find_packages()
15 | )


--------------------------------------------------------------------------------
/ssds/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/__init__.py


--------------------------------------------------------------------------------
/ssds/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/core/__init__.py


--------------------------------------------------------------------------------
/ssds/core/checkpoint.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | import os
  4 | from collections import OrderedDict
  5 | 
  6 | 
  7 | def model_to_cpu(model_state):
  8 |     r""" make sure the model is load from cpu memory. In this case, the loaded model will not occupied the gpu memory.
  9 | 
 10 |     :meta private:
 11 |     """
 12 |     new_state = OrderedDict()
 13 |     for k, v in model_state.items():
 14 |         new_state[k] = v.cpu()
 15 |     return new_state
 16 | 
 17 | 
 18 | def save_checkpoints(model, output_dir, checkpoint_prefix, epochs):
 19 |     r"""Save the model parameter to a pth file.
 20 | 
 21 |     Args:
 22 |         model: the ssds model
 23 |         output_dir (str): the folder for model saving, usually defined by cfg.EXP_DIR
 24 |         checkpoint_prefix (str): the prefix for the checkpoint, usually is the combination of the ssd model and the dataset
 25 |         epochs (int): the epoch for the current training
 26 |     """
 27 |     if not os.path.exists(output_dir):
 28 |         os.makedirs(output_dir)
 29 | 
 30 |     filename = checkpoint_prefix + "_epoch_{:d}".format(epochs) + ".pth"
 31 |     filename = os.path.join(output_dir, filename)
 32 |     torch.save(model_to_cpu(model.state_dict()), filename)
 33 |     with open(os.path.join(output_dir, "checkpoint_list.txt"), "a") as f:
 34 |         f.write("epoch {epoch:d}: {filename}\n".format(epoch=epochs, filename=filename))
 35 |     print("Wrote snapshot to: {:s}".format(filename))
 36 | 
 37 | 
 38 | def find_previous_checkpoint(output_dir):
 39 |     r"""Return the most recent checkpoint in the checkpoint_list.txt
 40 |     
 41 |     checkpoint_list.txt is usually saved at cfg.EXP_DIR
 42 | 
 43 |     Args:
 44 |         output_dir (str): the folder contains the previous checkpoints and checkpoint_list.txt
 45 |     """
 46 |     if not os.path.exists(os.path.join(output_dir, "checkpoint_list.txt")):
 47 |         return False
 48 |     with open(os.path.join(output_dir, "checkpoint_list.txt"), "r") as f:
 49 |         lineList = f.readlines()
 50 |     epoches, resume_checkpoints = [list() for _ in range(2)]
 51 |     for line in lineList:
 52 |         epoch = int(line[line.find("epoch ") + len("epoch ") : line.find(":")])
 53 |         checkpoint = line[line.find(":") + 2 : -1]
 54 |         epoches.append(epoch)
 55 |         resume_checkpoints.append(checkpoint)
 56 |     return epoches, resume_checkpoints
 57 | 
 58 | 
 59 | def resume_checkpoint(model, resume_checkpoint, resume_scope=""):
 60 |     r"""Resume the checkpoints to the given ssds model based on the resume_scope.
 61 | 
 62 |     The resume_scope is defined by cfg.TRAIN.RESUME_SCOPE.
 63 | 
 64 |     When:
 65 | 
 66 |     * cfg.TRAIN.RESUME_SCOPE = ""
 67 |         All the parameters in the resume_checkpoint are resumed to the model
 68 |     * cfg.TRAIN.RESUME_SCOPE = "a,b,c"
 69 |         Only the the parameters in the a, b and c are resumed to the model
 70 |     
 71 |     Args:
 72 |         model: the ssds model
 73 |         resume_checkpoint (str): the file address for the checkpoint which contains the resumed parameters
 74 |         resume_scope: the scope of the resumed parameters, defined at cfg.TRAIN.RESUME_SCOPE
 75 |     """
 76 |     if resume_checkpoint == "" or not os.path.isfile(resume_checkpoint):
 77 |         print(("=> no checkpoint found at '{}'".format(resume_checkpoint)))
 78 |         return False
 79 |     print(("=> loading checkpoint '{:s}'".format(resume_checkpoint)))
 80 |     checkpoint = torch.load(resume_checkpoint, map_location=torch.device("cpu"))
 81 |     if "state_dict" in checkpoint:
 82 |         checkpoint = checkpoint["state_dict"]
 83 | 
 84 |     # print("=> Weigths in the checkpoints:")
 85 |     # print([k for k, v in list(checkpoint.items())])
 86 | 
 87 |     # remove the module in the parrallel model
 88 |     if "module." in list(checkpoint.items())[0][0]:
 89 |         pretrained_dict = {
 90 |             ".".join(k.split(".")[1:]): v for k, v in list(checkpoint.items())
 91 |         }
 92 |         checkpoint = pretrained_dict
 93 | 
 94 |     # change the name of the weights which exists in other model
 95 |     # change_dict = {
 96 |     # }
 97 |     # for k, v in list(checkpoint.items()):
 98 |     #     for _k, _v in list(change_dict.items()):
 99 |     #         if _k in k:
100 |     #             new_key = k.replace(_k, _v)
101 |     #             checkpoint[new_key] = checkpoint.pop(k)
102 | 
103 |     # remove the output layers from the checkpoint
104 |     # remove_list = {
105 |     # }
106 |     # for k in remove_list:
107 |     #     checkpoint.pop(k+'.weight', None)
108 |     #     checkpoint.pop(k+'.bias', None)
109 | 
110 |     # extract the weights based on the resume scope
111 |     if resume_scope != "":
112 |         pretrained_dict = {}
113 |         for k, v in list(checkpoint.items()):
114 |             for resume_key in resume_scope.split(","):
115 |                 if resume_key in k:
116 |                     pretrained_dict[k] = v
117 |                     break
118 |         checkpoint = pretrained_dict
119 | 
120 |     pretrained_dict = {k: v for k, v in checkpoint.items() if k in model.state_dict()}
121 |     # print("=> Resume weigths:")
122 |     # print([k for k, v in list(pretrained_dict.items())])
123 | 
124 |     checkpoint = model.state_dict()
125 |     unresume_dict = set(checkpoint) - set(pretrained_dict)
126 |     if len(unresume_dict) != 0:
127 |         print("=> UNResume weigths:")
128 |         print(unresume_dict)
129 | 
130 |     checkpoint.update(pretrained_dict)
131 | 
132 |     model.load_state_dict(checkpoint)
133 |     return model
134 | 


--------------------------------------------------------------------------------
/ssds/core/data_parallel.py:
--------------------------------------------------------------------------------
  1 | from torch.nn.parallel import DataParallel
  2 | import torch
  3 | from torch.nn.parallel._functions import Scatter
  4 | from torch.nn.parallel.parallel_apply import parallel_apply
  5 | 
  6 | 
  7 | def scatter(inputs, target_gpus, chunk_sizes, dim=0):
  8 |     r"""
  9 |     Slices tensors into approximately equal chunks and
 10 |     distributes them across given GPUs. Duplicates
 11 |     references to objects that are not tensors.
 12 |     """
 13 | 
 14 |     def scatter_map(obj):
 15 |         if isinstance(obj, torch.Tensor):
 16 |             try:
 17 |                 return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
 18 |             except:
 19 |                 print("obj", obj.size())
 20 |                 print("dim", dim)
 21 |                 print("chunk_sizes", chunk_sizes)
 22 |                 quit()
 23 |         if isinstance(obj, tuple) and len(obj) > 0:
 24 |             return list(zip(*map(scatter_map, obj)))
 25 |         if isinstance(obj, list) and len(obj) > 0:
 26 |             return list(map(list, zip(*map(scatter_map, obj))))
 27 |         if isinstance(obj, dict) and len(obj) > 0:
 28 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
 29 |         return [obj for targets in target_gpus]
 30 | 
 31 |     # After scatter_map is called, a scatter_map cell will exist. This cell
 32 |     # has a reference to the actual function scatter_map, which has references
 33 |     # to a closure that has a reference to the scatter_map cell (because the
 34 |     # fn is recursive). To avoid this reference cycle, we set the function to
 35 |     # None, clearing the cell
 36 |     try:
 37 |         return scatter_map(inputs)
 38 |     finally:
 39 |         scatter_map = None
 40 | 
 41 | 
 42 | def scatter_kwargs(inputs, kwargs, target_gpus, chunk_sizes, dim=0):
 43 |     r"""Scatter with support for kwargs dictionary"""
 44 |     inputs = scatter(inputs, target_gpus, chunk_sizes, dim) if inputs else []
 45 |     kwargs = scatter(kwargs, target_gpus, chunk_sizes, dim) if kwargs else []
 46 |     if len(inputs) < len(kwargs):
 47 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
 48 |     elif len(kwargs) < len(inputs):
 49 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
 50 |     inputs = tuple(inputs)
 51 |     kwargs = tuple(kwargs)
 52 |     return inputs, kwargs
 53 | 
 54 | 
 55 | class BalancedDataParallel(DataParallel):
 56 |     """ This class is used to replace the original pytorch DataParallel and balance the first GPU memory usage.
 57 | 
 58 |     The original script is from: https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/utils/data_parallel.py
 59 |     """
 60 |     def __init__(self, gpu0_bsz, *args, **kwargs):
 61 |         self.gpu0_bsz = gpu0_bsz
 62 |         super().__init__(*args, **kwargs)
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         if self.gpu0_bsz == 0:
 68 |             device_ids = self.device_ids[1:]
 69 |         else:
 70 |             device_ids = self.device_ids
 71 |         inputs, kwargs = self.scatter(inputs, kwargs, device_ids)
 72 |         # print('len(inputs)1: ', str(len(inputs)))
 73 |         # print('self.device_ids[:len(inputs)]', str(self.device_ids[:len(inputs)]))
 74 |         if len(self.device_ids) == 1:
 75 |             return self.module(*inputs[0], **kwargs[0])
 76 |         replicas = self.replicate(self.module, self.device_ids[: len(inputs)])
 77 |         if self.gpu0_bsz == 0:
 78 |             replicas = replicas[1:]
 79 |         outputs = self.parallel_apply(replicas, device_ids, inputs, kwargs)
 80 |         return self.gather(outputs, self.output_device)
 81 | 
 82 |     def parallel_apply(self, replicas, device_ids, inputs, kwargs):
 83 |         return parallel_apply(replicas, inputs, kwargs, device_ids[: len(inputs)])
 84 | 
 85 |     def scatter(self, inputs, kwargs, device_ids):
 86 |         bsz = inputs[0].size(self.dim)
 87 |         num_dev = len(self.device_ids)
 88 |         gpu0_bsz = self.gpu0_bsz
 89 |         bsz_unit = (bsz - gpu0_bsz) // (num_dev - 1)
 90 |         if gpu0_bsz < bsz_unit:
 91 |             chunk_sizes = [gpu0_bsz] + [bsz_unit] * (num_dev - 1)
 92 |             delta = bsz - sum(chunk_sizes)
 93 |             for i in range(delta):
 94 |                 chunk_sizes[i + 1] += 1
 95 |             if gpu0_bsz == 0:
 96 |                 chunk_sizes = chunk_sizes[1:]
 97 |         else:
 98 |             return super().scatter(inputs, kwargs, device_ids)
 99 | 
100 |         # print('bsz: ', bsz)
101 |         # print('num_dev: ', num_dev)
102 |         # print('gpu0_bsz: ', gpu0_bsz)
103 |         # print('bsz_unit: ', bsz_unit)
104 |         # print('chunk_sizes: ', chunk_sizes)
105 |         return scatter_kwargs(inputs, kwargs, device_ids, chunk_sizes, dim=self.dim)
106 | 


--------------------------------------------------------------------------------
/ssds/core/evaluation_metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | class MeanAveragePrecision(object):
  6 |     def __init__(self, num_classes, conf_threshold, iou_threshold):
  7 |         self.num_classes = num_classes
  8 |         self.conf_threshold = conf_threshold
  9 |         self.iou_threshold = iou_threshold
 10 |         self.score, self.detect_ismatched, self.target_ismatched = [
 11 |             [[] for _ in range(self.num_classes)] for _ in range(3)
 12 |         ]
 13 |         self.npos = [0 for _ in range(self.num_classes)]
 14 | 
 15 |     def __call__(self, detections, targets):
 16 |         def matrix_iou(a, b):
 17 |             """
 18 |             return iou of a and b, numpy version for data augenmentation
 19 |             """
 20 |             lt = torch.max(a[:, None, :2], b[:, :2])
 21 |             rb = torch.min(a[:, None, 2:], b[:, 2:])
 22 | 
 23 |             area_i = torch.prod(rb - lt, dim=2) * (lt < rb).all(dim=2)
 24 |             area_a = torch.prod(a[:, 2:] - a[:, :2], dim=1)
 25 |             area_b = torch.prod(b[:, 2:] - b[:, :2], dim=1)
 26 |             return area_i / (area_a[:, None] + area_b - area_i)
 27 | 
 28 |         for out_score, out_box, out_class, target in zip(*detections, targets):
 29 |             out_class = out_class[out_score > self.conf_threshold]
 30 |             out_box = out_box[out_score > self.conf_threshold]
 31 |             out_score = out_score[out_score > self.conf_threshold]
 32 |             for c in range(self.num_classes):
 33 |                 target_c = target[target[:, 4] == c]
 34 |                 out_score_c = out_score[out_class == c]
 35 |                 out_box_c = out_box[out_class == c]
 36 |                 if len(out_score_c) == 0:
 37 |                     self.npos[c] += len(target_c)
 38 |                     self.target_ismatched[c] += np.zeros(
 39 |                         len(target_c), dtype=bool
 40 |                     ).tolist()
 41 |                     continue
 42 |                 if len(target_c) == 0:
 43 |                     self.score[c] += out_score_c.cpu().tolist()
 44 |                     self.detect_ismatched[c] += np.zeros(
 45 |                         len(out_score_c), dtype=bool
 46 |                     ).tolist()
 47 |                     continue
 48 |                 iou_c = matrix_iou(out_box_c, target_c[:, :4])
 49 |                 max_overlap_tids = torch.argmax(iou_c, dim=1)
 50 |                 is_box_detected = np.zeros(len(target_c), dtype=bool)
 51 |                 lable_c = np.zeros(len(out_score_c), dtype=bool)
 52 |                 for i in range(len(max_overlap_tids)):
 53 |                     tid = max_overlap_tids[i]
 54 |                     if iou_c[i][tid] >= self.iou_threshold and not is_box_detected[tid]:
 55 |                         is_box_detected[tid] = True
 56 |                         lable_c[i] = True
 57 |                 self.npos[c] += len(target_c)
 58 |                 self.detect_ismatched[c] += lable_c.tolist()
 59 |                 self.score[c] += out_score_c.cpu().tolist()
 60 |                 self.target_ismatched[c] += is_box_detected.tolist()
 61 |         return
 62 | 
 63 |     def get_results(self):
 64 |         def compute_average_precision(precision, recall):
 65 |             """Compute Average Precision according to the definition in VOCdevkit.
 66 | 
 67 |             Precision is modified to ensure that it does not decrease as recall
 68 |             decrease.
 69 | 
 70 |             Args:
 71 |                 precision: A float [N, 1] numpy array of precisions
 72 |                 recall: A float [N, 1] numpy array of recalls
 73 | 
 74 |             Raises:
 75 |                 ValueError: if the input is not of the correct format
 76 | 
 77 |             Returns:
 78 |                 average_precison: The area under the precision recall curve. NaN if
 79 |                 precision and recall are None.
 80 | 
 81 |             """
 82 |             if precision is None:
 83 |                 if recall is not None:
 84 |                     raise ValueError("If precision is None, recall must also be None")
 85 |                 return np.NAN
 86 | 
 87 |             if not isinstance(precision, np.ndarray) or not isinstance(
 88 |                 recall, np.ndarray
 89 |             ):
 90 |                 raise ValueError("precision and recall must be numpy array")
 91 |             if precision.dtype != np.float or recall.dtype != np.float:
 92 |                 raise ValueError("input must be float numpy array.")
 93 |             if len(precision) != len(recall):
 94 |                 raise ValueError("precision and recall must be of the same size.")
 95 |             if not precision.size:
 96 |                 return 0.0
 97 |             if np.amin(precision) < 0 or np.amax(precision) > 1:
 98 |                 raise ValueError("Precision must be in the range of [0, 1].")
 99 |             if np.amin(recall) < 0 or np.amax(recall) > 1:
100 |                 raise ValueError("recall must be in the range of [0, 1].")
101 |             if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)):
102 |                 raise ValueError("recall must be a non-decreasing array")
103 | 
104 |             recall = np.concatenate([[0], recall, [1]])
105 |             precision = np.concatenate([[0], precision, [0]])
106 | 
107 |             # Preprocess precision to be a non-decreasing array
108 |             for i in range(len(precision) - 2, -1, -1):
109 |                 precision[i] = np.maximum(precision[i], precision[i + 1])
110 | 
111 |             indices = np.where(recall[1:] != recall[:-1])[0] + 1
112 |             average_precision = np.sum(
113 |                 (recall[indices] - recall[indices - 1]) * precision[indices]
114 |             )
115 |             return average_precision
116 | 
117 |         recall, precision, ap = [], [], []
118 |         for labels_c, scores_c, npos_c in zip(
119 |             self.detect_ismatched, self.score, self.npos
120 |         ):
121 |             # to avoid missing ground truth in that class
122 |             if npos_c == 0:
123 |                 ap += [np.NAN]
124 |                 recall += [[0], [1]]
125 |                 precision += [[0], [0]]
126 |                 continue
127 | 
128 |             sorted_indices = np.argsort(scores_c)
129 |             sorted_indices = sorted_indices[::-1]
130 |             labels_c = np.array(labels_c).astype(int)
131 |             true_positive_labels = labels_c[sorted_indices]
132 |             false_positive_labels = 1 - true_positive_labels
133 |             tp = np.cumsum(true_positive_labels)
134 |             fp = np.cumsum(false_positive_labels)
135 | 
136 |             rec = tp.astype(float) / float(npos_c)
137 |             prec = tp.astype(float) / np.maximum(tp + fp, np.finfo(np.float64).eps)
138 |             ap += [compute_average_precision(prec, rec)]
139 |             recall += [rec]
140 |             precision += [prec]
141 |         mAP = np.nanmean(ap)
142 |         return mAP, (precision, recall, ap)
143 | 


--------------------------------------------------------------------------------
/ssds/core/optimizer.py:
--------------------------------------------------------------------------------
  1 | import torch.optim as optim
  2 | from torch.optim import lr_scheduler
  3 | 
  4 | 
  5 | class InvertedExponentialLR(lr_scheduler._LRScheduler):
  6 |     """Exponentially increases the learning rate between two boundaries over a number of
  7 |     iterations.
  8 |     Arguments:
  9 |         optimizer (torch.optim.Optimizer): wrapped optimizer.
 10 |         end_lr (float): the final learning rate.
 11 |         num_iter (int): the number of iterations over which the test occurs.
 12 |         last_epoch (int, optional): the index of last epoch. Default: -1.
 13 | 
 14 |     :meta private:
 15 |     """
 16 | 
 17 |     def __init__(self, optimizer, end_lr, num_iter=100, last_epoch=-1):
 18 |         self.end_lr = end_lr
 19 |         self.num_iter = num_iter
 20 |         super(ExponentialLR, self).__init__(optimizer, last_epoch)
 21 | 
 22 |     def get_lr(self):
 23 |         curr_iter = self.last_epoch + 1
 24 |         r = curr_iter / self.num_iter
 25 |         return [base_lr * (self.end_lr / base_lr) ** r for base_lr in self.base_lrs]
 26 | 
 27 | 
 28 | def trainable_param(model, trainable_scope):
 29 |     r""" Return the trainable parameters for the optimizers by :attr:`cfg.TRAIN.TRAINABLE_SCOPE`
 30 | 
 31 |     If the module in trainable scope, then train this module's parameters
 32 | 
 33 |     When :
 34 | 
 35 |     * cfg.TRAIN.TRAINABLE_SCOPE = ""
 36 |         All the parameters in the model are used to train
 37 |     * cfg.TRAIN.TRAINABLE_SCOPE = "a,b,c.d"
 38 |         Only the the parameters in the a, b and c.d are used to train
 39 |     * cfg.TRAIN.TRAINABLE_SCOPE = "a;b,c.d"
 40 |         Only the the parameters in the a, b and c.d are used to train. module a and model b&c.d can be assigned to different learning rate (differential learning rate)
 41 | 
 42 |     Args:
 43 |         model: the ssds model for training
 44 |         trainable_scope (str): the scope for the trainable parameter in the given ssds model, which is defined in the cfg.TRAIN.TRAINABLE_SCOPE
 45 |     """
 46 |     trainable_param = []
 47 | 
 48 |     if trainable_scope == "":
 49 |         for param in model.parameters():
 50 |             param.requires_grad = True
 51 |         trainable_param.append(model.parameters())
 52 |     else:
 53 |         for param in model.parameters():
 54 |             param.requires_grad = False
 55 | 
 56 |         for train_scope in trainable_scope.split(";"):
 57 |             param_temp = []
 58 |             for module in train_scope.split(","):
 59 |                 submodule = module.split(".")
 60 |                 tmp_model = model
 61 |                 for subm in submodule:
 62 |                     if hasattr(tmp_model, subm):
 63 |                         tmp_model = getattr(tmp_model, subm)
 64 |                     else:
 65 |                         raise ValueError(module + " is not in the model")
 66 |                 for param in tmp_model.parameters():
 67 |                     param.requires_grad = True
 68 |                 param_temp.extend(tmp_model.parameters())
 69 |             trainable_param.append(param_temp)
 70 |     return trainable_param
 71 | 
 72 | 
 73 | def configure_optimizer(trainable_param, cfg):
 74 |     r""" Return the optimizer for the trainable parameters
 75 | 
 76 |     Basically, it returns the optimizer defined by :attr:`cfg.TRAIN.OPTIMIZER.OPTIMIZER`. The learning rate for the optimizer is defined by :attr:`cfg.TRAIN.OPTIMIZER.LEARNING_RATE`
 77 |     and :attr:`cfg.TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE`. Some other parameters are also defined in :attr:`cfg.TRAIN.OPTIMIZER`.
 78 | 
 79 |     Currently, there are 4 popular optimizers supported: sgd, rmsprop, adam and amsgrad.
 80 | 
 81 |     TODO: directly fetch the optimizer by getattr(optim, cfg.OPTIMIZER) and send the the relative parameter by dict.
 82 | 
 83 |     Args:
 84 |         trainable_param: the trainable parameter in the given ssds model, check :meth:`trainable_param` for more details.
 85 |         cfg: the config dict, which is defined in :attr:`cfg.TRAIN.OPTIMIZER`. 
 86 |     """
 87 | 
 88 | 
 89 |     if len(cfg.DIFFERENTIAL_LEARNING_RATE) == 0 or len(trainable_param) == 1:
 90 |         trainable_param = trainable_param[0]
 91 |     else:
 92 |         assert len(cfg.DIFFERENTIAL_LEARNING_RATE) == len(trainable_param)
 93 |         trainable_param = [
 94 |             {"params": _param, "lr": _lr}
 95 |             for _param, _lr in zip(trainable_param, cfg.DIFFERENTIAL_LEARNING_RATE)
 96 |         ]
 97 | 
 98 |     if cfg.OPTIMIZER == "sgd":
 99 |         optimizer = optim.SGD(
100 |             trainable_param,
101 |             lr=cfg.LEARNING_RATE,
102 |             momentum=cfg.MOMENTUM,
103 |             weight_decay=cfg.WEIGHT_DECAY,
104 |         )
105 |     elif cfg.OPTIMIZER == "rmsprop":
106 |         optimizer = optim.RMSprop(
107 |             trainable_param,
108 |             lr=cfg.LEARNING_RATE,
109 |             momentum=cfg.MOMENTUM,
110 |             alpha=cfg.MOMENTUM_2,
111 |             eps=cfg.EPS,
112 |             weight_decay=cfg.WEIGHT_DECAY,
113 |         )
114 |     elif cfg.OPTIMIZER == "adam":
115 |         optimizer = optim.Adam(
116 |             trainable_param,
117 |             lr=cfg.LEARNING_RATE,
118 |             betas=(cfg.MOMENTUM, cfg.MOMENTUM_2),
119 |             weight_decay=cfg.WEIGHT_DECAY,
120 |         )
121 |     elif cfg.OPTIMIZER == "amsgrad":
122 |         optimizer = optim.Adam(
123 |             trainable_param,
124 |             lr=cfg.LEARNING_RATE,
125 |             betas=(cfg.MOMENTUM, cfg.MOMENTUM_2),
126 |             weight_decay=cfg.WEIGHT_DECAY,
127 |             amsgrad=True,
128 |         )
129 |     else:
130 |         AssertionError("optimizer can not be recognized")
131 |     return optimizer
132 | 
133 | 
134 | def configure_lr_scheduler(optimizer, cfg):
135 |     r""" Return the learning rate scheduler for the trainable parameters
136 | 
137 |     Basically, it returns the learning rate scheduler defined by :attr:`cfg.TRAIN.LR_SCHEDULER.SCHEDULER`. 
138 |     Some parameters for the learning rate scheduler are also defined in :attr:`cfg.TRAIN.LR_SCHEDULER`.
139 | 
140 |     Currently, there are 4 popular learning rate scheduler supported: step, multi_step, exponential and sgdr.
141 | 
142 |     TODO: directly fetch the optimizer by getattr(lr_scheduler, cfg.SCHEDULER) and send the the relative parameter by dict.
143 | 
144 |     Args:
145 |         optimizer: the optimizer in the given ssds model, check :meth:`configure_optimizer` for more details.
146 |         cfg: the config dict, which is defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. 
147 |     """
148 |     if cfg.SCHEDULER == "step":
149 |         scheduler = lr_scheduler.StepLR(
150 |             optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA
151 |         )
152 |     elif cfg.SCHEDULER == "multi_step":
153 |         scheduler = lr_scheduler.MultiStepLR(
154 |             optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA
155 |         )
156 |     elif cfg.SCHEDULER == "exponential":
157 |         scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA)
158 |     elif cfg.SCHEDULER == "inverted_exponential":
159 |         scheduler = InvertedExponentialLR(optimizer, end_lr=cfg.LR_MIN)
160 |     elif cfg.SCHEDULER == "sgdr":
161 |         scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
162 |             optimizer, T_0=2, T_mult=2, eta_min=cfg.LR_MIN
163 |         )
164 |     else:
165 |         AssertionError("scheduler can not be recognized.")
166 |     return scheduler
167 | 


--------------------------------------------------------------------------------
/ssds/core/tools.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class IteratorTimer:
 5 |     def __init__(self, iterable):
 6 |         self.iterable = iterable
 7 |         self.iterator = self.iterable.__iter__()
 8 | 
 9 |     def __iter__(self):
10 |         return self
11 | 
12 |     def __len__(self):
13 |         return len(self.iterable)
14 | 
15 |     def __next__(self):
16 |         start = time.time()
17 |         n = next(self.iterator)
18 |         self.last_duration = time.time() - start
19 |         return n
20 | 
21 |     next = __next__
22 | 
23 | 
24 | class AverageMeter(object):
25 |     """ Computes ans stores the average and current value"""
26 | 
27 |     def __init__(self):
28 |         self.reset()
29 | 
30 |     def reset(self):
31 |         self.val = 0.0
32 |         self.avg = 0.0
33 |         self.sum = 0.0
34 |         self.count = 0
35 | 
36 |     def update(self, val, n=1):
37 |         self.val = val
38 |         self.sum += val * n
39 |         self.count += n
40 |         self.avg = self.sum / self.count
41 | 
42 |     def __call__(self):
43 |         return self.val
44 | 
45 | 
46 | def format_dict_of_loss(dict_loss):
47 |     try:
48 |         string = ", ".join(
49 |             [
50 |                 ("{}: {:" + (".3f" if value >= 0.001 else ".1e") + "}").format(
51 |                     name, value
52 |                 )
53 |                 for name, value in dict_loss.items()
54 |             ]
55 |         )
56 |     except (TypeError, ValueError) as e:
57 |         print(dict_loss)
58 |         string = "[Log Error] " + str(e)
59 | 
60 |     return string
61 | 


--------------------------------------------------------------------------------
/ssds/core/visualize_funcs.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | from scipy.optimize import linear_sum_assignment
  5 | 
  6 | 
  7 | def matrix_iou(a, b):
  8 |     """
  9 |     return iou of a and b, numpy version for data augenmentation
 10 | 
 11 |     :meta private:
 12 |     """
 13 |     lt = np.maximum(a[:, None, :2], b[:, :2])
 14 |     rb = np.minimum(a[:, None, 2:], b[:, 2:])
 15 | 
 16 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 17 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 18 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 19 |     with np.errstate(divide="ignore", invalid="ignore"):
 20 |         return np.nan_to_num(area_i / (area_a[:, None] + area_b - area_i))
 21 | 
 22 | 
 23 | def add_prCurve(writer, precision, recall, class_names=[], epoch=0):
 24 |     def add_pr_curve_raw(writer, tag, precision, recall, epoch=0):
 25 |         """ the pr_curve_raw_data_pb() needs
 26 |         Args:
 27 |             precisions: ascending  array
 28 |             recalls   : descending array
 29 |         """
 30 |         num_thresholds = len(precision)
 31 |         writer.add_pr_curve_raw(
 32 |             tag=tag,
 33 |             true_positive_counts=-np.ones(num_thresholds),
 34 |             false_positive_counts=-np.ones(num_thresholds),
 35 |             true_negative_counts=-np.ones(num_thresholds),
 36 |             false_negative_counts=-np.ones(num_thresholds),
 37 |             precision=precision,
 38 |             recall=recall,
 39 |             global_step=epoch,
 40 |             num_thresholds=num_thresholds,
 41 |         )
 42 | 
 43 |     for i, (_prec, _rec) in enumerate(zip(precision, recall)):
 44 |         num_thresholds = min(500, len(_prec))
 45 |         if num_thresholds != len(_prec):
 46 |             gap = int(len(_prec) / num_thresholds)
 47 |             _prec = np.append(_prec[::gap], _prec[-1])
 48 |             _rec = np.append(_rec[::gap], _rec[-1])
 49 |             num_thresholds = len(_prec)
 50 |         _prec.sort()
 51 |         _rec[::-1].sort()
 52 |         tag = class_names[i] if class_names else "pr_curve/{}".format(i + 1)
 53 |         add_pr_curve_raw(
 54 |             writer=writer, tag=tag, precision=_prec, recall=_rec, epoch=epoch
 55 |         )
 56 | 
 57 | 
 58 | def add_defaultAnchors(writer, image, anchors, epoch=0):
 59 |     if isinstance(image, torch.Tensor):
 60 |         image = (image * 255).int().permute(1, 2, 0).cpu().numpy().astype(np.uint8)
 61 | 
 62 |     size = np.array(image.shape[1::-1])
 63 |     for stride, anchor in anchors.items():
 64 |         image_show = image.copy()
 65 | 
 66 |         anchor_wh = anchor[:, 2:] - anchor[:, :2] + 1
 67 |         anchor_ctr = anchor[:, :2] + 0.5 * anchor_wh
 68 |         size_anchor = (size // stride) * stride
 69 |         x, y = torch.meshgrid(
 70 |             [torch.arange(0, size_anchor[i], stride, device="cpu") for i in range(2)]
 71 |         )
 72 |         xyxy = torch.stack((x, y, x, y), 2).view(-1, 4)
 73 | 
 74 |         xy = (xyxy[:, :2] + anchor_ctr[0]).int()
 75 |         for _xy in xy:
 76 |             cv2.circle(image_show, tuple(_xy.tolist()), 2, (255, 0, 0), -1)
 77 |         shift_anchor = (anchor + xyxy[xyxy.shape[0] // 2]).int().tolist()
 78 |         for an in shift_anchor:
 79 |             cv2.rectangle(image_show, tuple(an[:2]), tuple(an[2:]), (0, 255, 0), 1)
 80 |         writer.add_image(
 81 |             "anchors/stride_{}".format(stride), image_show, epoch, dataformats="HWC"
 82 |         )
 83 | 
 84 | 
 85 | def add_matchedAnchorsWithBox(writer, image, anchor, stride, depth, epoch=0):
 86 |     if isinstance(image, torch.Tensor):
 87 |         image = (image * 255).int().permute(1, 2, 0).cpu().numpy().astype(np.uint8)
 88 |     if isinstance(depth, torch.Tensor):
 89 |         depth = depth.clamp(-1, 1).cpu().numpy().astype(np.int8)
 90 | 
 91 |     size = np.array(image.shape[1::-1])
 92 |     anchor_wh = anchor[:, 2:] - anchor[:, :2] + 1
 93 |     anchor_ctr = anchor[:, :2] + 0.5 * anchor_wh
 94 |     size_anchor = (size // stride) * stride
 95 |     x, y = torch.meshgrid(
 96 |         [torch.arange(0, size_anchor[i], stride, device="cpu") for i in range(2)]
 97 |     )
 98 |     xyxy = torch.stack((x, y, x, y), 2).view(-1, 4)
 99 | 
100 |     xy = (xyxy[:, :2] + anchor_ctr[0]).int()
101 |     depth_xy = (xyxy[:, :2] // stride).int()
102 |     color = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]  # neg, pos, ignore
103 | 
104 |     for an_idx, an in enumerate(anchor):
105 |         image_show = image.copy()
106 |         for _xy, _xyxy, _depth_xy in zip(xy, xyxy, depth_xy):
107 |             _depth = depth[an_idx, 0, _depth_xy[1], _depth_xy[0]]
108 |             cv2.circle(image_show, tuple(_xy.tolist()), 2, color[_depth], -1)
109 |         writer.add_image(
110 |             "matched_anchors/stride_{}_anchor_{}".format(stride, an_idx),
111 |             image_show,
112 |             epoch,
113 |             dataformats="HWC",
114 |         )
115 | 
116 | 
117 | def add_imagesWithBoxes(writer, tag, images, boxes, class_names=[], epoch=0):
118 |     if isinstance(images, torch.Tensor):
119 |         images = (images * 255).int().permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)
120 |         boxes = boxes.int().cpu().numpy()
121 |     for i, (image, box) in enumerate(zip(images, boxes)):
122 |         image = np.ascontiguousarray(image)
123 |         for b in box:
124 |             if b[4] == -1:
125 |                 continue
126 |             cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (0, 255, 0), 1)
127 |             c = class_names[b[4]] if class_names else b[4]
128 |             cv2.putText(
129 |                 image,
130 |                 str(c),
131 |                 tuple(b[:2]),
132 |                 cv2.FONT_HERSHEY_SIMPLEX,
133 |                 1,
134 |                 (0, 255, 0),
135 |                 1,
136 |                 cv2.LINE_AA,
137 |             )
138 |         writer.add_image(tag + "/{}".format(i), image, epoch, dataformats="HWC")
139 | 
140 | 
141 | def add_imagesWithMatchedBoxes(
142 |     writer, tag, images, boxes, targets, class_names=[], epoch=0
143 | ):
144 |     if isinstance(images, torch.Tensor):
145 |         images = (images * 255).int().permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)
146 |         boxes = boxes.cpu().detach().numpy()
147 |         targets = targets.int().cpu().numpy()
148 |     for i, (image, box, target) in enumerate(zip(images, boxes, targets)):
149 |         image = np.ascontiguousarray(image)
150 |         box = box[box[:, 4] > 0.5]
151 |         iou_c = matrix_iou(box[:, :4], target[:, :4])
152 |         matched = np.any(iou_c > 0.6, axis=1)
153 |         for b in box[matched].astype(int):
154 |             cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (255, 0, 255), 1)
155 |         for b in target:
156 |             if b[4] == -1:
157 |                 continue
158 |             cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (0, 255, 0), 1)
159 |             c = class_names[b[4]] if class_names else b[4]
160 |             cv2.putText(
161 |                 image,
162 |                 str(c),
163 |                 tuple(b[:2]),
164 |                 cv2.FONT_HERSHEY_SIMPLEX,
165 |                 1,
166 |                 (0, 255, 0),
167 |                 1,
168 |                 cv2.LINE_AA,
169 |             )
170 |         writer.add_image(tag + "/{}".format(i), image, epoch, dataformats="HWC")
171 | 
172 | 
173 | def add_anchorStrategy(writer, targets, num_thresholds=100):
174 |     scale = torch.sqrt(targets[:, 2] * targets[:, 3]).cpu().numpy()
175 |     ratio = (targets[:, 3] / targets[:, 2]).cpu().numpy()  # h/w
176 |     scale[scale > 1000] = -1
177 |     ratio[np.isinf(ratio)] = -1
178 | 
179 |     scale.sort(), ratio.sort()
180 | 
181 |     import matplotlib.pyplot as plt
182 | 
183 |     plt.switch_backend("agg")
184 |     plt.style.use("ggplot")
185 | 
186 |     fig = plt.figure()
187 |     plt.hist(scale, bins=num_thresholds)
188 |     plt.xlabel("scale")
189 |     plt.ylabel("frequence")
190 |     # plt.xticks((np.arange(num_thresholds+1)[::-1]/num_thresholds+1) * scale.max())
191 |     writer.add_figure("archor_strategy/scale_distribute", fig)
192 |     fig.clf()
193 | 
194 |     fig = plt.figure()
195 |     plt.hist(ratio, bins=num_thresholds)
196 |     plt.xlabel("ratio")
197 |     plt.ylabel("frequence")
198 |     # plt.xticks([0.2,0.25,0.333,0.5,1,2,3,4,5])
199 |     writer.add_figure("archor_strategy/ratio_distribute", fig)
200 |     fig.clf()
201 | 
202 | 
203 | def add_matchedAnchor(writer):
204 |     raise NotImplementedError
205 | 


--------------------------------------------------------------------------------
/ssds/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dali_tfrecord import DaliTFRecord
2 | from .dali_coco     import DaliCOCO
3 | from .coco          import COCODetection


--------------------------------------------------------------------------------
/ssds/dataset/coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import pickle
  5 | from pycocotools.coco import COCO
  6 | 
  7 | from .detection_dataset import DetectionDataset
  8 | 
  9 | class COCODataset(object):
 10 |     r"""COCO Dataset, used to extract the data from annotation file only.
 11 |     
 12 |     For the dataset defined in the cfg.DATASET.DATASET, please refer to :class:`.COCODetection`.
 13 | 
 14 |     Saved the image path and the relative annotation to the self.img_paths and self.anno
 15 | 
 16 |     Arguments:
 17 |         dataset_dir (str): the directory of coco dataset
 18 |         image_sets (list): list folders that
 19 |     """
 20 |     def __init__(self, dataset_dir, image_sets):
 21 |         self.dataset_dir   = dataset_dir
 22 |         self.cache_path    = os.path.join(dataset_dir, 'cache')
 23 |         self.image_sets    = image_sets
 24 |         self.img_paths     = []
 25 |         self.anno          = []
 26 |         self.classes_names = []
 27 | 
 28 |         if not os.path.exists(self.cache_path):
 29 |             os.makedirs(self.cache_path)
 30 |         self._load_anno_files(dataset_dir, image_sets)
 31 | 
 32 |     def _load_anno_files(self, dataset_dir, image_sets):
 33 |         for coco_name in image_sets:
 34 |             annofile = os.path.join(dataset_dir, 'annotations', 'instances_' + coco_name + '.json')
 35 |             _COCO = COCO(annofile)
 36 |             cats = _COCO.loadCats(_COCO.getCatIds())
 37 |             indexes = _COCO.getImgIds()
 38 | 
 39 |             self.classes_names = tuple(c['name'] for c in cats)
 40 |             self.num_classes = len(self.classes_names)
 41 |             self._class_to_ind = dict(zip(self.classes_names, range(self.num_classes)))
 42 |             self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats],
 43 |                                                   _COCO.getCatIds()))
 44 |             self.img_paths.extend(self._load_coco_img_path(coco_name, indexes))
 45 |             self.anno.extend(self._load_coco_annotations(coco_name, indexes, _COCO))
 46 |             
 47 |     def _load_coco_img_path(self, coco_name, indexes):
 48 |         cache_file=os.path.join(self.cache_path, coco_name+'_img_path.pkl')
 49 |         if os.path.exists(cache_file):
 50 |             with open(cache_file, 'rb') as fid:
 51 |                 img_path = pickle.load(fid)
 52 |             print('{} img path loaded from {}'.format(coco_name,cache_file))
 53 |             return img_path
 54 | 
 55 |         print('parsing img path for {}'.format(coco_name))
 56 |         img_path = [self.image_path_from_index(coco_name, index)
 57 |                     for index in indexes]
 58 |         with open(cache_file, 'wb') as fid:
 59 |             pickle.dump(img_path,fid,pickle.HIGHEST_PROTOCOL)
 60 |         print('wrote img path to {}'.format(cache_file))
 61 |         return img_path
 62 | 
 63 |     def _load_coco_annotations(self, coco_name, indexes, _COCO):
 64 |         cache_file=os.path.join(self.cache_path, coco_name+'_gt_db.pkl')
 65 |         if os.path.exists(cache_file):
 66 |             with open(cache_file, 'rb') as fid:
 67 |                 roidb = pickle.load(fid)
 68 |             print('{} gt loaded from {}'.format(coco_name,cache_file))
 69 |             return roidb
 70 | 
 71 |         print('parsing gt for {}'.format(coco_name))
 72 |         gt_roidb = [self.annotation_from_index(index, _COCO)
 73 |                     for index in indexes]
 74 |         with open(cache_file, 'wb') as fid:
 75 |             pickle.dump(gt_roidb,fid,pickle.HIGHEST_PROTOCOL)
 76 |         print('wrote gt to {}'.format(cache_file))
 77 |         return gt_roidb
 78 | 
 79 |     def image_path_from_index(self, name, index):
 80 |         """
 81 |         Construct an image path from the image's "index" identifier.
 82 |         Example image path for index=119993:
 83 |         images/train2014/COCO_train2014_000000119993.jpg
 84 |         """
 85 |         file_name = (str(index).zfill(12) + '.jpg')
 86 |         image_path = os.path.join(self.dataset_dir, 'images',
 87 |                               name, file_name)
 88 |         assert os.path.exists(image_path), \
 89 |                 'Path does not exist: {}'.format(image_path)
 90 |         return image_path
 91 | 
 92 |     def annotation_from_index(self, index, _COCO, toPercent=True):
 93 |         """
 94 |         Loads COCO bounding-box instance annotations. Crowd instances are
 95 |         handled by marking their overlaps (with all categories) to -1. This
 96 |         overlap value means that crowd "instances" are excluded from training.
 97 |         Return result with Percent Coords
 98 |         """
 99 |         im_ann = _COCO.loadImgs(index)[0]
100 |         width = im_ann['width']
101 |         height = im_ann['height']
102 | 
103 |         annIds = _COCO.getAnnIds(imgIds=index, iscrowd=None)
104 |         objs = _COCO.loadAnns(annIds)
105 |         # Sanitize bboxes -- some are invalid
106 |         valid_objs = []
107 |         for obj in objs:
108 |             x1 = np.max((0, obj['bbox'][0]))
109 |             y1 = np.max((0, obj['bbox'][1]))
110 |             x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1))))
111 |             y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1))))
112 |             if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
113 |                 obj['clean_bbox'] = [x1, y1, x2, y2]
114 |                 valid_objs.append(obj)
115 | 
116 |         # Lookup table to map from COCO category ids to our internal class
117 |         # indices
118 |         coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[name],
119 |                                           self._class_to_ind[name])
120 |                                          for name in self.classes_names])
121 | 
122 |         res = np.zeros((len(valid_objs), 5), dtype=np.float32)
123 |         for ix, obj in enumerate(valid_objs):
124 |             clss = coco_cat_id_to_class_ind[obj['category_id']]
125 |             res[ix, 0:4] = obj['clean_bbox']
126 |             res[ix, 4] = clss
127 | 
128 |         if toPercent == True:
129 |             res[:,:4:2] /= width
130 |             res[:,1:4:2] /= height
131 |         return res
132 | 
133 | class COCODetection(COCODataset, DetectionDataset):
134 |     r"""COCO Object Detection Dataset
135 | 
136 |     The derivative class for COCODataset and DetectionDataset.
137 | 
138 |     load the image path and the relative annotation from :class:`.COCODataset` and save them to the annotation database.
139 |     Then fetch the data by the data pipeline in the :class:`ssds.dataset.detection_dataset.DetectionDataset`.
140 | 
141 |     Arguments:
142 |         dataset_dir (str): the directory of coco dataset
143 |         image_sets (list): list folders that
144 |     """
145 |     def __init__(self, cfg, dataset_dir, image_sets, training=False, transform=None): 
146 |         DetectionDataset.__init__(self, cfg, training, transform)
147 |         COCODataset.__init__(self, dataset_dir, image_sets)
148 | 
149 |         self.db = self._get_db()
150 |         # self.db = self.reorder_data(self.db, self.cfg_joints_name, self.ds_joints_name)
151 | 
152 |         # loading img db to boost up the speed
153 |         if self.using_pickle:
154 |             pickle_path = os.path.join(dataset_dir, 'pickle', 'img_db_' + '_'.join(image_set) + '.pickle')
155 |             if not os.path.exists(os.path.dirname(pickle_path)):
156 |                 os.makedirs(os.path.dirname(pickle_path))
157 |             if not os.path.exists(pickle_path):
158 |                 self.saving_pickle(pickle_path)
159 |             self.img_db = self.loading_pickle(pickle_path)
160 | 
161 |     def _get_db(self):
162 |         gt_db = [{
163 |             'image': img_path,
164 |             'boxes': anno[:,:4],
165 |             'labels': anno[:,4]
166 |         } for img_path, anno in zip(self.img_paths, self.anno)]
167 |         return gt_db


--------------------------------------------------------------------------------
/ssds/dataset/dali_coco.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | import math
 4 | import ctypes
 5 | from contextlib import redirect_stdout
 6 | from pycocotools.coco import COCO
 7 | 
 8 | import nvidia.dali.ops as ops
 9 | import nvidia.dali.types as types
10 | from nvidia.dali.pipeline import Pipeline
11 | 
12 | from .dali_dataiterator import DaliDataset, DaliPipeline
13 | 
14 | class COCOPipeline(Pipeline, DaliPipeline):
15 |     'Dali pipeline for COCO'
16 | 
17 |     def __init__(self, image_dir, annotations_file, cache_path, batch_size, target_size, preproc_param, num_threads, num_shards, device_ids, training=False):
18 |         Pipeline.__init__(self, batch_size=batch_size, num_threads=num_threads, device_id = device_ids, prefetch_queue_depth=num_threads, seed=42)
19 |         DaliPipeline.__init__(self, target_size=target_size, preproc_param=preproc_param, training=training)
20 | 
21 |         self.reader = ops.COCOReader(annotations_file=annotations_file, file_root=image_dir, num_shards=num_shards, shard_id=0, 
22 |                                      ltrb=True, ratio=True, shuffle_after_epoch=training, save_img_ids=True,
23 |                                      dump_meta_files=True, dump_meta_files_path=cache_path)
24 | 
25 |     def define_graph(self):
26 |         images, bboxes, labels, img_ids = self.reader()
27 |         return self.predefined_graph(images, bboxes, labels)
28 | 
29 | class DaliCOCO(DaliDataset):
30 |     'Data loader for data parallel using Dali for TFRecord files'
31 |     def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False):
32 |         super(DaliCOCO, self).__init__(cfg, dataset_dir, image_sets, batch_size, training)
33 | 
34 |         if len(image_sets) != 1:
35 |             raise ValueError("For DaliCOCO dataset, the number of image_set has to be 1, currently it is {}".format(image_sets))
36 |         
37 |         self.image_dir = os.path.join(dataset_dir, "images", image_sets[0])
38 |         self.annotations_file = os.path.join(dataset_dir, "annotations", "instances_{}.json".format(image_sets[0]))
39 |         self.cache_path = os.path.join(dataset_dir, "cache")
40 |         if not os.path.exists(self.cache_path):
41 |             os.makedirs(self.cache_path)
42 | 
43 |         self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args)
44 |         self.pipe.build()
45 | 
46 |         with redirect_stdout(None):
47 |             self.coco = COCO(self.annotations_file)
48 |         self.ids = list(self.coco.imgs.keys())
49 | 
50 |     def __len__(self):
51 |         return math.ceil(len(self.ids) // self.num_shards / self.batch_size)
52 | 
53 |     def reset_size(self, batch_size, target_size):
54 |         self.batch_size = batch_size
55 |         self.target_size = target_size
56 |         self.pipeline_args["batch_size"] = batch_size
57 |         self.pipeline_args["target_size"] = target_size
58 | 
59 |         del self.pipe
60 | 
61 |         self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args)
62 |         self.pipe.build()


--------------------------------------------------------------------------------
/ssds/dataset/dali_dataiterator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import ctypes
  4 | 
  5 | import nvidia.dali.ops as ops
  6 | import nvidia.dali.types as types
  7 | 
  8 | 
  9 | class DaliPipeline(object):
 10 |     r""" The data pipeline for the Dali dataset
 11 |     """
 12 | 
 13 |     def __init__(self, target_size, preproc_param, training=False):
 14 |         self.training = training
 15 |         mean = preproc_param.MEAN
 16 |         std = preproc_param.STD
 17 |         bri_delta = preproc_param.BRI_DELTA
 18 |         hue_delta = preproc_param.HUE_DELTA
 19 |         max_expand_ratio = preproc_param.MAX_EXPAND_RATIO
 20 |         contrast_range = preproc_param.CONTRAST_RANGE
 21 |         saturation_range = preproc_param.SATURATION_RANGE
 22 |         crop_aspect_ratio = preproc_param.CROP_ASPECT_RATIO
 23 |         crop_scale = preproc_param.CROP_SCALE
 24 |         crop_attempts = preproc_param.CROP_ATTEMPTS
 25 | 
 26 |         # decoder
 27 |         self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB)
 28 |         self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB)
 29 | 
 30 |         # ssd crop
 31 |         self.bbox_crop = ops.RandomBBoxCrop(
 32 |             device="cpu",
 33 |             bbox_layout="xyXY",
 34 |             scaling=crop_scale,
 35 |             aspect_ratio=crop_aspect_ratio,
 36 |             allow_no_crop=True,
 37 |             thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9],
 38 |             num_attempts=crop_attempts,
 39 |         )
 40 | 
 41 |         # color twist
 42 |         self.uniform_con = ops.Uniform(range=contrast_range)
 43 |         self.uniform_bri = ops.Uniform(
 44 |             range=(1.0 - bri_delta / 256.0, 1.0 + bri_delta / 256.0)
 45 |         )
 46 |         self.uniform_sat = ops.Uniform(range=saturation_range)
 47 |         self.uniform_hue = ops.Uniform(range=(-hue_delta, hue_delta))
 48 |         self.hsv = ops.Hsv(device="gpu")
 49 |         self.contrast = ops.BrightnessContrast(device="gpu")
 50 | 
 51 |         # hflip
 52 |         self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True)
 53 |         self.img_flip = ops.Flip(device="gpu")
 54 |         self.coin_flip = ops.CoinFlip(probability=0.5)
 55 | 
 56 |         # past
 57 |         self.paste_pos = ops.Uniform(range=(0, 1))
 58 |         self.paste_ratio = ops.Uniform(range=(1, max_expand_ratio))
 59 |         self.paste = ops.Paste(device="gpu", fill_value=mean)
 60 |         self.bbox_paste = ops.BBoxPaste(device="cpu", ltrb=True)
 61 | 
 62 |         # resize and normalize
 63 |         self.resize = ops.Resize(
 64 |             device="gpu",
 65 |             interp_type=types.DALIInterpType.INTERP_CUBIC,
 66 |             resize_x=target_size[0],
 67 |             resize_y=target_size[1],
 68 |             save_attrs=True,
 69 |         )
 70 |         self.normalize = ops.CropMirrorNormalize(device="gpu", mean=mean, std=std)
 71 | 
 72 |     def predefined_graph(self, images, bboxes, labels):
 73 |         if self.training:
 74 |             # crop
 75 |             crop_begin, crop_size, bboxes, labels = self.bbox_crop(bboxes, labels)
 76 |             images = self.decode_train(images, crop_begin, crop_size)
 77 | 
 78 |             # color twist
 79 |             images = self.hsv(
 80 |                 images, hue=self.uniform_hue(), saturation=self.uniform_sat()
 81 |             )
 82 |             images = self.contrast(
 83 |                 images, brightness=self.uniform_bri(), contrast=self.uniform_con()
 84 |             )
 85 | 
 86 |             # hflip
 87 |             flip = self.coin_flip()
 88 |             bboxes = self.bbox_flip(bboxes, horizontal=flip)
 89 |             images = self.img_flip(images, horizontal=flip)
 90 | 
 91 |             # past
 92 |             ratio = self.paste_ratio()
 93 |             px = self.paste_pos()
 94 |             py = self.paste_pos()
 95 |             images = self.paste(images.gpu(), paste_x=px, paste_y=py, ratio=ratio)
 96 |             bboxes = self.bbox_paste(bboxes, paste_x=px, paste_y=py, ratio=ratio)
 97 |         else:
 98 |             images = self.decode_infer(images)
 99 | 
100 |         images, attrs = self.resize(images)
101 |         images = self.normalize(images)
102 | 
103 |         return images, bboxes, labels
104 | 
105 | 
106 | class DaliDataset(object):
107 |     r""" Data loader for data parallel using Dali
108 |     """
109 | 
110 |     def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False):
111 | 
112 |         self.training = training
113 |         self.batch_size = batch_size
114 |         self.target_size = cfg.IMAGE_SIZE
115 |         self.preproc_param = cfg.PREPROC
116 | 
117 |         self.device_ids = (
118 |             torch.cuda.current_device() if len(cfg.DEVICE_ID) != 1 else cfg.DEVICE_ID[0]
119 |         )  # ",".join([str(d) for d in device_ids])
120 |         self.num_shards = max(len(cfg.DEVICE_ID), 1)
121 |         self.num_threads = cfg.NUM_WORKERS
122 | 
123 |         self.pipeline_args = {
124 |             "target_size": self.target_size,
125 |             "num_threads": self.num_threads,
126 |             "num_shards": self.num_shards,
127 |             "batch_size": self.batch_size,
128 |             "training": self.training,
129 |             "device_ids": self.device_ids,
130 |             "preproc_param": self.preproc_param,
131 |         }
132 | 
133 |     def __repr__(self):
134 |         return "\n".join(
135 |             [
136 |                 "    loader: dali"
137 |                 "    length: {}"
138 |                 "    target_size: {}".format(self.__len__(), self.target_size),
139 |             ]
140 |         )
141 | 
142 |     def __len__(self):
143 |         return math.ceil(len(self.pipe) // self.num_shards / self.batch_size)
144 | 
145 |     def __iter__(self):
146 |         for _ in range(self.__len__()):
147 |             data, num_detections = [], []
148 |             dali_data, dali_boxes, dali_labels = self.pipe.run()
149 | 
150 |             for l in range(len(dali_boxes)):
151 |                 num_detections.append(dali_boxes.at(l).shape[0])
152 | 
153 |             torch_targets = -1 * torch.ones(
154 |                 [len(dali_boxes), max(max(num_detections), 1), 5]
155 |             )
156 | 
157 |             for batch in range(self.batch_size):
158 |                 # Convert dali tensor to pytorch
159 |                 dali_tensor = dali_data[batch]
160 |                 tensor_shape = dali_tensor.shape()
161 | 
162 |                 datum = torch.zeros(
163 |                     dali_tensor.shape(), dtype=torch.float, device=torch.device("cuda")
164 |                 )
165 |                 c_type_pointer = ctypes.c_void_p(datum.data_ptr())
166 |                 dali_tensor.copy_to_external(c_type_pointer)
167 | 
168 |                 # Rescale boxes
169 |                 b_arr = dali_boxes.at(batch)
170 |                 num_dets = b_arr.shape[0]
171 |                 if num_dets is not 0:
172 |                     torch_bbox = torch.from_numpy(b_arr).float()
173 | 
174 |                     torch_bbox[:, ::2] *= self.target_size[0]
175 |                     torch_bbox[:, 1::2] *= self.target_size[1]
176 |                     # (l,t,r,b) ->  (x,y,w,h) == (l,r, r-l, b-t)
177 |                     torch_bbox[:, 2] -= torch_bbox[:, 0]
178 |                     torch_bbox[:, 3] -= torch_bbox[:, 1]
179 |                     torch_targets[batch, :num_dets, :4] = torch_bbox  # * ratio
180 | 
181 |                 # Arrange labels in target tensor
182 |                 l_arr = dali_labels.at(batch)
183 |                 if num_dets is not 0:
184 |                     torch_label = torch.from_numpy(l_arr).float()
185 |                     torch_label -= 1  # Rescale labels to [0,n-1] instead of [1,n]
186 |                     torch_targets[batch, :num_dets, 4] = torch_label.squeeze()
187 | 
188 |                 data.append(datum.unsqueeze(0))
189 | 
190 |             data = torch.cat(data, dim=0)
191 |             torch_targets = torch_targets.cuda(non_blocking=True)
192 |             yield data, torch_targets
193 | 
194 |     def reset_size(self, batch_size, target_size):
195 |         r"""
196 |         :meta private:
197 |         """
198 |         raise NotImplementedError()
199 | 


--------------------------------------------------------------------------------
/ssds/dataset/dali_tfrecord.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | import math
 4 | import ctypes
 5 | from subprocess import call
 6 | from glob import glob
 7 | 
 8 | import nvidia.dali.ops as ops
 9 | import nvidia.dali.types as types
10 | import nvidia.dali.tfrecord as tfrec
11 | from nvidia.dali.pipeline import Pipeline
12 | import nvidia.dali.plugin.pytorch as dalitorch
13 | 
14 | from .dali_dataiterator import DaliDataset, DaliPipeline
15 | 
16 | class TFRecordPipeline(Pipeline, DaliPipeline):
17 |     """ Currently the easiest way for using dali to process the dataset is using TFRecord Files
18 |     """
19 |     def __init__(self, tfrecords, batch_size, target_size, preproc_param, num_threads, num_shards, device_ids, training=False):
20 |         Pipeline.__init__(self, batch_size=batch_size, num_threads=num_threads, 
21 |                           device_id=device_ids, prefetch_queue_depth=num_threads, seed=42,
22 |                           exec_async=False, exec_pipelined=False)
23 |         DaliPipeline.__init__(self, target_size=target_size, preproc_param=preproc_param, training=training)
24 | 
25 |         tfrecords_idx = [tfrecord+"_idx" for tfrecord in tfrecords]
26 |         for tfrecord, tfrecord_idx in zip(tfrecords, tfrecords_idx):
27 |             if os.path.exists(tfrecord_idx):
28 |                 continue
29 |             call(["tfrecord2idx", tfrecord, tfrecord+"_idx"])
30 |         self.length = sum([len(open(f).readlines()) for f in tfrecords_idx])
31 | 
32 |         self.input = ops.TFRecordReader(path = tfrecords,
33 |                                         index_path = tfrecords_idx,
34 |                                         features = {
35 |                                          'image/height'  : tfrec.FixedLenFeature([1], tfrec.int64,  -1),
36 |                                          'image/width'   : tfrec.FixedLenFeature([1], tfrec.int64,  -1),
37 |                                          'image/encoded' : tfrec.FixedLenFeature((), tfrec.string, ""),
38 |                                          'image/format'  : tfrec.FixedLenFeature((), tfrec.string, ""),
39 |                                          'image/object/bbox/xmin':    tfrec.VarLenFeature(tfrec.float32, 0.0),
40 |                                          'image/object/bbox/ymin':    tfrec.VarLenFeature(tfrec.float32, 0.0),
41 |                                          'image/object/bbox/xmax':    tfrec.VarLenFeature(tfrec.float32, 0.0),
42 |                                          'image/object/bbox/ymax':    tfrec.VarLenFeature(tfrec.float32, 0.0),
43 |                                          'image/object/class/text':   tfrec.FixedLenFeature([ ], tfrec.string, ''),
44 |                                          'image/object/class/label':  tfrec.VarLenFeature(tfrec.int64, -1)
45 |                                          },
46 |                                          num_shards = num_shards,
47 |                                          random_shuffle = training)
48 |         self.training = training
49 |         self.cat = dalitorch.TorchPythonFunction(function=lambda l,t,r,b: torch.cat([l,t,r,b]).view(4,-1).permute(1,0)) #[l*w,t*h,r*w,b*h], [l,t,r,b]
50 |         self.cast = ops.Cast(dtype=types.DALIDataType.INT32)
51 | 
52 |     def define_graph(self):
53 |         inputs = self.input()
54 |         images = inputs["image/encoded"]
55 |         bboxes = self.cat(inputs["image/object/bbox/xmin"], inputs["image/object/bbox/ymin"],
56 |                           inputs["image/object/bbox/xmax"], inputs["image/object/bbox/ymax"])
57 |         labels = self.cast(inputs["image/object/class/label"])
58 |         return self.predefined_graph(images, bboxes, labels)
59 | 
60 |     def __len__(self):
61 |         return self.length
62 | 
63 | 
64 | class DaliTFRecord(DaliDataset):
65 |     'Data loader for data parallel using Dali for TFRecord files'
66 |     def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False):
67 |         super(DaliTFRecord, self).__init__(cfg, dataset_dir, image_sets, batch_size, training)
68 | 
69 |         self.tfrecords = [path for sets in image_sets for path in glob(os.path.join(dataset_dir, sets))]
70 |         self.pipe = TFRecordPipeline(tfrecords=self.tfrecords, **self.pipeline_args)
71 |         self.pipe.build()
72 | 
73 | 
74 |     def reset_size(self, batch_size, target_size):
75 |         self.batch_size = batch_size
76 |         self.target_size = target_size
77 |         self.pipeline_args["batch_size"] = batch_size
78 |         self.pipeline_args["target_size"] = target_size
79 | 
80 |         del self.pipe
81 | 
82 |         self.pipe = TFRecordPipeline(tfrecords=self.tfrecords, **self.pipeline_args)
83 |         self.pipe.build()


--------------------------------------------------------------------------------
/ssds/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.utils.data as data
 3 | import numpy as np
 4 | import os
 5 | from glob import glob
 6 | 
 7 | from ssds import dataset
 8 | 
 9 | 
10 | def detection_collate(batch):
11 |     """Custom collate fn for dealing with batches of images that have a different
12 |     number of associated object annotations (bounding boxes).
13 |     Arguments:
14 |         batch: (tuple) A tuple of tensor images and lists of annotations
15 |     Return:
16 |         A tuple containing:
17 |             1) (tensor) batch of images stacked on their 0 dim
18 |             2) (tensors) annotations for a given image are stacked on 0 dim
19 |     """
20 |     targets = []
21 |     imgs = []
22 |     num_detections = []
23 |     for img, target in batch:
24 |         # for tup in sample:
25 |         imgs.append(img)
26 |         targets.append(target)
27 |         num_detections.append(target.shape[0])
28 | 
29 |     torch_targets = -1 * torch.ones(
30 |         [len(targets), max(max(num_detections), 1), 5], dtype=torch.float, device="cpu"
31 |     )
32 |     for i, target in enumerate(targets):
33 |         num_dets = target.shape[0]
34 |         torch_targets[i, :num_dets] = torch.from_numpy(target).float()
35 |     return torch.stack(imgs, 0), torch_targets
36 | 
37 | 
38 | def load_data(cfg, phase):
39 |     r""" create the dataloader based on the config file.
40 | 
41 |     * If the phase == "train",
42 |         it returns the dataloader in cfg.DATASET.TRAIN_SETS and fetch the randomly;
43 |     * If the phase == "test",
44 |         it returns the dataloader in cfg.DATASET.TEST_SETS and fetch the squentially;
45 | 
46 |     Args:
47 |         cfg: the configs defined by cfg.DATASET
48 |         phase (str): "train" or "test"
49 | 
50 |     Returns:
51 |         dataloader 
52 |     """
53 |     training = phase == "train"
54 |     image_sets = cfg.TRAIN_SETS if training else cfg.TEST_SETS
55 |     batch_size = cfg.TRAIN_BATCH_SIZE if training else cfg.TEST_BATCH_SIZE
56 | 
57 |     if "Dali" in cfg.DATASET:
58 |         data_loader = getattr(dataset, cfg.DATASET)(
59 |             cfg=cfg,
60 |             dataset_dir=cfg.DATASET_DIR,
61 |             image_sets=image_sets,
62 |             batch_size=batch_size,
63 |             training=training,
64 |         )
65 |     else:
66 |         _dataset = getattr(dataset, cfg.DATASET)(
67 |             cfg=cfg,
68 |             dataset_dir=cfg.DATASET_DIR,
69 |             image_sets=image_sets,
70 |             training=training,
71 |         )
72 |         data_loader = data.DataLoader(
73 |             _dataset,
74 |             batch_size,
75 |             num_workers=cfg.NUM_WORKERS,
76 |             shuffle=training,
77 |             collate_fn=detection_collate,
78 |             pin_memory=True,
79 |         )
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/ssds/dataset/detection_dataset.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import sys
  3 | import pickle
  4 | import glob
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | from PIL import Image
  9 | import io
 10 | 
 11 | import torch
 12 | import torch.utils.data as data
 13 | 
 14 | from . import transforms as preprocess
 15 | 
 16 | class DetectionDataset(data.Dataset):
 17 |     '''The base class for the detection 2d dataset.
 18 |     
 19 |     It contains the data pipeline which is defined by :meth:`_init_transform`.
 20 | 
 21 |     DetectionDataset is the base class and does not contain the actual data, the derivative class 
 22 |     need to fill the annotation to the self.db.
 23 |     '''
 24 |     def __init__(self, cfg, is_train, transform=None):
 25 |         # super(DetectionDataset, self).__init__()
 26 |         self.is_train = is_train
 27 | 
 28 |         self.image_size = cfg.IMAGE_SIZE
 29 |         # self.num_classes = cfg.NUM_CLASSES
 30 |         # self.classes_names = cfg.CLASSES_NAME
 31 |         self.preproc_param = cfg.PREPROC
 32 |         self.using_pickle = cfg.PICKLE
 33 |         self.transform = transform
 34 | 
 35 |         self.db = []
 36 |         self.img_db = []
 37 |         self._init_transform()
 38 | 
 39 |     def _init_transform(self):
 40 |         if self.is_train:
 41 |             self.transform = preprocess.Compose([
 42 |                 preprocess.ConvertFromInts(),
 43 |                 preprocess.ToAbsoluteCoords(),
 44 |                 preprocess.RandomSampleCrop(scale=self.preproc_param.CROP_SCALE, 
 45 |                                             num_attempts=self.preproc_param.CROP_ATTEMPTS),
 46 |                 preprocess.RandomMirror(),
 47 |                 # preprocess.PhotometricDistort(hue_delta=self.preproc_param.HUE_DELTA,
 48 |                 #                               bri_delta=self.preproc_param.BRI_DELTA, 
 49 |                 #                               contrast_range=self.preproc_param.CONTRAST_RANGE, 
 50 |                 #                               saturation_range=self.preproc_param.SATURATION_RANGE),
 51 |                 preprocess.Expand(mean=self.preproc_param.MEAN, 
 52 |                                   max_expand_ratio=self.preproc_param.MAX_EXPAND_RATIO),
 53 |                 preprocess.ToPercentCoords(),
 54 |                 preprocess.Resize(tuple(self.image_size)),
 55 |                 preprocess.ToAbsoluteCoords(),
 56 |                 preprocess.ToTensor(),
 57 |                 # preprocess.ToGPU(),
 58 |                 preprocess.Normalize(mean=self.preproc_param.MEAN, std=self.preproc_param.STD),
 59 |                 preprocess.ToXYWH(),
 60 |             ])
 61 |         else:
 62 |             self.transform = preprocess.Compose([
 63 |                 preprocess.ConvertFromInts(),
 64 |                 preprocess.Resize(tuple(self.image_size)),
 65 |                 preprocess.ToAbsoluteCoords(),
 66 |                 preprocess.ToTensor(),
 67 |                 # preprocess.ToGPU(),
 68 |                 preprocess.Normalize(mean=self.preproc_param.MEAN, std=self.preproc_param.STD),
 69 |                 preprocess.ToXYWH(),
 70 |             ])
 71 | 
 72 |     def _get_db(self):
 73 |         raise NotImplementedError
 74 | 
 75 |     def __len__(self):
 76 |         return len(self.db)
 77 | 
 78 |     def __getitem__(self, index):
 79 |         r''' fetch the image and annotation from self.db[index]
 80 | 
 81 |         The data in the self.db can be discribed as:
 82 | 
 83 |         db[index] = {
 84 | 
 85 |         'image': 'Absolute Path',
 86 | 
 87 |         'boxes': np.ndarray,
 88 | 
 89 |         'labels': np.adarray}
 90 | 
 91 |         Args:
 92 |             index: index for db, 
 93 |             
 94 | 
 95 |         Returns:
 96 |             'image', torch(c,h,w),
 97 | 
 98 |             'target', np.ndarray(n,5)
 99 | 
100 |             0~4 is the bounding box in AbsoluteCoords with format x,y,w,h
101 |             
102 |             5 is the bounding box label
103 |         '''
104 |         db_rec = copy.deepcopy(self.db[index])
105 |         
106 |         # read the images
107 |         if self.using_pickle:
108 |             # decode image
109 |             encoded_image = copy.deepcopy(self.img_db[index])
110 |             image = Image.open(io.BytesIO(encoded_image))
111 |             image = np.array(image) 
112 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
113 |         else:
114 |             image_file = db_rec['image']
115 |             image = cv2.imread(image_file)
116 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
117 |             if image is None:
118 |                 raise ValueError('Fail to read {}'.format(image_file))
119 | 
120 |         boxes = db_rec['boxes']
121 |         labels = db_rec['labels']
122 | 
123 |         # preprocess
124 |         image, boxes, labels = self.transform(image, boxes, labels)
125 |         return image, np.concatenate((boxes, labels[:,None]),axis=1)
126 | 
127 |     def reorder_data(self, db, cfg_joints_name, ds_joints_name):
128 |         ''' reorder the db based on the cfg_joints_name
129 | 
130 |         :meta private:
131 |         '''
132 |         order = []
133 |         for cfg_name in cfg_joints_name:
134 |             if cfg_name in ds_joints_name:
135 |                 order.append(ds_joints_name.index(cfg_name))
136 |             else:
137 |                 order.append(-1)
138 |         order = np.array(order)
139 | 
140 |         raise NotImplementedError
141 |         return db
142 | 
143 |     def saving_pickle(self, pickle_path):
144 |         '''
145 |         :meta private:
146 |         '''
147 |         img_db = []
148 |         for idx, db_rec in enumerate(self.db):
149 |             sys.stdout.write('\rLoading Image: {}/{}'.format(idx, len(self.db)))
150 |             sys.stdout.flush()
151 |             # load bytes from file
152 |             with open(db_rec['image'], 'rb') as f:
153 |                 img_db.append(f.read())
154 | 
155 |         # serialize
156 |         sys.stdout.write('\rSaving img_db ({}) to {}\n'.format(len(self.db), pickle_path))
157 |         with open(pickle_path, 'wb') as handle:
158 |             return pickle.dump(img_db, handle, protocol=pickle.HIGHEST_PROTOCOL)
159 | 
160 |     def loading_pickle(self, pickle_path):
161 |         '''
162 |         :meta private:
163 |         '''
164 |         sys.stdout.write('\rLoading Pickle from {}\n'.format(pickle_path))
165 |         with open(pickle_path, 'rb') as handle:
166 |             return pickle.load(handle)


--------------------------------------------------------------------------------
/ssds/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/modeling/__init__.py


--------------------------------------------------------------------------------
/ssds/modeling/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/modeling/layers/__init__.py


--------------------------------------------------------------------------------
/ssds/modeling/layers/basic_layers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SepConvBNReLU(nn.Sequential):
 6 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, expand_ratio=1):
 7 |         padding = (kernel_size - 1) // 2
 8 |         super(SepConvBNReLU, self).__init__(
 9 |             # dw
10 |             nn.Conv2d(
11 |                 in_planes,
12 |                 in_planes,
13 |                 kernel_size,
14 |                 stride,
15 |                 padding,
16 |                 groups=in_planes,
17 |                 bias=False,
18 |             ),
19 |             nn.BatchNorm2d(in_planes),
20 |             nn.ReLU(inplace=True),
21 |             # pw
22 |             nn.Conv2d(in_planes, out_planes, 1, 1, 0, bias=False),
23 |             nn.BatchNorm2d(out_planes),
24 |             nn.ReLU(inplace=True),
25 |         )
26 | 
27 | 
28 | class ConvBNReLU(nn.Sequential):
29 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1):
30 |         padding = (kernel_size - 1) // 2
31 |         super(ConvBNReLU, self).__init__(
32 |             nn.Conv2d(
33 |                 in_planes, out_planes, kernel_size, stride, padding=padding, bias=False
34 |             ),
35 |             nn.BatchNorm2d(out_planes),
36 |             nn.ReLU(inplace=True),
37 |         )
38 | 
39 | 
40 | class ConvBNReLUx2(nn.Sequential):
41 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1):
42 |         padding = (kernel_size - 1) // 2
43 |         super(ConvBNReLUx2, self).__init__(
44 |             nn.Conv2d(in_planes, out_planes // 2, 1, bias=False),
45 |             nn.BatchNorm2d(out_planes // 2),
46 |             nn.ReLU(inplace=True),
47 |             nn.Conv2d(
48 |                 out_planes // 2,
49 |                 out_planes,
50 |                 kernel_size,
51 |                 stride,
52 |                 padding=padding,
53 |                 bias=False,
54 |             ),
55 |             nn.BatchNorm2d(out_planes),
56 |             nn.ReLU(inplace=True),
57 |         )
58 | 


--------------------------------------------------------------------------------
/ssds/modeling/layers/decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .box import decode, nms
 3 | 
 4 | 
 5 | class Decoder(object):
 6 |     r"""
 7 |     class Decoder contains the decoder func and nms func
 8 |     
 9 |     * decoder
10 |         decoder is used to decode the boxes from loc and conf feature map, check :meth:`ssds.modeling.layers.box.decode` for more details.
11 |     * nms
12 |         nms is used to filter the decoded boxes by its confidence and box location, check :meth:`ssds.modeling.layers.box.nms` for more details.
13 |     """
14 | 
15 |     def __init__(
16 |         self, conf_threshold, nms_threshold, top_n, top_n_per_level, rescore, use_diou
17 |     ):
18 |         self.conf_threshold = conf_threshold
19 |         self.nms_threshold = nms_threshold
20 |         self.top_n = top_n
21 |         self.top_n_per_level = top_n_per_level
22 |         self.rescore = rescore
23 |         self.use_diou = use_diou
24 | 
25 |     def __call__(self, loc, conf, anchors):
26 |         r"""
27 |         Decode and filter boxes
28 | 
29 |         Returns:
30 |             out_scores,  (batch, top_n)
31 | 
32 |             out_boxes,   (batch, top_n, 4) with ltrb format
33 | 
34 |             out_classes, (batch, top_n)
35 |         """
36 |         decoded = [
37 |             decode(
38 |                 c,
39 |                 l,
40 |                 stride,
41 |                 self.conf_threshold,
42 |                 self.top_n_per_level,
43 |                 anchor,
44 |                 rescore=self.rescore,
45 |             )
46 |             for l, c, (stride, anchor) in zip(loc, conf, anchors.items())
47 |         ]
48 |         decoded = [torch.cat(tensors, 1) for tensors in zip(*decoded)]
49 |         return nms(*decoded, self.nms_threshold, self.top_n, using_diou=self.use_diou)
50 | 


--------------------------------------------------------------------------------
/ssds/modeling/layers/layers_parser.py:
--------------------------------------------------------------------------------
 1 | from ssds.modeling.layers.basic_layers import SepConvBNReLU, ConvBNReLUx2
 2 | from ssds.modeling.layers.rfb_layers import BasicRFB, BasicRFB_lite
 3 | 
 4 | 
 5 | def parse_feature_layer(layer, in_channels, depth):
 6 |     """ Parse the layers defined in the config files
 7 |     Args:
 8 |         layer:       The name of the layer
 9 |         in_channels: The input channel of the layer
10 |         depth:       The output channel of the layer
11 |     Return:
12 |         list of operation
13 |     """
14 |     if layer == "SepConv:S":
15 |         return [SepConvBNReLU(in_channels, depth, stride=2, expand_ratio=1)]
16 |     elif layer == "SepConv":
17 |         return [SepConvBNReLU(in_channels, depth, stride=1, expand_ratio=1)]
18 |     elif layer == "Conv:S":
19 |         return [ConvBNReLUx2(in_channels, depth, stride=2)]
20 |     elif layer == "Conv":
21 |         return [ConvBNReLUx2(in_channels, depth, stride=1)]
22 |     elif layer == "RBF:S":
23 |         return [BasicRFB(in_channels, depth, stride=2, scale=1.0, visual=2)]
24 |     elif layer == "RBF":
25 |         return [BasicRFB(in_channels, depth, stride=1, scale=1.0, visual=2)]
26 |     elif isinstance(layer, int):
27 |         # temp, need TODO improve
28 |         return []
29 |     else:
30 |         raise AssertionError("Undefined layer: {}".format(layer))
31 | 


--------------------------------------------------------------------------------
/ssds/modeling/model_builder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from collections import OrderedDict
 3 | 
 4 | from . import ssds, nets
 5 | from .layers.box import generate_anchors, configure_ratio_scale
 6 | from .layers.decoder import Decoder
 7 | 
 8 | 
 9 | def create_model(cfg):
10 |     """ create the model based on the config files
11 |     Returns:
12 |         torch ssds model with backbone as net
13 |     """
14 |     ratios, scales = configure_ratio_scale(len(cfg.SIZES), cfg.ASPECT_RATIOS, cfg.SIZES)
15 |     number_box = [len(r) * len(s) for r, s in zip(ratios, scales)]
16 |     nets_outputs, extras, head = getattr(ssds, cfg.SSDS).add_extras(
17 |         feature_layer=cfg.FEATURE_LAYER, mbox=number_box, num_classes=cfg.NUM_CLASSES
18 |     )
19 |     model = getattr(ssds, cfg.SSDS)(
20 |         backbone=getattr(nets, cfg.NETS)(
21 |             outputs=nets_outputs, num_images=cfg.NUM_IMAGES
22 |         ),
23 |         extras=extras,
24 |         head=head,
25 |         num_classes=cfg.NUM_CLASSES,
26 |     )
27 |     return model
28 | 
29 | 
30 | def create_anchors(cfg, model, image_size, visualize=False):
31 |     """ current version for generate the anchor, only generate the default anchor for each feature map layers
32 |     Returns:
33 |         anchors: OrderedDict(key=stride, value=default_anchors)
34 |     """
35 |     model.eval()
36 |     with torch.no_grad():
37 |         x = torch.rand(
38 |             (1, 3, image_size[0], image_size[1]), device=next(model.parameters()).device
39 |         )
40 |         conf = model(x)[-1]
41 |         strides = [x.shape[-1] // c.shape[-1] for c in conf]
42 | 
43 |     ratios, scales = configure_ratio_scale(len(strides), cfg.ASPECT_RATIOS, cfg.SIZES)
44 |     anchors = OrderedDict(
45 |         [
46 |             (strides[i], generate_anchors(strides[i], ratios[i], scales[i]))
47 |             for i in range(len(strides))
48 |         ]
49 |     )
50 |     if visualize:
51 |         print("Anchor Boxs (width, height)")
52 |         [
53 |             print("Stride {}: {}".format(k, (v[:, 2:] - v[:, :2] + 1).int().tolist()))
54 |             for k, v in anchors.items()
55 |         ]
56 |     return anchors
57 | 
58 | 
59 | def create_decoder(cfg):
60 |     r""" Generate decoder based on the cfg.POST_PROCESS. 
61 |     
62 |     The generated decoder is the object of class Decoder, check more details by :class:`ssds.modeling.layers.decoder.Decoder`.
63 |     
64 |     Args:
65 |         cfg: defined cfg.POST_PROCESS
66 |     """
67 |     return Decoder(
68 |         cfg.SCORE_THRESHOLD,
69 |         cfg.IOU_THRESHOLD,
70 |         cfg.MAX_DETECTIONS,
71 |         cfg.MAX_DETECTIONS_PER_LEVEL,
72 |         cfg.RESCORE_CENTER,
73 |         cfg.USE_DIOU,
74 |     )
75 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .efficientnet import *
3 | from .mobilenet import *
4 | from .effnet import *
5 | from .shufflenet import *
6 | from .densenet import *
7 | from .inception_v2 import InceptionV2
8 | from .darknet import *
9 | from .regnet import *


--------------------------------------------------------------------------------
/ssds/modeling/nets/darknet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from .rutils import register
  4 | 
  5 | 
  6 | def Conv3x3BNReLU(in_channels, out_channels, stride=1):
  7 |     return nn.Sequential(
  8 |         nn.Conv2d(
  9 |             in_channels=in_channels,
 10 |             out_channels=out_channels,
 11 |             kernel_size=3,
 12 |             stride=stride,
 13 |             padding=1,
 14 |         ),
 15 |         nn.BatchNorm2d(out_channels),
 16 |         nn.ReLU6(inplace=True),
 17 |     )
 18 | 
 19 | 
 20 | def Conv1x1BNReLU(in_channels, out_channels):
 21 |     return nn.Sequential(
 22 |         nn.Conv2d(
 23 |             in_channels=in_channels,
 24 |             out_channels=out_channels,
 25 |             kernel_size=1,
 26 |             stride=1,
 27 |             padding=0,
 28 |         ),
 29 |         nn.BatchNorm2d(out_channels),
 30 |         nn.ReLU6(inplace=True),
 31 |     )
 32 | 
 33 | 
 34 | class Residual(nn.Module):
 35 |     def __init__(self, nchannels):
 36 |         super(Residual, self).__init__()
 37 |         mid_channels = nchannels // 2
 38 |         self.conv1x1 = Conv1x1BNReLU(in_channels=nchannels, out_channels=mid_channels)
 39 |         self.conv3x3 = Conv3x3BNReLU(in_channels=mid_channels, out_channels=nchannels)
 40 | 
 41 |     def forward(self, x):
 42 |         out = self.conv3x3(self.conv1x1(x))
 43 |         return out + x
 44 | 
 45 | 
 46 | class DarkNet(nn.Module):
 47 |     def __init__(
 48 |         self,
 49 |         layers=[1, 2, 8, 8, 4],
 50 |         outputs=[5],
 51 |         groups=1,
 52 |         width_per_group=64,
 53 |         url=None,
 54 |     ):
 55 |         super(DarkNet, self).__init__()
 56 |         self.outputs = outputs
 57 |         self.url = url
 58 | 
 59 |         self.conv1 = Conv3x3BNReLU(in_channels=3, out_channels=32)
 60 | 
 61 |         self.block1 = self._make_layers(
 62 |             in_channels=32, out_channels=64, block_num=layers[0]
 63 |         )
 64 |         self.block2 = self._make_layers(
 65 |             in_channels=64, out_channels=128, block_num=layers[1]
 66 |         )
 67 |         self.block3 = self._make_layers(
 68 |             in_channels=128, out_channels=256, block_num=layers[2]
 69 |         )
 70 |         self.block4 = self._make_layers(
 71 |             in_channels=256, out_channels=512, block_num=layers[3]
 72 |         )
 73 |         self.block5 = self._make_layers(
 74 |             in_channels=512, out_channels=1024, block_num=layers[4]
 75 |         )
 76 | 
 77 |     def _make_layers(self, in_channels, out_channels, block_num):
 78 |         _layers = []
 79 |         _layers.append(
 80 |             Conv3x3BNReLU(in_channels=in_channels, out_channels=out_channels, stride=2)
 81 |         )
 82 |         for _ in range(block_num):
 83 |             _layers.append(Residual(nchannels=out_channels))
 84 |         return nn.Sequential(*_layers)
 85 | 
 86 |     def initialize(self):
 87 |         pass
 88 | 
 89 |     def forward(self, x):
 90 |         outputs = []
 91 |         x = self.conv1(x)
 92 |         for level in range(1, 6):
 93 |             if level > max(self.outputs):
 94 |                 break
 95 |             x = getattr(self, "block{}".format(level))(x)
 96 |             if level in self.outputs:
 97 |                 outputs.append(x)
 98 |         return outputs
 99 | 
100 | 
101 | @register
102 | def DarkNet53(outputs, **kwargs):
103 |     return DarkNet(outputs=outputs)
104 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/densenet.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import torch
  3 | import torch.nn as nn
  4 | from torchvision.models import densenet
  5 | import torch.utils.model_zoo as model_zoo
  6 | from collections import OrderedDict
  7 | from .rutils import register
  8 | 
  9 | 
 10 | class DenseNet(nn.Module):
 11 |     def __init__(
 12 |         self,
 13 |         growth_rate=32,
 14 |         block_config=(6, 12, 24, 16),
 15 |         num_init_features=64,
 16 |         bn_size=4,
 17 |         drop_rate=0,
 18 |         memory_efficient=False,
 19 |         outputs=[],
 20 |         url=None,
 21 |     ):
 22 |         super(DenseNet, self).__init__()
 23 |         self.url = url
 24 |         self.outputs = outputs
 25 |         self.block_config = block_config
 26 | 
 27 |         # First convolution
 28 |         self.conv1 = nn.Sequential(
 29 |             OrderedDict(
 30 |                 [
 31 |                     (
 32 |                         "conv",
 33 |                         nn.Conv2d(
 34 |                             3,
 35 |                             num_init_features,
 36 |                             kernel_size=7,
 37 |                             stride=2,
 38 |                             padding=3,
 39 |                             bias=False,
 40 |                         ),
 41 |                     ),
 42 |                     ("norm", nn.BatchNorm2d(num_init_features)),
 43 |                     ("relu", nn.ReLU(inplace=True)),
 44 |                     ("pool", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
 45 |                 ]
 46 |             )
 47 |         )
 48 | 
 49 |         # Each denseblock
 50 |         num_features = num_init_features
 51 |         for i, num_layers in enumerate(block_config):
 52 |             block = densenet._DenseBlock(
 53 |                 num_layers=num_layers,
 54 |                 num_input_features=num_features,
 55 |                 bn_size=bn_size,
 56 |                 growth_rate=growth_rate,
 57 |                 drop_rate=drop_rate,
 58 |                 memory_efficient=memory_efficient,
 59 |             )
 60 |             self.add_module("denseblock%d" % (i + 1), block)
 61 |             num_features = num_features + num_layers * growth_rate
 62 |             if i != len(block_config) - 1:
 63 |                 trans = densenet._Transition(
 64 |                     num_input_features=num_features,
 65 |                     num_output_features=num_features // 2,
 66 |                 )
 67 |                 self.add_module("transition%d" % (i + 1), trans)
 68 |                 num_features = num_features // 2
 69 | 
 70 |         # Official init from torch repo.
 71 |         for m in self.modules():
 72 |             if isinstance(m, nn.Conv2d):
 73 |                 nn.init.kaiming_normal_(m.weight)
 74 |             elif isinstance(m, nn.BatchNorm2d):
 75 |                 nn.init.constant_(m.weight, 1)
 76 |                 nn.init.constant_(m.bias, 0)
 77 |             elif isinstance(m, nn.Linear):
 78 |                 nn.init.constant_(m.bias, 0)
 79 | 
 80 |     def initialize(self):
 81 |         if self.url:
 82 |             checkpoint = model_zoo.load_url(self.url)
 83 | 
 84 |             pattern = re.compile(
 85 |                 r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$"
 86 |             )
 87 |             for key in list(checkpoint.keys()):
 88 |                 res = pattern.match(key)
 89 |                 if res:
 90 |                     new_key = res.group(1) + res.group(2)
 91 |                     checkpoint[new_key] = checkpoint[key]
 92 |                     del checkpoint[key]
 93 | 
 94 |             change_dict = {
 95 |                 "features.conv0.": "conv1.conv.",
 96 |                 "features.norm0.": "conv1.norm.",
 97 |             }
 98 |             for i, num_layers in enumerate(self.block_config):
 99 |                 change_dict[
100 |                     "features.denseblock{}.".format(i + 1)
101 |                 ] = "denseblock{}.".format(i + 1)
102 |                 change_dict[
103 |                     "features.transition{}.".format(i + 1)
104 |                 ] = "transition{}.".format(i + 1)
105 |             for k, v in list(checkpoint.items()):
106 |                 for _k, _v in list(change_dict.items()):
107 |                     if _k in k:
108 |                         new_key = k.replace(_k, _v)
109 |                         checkpoint[new_key] = checkpoint.pop(k)
110 | 
111 |             remove_dict = ["classifier.", "features.norm5."]
112 |             for k, v in list(checkpoint.items()):
113 |                 for _k in remove_dict:
114 |                     if _k in k:
115 |                         checkpoint.pop(k)
116 |             self.load_state_dict(checkpoint)
117 | 
118 |     def forward(self, x):
119 |         x = self.conv1(x)
120 | 
121 |         outputs = []
122 |         for j in range(len(self.block_config)):
123 |             level = j + 1  # only 1 conv before
124 |             if level > max(self.outputs):
125 |                 break
126 |             if level > 1:
127 |                 x = getattr(self, "transition{}".format(level - 1))(x)
128 |             x = getattr(self, "denseblock{}".format(level))(x)
129 |             if level in self.outputs:
130 |                 outputs.append(x)
131 | 
132 |         return outputs
133 | 
134 | 
135 | @register
136 | def DenseNet121(outputs, **kwargs):
137 |     return DenseNet(
138 |         32, (6, 12, 24, 16), 64, outputs=outputs, url=densenet.model_urls["densenet121"]
139 |     )
140 | 
141 | 
142 | # print(DenseNet121([4]))
143 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/effnet.py:
--------------------------------------------------------------------------------
 1 | """ This file is similar with the effcientnet file, but use torch hub instand of using 
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | from .rutils import register
 7 | 
 8 | 
 9 | class EffNet(nn.Module):
10 |     def __init__(self, model_name, outputs, exportable=False, **kwargs):
11 |         super(EffNet, self).__init__()
12 |         self.outputs = outputs
13 | 
14 |         if exportable:
15 |             import geffnet
16 | 
17 |             geffnet.config.set_exportable(True)
18 |             model = geffnet.create_model(model_name, **kwargs)
19 |         else:
20 |             model = torch.hub.load(
21 |                 "rwightman/gen-efficientnet-pytorch", model_name, **kwargs
22 |             )
23 | 
24 |         self.conv_stem = model.conv_stem
25 |         self.bn1 = model.bn1
26 |         self.act1 = model.act1
27 |         for j in range(7):
28 |             self.add_module(
29 |                 "block{}".format(j + 1), getattr(model.blocks, "{}".format(j))
30 |             )
31 | 
32 |     def forward(self, x):
33 |         x = self.act1(self.bn1(self.conv_stem(x)))
34 | 
35 |         outputs = []
36 |         for level in range(1, 8):
37 |             # level = j + 1 # only 1 conv before
38 |             if level > max(self.outputs):
39 |                 break
40 |             x = getattr(self, "block{}".format(level))(x)
41 |             if level in self.outputs:
42 |                 outputs.append(x)
43 | 
44 |         return outputs
45 | 
46 |     def initialize(self):
47 |         pass
48 | 
49 | 
50 | @register
51 | def EffNetB0(outputs, **kwargs):
52 |     return EffNet("efficientnet_b0", outputs, drop_connect_rate=0.2, pretrained=True)
53 | 
54 | 
55 | @register
56 | def EffNetB1(outputs, **kwargs):
57 |     return EffNet("efficientnet_b1", outputs, drop_connect_rate=0.2, pretrained=True)
58 | 
59 | 
60 | @register
61 | def EffNetB2(outputs, **kwargs):
62 |     return EffNet("efficientnet_b2", outputs, drop_connect_rate=0.2, pretrained=True)
63 | 
64 | 
65 | @register
66 | def EffNetB3(outputs, **kwargs):
67 |     return EffNet("efficientnet_b3", outputs, drop_connect_rate=0.2, pretrained=True)
68 | 
69 | 
70 | @register
71 | def EffNetB4(outputs, **kwargs):
72 |     return EffNet("efficientnet_b4", outputs, drop_connect_rate=0.2, pretrained=True)
73 | 
74 | 
75 | @register
76 | def EffNetB5(outputs, **kwargs):
77 |     return EffNet("efficientnet_b5", outputs, drop_connect_rate=0.2, pretrained=True)
78 | 
79 | 
80 | @register
81 | def EffNetB6(outputs, **kwargs):
82 |     return EffNet("efficientnet_b6", outputs, drop_connect_rate=0.2, pretrained=True)
83 | 
84 | 
85 | @register
86 | def EffNetB7(outputs, **kwargs):
87 |     return EffNet("efficientnet_b7", outputs, drop_connect_rate=0.2, pretrained=True)
88 | 
89 | 
90 | @register
91 | def EffNetB0Ex(outputs, **kwargs):
92 |     return EffNet(
93 |         "efficientnet_b0",
94 |         outputs,
95 |         drop_connect_rate=0.2,
96 |         pretrained=True,
97 |         exportable=True,
98 |     )
99 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/mobilenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torchvision.models import mobilenet
  4 | import torch.utils.model_zoo as model_zoo
  5 | from .rutils import register
  6 | 
  7 | 
  8 | class SepConvBNReLU(nn.Sequential):
  9 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, expand_ratio=1):
 10 |         padding = (kernel_size - 1) // 2
 11 |         super(SepConvBNReLU, self).__init__(
 12 |             # dw
 13 |             nn.Conv2d(
 14 |                 in_planes,
 15 |                 in_planes,
 16 |                 kernel_size,
 17 |                 stride,
 18 |                 padding,
 19 |                 groups=in_planes,
 20 |                 bias=False,
 21 |             ),
 22 |             nn.BatchNorm2d(in_planes),
 23 |             nn.ReLU6(inplace=True),
 24 |             # pw
 25 |             nn.Conv2d(in_planes, out_planes, 1, 1, 0, bias=False),
 26 |             nn.BatchNorm2d(out_planes),
 27 |             nn.ReLU6(inplace=True),
 28 |         )
 29 | 
 30 | 
 31 | class MobileNet(nn.Module):
 32 |     def __init__(self, num_classes=1000, width_mult=1.0, version="v1", round_nearest=8):
 33 |         """
 34 |         MobileNet V2 main class
 35 |         Args:
 36 |             num_classes (int): Number of classes
 37 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 38 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 39 |             Set to 1 to turn off rounding
 40 |         """
 41 |         super(MobileNet, self).__init__()
 42 | 
 43 |         input_channel = 32
 44 |         if version == "v2":
 45 |             settings = [
 46 |                 # t, c, n, s
 47 |                 [1, 16, 1, 1],
 48 |                 [6, 24, 2, 2],
 49 |                 [6, 32, 3, 2],
 50 |                 [6, 64, 4, 2],
 51 |                 [6, 96, 3, 1],
 52 |                 [6, 160, 3, 2],
 53 |                 [6, 320, 1, 1],
 54 |             ]
 55 |             last_channel = 1280
 56 |             layer = mobilenet.InvertedResidual
 57 |         elif version == "v1":
 58 |             settings = [
 59 |                 # t, c, n, s
 60 |                 [1, 64, 1, 1],
 61 |                 [1, 128, 2, 2],
 62 |                 [1, 256, 2, 2],
 63 |                 [1, 512, 6, 2],
 64 |                 [1, 1024, 2, 2],
 65 |             ]
 66 |             last_channel = 1024
 67 |             layer = SepConvBNReLU
 68 |         self.settings = settings
 69 |         self.version = version
 70 | 
 71 |         # building first layer
 72 |         input_channel = mobilenet._make_divisible(
 73 |             input_channel * width_mult, round_nearest
 74 |         )
 75 |         self.last_channel = mobilenet._make_divisible(
 76 |             last_channel * max(1.0, width_mult), round_nearest
 77 |         )
 78 |         self.conv1 = mobilenet.ConvBNReLU(3, input_channel, stride=2)
 79 |         # building inverted residual blocks
 80 |         for j, (t, c, n, s) in enumerate(settings):
 81 |             output_channel = mobilenet._make_divisible(c * width_mult, round_nearest)
 82 |             layers = []
 83 |             for i in range(n):
 84 |                 stride = s if i == 0 else 1
 85 |                 layers.append(
 86 |                     layer(input_channel, output_channel, stride=stride, expand_ratio=t)
 87 |                 )
 88 |                 input_channel = output_channel
 89 |             self.add_module("layer{}".format(j + 1), nn.Sequential(*layers))
 90 |         # building last several layers
 91 |         if self.version == "v2":
 92 |             self.head_conv = mobilenet.ConvBNReLU(
 93 |                 input_channel, self.last_channel, kernel_size=1
 94 |             )
 95 | 
 96 |         # building classifier
 97 |         self.classifier = nn.Sequential(
 98 |             nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes),
 99 |         )
100 | 
101 |         # weight initialization
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode="fan_out")
105 |                 if m.bias is not None:
106 |                     nn.init.zeros_(m.bias)
107 |             elif isinstance(m, nn.BatchNorm2d):
108 |                 nn.init.ones_(m.weight)
109 |                 nn.init.zeros_(m.bias)
110 |             elif isinstance(m, nn.Linear):
111 |                 nn.init.normal_(m.weight, 0, 0.01)
112 |                 nn.init.zeros_(m.bias)
113 | 
114 |     def forward(self, x):
115 |         x = self.conv1(x)
116 |         for j in range(len(self.settings)):
117 |             x = getattr(self, "layer{}".format(j + 1))(x)
118 |         if self.version == "v2":
119 |             x = self.head_conv(x)
120 |         x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
121 |         x = self.classifier(x)
122 |         return x
123 | 
124 | 
125 | class MobileNetEx(MobileNet):
126 |     def __init__(self, width_mult=1.0, version="v1", outputs=[7], url=None):
127 |         super(MobileNetEx, self).__init__(width_mult=width_mult, version=version)
128 |         self.url = url
129 |         self.outputs = outputs
130 | 
131 |     def initialize(self):
132 |         if self.url:
133 |             checkpoint = model_zoo.load_url(self.url)
134 |             if self.version == "v2":
135 |                 change_dict = {"features.0.": "conv1."}
136 |                 f_idx = 1
137 |                 for j, (t, c, n, s) in enumerate(self.settings):
138 |                     for i in range(n):
139 |                         change_dict[
140 |                             "features.{}.".format(f_idx)
141 |                         ] = "layer{}.{}.".format(j + 1, i)
142 |                         f_idx += 1
143 |                 change_dict["features.{}.".format(f_idx)] = "head_conv."
144 |                 for k, v in list(checkpoint.items()):
145 |                     for _k, _v in list(change_dict.items()):
146 |                         if _k in k:
147 |                             new_key = k.replace(_k, _v)
148 |                             checkpoint[new_key] = checkpoint.pop(k)
149 |             else:
150 |                 change_dict = {"features.Conv2d_0.conv.": "conv1."}
151 |                 f_idx = 1
152 |                 for j, (t, c, n, s) in enumerate(self.settings):
153 |                     for i in range(n):
154 |                         for z in range(2):
155 |                             change_dict[
156 |                                 "features.Conv2d_{}.depthwise.{}".format(f_idx, z)
157 |                             ] = "layer{}.{}.{}".format(j + 1, i, z)
158 |                             change_dict[
159 |                                 "features.Conv2d_{}.pointwise.{}".format(f_idx, z)
160 |                             ] = "layer{}.{}.{}".format(j + 1, i, z + 3)
161 |                         f_idx += 1
162 |                 for k, v in list(checkpoint.items()):
163 |                     for _k, _v in list(change_dict.items()):
164 |                         if _k in k:
165 |                             new_key = k.replace(_k, _v)
166 |                             checkpoint[new_key] = checkpoint.pop(k)
167 | 
168 |                 remove_dict = ["classifier."]
169 |                 for k, v in list(checkpoint.items()):
170 |                     for _k in remove_dict:
171 |                         if _k in k:
172 |                             checkpoint.pop(k)
173 | 
174 |                 org_checkpoint = self.state_dict()
175 |                 org_checkpoint.update(checkpoint)
176 |                 checkpoint = org_checkpoint
177 | 
178 |             self.load_state_dict(checkpoint)
179 | 
180 |     def forward(self, x):
181 |         x = self.conv1(x)
182 | 
183 |         outputs = []
184 |         for j in range(len(self.settings)):
185 |             level = j + 1  # only 1 conv before
186 |             if level > max(self.outputs):
187 |                 break
188 |             x = getattr(self, "layer{}".format(level))(x)
189 |             if level in self.outputs:
190 |                 outputs.append(x)
191 | 
192 |         return outputs
193 | 
194 | 
195 | @register
196 | def MobileNetV1(outputs, **kwargs):
197 |     return MobileNetEx(
198 |         width_mult=1.0,
199 |         version="v1",
200 |         outputs=outputs,
201 |         url="https://www.dropbox.com/s/kygo8l6dwah3djv/mobilenet_v1_1.0_224.pth?dl=1",
202 |     )
203 | 
204 | 
205 | @register
206 | def MobileNetV2(outputs, **kwargs):
207 |     return MobileNetEx(
208 |         width_mult=1.0,
209 |         version="v2",
210 |         outputs=outputs,
211 |         url=mobilenet.model_urls["mobilenet_v2"],
212 |     )
213 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | from torchvision.models import resnet
  5 | import torch.utils.model_zoo as model_zoo
  6 | from .rutils import register
  7 | 
  8 | 
  9 | class ResNet(resnet.ResNet):
 10 |     "Deep Residual Network - https://arxiv.org/abs/1512.03385"
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         layers=[3, 4, 6, 3],
 15 |         bottleneck=resnet.Bottleneck,
 16 |         outputs=[5],
 17 |         groups=1,
 18 |         width_per_group=64,
 19 |         url=None,
 20 |     ):
 21 |         self.stride = 128
 22 |         self.bottleneck = bottleneck
 23 |         self.outputs = outputs
 24 |         self.url = url
 25 | 
 26 |         # torchvision added support for ResNeXt in version 0.3.0,
 27 |         # and introduces additional args to torchvision.models.resnet constructor
 28 |         kwargs_common = {"block": bottleneck, "layers": layers}
 29 |         kwargs_extra = (
 30 |             {"groups": groups, "width_per_group": width_per_group}
 31 |             if torchvision.__version__ > "0.2.1"
 32 |             else {}
 33 |         )
 34 |         kwargs = {**kwargs_common, **kwargs_extra}
 35 |         super().__init__(**kwargs)
 36 | 
 37 |     def initialize(self):
 38 |         if self.url:
 39 |             self.load_state_dict(model_zoo.load_url(self.url))
 40 | 
 41 |     def forward(self, x):
 42 |         x = self.conv1(x)
 43 |         x = self.bn1(x)
 44 |         x = self.relu(x)
 45 |         x = self.maxpool(x)
 46 | 
 47 |         outputs = []
 48 |         for i, layer in enumerate([self.layer1, self.layer2, self.layer3, self.layer4]):
 49 |             level = i + 2
 50 |             if level > max(self.outputs):
 51 |                 break
 52 |             x = layer(x)
 53 |             if level in self.outputs:
 54 |                 outputs.append(x)
 55 | 
 56 |         return outputs
 57 | 
 58 | 
 59 | @register
 60 | def ResNet18(outputs, **kwargs):
 61 |     return ResNet(
 62 |         layers=[2, 2, 2, 2],
 63 |         bottleneck=resnet.BasicBlock,
 64 |         outputs=outputs,
 65 |         url=resnet.model_urls["resnet18"],
 66 |     )
 67 | 
 68 | 
 69 | @register
 70 | def ResNet34(outputs, **kwargs):
 71 |     return ResNet(
 72 |         layers=[3, 4, 6, 3],
 73 |         bottleneck=resnet.BasicBlock,
 74 |         outputs=outputs,
 75 |         url=resnet.model_urls["resnet34"],
 76 |     )
 77 | 
 78 | 
 79 | @register
 80 | def ResNet50(outputs, **kwargs):
 81 |     return ResNet(
 82 |         layers=[3, 4, 6, 3],
 83 |         bottleneck=resnet.Bottleneck,
 84 |         outputs=outputs,
 85 |         url=resnet.model_urls["resnet50"],
 86 |     )
 87 | 
 88 | 
 89 | @register
 90 | def ResNet101(outputs, **kwargs):
 91 |     return ResNet(
 92 |         layers=[3, 4, 23, 3],
 93 |         bottleneck=resnet.Bottleneck,
 94 |         outputs=outputs,
 95 |         url=resnet.model_urls["resnet101"],
 96 |     )
 97 | 
 98 | 
 99 | @register
100 | def ResNet152(outputs, **kwargs):
101 |     return ResNet(
102 |         layers=[3, 8, 36, 3],
103 |         bottleneck=resnet.Bottleneck,
104 |         outputs=outputs,
105 |         url=resnet.model_urls["resnet152"],
106 |     )
107 | 
108 | 
109 | @register
110 | def ResNeXt50_32x4d(outputs, **kwargs):
111 |     return ResNet(
112 |         layers=[3, 4, 6, 3],
113 |         bottleneck=resnet.Bottleneck,
114 |         outputs=outputs,
115 |         groups=32,
116 |         width_per_group=4,
117 |         url=resnet.model_urls["resnext50_32x4d"],
118 |     )
119 | 
120 | 
121 | @register
122 | def ResNeXt101_32x8d(outputs, **kwargs):
123 |     return ResNet(
124 |         layers=[3, 4, 23, 3],
125 |         bottleneck=resnet.Bottleneck,
126 |         outputs=outputs,
127 |         groups=32,
128 |         width_per_group=8,
129 |         url=resnet.model_urls["resnext101_32x8d"],
130 |     )
131 | 
132 | 
133 | @register
134 | def WideResNet50_2(outputs, **kwargs):
135 |     return ResNet(
136 |         layers=[3, 4, 6, 3],
137 |         bottleneck=resnet.Bottleneck,
138 |         outputs=outputs,
139 |         width_per_group=128,
140 |         url=resnet.model_urls["wide_resnet50_2"],
141 |     )
142 | 
143 | 
144 | @register
145 | def WideResNet101_2(outputs, **kwargs):
146 |     return ResNet(
147 |         layers=[3, 4, 23, 3],
148 |         bottleneck=resnet.Bottleneck,
149 |         outputs=outputs,
150 |         width_per_group=128,
151 |         url=resnet.model_urls["wide_resnet101_2"],
152 |     )


--------------------------------------------------------------------------------
/ssds/modeling/nets/rutils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | def register(f):
 5 |     all = sys.modules[f.__module__].__dict__.setdefault("__all__", [])
 6 |     if f.__name__ in all:
 7 |         raise RuntimeError("{} already exist!".format(f.__name__))
 8 |     all.append(f.__name__)
 9 |     return f
10 | 


--------------------------------------------------------------------------------
/ssds/modeling/nets/shufflenet.py:
--------------------------------------------------------------------------------
 1 | import torchvision
 2 | from torchvision.models import shufflenetv2
 3 | import torch.utils.model_zoo as model_zoo
 4 | from .rutils import register
 5 | 
 6 | 
 7 | class ShuffleNetV2(shufflenetv2.ShuffleNetV2):
 8 |     def __init__(self, stages_repeats, stages_out_channels, outputs=[4], url=None):
 9 |         super(ShuffleNetV2, self).__init__(stages_repeats, stages_out_channels)
10 |         self.outputs = outputs
11 |         self.url = url
12 | 
13 |     def initialize(self):
14 |         if self.url:
15 |             self.load_state_dict(model_zoo.load_url(self.url))
16 | 
17 |     def forward(self, x):
18 |         x = self.maxpool(self.conv1(x))
19 | 
20 |         outputs = []
21 |         for i, stage in enumerate([self.stage2, self.stage3, self.stage4]):
22 |             level = i + 2
23 |             if level > max(self.outputs):
24 |                 break
25 |             x = stage(x)
26 |             if level in self.outputs:
27 |                 outputs.append(x)
28 |         return outputs
29 | 
30 | 
31 | @register
32 | def ShuffleNetV2_x1(outputs, **kwargs):
33 |     return ShuffleNetV2(
34 |         [4, 8, 4],
35 |         [24, 116, 232, 464, 1024],
36 |         outputs=outputs,
37 |         url=shufflenetv2.model_urls["shufflenetv2_x1.0"],
38 |     )
39 | 
40 | 
41 | @register
42 | def ShuffleNetV2_x2(outputs, **kwargs):
43 |     return ShuffleNetV2([4, 8, 4], [24, 244, 488, 976, 2048], outputs=outputs)
44 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/__init__.py:
--------------------------------------------------------------------------------
1 | from .ssd import SSD
2 | from .fpn import SSDFPN
3 | from .shelf import SSDShelf
4 | from .bifpn import SSDBiFPN
5 | from .yolo import YOLOV3, YOLOV4


--------------------------------------------------------------------------------
/ssds/modeling/ssds/bifpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from .ssdsbase import SSDSBase
  6 | from .fpn import SharedHead
  7 | from ssds.modeling.layers.basic_layers import ConvBNReLU, SepConvBNReLU
  8 | 
  9 | 
 10 | class BiFPNModule(nn.Module):
 11 |     def __init__(self, channels, levels, init=0.5, block=ConvBNReLU):
 12 |         super(BiFPNModule, self).__init__()
 13 | 
 14 |         self.levels = levels
 15 |         self.w1 = nn.Parameter(torch.Tensor(2, levels).fill_(init))
 16 |         self.w2 = nn.Parameter(torch.Tensor(3, levels - 2).fill_(init))
 17 | 
 18 |         for i in range(levels - 1, 0, -1):
 19 |             self.add_module("top-down-{}".format(i - 1), block(channels, channels))
 20 | 
 21 |         for i in range(0, levels - 1, 1):
 22 |             self.add_module("bottom-up-{}".format(i + 1), block(channels, channels))
 23 | 
 24 |         for m in self.modules():
 25 |             if isinstance(m, nn.Conv2d):
 26 |                 nn.init.xavier_uniform_(m.weight)
 27 |                 if m.bias is not None:
 28 |                     nn.init.constant_(m.bias, val=0)
 29 | 
 30 |     def forward(self, xx):
 31 |         assert len(xx) == self.levels
 32 |         levels = self.levels
 33 | 
 34 |         # normalize weights
 35 |         w1 = F.relu(self.w1)
 36 |         w1 /= torch.sum(w1, dim=0) + 1e-6
 37 |         w2 = F.relu(self.w2)
 38 |         w2 /= torch.sum(w2, dim=0) + 1e-6
 39 | 
 40 |         # build top-down
 41 |         xs = [[]] + [x for x in xx[1:-1]] + [[]]
 42 |         for i in range(levels - 1, 0, -1):
 43 |             xx[i - 1] = w1[0, i - 1] * xx[i - 1] + w1[1, i - 1] * F.interpolate(
 44 |                 xx[i], scale_factor=2, mode="nearest"
 45 |             )
 46 |             xx[i - 1] = getattr(self, "top-down-{}".format(i - 1))(xx[i - 1])
 47 | 
 48 |         # build bottom-up
 49 |         for i in range(0, levels - 2, 1):
 50 |             xx[i + 1] = (
 51 |                 w2[0, i] * xx[i + 1]
 52 |                 + w2[1, i] * F.max_pool2d(xx[i], kernel_size=2)
 53 |                 + w2[2, i] * xs[i + 1]
 54 |             )
 55 |             xx[i + 1] = getattr(self, "bottom-up-{}".format(i + 1))(xx[i + 1])
 56 | 
 57 |         xx[levels - 1] = w1[0, levels - 1] * xx[levels - 1] + w1[
 58 |             1, levels - 1
 59 |         ] * F.max_pool2d(xx[levels - 2], kernel_size=2)
 60 |         xx[levels - 1] = getattr(self, "bottom-up-{}".format(levels - 1))(
 61 |             xx[levels - 1]
 62 |         )
 63 |         return xx
 64 | 
 65 | 
 66 | class SSDBiFPN(SSDSBase):
 67 |     """EfficientDet: Scalable and Efficient Object Detection
 68 |     See: https://arxiv.org/abs/1911.09070v6 for more details.
 69 | 
 70 |     Compared with the original implementation, change the conv2d 
 71 |     in the extra and head to ConvBNReLU to helps the model converage easily
 72 |     Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum
 73 | 
 74 |     Args:
 75 |         backbone: backbone layers for input
 76 |         extras: contains transforms, extra and stack_bifpn layers that feed to multibox loc and conf layers
 77 |         head: "multibox head" consists of loc and conf conv layers
 78 |         num_classes: num of classes 
 79 |     """
 80 | 
 81 |     def __init__(self, backbone, extras, head, num_classes):
 82 |         super(SSDBiFPN, self).__init__(backbone, num_classes)
 83 | 
 84 |         # SSD network
 85 |         self.transforms = nn.ModuleList(extras[0])
 86 |         self.extras = nn.ModuleList(extras[1])
 87 |         self.stack_bifpn = extras[2]
 88 |         self.loc = head[0]
 89 |         self.conf = head[1]
 90 | 
 91 |         self.initialize()
 92 | 
 93 |     def initialize(self):
 94 |         r"""
 95 |         :meta private:
 96 |         """
 97 |         self.backbone.initialize()
 98 |         self.transforms.apply(self.initialize_extra)
 99 |         self.extras.apply(self.initialize_extra)
100 |         self.loc.apply(self.initialize_head)
101 |         self.conf.apply(self.initialize_head)
102 |         self.conf[-1].apply(self.initialize_prior)
103 | 
104 |     def forward(self, x):
105 |         r"""Applies network layers and ops on input image(s) x.
106 | 
107 |         Args:
108 |             x: input image or batch of images.
109 | 
110 |         Return:
111 |             When self.training==True, loc and conf for each anchor box;
112 | 
113 |             When self.training==False. loc and conf.sigmoid() for each anchor box;
114 | 
115 |             For each player, conf with shape [batch, num_anchor*num_classes, height, width];
116 | 
117 |             For each player, loc  with shape [batch, num_anchor*4, height, width].
118 |         """
119 |         loc, conf = [list() for _ in range(2)]
120 | 
121 |         # apply bases layers and cache source layer outputs
122 |         features = self.backbone(x)
123 | 
124 |         x = features[-1]
125 |         features_len = len(features)
126 |         for i in range(features_len):
127 |             features[i] = self.transforms[i](features[i])
128 |         features = self.stack_bifpn(features)
129 | 
130 |         for i, v in enumerate(self.extras):
131 |             if i < features_len:
132 |                 xx = v(features[i])
133 |             elif i == features_len:
134 |                 xx = v(x)
135 |             else:
136 |                 xx = v(xx)
137 |             loc.append(self.loc(xx))
138 |             conf.append(self.conf(xx))
139 | 
140 |         if not self.training:
141 |             conf = [c.sigmoid() for c in conf]
142 |         return tuple(loc), tuple(conf)
143 | 
144 |     @staticmethod
145 |     def add_extras(feature_layer, mbox, num_classes):
146 |         r"""Define and declare the extras, loc and conf modules for the ssdfpn model.
147 | 
148 |         The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssdfpn model can be int, list of int and str:
149 | 
150 |         * int
151 |             The int in the feature_layer represents the output feature in the backbone.
152 |         * list of int
153 |             The list of int in the feature_layer represents the output feature in the backbone, the first int is the \
154 |             backbone output and the second int is the upsampling branch to fuse feature.
155 |         * str
156 |             The str in the feature_layer represents the extra layers append at the end of the backbone.
157 | 
158 |         Args:
159 |             feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER
160 |             mbox: the number of boxes for each feature map
161 |             num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES
162 |         """
163 |         nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)]
164 |         transform_layers = []
165 |         if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)):
166 |             raise ValueError(
167 |                 "For SSDFPN module, the number of box have to be same in every layer"
168 |             )
169 |         loc_layers = SharedHead(mbox[0] * 4)
170 |         conf_layers = SharedHead(mbox[0] * num_classes)
171 | 
172 |         for layer, depth in zip(feature_layer[0], feature_layer[1]):
173 |             if isinstance(layer, int):
174 |                 nets_outputs.append(layer)
175 |                 transform_layers += [
176 |                     nn.Conv2d(depth, 256, 1)
177 |                 ]  # [ConvBNReLU(depth, 256, 1)]
178 |                 extra_layers += [
179 |                     ConvBNReLU(256, 256, 3)
180 |                 ]  # [nn.Conv2d(256, 256, 3, padding=1)]
181 |             elif layer == "Conv:S":
182 |                 extra_layers += [
183 |                     ConvBNReLU(depth, 256, 3, stride=2)
184 |                 ]  # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)]
185 |             else:
186 |                 raise ValueError(layer + " does not support by SSDFPN")
187 |         num_stack = 1 if len(feature_layer) == 2 else feature_layer[2]
188 |         fpn = nn.Sequential(
189 |             *[BiFPNModule(256, len(transform_layers)) for _ in range(num_stack)]
190 |         )
191 |         return (
192 |             nets_outputs,
193 |             (transform_layers, extra_layers, fpn),
194 |             (loc_layers, conf_layers),
195 |         )
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     model = BiFPNModule(1, 4)
200 |     model.eval()
201 |     xx = [torch.ones(1, 1, i, i) * i for i in [8, 4, 2, 1]]
202 |     model(xx)
203 | 
204 |     torch.onnx.export(model, xx, "test.onnx")
205 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/fcos.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from .ssdsbase import SSDSBase
  7 | from ssds.modeling.layers.basic_layers import ConvBNReLU
  8 | 
  9 | 
 10 | class SharedHead(nn.Sequential):
 11 |     def __init__(self, out_planes):
 12 |         layers = []
 13 |         for _ in range(4):
 14 |             layers += [
 15 |                 ConvBNReLU(256, 256, 3)
 16 |             ]  # [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()]
 17 |         layers += [nn.Conv2d(256, out_planes, 3, padding=1)]
 18 |         super(SharedHead, self).__init__(*layers)
 19 | 
 20 | 
 21 | class FCOS(SSDSBase):
 22 |     """RetinaNet in Focal Loss for Dense Object Detection
 23 |     See: https://arxiv.org/pdf/1708.02002.pdf for more details.
 24 |     Compared with the original implementation, change the conv2d 
 25 |     in the extra and head to ConvBNReLU to helps the model converage easily
 26 |     Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum
 27 | 
 28 |     Args:
 29 |         backbone: backbone layers for input
 30 |         extras: extra layers that feed to multibox loc and conf layers
 31 |         head: "multibox head" consists of loc and conf conv layers
 32 |         num_classes: num of classes 
 33 |     """
 34 | 
 35 |     def __init__(self, backbone, extras, head, num_classes):
 36 |         super(SSDFPN, self).__init__(backbone, num_classes)
 37 | 
 38 |         # SSD network
 39 |         self.transforms = nn.ModuleList(extras[0])
 40 |         self.extras = nn.ModuleList(extras[1])
 41 |         self.loc = head[0]
 42 |         self.conf = head[1]
 43 | 
 44 |         self.initialize()
 45 | 
 46 |     def initialize(self):
 47 |         self.backbone.initialize()
 48 |         self.transforms.apply(self.initialize_extra)
 49 |         self.extras.apply(self.initialize_extra)
 50 |         self.loc.apply(self.initialize_head)
 51 |         self.conf.apply(self.initialize_head)
 52 |         self.conf[-1].apply(self.initialize_prior)
 53 | 
 54 |     def forward(self, x):
 55 |         loc, conf, center = [list() for _ in range(3)]
 56 | 
 57 |         # apply bases layers and cache source layer outputs
 58 |         features = self.backbone(x)
 59 | 
 60 |         x = features[-1]
 61 |         features_len = len(features)
 62 |         for i in range(len(features))[::-1]:
 63 |             if i != features_len - 1:
 64 |                 xx = F.interpolate(
 65 |                     xx, scale_factor=2, mode="nearest"
 66 |                 ) + self.transforms[i](features[i])
 67 |             else:
 68 |                 xx = self.transforms[i](features[i])
 69 |             features[i] = xx
 70 | 
 71 |         for i, v in enumerate(self.extras):
 72 |             if i < features_len:
 73 |                 xx = v(features[i])
 74 |             elif i == features_len:
 75 |                 xx = v(x)
 76 |             else:
 77 |                 xx = v(xx)
 78 |             loc.append(self.loc(xx))
 79 |             conf_out = self.conf(xx)
 80 |             conf.append(conf_out[:, : self.num_classes].contiguous())
 81 |             center.append(conf_out[:, self.num_classes :].contiguous())
 82 | 
 83 |         if not self.training:
 84 |             conf = [c.sigmoid() for c in conf]
 85 |         return tuple(loc), tuple(conf), tuple(center)
 86 | 
 87 |     @staticmethod
 88 |     def add_extras(feature_layer, mbox, num_classes):
 89 |         nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)]
 90 |         if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)):
 91 |             raise ValueError(
 92 |                 "For SSDFPN module, the number of box have to be same in every layer"
 93 |             )
 94 |         loc_layers = SharedHead(4)
 95 |         conf_layers = SharedHead(num_classes + 1)
 96 | 
 97 |         for layer, depth in zip(feature_layer[0], feature_layer[1]):
 98 |             if isinstance(layer, int):
 99 |                 nets_outputs.append(layer)
100 |                 transform_layers += [
101 |                     nn.Conv2d(depth, 256, 1)
102 |                 ]  # [ConvBNReLU(depth, 256, 1)]
103 |                 extra_layers += [
104 |                     ConvBNReLU(256, 256, 3)
105 |                 ]  # [nn.Conv2d(256, 256, 3, padding=1)]
106 |             elif layer == "Conv:S":
107 |                 extra_layers += [
108 |                     ConvBNReLU(depth, 256, 3, stride=2)
109 |                 ]  # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)]
110 |             else:
111 |                 raise ValueError(layer + " does not support by SSDFPN")
112 |         return nets_outputs, (transform_layers, extra_layers), (loc_layers, conf_layers)
113 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/fpn.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | from .ssdsbase import SSDSBase
  7 | from ssds.modeling.layers.basic_layers import ConvBNReLU
  8 | 
  9 | 
 10 | class SharedHead(nn.Sequential):
 11 |     def __init__(self, out_planes):
 12 |         layers = []
 13 |         for _ in range(4):
 14 |             layers += [
 15 |                 ConvBNReLU(256, 256, 3)
 16 |             ]  # [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()]
 17 |         layers += [nn.Conv2d(256, out_planes, 3, padding=1)]
 18 |         super(SharedHead, self).__init__(*layers)
 19 | 
 20 | 
 21 | class SSDFPN(SSDSBase):
 22 |     """RetinaNet in Focal Loss for Dense Object Detection
 23 |     See: https://arxiv.org/abs/1708.02002v2 for more details.
 24 | 
 25 |     Compared with the original implementation, change the conv2d 
 26 |     in the extra and head to ConvBNReLU to helps the model converage easily
 27 |     Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum
 28 | 
 29 |     Args:
 30 |         backbone: backbone layers for input
 31 |         extras: contains transforms and extra layers that feed to multibox loc and conf layers
 32 |         head: "multibox head" consists of loc and conf conv layers
 33 |         num_classes: num of classes 
 34 |     """
 35 | 
 36 |     def __init__(self, backbone, extras, head, num_classes):
 37 |         super(SSDFPN, self).__init__(backbone, num_classes)
 38 | 
 39 |         # SSD network
 40 |         self.transforms = nn.ModuleList(extras[0])
 41 |         self.extras = nn.ModuleList(extras[1])
 42 |         self.loc = head[0]
 43 |         self.conf = head[1]
 44 | 
 45 |         self.initialize()
 46 | 
 47 |     def initialize(self):
 48 |         r"""
 49 |         :meta private:
 50 |         """
 51 |         self.backbone.initialize()
 52 |         self.transforms.apply(self.initialize_extra)
 53 |         self.extras.apply(self.initialize_extra)
 54 |         self.loc.apply(self.initialize_head)
 55 |         self.conf.apply(self.initialize_head)
 56 |         self.conf[-1].apply(self.initialize_prior)
 57 | 
 58 |     def forward(self, x):
 59 |         r"""Applies network layers and ops on input image(s) x.
 60 | 
 61 |         Args:
 62 |             x: input image or batch of images.
 63 | 
 64 |         Return:
 65 |             When self.training==True, loc and conf for each anchor box;
 66 | 
 67 |             When self.training==False. loc and conf.sigmoid() for each anchor box;
 68 | 
 69 |             For each player, conf with shape [batch, num_anchor*num_classes, height, width];
 70 | 
 71 |             For each player, loc  with shape [batch, num_anchor*4, height, width].
 72 |         """
 73 |         loc, conf = [list() for _ in range(2)]
 74 | 
 75 |         # apply bases layers and cache source layer outputs
 76 |         features = self.backbone(x)
 77 | 
 78 |         x = features[-1]
 79 |         features_len = len(features)
 80 |         for i in range(len(features))[::-1]:
 81 |             if i != features_len - 1:
 82 |                 xx = F.interpolate(
 83 |                     xx, scale_factor=2, mode="nearest"
 84 |                 ) + self.transforms[i](features[i])
 85 |             else:
 86 |                 xx = self.transforms[i](features[i])
 87 |             features[i] = xx
 88 | 
 89 |         for i, v in enumerate(self.extras):
 90 |             if i < features_len:
 91 |                 xx = v(features[i])
 92 |             elif i == features_len:
 93 |                 xx = v(x)
 94 |             else:
 95 |                 xx = v(xx)
 96 |             loc.append(self.loc(xx))
 97 |             conf.append(self.conf(xx))
 98 | 
 99 |         if not self.training:
100 |             conf = [c.sigmoid() for c in conf]
101 |         return tuple(loc), tuple(conf)
102 | 
103 |     @staticmethod
104 |     def add_extras(feature_layer, mbox, num_classes):
105 |         r"""Define and declare the extras, loc and conf modules for the ssdfpn model.
106 | 
107 |         The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssdfpn model can be int, list of int and str:
108 | 
109 |         * int
110 |             The int in the feature_layer represents the output feature in the backbone.
111 |         * list of int
112 |             The list of int in the feature_layer represents the output feature in the backbone, the first int is the \
113 |             backbone output and the second int is the upsampling branch to fuse feature.
114 |         * str
115 |             The str in the feature_layer represents the extra layers append at the end of the backbone.
116 | 
117 |         Args:
118 |             feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER
119 |             mbox: the number of boxes for each feature map
120 |             num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES
121 |         """
122 | 
123 |         nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)]
124 |         if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)):
125 |             raise ValueError(
126 |                 "For SSDFPN module, the number of box have to be same in every layer"
127 |             )
128 |         loc_layers = SharedHead(mbox[0] * 4)
129 |         conf_layers = SharedHead(mbox[0] * num_classes)
130 | 
131 |         for layer, depth in zip(feature_layer[0], feature_layer[1]):
132 |             if isinstance(layer, int):
133 |                 nets_outputs.append(layer)
134 |                 transform_layers += [
135 |                     nn.Conv2d(depth, 256, 1)
136 |                 ]  # [ConvBNReLU(depth, 256, 1)]
137 |                 extra_layers += [
138 |                     ConvBNReLU(256, 256, 3)
139 |                 ]  # [nn.Conv2d(256, 256, 3, padding=1)]
140 |             elif layer == "Conv:S":
141 |                 extra_layers += [
142 |                     ConvBNReLU(depth, 256, 3, stride=2)
143 |                 ]  # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)]
144 |             else:
145 |                 raise ValueError(layer + " does not support by SSDFPN")
146 |         return nets_outputs, (transform_layers, extra_layers), (loc_layers, conf_layers)
147 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/fssd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | import os
  6 | 
  7 | from ssds.modeling.layers.basic_layers import _conv
  8 | from ssds.modeling.layers.layers_parser import parse_feature_layer
  9 | 
 10 | 
 11 | class FSSD(nn.Module):
 12 |     """FSSD: Feature Fusion Single Shot Multibox Detector
 13 |     See: https://arxiv.org/pdf/1712.00960.pdf for more details.
 14 | 
 15 |     Args:
 16 |         phase: (string) Can be "eval" or "train" or "feature"
 17 |         base: base layers for input
 18 |         extras: extra layers that feed to multibox loc and conf layers
 19 |         head: "multibox head" consists of loc and conf conv layers
 20 |         features： include to feature layers to fusion feature and build pyramids
 21 |         feature_layer: the feature layers for head to loc and conf
 22 |         num_classes: num of classes 
 23 |     """
 24 | 
 25 |     def __init__(self, base, extras, head, features, feature_layer, num_classes):
 26 |         super(FSSD, self).__init__()
 27 |         self.num_classes = num_classes
 28 | 
 29 |         # FSSD network
 30 |         self.base = nn.ModuleList(base)
 31 |         self.extras = nn.ModuleList(extras)
 32 |         self.feature_layer = feature_layer[0][0]
 33 |         self.transforms = nn.ModuleList(features[0])
 34 |         self.pyramids = nn.ModuleList(features[1])
 35 |         self.norm = nn.BatchNorm2d(
 36 |             int(feature_layer[0][1][-1] / 2) * len(self.transforms), affine=True
 37 |         )
 38 | 
 39 |         self.loc = nn.ModuleList(head[0])
 40 |         self.conf = nn.ModuleList(head[1])
 41 | 
 42 |         self.softmax = nn.Softmax(dim=-1)
 43 | 
 44 |     def forward(self, x, phase="eval"):
 45 |         """Applies network layers and ops on input image(s) x.
 46 | 
 47 |         Args:
 48 |             x: input image or batch of images. Shape: [batch,3,300,300].
 49 | 
 50 |         Return:
 51 |             Depending on phase:
 52 |             test:
 53 |                 Variable(tensor) of output class label predictions,
 54 |                 confidence score, and corresponding location predictions for
 55 |                 each object detected. Shape: [batch,topk,7]
 56 | 
 57 |             train:
 58 |                 list of concat outputs from:
 59 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 60 |                     2: localization layers, Shape: [batch,num_priors*4]
 61 | 
 62 |             feature:
 63 |                 the features maps of the feature extractor
 64 |         """
 65 |         sources, transformed, pyramids, loc, conf = [list() for _ in range(5)]
 66 | 
 67 |         # apply bases layers and cache source layer outputs
 68 |         for k in range(len(self.base)):
 69 |             x = self.base[k](x)
 70 |             if k in self.feature_layer:
 71 |                 sources.append(x)
 72 | 
 73 |         # apply extra layers and cache source layer outputs
 74 |         for k, v in enumerate(self.extras):
 75 |             x = v(x)
 76 |             sources.append(x)
 77 | 
 78 |         assert len(self.transforms) == len(sources)
 79 |         upsize = (sources[0].size()[2], sources[0].size()[3])
 80 | 
 81 |         for k, v in enumerate(self.transforms):
 82 |             size = None if k == 0 else upsize
 83 |             transformed.append(v(sources[k], size))
 84 |         x = torch.cat(transformed, 1)
 85 |         x = self.norm(x)
 86 |         for k, v in enumerate(self.pyramids):
 87 |             x = v(x)
 88 |             pyramids.append(x)
 89 | 
 90 |         if phase == "feature":
 91 |             return pyramids
 92 | 
 93 |         # apply multibox head to pyramids layers
 94 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 95 |             loc.append(l(x).view(x.size(0), 4, -1))
 96 |             conf.append(c(x).view(x.size(0), self.num_classes, -1))
 97 |         loc = torch.cat(loc, 2).contiguous()
 98 |         conf = torch.cat(conf, 2).contiguous()
 99 | 
100 |         return loc, conf
101 | 
102 | 
103 | class BasicConvWithUpSample(nn.Module):
104 |     # temp, need TODO improve
105 |     def __init__(
106 |         self,
107 |         in_planes,
108 |         out_planes,
109 |         kernel_size,
110 |         stride=1,
111 |         padding=0,
112 |         dilation=1,
113 |         groups=1,
114 |         relu=True,
115 |         bn=False,
116 |         bias=True,
117 |     ):
118 |         super(BasicConvWithUpSample, self).__init__()
119 |         self.out_channels = out_planes
120 |         self.conv = nn.Conv2d(
121 |             in_planes,
122 |             out_planes,
123 |             kernel_size=kernel_size,
124 |             stride=stride,
125 |             padding=padding,
126 |             dilation=dilation,
127 |             groups=groups,
128 |             bias=bias,
129 |         )
130 |         self.bn = (
131 |             nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
132 |             if bn
133 |             else None
134 |         )
135 |         self.relu = nn.ReLU(inplace=True) if relu else None
136 | 
137 |     def forward(self, x, up_size=None):
138 |         x = self.conv(x)
139 |         if self.bn is not None:
140 |             x = self.bn(x)
141 |         if self.relu is not None:
142 |             x = self.relu(x)
143 |         if up_size is not None:
144 |             x = F.upsample(x, size=up_size, mode="bilinear")
145 |         return x
146 | 
147 | 
148 | def add_extras(base, feature_layer, mbox, num_classes, version):
149 |     extra_layers = []
150 |     feature_transform_layers = []
151 |     pyramid_feature_layers = []
152 |     loc_layers = []
153 |     conf_layers = []
154 |     in_channels = None
155 | 
156 |     feature_transform_channel = int(feature_layer[0][1][-1] / 2)
157 |     for layer, depth in zip(feature_layer[0][0], feature_layer[0][1]):
158 |         extra_layers += parse_feature_layer(layer, in_channels, depth)
159 |         in_channels = depth
160 |         feature_transform_layers += [
161 |             BasicConvWithUpSample(
162 |                 in_channels, feature_transform_channel, kernel_size=1, padding=0
163 |             )
164 |         ]
165 | 
166 |     in_channels = len(feature_transform_layers) * feature_transform_channel
167 |     for layer, depth, box in zip(feature_layer[1][0], feature_layer[1][1], mbox):
168 |         pyramid_feature_layers += parse_feature_layer(layer, in_channels, depth)
169 |         in_channels = depth
170 |         loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)]
171 |         conf_layers += [
172 |             nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)
173 |         ]
174 |     return (
175 |         base,
176 |         extra_layers,
177 |         (feature_transform_layers, pyramid_feature_layers),
178 |         (loc_layers, conf_layers),
179 |     )
180 | 
181 | 
182 | def build_fssd(base, feature_layer, mbox, num_classes):
183 |     base_, extras_, features_, head_ = add_extras(
184 |         base(), feature_layer, mbox, num_classes, version="fssd"
185 |     )
186 |     return FSSD(base_, extras_, head_, features_, feature_layer, num_classes)
187 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/shelf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from collections import OrderedDict
  4 | 
  5 | from .ssdsbase import SSDSBase
  6 | from ssds.modeling.layers.layers_parser import parse_feature_layer
  7 | from ssds.modeling.layers.basic_layers import ConvBNReLU
  8 | 
  9 | 
 10 | class SharedBlock(nn.Module):
 11 |     """ The conv params in this block is shared
 12 |     """
 13 | 
 14 |     def __init__(self, planes):
 15 |         super(SharedBlock, self).__init__()
 16 | 
 17 |         self.planes = planes
 18 |         self.conv1 = nn.Conv2d(
 19 |             self.planes, self.planes, kernel_size=3, stride=1, padding=1, bias=False
 20 |         )
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.relu1 = nn.ReLU(inplace=True)
 23 |         self.drop = nn.Dropout2d(p=0.25)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 |         self.relu2 = nn.ReLU(inplace=True)
 26 | 
 27 |     def forward(self, x):
 28 |         out = self.conv1(x)
 29 |         out = self.bn1(out)
 30 |         out = self.relu1(out)
 31 |         out = self.drop(out)
 32 |         out = self.conv1(out)
 33 |         out = self.bn2(out)
 34 |         out = out + x
 35 |         return self.relu2(out)
 36 | 
 37 | 
 38 | class ShelfPyramid(nn.Module):
 39 |     def __init__(self, settings, conv=nn.ConvTranspose2d, block=SharedBlock):
 40 |         super().__init__()
 41 | 
 42 |         # "output_padding":1 is not work for tensorrt
 43 |         extra_args = {"padding": 1, "bias": True} if conv == nn.ConvTranspose2d else {}
 44 |         for i, depth in enumerate(settings):
 45 |             if i == 0:
 46 |                 self.add_module("block{}".format(i), block(depth))
 47 |             else:
 48 |                 self.add_module("block{}".format(i), block(depth))
 49 |                 self.add_module(
 50 |                     "conv{}".format(i),
 51 |                     conv(settings[i - 1], depth, kernel_size=3, stride=2, **extra_args),
 52 |                 )
 53 | 
 54 |     def forward(self, xx):
 55 |         out = []
 56 |         x = xx[0]
 57 |         for i in range(len(xx)):
 58 |             if i != 0:
 59 |                 x = getattr(self, "conv{}".format(i))(x) + xx[i]
 60 |             x = getattr(self, "block{}".format(i))(x)
 61 |             out.append(x)
 62 |         return out[::-1]
 63 | 
 64 | 
 65 | class Head(nn.Sequential):
 66 |     def __init__(self, in_channels, out_planes):
 67 |         super(Head, self).__init__(
 68 |             ConvBNReLU(in_channels, in_channels, 3),
 69 |             nn.Conv2d(in_channels, out_planes, 3, padding=1),
 70 |         )
 71 | 
 72 | 
 73 | class SSDShelf(SSDSBase):
 74 |     """ShelfNet for Fast Semantic Segmentation
 75 |     See: https://arxiv.org/pdf/1811.11254.pdf for more details.
 76 | 
 77 |     Args:
 78 |         backbone: backbone layers for input
 79 |         extras: extra layers that feed to multibox loc and conf layers
 80 |         head: "multibox head" consists of loc and conf conv layers
 81 |         num_classes: num of classes 
 82 |     """
 83 | 
 84 |     def __init__(self, backbone, extras, head, num_classes):
 85 |         super(SSDShelf, self).__init__(backbone, num_classes)
 86 | 
 87 |         self.transforms = nn.ModuleList(extras[0])
 88 |         self.shelf_head = nn.Sequential(extras[1])
 89 |         self.loc = nn.ModuleList(head[0])
 90 |         self.conf = nn.ModuleList(head[1])
 91 | 
 92 |         self.initialize()
 93 | 
 94 |     def initialize(self):
 95 |         self.backbone.initialize()
 96 |         self.transforms.apply(self.initialize_extra)
 97 |         self.shelf_head.apply(self.initialize_extra)
 98 |         self.loc.apply(self.initialize_head)
 99 |         self.conf.apply(self.initialize_head)
100 |         for c in self.conf:
101 |             c[-1].apply(self.initialize_prior)
102 | 
103 |     def forward(self, x):
104 |         loc, conf = [list() for _ in range(2)]
105 | 
106 |         # apply bases layers and cache source layer outputs
107 |         features = self.backbone(x)
108 | 
109 |         features_len = len(features)
110 |         features = [self.transforms[i](x) for i, x in enumerate(features)]
111 | 
112 |         features = self.shelf_head(features[::-1])
113 |         for i in range(len(features), len(self.transforms)):
114 |             features.append(self.transforms[i](features[-1]))
115 | 
116 |         # apply multibox head to source layers
117 |         for (x, l, c) in zip(features, self.loc, self.conf):
118 |             loc.append(l(x))
119 |             conf.append(c(x))
120 | 
121 |         if not self.training:
122 |             conf = [c.sigmoid() for c in conf]
123 |         return tuple(loc), tuple(conf)
124 | 
125 |     @staticmethod
126 |     def add_extras(feature_layer, mbox, num_classes):
127 |         nets_outputs, transform_layers, loc_layers, conf_layers = [
128 |             list() for _ in range(4)
129 |         ]
130 |         shelf_depths = []
131 |         for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox):
132 |             if isinstance(layer, int):
133 |                 if isinstance(depth, list):
134 |                     if len(depth) == 2:
135 |                         in_channels = depth[0]
136 |                         depth = depth[1]
137 |                 else:
138 |                     in_channels = depth
139 |                 nets_outputs.append(layer)
140 |                 shelf_depths.append(in_channels)
141 |                 transform_layers += [nn.Conv2d(in_channels, depth, 1)]
142 |             else:
143 |                 transform_layers += parse_feature_layer(layer, in_channels, depth)
144 |                 in_channels = depth
145 | 
146 |             loc_layers += [Head(in_channels, box * 4)]
147 |             conf_layers += [Head(in_channels, box * num_classes)]
148 | 
149 |         shelf_head = OrderedDict(
150 |             [
151 |                 ("decoder0", ShelfPyramid(shelf_depths[::-1])),
152 |                 ("encoder0", ShelfPyramid(shelf_depths, conv=ConvBNReLU)),
153 |                 ("decoder1", ShelfPyramid(shelf_depths[::-1])),
154 |             ]
155 |         )
156 |         return nets_outputs, (transform_layers, shelf_head), (loc_layers, conf_layers)
157 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/ssd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import copy
  5 | 
  6 | from .ssdsbase import SSDSBase
  7 | from ssds.modeling.layers.layers_parser import parse_feature_layer
  8 | 
  9 | 
 10 | class SSD(SSDSBase):
 11 |     r"""SSD: Single Shot MultiBox Detector
 12 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 13 | 
 14 |     Args:
 15 |         backbone: backbone layers for input
 16 |         extras: extra layers that feed to multibox loc and conf layers
 17 |         head: "multibox head" consists of loc and conf conv layers
 18 |         num_classes: num of classes 
 19 |     """
 20 | 
 21 |     def __init__(self, backbone, extras, head, num_classes):
 22 |         super(SSD, self).__init__(backbone, num_classes)
 23 | 
 24 |         # SSD head
 25 |         self.extras = nn.ModuleList(extras)
 26 |         self.loc = nn.ModuleList(head[0])
 27 |         self.conf = nn.ModuleList(head[1])
 28 | 
 29 |         self.initialize()
 30 | 
 31 |     def initialize(self):
 32 |         r"""
 33 |         :meta private:
 34 |         """
 35 |         self.backbone.initialize()
 36 |         self.extras.apply(self.initialize_extra)
 37 |         self.loc.apply(self.initialize_head)
 38 |         self.conf.apply(self.initialize_head)
 39 |         for c in self.conf:
 40 |             c.apply(self.initialize_prior)
 41 | 
 42 |     def forward(self, x):
 43 |         r"""Applies network layers and ops on input image(s) x.
 44 | 
 45 |         Args:
 46 |             x: input image or batch of images.
 47 | 
 48 |         Return:
 49 |             When self.training==True, loc and conf for each anchor box;
 50 | 
 51 |             When self.training==False. loc and conf.sigmoid() for each anchor box;
 52 | 
 53 |             For each player, conf with shape [batch, num_anchor*num_classes, height, width];
 54 | 
 55 |             For each player, loc  with shape [batch, num_anchor*4, height, width].
 56 |         """
 57 |         loc, conf = [list() for _ in range(2)]
 58 | 
 59 |         # apply backbone to input and cache outputs
 60 |         features = self.backbone(x)
 61 | 
 62 |         # apply extra blocks and cache outputs
 63 |         for v in self.extras:
 64 |             x = v(features[-1])
 65 |             features.append(x)
 66 | 
 67 |         # apply multibox head to source layers
 68 |         for (x, l, c) in zip(features, self.loc, self.conf):
 69 |             loc.append(l(x))
 70 |             conf.append(c(x))
 71 | 
 72 |         if not self.training:
 73 |             conf = [c.sigmoid() for c in conf]
 74 |         return tuple(loc), tuple(conf)
 75 | 
 76 |     @staticmethod
 77 |     def add_extras(feature_layer, mbox, num_classes):
 78 |         r"""Define and declare the extras, loc and conf modules for the ssd model.
 79 | 
 80 |         The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssd model can be int, list of int and str:
 81 | 
 82 |         * int
 83 |             The int in the feature_layer represents the output feature in the backbone.
 84 |         * str
 85 |             The str in the feature_layer represents the extra layers append at the end of the backbone.
 86 | 
 87 |         Args:
 88 |             feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER
 89 |             mbox: the number of boxes for each feature map
 90 |             num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES
 91 |         """
 92 |         nets_outputs, extra_layers, loc_layers, conf_layers = [list() for _ in range(4)]
 93 |         in_channels = None
 94 |         for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox):
 95 |             if isinstance(layer, int):
 96 |                 nets_outputs.append(layer)
 97 |             else:
 98 |                 extra_layers += parse_feature_layer(layer, in_channels, depth)
 99 |             in_channels = depth
100 |             loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)]
101 |             conf_layers += [
102 |                 nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)
103 |             ]
104 |         return nets_outputs, extra_layers, (loc_layers, conf_layers)
105 | 


--------------------------------------------------------------------------------
/ssds/modeling/ssds/ssdsbase.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | 
 5 | 
 6 | class SSDSBase(nn.Module):
 7 |     r"""Base class for all ssds model.
 8 |     """
 9 |     def __init__(self, backbone, num_classes):
10 |         super(SSDSBase, self).__init__()
11 |         self.backbone = backbone
12 |         self.num_classes = num_classes
13 | 
14 |     # Initialize class head prior
15 |     def initialize_prior(self, layer):
16 |         pi = 0.01
17 |         b = -math.log((1 - pi) / pi)
18 |         nn.init.constant_(layer.bias, b)
19 |         nn.init.normal_(layer.weight, std=0.01)
20 | 
21 |     def initialize_head(self, layer):
22 |         if isinstance(layer, nn.Conv2d):
23 |             nn.init.normal_(layer.weight, std=0.01)
24 |             if layer.bias is not None:
25 |                 nn.init.constant_(layer.bias, val=0)
26 | 
27 |     def initialize_extra(self, layer):
28 |         if isinstance(layer, nn.Conv2d):
29 |             nn.init.xavier_uniform_(layer.weight)
30 |             if layer.bias is not None:
31 |                 nn.init.constant_(layer.bias, val=0)
32 | 


--------------------------------------------------------------------------------
/ssds/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/pipeline/__init__.py


--------------------------------------------------------------------------------
/ssds/pipeline/pipeline_anchor_apex.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch
  3 | import time
  4 | from datetime import timedelta
  5 | 
  6 | from apex import amp
  7 | 
  8 | import ssds.core.tools as tools
  9 | import ssds.core.visualize_funcs as vsf
 10 | from ssds.modeling.layers.box import extract_targets
 11 | 
 12 | CURSOR_UP_ONE = "\x1b[1A"
 13 | ERASE_LINE = "\x1b[2K"
 14 | 
 15 | 
 16 | class ModelWithLossBasic(torch.nn.Module):
 17 |     r""" Class use to help the gpu memory becomes more balance in ddp model
 18 |     """
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         model,
 23 |         cls_criterion,
 24 |         loc_criterion,
 25 |         num_classes,
 26 |         match,
 27 |         center_sampling_radius,
 28 |     ):
 29 |         super(ModelWithLossBasic, self).__init__()
 30 |         self.model = model
 31 |         self.cls_criterion = cls_criterion
 32 |         self.loc_criterion = loc_criterion
 33 |         self.num_classes = num_classes
 34 |         self.match = match
 35 |         self.center_radius = center_sampling_radius
 36 | 
 37 |     def forward(self, images, targets, anchors):
 38 |         r""" 
 39 |         :meta private:
 40 |         """
 41 |         loc, conf = self.model(images)
 42 | 
 43 |         cls_losses, loc_losses, fg_targets = [], [], []
 44 |         for j, (stride, anchor) in enumerate(anchors.items()):
 45 |             size = conf[j].shape[-2:]
 46 |             conf_target, loc_target, depth = extract_targets(
 47 |                 targets,
 48 |                 anchors,
 49 |                 self.num_classes,
 50 |                 stride,
 51 |                 size,
 52 |                 self.match,
 53 |                 self.center_radius,
 54 |             )
 55 |             fg_targets.append((depth > 0).sum().float().clamp(min=1))
 56 | 
 57 |             c = conf[j].view_as(conf_target).float()
 58 |             cls_mask = (depth >= 0).expand_as(conf_target).float()
 59 |             cls_loss = self.cls_criterion(c, conf_target, depth)
 60 |             cls_loss = cls_mask * cls_loss
 61 |             cls_losses.append(cls_loss.sum())
 62 | 
 63 |             l = loc[j].view_as(loc_target).float()
 64 |             loc_loss = self.loc_criterion(l, loc_target)
 65 |             loc_mask = (depth > 0).expand_as(loc_loss).float()
 66 |             loc_loss = loc_mask * loc_loss
 67 |             loc_losses.append(loc_loss.sum())
 68 | 
 69 |         fg_targets = torch.stack(fg_targets).sum()
 70 |         cls_loss = torch.stack(cls_losses).sum() / fg_targets
 71 |         loc_loss = torch.stack(loc_losses).sum() / fg_targets
 72 |         return cls_loss, loc_loss, cls_losses, loc_losses
 73 | 
 74 | 
 75 | def train_anchor_based_epoch(
 76 |     model, data_loader, optimizer, anchors, writer, epoch, device, local_rank
 77 | ):
 78 |     r""" the pipeline for training
 79 |     """
 80 |     model.train()
 81 |     title = "Train: "
 82 | 
 83 |     if local_rank == 0:
 84 |         loss_writer = {
 85 |             "loc_loss": tools.AverageMeter(),
 86 |             "cls_loss": tools.AverageMeter(),
 87 |         }
 88 |         loss_writer.update(
 89 |             {
 90 |                 "loc_loss_{}".format(j): tools.AverageMeter()
 91 |                 for j, _ in enumerate(anchors.items())
 92 |             }
 93 |         )
 94 |         loss_writer.update(
 95 |             {
 96 |                 "cls_loss_{}".format(j): tools.AverageMeter()
 97 |                 for j, _ in enumerate(anchors.items())
 98 |             }
 99 |         )
100 |         start_time = time.time()
101 |         dataset_len = len(data_loader)
102 | 
103 |     for batch_idx, (images, targets) in enumerate(data_loader):
104 |         if images.device != device:
105 |             images, targets = images.to(device), targets.to(device)
106 |         if targets.dtype != torch.float:
107 |             targets = targets.float()
108 | 
109 |         cls_loss, loc_loss, cls_losses, loc_losses = model(images, targets, anchors)
110 |         if torch.isnan(loc_loss) or torch.isnan(cls_loss):
111 |             continue
112 |         if local_rank == 0:
113 |             for j, (cl, ll) in enumerate(zip(cls_losses, loc_losses)):
114 |                 loss_writer["cls_loss_{}".format(j)].update(cl.item())
115 |                 loss_writer["loc_loss_{}".format(j)].update(ll.item())
116 |             loss_writer["cls_loss"].update(cls_loss.item())
117 |             loss_writer["loc_loss"].update(loc_loss.item())
118 |             log = {
119 |                 "cls_loss": cls_loss.item(),
120 |                 "loc_loss": loc_loss.item(),
121 |                 "lr": optimizer.param_groups[0]["lr"],
122 |             }
123 | 
124 |         optimizer.zero_grad()
125 |         total_loss = cls_loss + loc_loss
126 |         if total_loss.item() == float("Inf") or torch.isnan(total_loss):
127 |             continue
128 |         with amp.scale_loss(total_loss, optimizer) as scaled_loss:
129 |             scaled_loss.backward()
130 |         optimizer.step()
131 | 
132 |         if local_rank == 0:
133 |             elapsed_time = time.time() - start_time
134 |             estimat_time = elapsed_time * (dataset_len) / (batch_idx + 1)
135 |             # log per iter
136 |             print(
137 |                 title + tools.format_dict_of_loss(log),
138 |                 "|",
139 |                 batch_idx + 1,
140 |                 "/",
141 |                 dataset_len,
142 |                 "| Time:",
143 |                 timedelta(seconds=int(elapsed_time)),
144 |                 "/",
145 |                 timedelta(seconds=int(estimat_time)),
146 |                 "\r",
147 |                 end="",
148 |             )
149 |             sys.stdout.flush()
150 | 
151 |     if local_rank == 0:
152 |         log = {"lr": optimizer.param_groups[0]["lr"]}
153 |         log.update({k: v.avg for k, v in loss_writer.items()})
154 |         print(
155 |             CURSOR_UP_ONE
156 |             + ERASE_LINE
157 |             + "===>Avg Train: "
158 |             + tools.format_dict_of_loss(log),
159 |             " | Time: ",
160 |             timedelta(seconds=int(time.time() - start_time)),
161 |         )
162 | 
163 |         # log for tensorboard
164 |         for key, value in log.items():
165 |             writer.add_scalar("Train/{}".format(key), value, epoch)
166 |         targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4]
167 |         vsf.add_imagesWithBoxes(
168 |             writer, "Train Image", images[:5], targets[:5], epoch=epoch
169 |         )
170 | 
171 |     return
172 | 


--------------------------------------------------------------------------------
/ssds/pipeline/pipeline_anchor_basic.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from tqdm import tqdm
  3 | import torch
  4 | 
  5 | import ssds.core.tools as tools
  6 | import ssds.core.visualize_funcs as vsf
  7 | from ssds.core.evaluation_metrics import MeanAveragePrecision
  8 | from ssds.modeling.layers.box import extract_targets
  9 | 
 10 | CURSOR_UP_ONE = "\x1b[1A"
 11 | ERASE_LINE = "\x1b[2K"
 12 | 
 13 | 
 14 | def train_anchor_based_epoch(
 15 |     model,
 16 |     data_loader,
 17 |     optimizer,
 18 |     cls_criterion,
 19 |     loc_criterion,
 20 |     anchors,
 21 |     num_classes,
 22 |     match,
 23 |     center_sampling_radius,
 24 |     writer,
 25 |     epoch,
 26 |     device,
 27 | ):
 28 |     r""" the pipeline for training
 29 |     """
 30 |     model.train()
 31 | 
 32 |     title = "Train: "
 33 |     progress = tqdm(
 34 |         tools.IteratorTimer(data_loader),
 35 |         total=len(data_loader),
 36 |         smoothing=0.9,
 37 |         miniters=1,
 38 |         leave=True,
 39 |         desc=title,
 40 |     )
 41 | 
 42 |     loss_writer = {"loc_loss": tools.AverageMeter(), "cls_loss": tools.AverageMeter()}
 43 |     loss_writer.update(
 44 |         {
 45 |             "loc_loss_{}".format(j): tools.AverageMeter()
 46 |             for j, _ in enumerate(anchors.items())
 47 |         }
 48 |     )
 49 |     loss_writer.update(
 50 |         {
 51 |             "cls_loss_{}".format(j): tools.AverageMeter()
 52 |             for j, _ in enumerate(anchors.items())
 53 |         }
 54 |     )
 55 | 
 56 |     for batch_idx, (images, targets) in enumerate(progress):
 57 |         if images.device != device:
 58 |             images, targets = images.to(device), targets.to(device)
 59 |         if targets.dtype != torch.float:
 60 |             targets = targets.float()
 61 | 
 62 |         loc, conf = model(images)
 63 | 
 64 |         cls_losses, loc_losses, fg_targets = [], [], []
 65 |         for j, (stride, anchor) in enumerate(anchors.items()):
 66 |             size = conf[j].shape[-2:]
 67 |             conf_target, loc_target, depth = extract_targets(
 68 |                 targets,
 69 |                 anchors,
 70 |                 num_classes,
 71 |                 stride,
 72 |                 size,
 73 |                 match,
 74 |                 center_sampling_radius,
 75 |             )
 76 |             fg_targets.append((depth > 0).sum().float().clamp(min=1))
 77 | 
 78 |             c = conf[j].view_as(conf_target).float()
 79 |             cls_mask = (depth >= 0).expand_as(conf_target).float()
 80 |             cls_loss = cls_criterion(c, conf_target, depth)
 81 |             cls_loss = cls_mask * cls_loss
 82 |             cls_losses.append(cls_loss.sum())
 83 | 
 84 |             l = loc[j].view_as(loc_target).float()
 85 |             loc_loss = loc_criterion(l, loc_target)
 86 |             loc_mask = (depth > 0).expand_as(loc_loss).float()
 87 |             loc_loss = loc_mask * loc_loss
 88 |             loc_losses.append(loc_loss.sum())
 89 | 
 90 |             if torch.isnan(loc_loss.sum()) or torch.isnan(cls_loss.sum()):
 91 |                 continue
 92 |             loss_writer["cls_loss_{}".format(j)].update(cls_losses[-1].item())
 93 |             loss_writer["loc_loss_{}".format(j)].update(loc_losses[-1].item())
 94 | 
 95 |         fg_targets = torch.stack(fg_targets).sum()
 96 |         cls_loss = torch.stack(cls_losses).sum() / fg_targets
 97 |         loc_loss = torch.stack(loc_losses).sum() / fg_targets
 98 |         if torch.isnan(loc_loss) or torch.isnan(cls_loss):
 99 |             continue
100 |         loss_writer["cls_loss"].update(cls_loss.item())
101 |         loss_writer["loc_loss"].update(loc_loss.item())
102 | 
103 |         log = {
104 |             "cls_loss": cls_loss.item(),
105 |             "loc_loss": loc_loss.item(),
106 |             "lr": optimizer.param_groups[0]["lr"],
107 |         }
108 | 
109 |         optimizer.zero_grad()
110 |         total_loss = cls_loss + loc_loss
111 |         if total_loss.item() == float("Inf"):
112 |             continue
113 |         total_loss.backward()
114 |         optimizer.step()
115 | 
116 |         # log per iter
117 |         progress.set_description(title + tools.format_dict_of_loss(log))
118 |         progress.update(1)
119 | 
120 |     progress.close()
121 |     log = {"lr": optimizer.param_groups[0]["lr"]}
122 |     log.update({k: v.avg for k, v in loss_writer.items()})
123 |     print(
124 |         CURSOR_UP_ONE + ERASE_LINE + "===>Avg Train: " + tools.format_dict_of_loss(log)
125 |     )
126 | 
127 |     # log for tensorboard
128 |     for key, value in log.items():
129 |         writer.add_scalar("Train/{}".format(key), value, epoch)
130 |     targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4]
131 |     vsf.add_imagesWithBoxes(writer, "Train Image", images[:5], targets[:5], epoch=epoch)
132 | 
133 |     return
134 | 
135 | 
136 | def eval_anchor_based_epoch(
137 |     model,
138 |     data_loader,
139 |     decoder,
140 |     cls_criterion,
141 |     loc_criterion,
142 |     anchors,
143 |     num_classes,
144 |     writer,
145 |     epoch,
146 |     device,
147 | ):
148 |     r""" the pipeline for evaluation
149 |     """
150 |     model.eval()
151 |     title = "Eval: "
152 |     progress = tqdm(
153 |         tools.IteratorTimer(data_loader),
154 |         total=len(data_loader),
155 |         smoothing=0.9,
156 |         miniters=1,
157 |         leave=True,
158 |         desc=title,
159 |     )
160 | 
161 |     metric = MeanAveragePrecision(
162 |         num_classes, decoder.conf_threshold, decoder.nms_threshold
163 |     )
164 |     for batch_idx, (images, targets) in enumerate(progress):
165 |         if images.device != device:
166 |             images, targets = images.to(device), targets.to(device)
167 |         if targets.dtype != torch.float:
168 |             targets = targets.float()
169 | 
170 |         loc, conf = model(images)
171 | 
172 |         # removed loss since the conf is sigmod in the evaluation stage,
173 |         # the conf loss is not meaningful anymore
174 |         detections = decoder(loc, conf, anchors)
175 |         targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4]  # from xywh to ltrb
176 |         metric(detections, targets)
177 | 
178 |         # log per iter
179 |         progress.update(1)
180 | 
181 |     progress.close()
182 |     mAP, (prec, rec, ap) = metric.get_results()
183 | 
184 |     log = {"mAP": mAP}
185 |     if len(ap) < 5:
186 |         for i, a in enumerate(ap):
187 |             log["AP@cls{}".format(i)] = a
188 |     print(
189 |         CURSOR_UP_ONE + ERASE_LINE + "===>Avg Eval: " + tools.format_dict_of_loss(log)
190 |     )
191 | 
192 |     # log for tensorboard
193 |     for key, value in log.items():
194 |         writer.add_scalar("Eval/{}".format(key), value, epoch)
195 |     vsf.add_prCurve(writer, prec, rec, epoch=epoch)
196 |     boxes = torch.cat((detections[1], detections[0][..., None]), dim=2)
197 |     vsf.add_imagesWithMatchedBoxes(
198 |         writer, "Eval Image", images[:5], boxes[:5], targets[:5], epoch=epoch
199 |     )
200 |     return
201 | 


--------------------------------------------------------------------------------
/ssds/ssds.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .core import checkpoint, config
 4 | from .modeling import model_builder
 5 | 
 6 | 
 7 | class SSDDetector(object):
 8 |     r"""The class used for ssds model inference
 9 | 
10 |     Args:
11 |         cfg_file (str):  The path to the config file
12 |         is_print (bool): Whether to print the model arch and the anchor strategy
13 |     """
14 |     def __init__(self, cfg_file, is_print=False):
15 |         # Config
16 |         cfg = config.cfg_from_file(cfg_file)
17 | 
18 |         # Build model
19 |         print("===> Building model")
20 |         self.model = model_builder.create_model(cfg.MODEL)
21 |         if is_print:
22 |             print("Model architectures:\n{}\n".format(self.model))
23 |         self.anchors = model_builder.create_anchors(
24 |             cfg.MODEL, self.model, cfg.MODEL.IMAGE_SIZE, is_print
25 |         )
26 |         self.decoder = model_builder.create_decoder(cfg.POST_PROCESS)
27 | 
28 |         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
29 |         if cfg.RESUME_CHECKPOINT:
30 |             print(
31 |                 "Loading initial model weights from {:s}".format(cfg.RESUME_CHECKPOINT)
32 |             )
33 |             checkpoint.resume_checkpoint(self.model, cfg.RESUME_CHECKPOINT, "")
34 |         self.model.eval().to(self.device)
35 | 
36 |         self.image_size = tuple(cfg.MODEL.IMAGE_SIZE)
37 |         self.num_classes = cfg.MODEL.NUM_CLASSES
38 |         self.mean = cfg.DATASET.PREPROC.MEAN
39 |         self.std = cfg.DATASET.PREPROC.STD
40 | 
41 |     def __call__(self, imgs):
42 |         r"""Applied the SSD model to do the inference for the given images
43 | 
44 |         Args:
45 |             imgs (np.ndarray): The images used for inference, with shape [H,W,3], [3,H,W] [N,H,W,3] or [N,3,H,W]
46 |         """
47 |         pick1st = False
48 |         if len(imgs.shape) == 3:
49 |             imgs = imgs[None, ...]
50 |             pick1st = True
51 |         if len(imgs.shape) != 4:
52 |             raise AssertionError("image dims has to be 3 or 4")
53 |         if imgs.shape[3] == 3:
54 |             imgs = imgs.transpose(0, 3, 1, 2)
55 | 
56 |         imgs_tensor = torch.Tensor(imgs).to(self.device)
57 |         imgs_tensor = (imgs_tensor - self.mean) / self.std
58 | 
59 |         loc, conf = self.model(imgs_tensor)
60 |         detections = self.decoder(loc, conf, self.anchors)
61 |         out_scores, out_boxes, out_classes = (
62 |             d.cpu().detach().numpy() for d in detections
63 |         )
64 | 
65 |         if pick1st:
66 |             return out_scores[0], out_boxes[0].astype(int), out_classes[0].astype(int)
67 |         else:
68 |             return out_scores, out_boxes.astype(int), out_classes.astype(int)
69 | 


--------------------------------------------------------------------------------
/ssds/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/utils/__init__.py


--------------------------------------------------------------------------------
/ssds/utils/export.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import sys
  4 | import glob
  5 | import json
  6 | import argparse
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from ssds.core import checkpoint, config
 12 | from ssds.modeling import model_builder
 13 | 
 14 | 
 15 | class ExportModel(nn.Module):
 16 |     def __init__(self, model, nhwc):
 17 |         super(ExportModel, self).__init__()
 18 |         self.model = model
 19 |         self.nhwc = nhwc
 20 | 
 21 |     def forward(self, x):
 22 |         if self.nhwc:
 23 |             x = x.permute(0, 3, 1, 2).contiguous() / 255.0
 24 |         return self.model(x)
 25 | 
 26 | 
 27 | class Solver(object):
 28 |     """
 29 |     A wrapper class for the training process
 30 |     """
 31 | 
 32 |     def __init__(self, cfg, nhwc, render=False):
 33 |         self.cfg = cfg
 34 |         self.render = render
 35 |         self.nhwc = nhwc
 36 | 
 37 |         # Build model
 38 |         print("===> Building model")
 39 |         self.model = model_builder.create_model(cfg.MODEL)
 40 |         self.model.eval().cuda()
 41 |         self.anchors = model_builder.create_anchors(
 42 |             self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE
 43 |         )
 44 | 
 45 |         # Print the model architecture and parameters
 46 |         if self.render:
 47 |             print("Model architectures:\n{}\n".format(self.model))
 48 |             model_builder.create_anchors(
 49 |                 self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE, self.render
 50 |             )
 51 | 
 52 |     def export_onnx(self, weights, export_path, batch):
 53 |         if weights != None:
 54 |             checkpoint.resume_checkpoint(self.model, weights)
 55 |         export_model = ExportModel(self.model, self.nhwc)
 56 | 
 57 |         import torch.onnx.symbolic_opset9 as onnx_symbolic
 58 | 
 59 |         def upsample_nearest2d(g, input, output_size, *args):
 60 |             # Currently, TRT 5.1/6.0 ONNX Parser does not support all ONNX ops
 61 |             # needed to support dynamic upsampling ONNX forumlation
 62 |             # Here we hardcode scale=2 as a temporary workaround
 63 |             scales = g.op("Constant", value_t=torch.tensor([1.0, 1.0, 2.0, 2.0]))
 64 |             return g.op("Upsample", input, scales, mode_s="nearest")
 65 | 
 66 |         onnx_symbolic.upsample_nearest2d = upsample_nearest2d
 67 | 
 68 |         export_model.eval().cuda()
 69 |         if self.nhwc:
 70 |             dummy_input = torch.rand(
 71 |                 batch,
 72 |                 self.cfg.MODEL.IMAGE_SIZE[1],
 73 |                 self.cfg.MODEL.IMAGE_SIZE[0],
 74 |                 3,
 75 |                 requires_grad=False,
 76 |             ).cuda()
 77 |         else:
 78 |             dummy_input = torch.rand(
 79 |                 batch,
 80 |                 3,
 81 |                 self.cfg.MODEL.IMAGE_SIZE[1],
 82 |                 self.cfg.MODEL.IMAGE_SIZE[0],
 83 |                 requires_grad=False,
 84 |             ).cuda()
 85 | 
 86 |         outputs = export_model(dummy_input)
 87 |         optional_args = dict(keep_initializers_as_inputs=True)
 88 |         input_names = ["input"]
 89 |         output_names = [
 90 |             n.format(i) for n in ["loc_{}", "conf_{}"] for i in range(len(outputs[0]))
 91 |         ]
 92 |         if export_path:
 93 |             print("Saving model weights & graph to {:s}".format(export_path))
 94 |             param = {
 95 |                 "image_size": self.cfg.MODEL.IMAGE_SIZE,
 96 |                 "score": self.cfg.POST_PROCESS.SCORE_THRESHOLD,
 97 |                 "iou": self.cfg.POST_PROCESS.IOU_THRESHOLD,
 98 |                 "max_detects": self.cfg.POST_PROCESS.MAX_DETECTIONS,
 99 |                 "max_detects_per_level": self.cfg.POST_PROCESS.MAX_DETECTIONS_PER_LEVEL,
100 |                 "rescore": self.cfg.POST_PROCESS.RESCORE_CENTER,
101 |                 "use_diou": self.cfg.POST_PROCESS.USE_DIOU,
102 |                 "NHWC": self.nhwc,
103 |                 "anchors": [v.view(-1).tolist() for k, v in self.anchors.items()],
104 |             }
105 |             with open(export_path + ".json", "w") as output_json:
106 |                 json.dump(param, output_json, indent=2)
107 | 
108 |             torch.onnx.export(
109 |                 export_model,
110 |                 dummy_input,
111 |                 export_path,
112 |                 verbose=self.render,
113 |                 export_params=True,
114 |                 input_names=input_names,
115 |                 output_names=output_names,
116 |                 **optional_args
117 |             )
118 |             return False
119 |         else:
120 |             onnx_bytes = io.BytesIO()
121 |             torch.onnx.export(
122 |                 export_model.cuda(),
123 |                 dummy_input,
124 |                 onnx_bytes,
125 |                 verbose=self.render,
126 |                 input_names=input_names,
127 |                 output_names=output_names,
128 |             )
129 |             return onnx_bytes
130 | 
131 |     def export_trt(
132 |         self, weights, export_path, batch, precision, calibration_files, workspace_size
133 |     ):
134 |         if not hasattr(ssds, "_C"):
135 |             raise AssertionError(
136 |                 "Currently ssds lib is not install with external cpp plugin,"
137 |                 "and cannot export to tensorrt model."
138 |                 "Please reinstall the ssds lib by `python setup_cpp.py clean -a install`"
139 |             )
140 |         onnx_bytes = self.export_onnx(weights, None, batch)
141 |         del self.model
142 | 
143 |         model_name = self.cfg.MODEL.SSDS + "_" + self.cfg.MODEL.NETS
144 |         anchors = [v.view(-1).tolist() for k, v in self.anchors.items()]
145 |         if calibration_files != "":
146 |             calibration_files = glob.glob(calibration_files)
147 |             num_files = (len(calibration_files) // batch) * batch
148 |             calibration_files = calibration_files[:num_files]
149 |         else:
150 |             calibration_files = []
151 |         batch = 1
152 | 
153 |         from ssds._C import trtConvert
154 | 
155 |         trtConvert(
156 |             export_path,
157 |             onnx_bytes.getvalue(),
158 |             len(onnx_bytes.getvalue()),
159 |             batch,
160 |             precision,
161 |             self.cfg.POST_PROCESS.SCORE_THRESHOLD,
162 |             self.cfg.POST_PROCESS.MAX_DETECTIONS,
163 |             anchors,
164 |             self.cfg.POST_PROCESS.IOU_THRESHOLD,
165 |             self.cfg.POST_PROCESS.MAX_DETECTIONS,
166 |             calibration_files,
167 |             model_name,
168 |             "",
169 |             self.nhwc,
170 |             self.render,
171 |             workspace_size,
172 |         )
173 |         return False
174 | 
175 | 
176 | if __name__ == "__main__":
177 |     parser = argparse.ArgumentParser(description="export a ssds.pytorch network")
178 |     parser.add_argument(
179 |         "-cfg",
180 |         "--config",
181 |         dest="config_file",
182 |         help="optional config file",
183 |         default=None,
184 |         type=str,
185 |     )
186 |     parser.add_argument(
187 |         "-c", "--checkpoint", help="optional checkpoint file", default=None, type=str
188 |     )
189 |     parser.add_argument("-o", "--onnx", help="output onnx file", default=None, type=str)
190 |     parser.add_argument("-t", "--trt", help="output trt file", default=None, type=str)
191 |     parser.add_argument(
192 |         "-b", "--batch", help="batch size for output model", default=1, type=int
193 |     )
194 |     parser.add_argument(
195 |         "-p",
196 |         "--precision",
197 |         help="precision for output trt model",
198 |         default="FP32",
199 |         choices=["FP32", "FP16", "INT8"],
200 |     )
201 |     parser.add_argument(
202 |         "-i",
203 |         "--image-files",
204 |         help="image files for calibrate output trt model with int8 model",
205 |         default="",
206 |         type=str,
207 |     )
208 |     parser.add_argument(
209 |         "-w",
210 |         "--max-workspace-size",
211 |         help="The max workspace size for output plan file. The final result is"
212 |         "1 << max-workspace-size. Example 30 for 1 GB.",
213 |         type=int,
214 |     )
215 |     parser.add_argument("--nhwc", action="store_true")
216 |     parser.add_argument("-r", "--render", action="store_true")
217 | 
218 |     if len(sys.argv) == 1:
219 |         parser.print_help()
220 |         sys.exit(1)
221 |     args = parser.parse_args()
222 | 
223 |     cfg = config.cfg_from_file(args.config_file)
224 |     solver = Solver(cfg, args.nhwc, args.render)
225 |     if args.onnx:
226 |         solver.export_onnx(args.checkpoint, args.onnx, args.batch)
227 |     if args.trt:
228 |         solver.export_trt(
229 |             args.checkpoint,
230 |             args.trt,
231 |             args.batch,
232 |             args.precision,
233 |             args.image_files,
234 |             1 << args.max_workspace_size,
235 |         )


--------------------------------------------------------------------------------
/ssds/utils/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.backends.cudnn as cudnn
  8 | from torch.utils.tensorboard import SummaryWriter
  9 | 
 10 | from ssds.core import optimizer, checkpoint, criterion, config, data_parallel
 11 | from ssds.modeling import model_builder
 12 | from ssds.pipeline.pipeline_anchor_basic import (
 13 |     train_anchor_based_epoch,
 14 |     eval_anchor_based_epoch,
 15 | )
 16 | from ssds.dataset.dataset_factory import load_data
 17 | 
 18 | 
 19 | class Solver(object):
 20 |     """
 21 |     A wrapper class for the training process
 22 |     """
 23 | 
 24 |     def __init__(self, cfg, render=False):
 25 |         self.cfg = cfg
 26 |         self.render = render
 27 | 
 28 |         # Build model
 29 |         print("===> Building model")
 30 |         self.model = model_builder.create_model(cfg.MODEL)
 31 | 
 32 |         # Utilize GPUs for computation
 33 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 34 | 
 35 |         # Print the model architecture and parameters
 36 |         if self.render:
 37 |             print("Model architectures:\n{}\n".format(self.model))
 38 |             # print('Parameters and size:')
 39 |             # for name, param in self.model.named_parameters():
 40 |             #     print('{}: {}'.format(name, list(param.size())))
 41 | 
 42 |         # print trainable scope
 43 |         print("Trainable scope: {}".format(cfg.TRAIN.TRAINABLE_SCOPE))
 44 |         trainable_param_ = optimizer.trainable_param(
 45 |             self.model, cfg.TRAIN.TRAINABLE_SCOPE
 46 |         )
 47 |         self.optimizer = optimizer.configure_optimizer(
 48 |             trainable_param_, cfg.TRAIN.OPTIMIZER
 49 |         )
 50 |         self.lr_scheduler = optimizer.configure_lr_scheduler(
 51 |             self.optimizer, cfg.TRAIN.LR_SCHEDULER
 52 |         )
 53 |         self.max_epochs = cfg.TRAIN.MAX_EPOCHS
 54 | 
 55 |         # metric
 56 |         self.cls_criterion = getattr(criterion, self.cfg.MATCHER.CLASSIFY_LOSS)(
 57 |             alpha=self.cfg.MATCHER.FOCAL_ALPHA,
 58 |             gamma=self.cfg.MATCHER.FOCAL_GAMMA,
 59 |             negpos_ratio=self.cfg.MATCHER.NEGPOS_RATIO,
 60 |         )
 61 |         self.loc_criterion = getattr(criterion, self.cfg.MATCHER.LOCATE_LOSS)()
 62 | 
 63 |         # Set the logger
 64 |         self.writer = SummaryWriter(log_dir=cfg.LOG_DIR)
 65 | 
 66 |     def train_model(self):
 67 |         previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR)
 68 |         if previous:
 69 |             start_epoch = previous[0][-1]
 70 |             checkpoint.resume_checkpoint(
 71 |                 self.model, previous[1][-1], self.cfg.TRAIN.RESUME_SCOPE
 72 |             )
 73 |         else:
 74 |             start_epoch = 0
 75 |             if self.cfg.RESUME_CHECKPOINT:
 76 |                 print(
 77 |                     "Loading initial model weights from {:s}".format(
 78 |                         self.cfg.RESUME_CHECKPOINT
 79 |                     )
 80 |                 )
 81 |                 checkpoint.resume_checkpoint(
 82 |                     self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE
 83 |                 )
 84 | 
 85 |         if torch.cuda.is_available():
 86 |             print("Utilize GPUs for computation")
 87 |             print("Number of GPU available", torch.cuda.device_count())
 88 |             if len(self.cfg.DEVICE_ID) > 1:
 89 |                 gpu0_bsz = self.cfg.TRAIN.BATCH_SIZE // (
 90 |                     8 * (len(self.cfg.DEVICE_ID) - 1) + 1
 91 |                 )
 92 |                 self.model = data_parallel.BalancedDataParallel(
 93 |                     gpu0_bsz, self.model, device_ids=self.cfg.DEVICE_ID
 94 |                 )
 95 |                 # self.model = nn.DataParallel(self.model, device_ids=self.cfg.DEVICE_ID)
 96 |             self.model.to(self.device)
 97 |             cudnn.benchmark = True
 98 | 
 99 |         # Load data
100 |         print("===> Loading data")
101 |         train_loader = load_data(cfg.DATASET, "train")
102 |         eval_loader = load_data(cfg.DATASET, "eval") if "eval" in cfg.PHASE else None
103 | 
104 |         warm_up = self.cfg.TRAIN.LR_SCHEDULER.WARM_UP_EPOCHS
105 |         for epoch in iter(range(start_epoch + 1, self.max_epochs + 1)):
106 |             sys.stdout.write(
107 |                 "\rEpoch {epoch:d}/{max_epochs:d}:\n".format(
108 |                     epoch=epoch, max_epochs=self.max_epochs
109 |                 )
110 |             )
111 |             torch.cuda.empty_cache()
112 |             # start phases for epoch
113 |             anchors = model_builder.create_anchors(
114 |                 self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE, self.render
115 |             )
116 |             train_anchor_based_epoch(
117 |                 self.model,
118 |                 train_loader,
119 |                 self.optimizer,
120 |                 self.cls_criterion,
121 |                 self.loc_criterion,
122 |                 anchors,
123 |                 self.cfg.MODEL.NUM_CLASSES,
124 |                 self.cfg.MATCHER.MATCH_THRESHOLD,
125 |                 self.cfg.MATCHER.CENTER_SAMPLING_RADIUS,
126 |                 self.writer,
127 |                 epoch,
128 |                 self.device,
129 |             )
130 |             # save checkpoint
131 |             if epoch % self.cfg.TRAIN.CHECKPOINTS_EPOCHS == 0:
132 |                 checkpoint.save_checkpoints(
133 |                     self.model, self.cfg.EXP_DIR, self.cfg.CHECKPOINTS_PREFIX, epoch
134 |                 )
135 |             # multi scale training
136 |             if len(self.cfg.DATASET.MULTISCALE) > 1:
137 |                 batch_size, target_size = self.cfg.DATASET.MULTISCALE[
138 |                     epoch % len(self.cfg.DATASET.MULTISCALE)
139 |                 ]
140 |                 train_loader.reset_size(batch_size, target_size)
141 |             if "eval" in self.cfg.PHASE:
142 |                 anchors = model_builder.create_anchors(
143 |                     self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE
144 |                 )
145 |                 decoder = model_builder.create_decoder(self.cfg.POST_PROCESS)
146 |                 eval_anchor_based_epoch(
147 |                     self.model,
148 |                     eval_loader,
149 |                     decoder,
150 |                     self.cls_criterion,
151 |                     self.loc_criterion,
152 |                     anchors,
153 |                     self.cfg.MODEL.NUM_CLASSES,
154 |                     self.writer,
155 |                     epoch,
156 |                     self.device,
157 |                 )
158 | 
159 |             self.lr_scheduler.step()
160 | 
161 |     def eval_model(self):
162 |         eval_loader = load_data(cfg.DATASET, "eval")
163 |         self.model.to(self.device)
164 |         anchors = model_builder.create_anchors(
165 |             self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE
166 |         )
167 |         decoder = model_builder.create_decoder(self.cfg.POST_PROCESS)
168 | 
169 |         previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR)
170 |         if previous:
171 |             for epoch, resume_checkpoint in zip(previous[0], previous[1]):
172 |                 if self.cfg.TEST.TEST_SCOPE[0] <= epoch <= self.cfg.TEST.TEST_SCOPE[1]:
173 |                     checkpoint.resume_checkpoint(
174 |                         self.model, resume_checkpoint, self.cfg.TRAIN.RESUME_SCOPE
175 |                     )
176 |                     eval_anchor_based_epoch(
177 |                         self.model,
178 |                         eval_loader,
179 |                         decoder,
180 |                         self.cls_criterion,
181 |                         self.loc_criterion,
182 |                         anchors,
183 |                         self.cfg.MODEL.NUM_CLASSES,
184 |                         self.writer,
185 |                         epoch,
186 |                         self.device,
187 |                     )
188 |         else:
189 |             if self.cfg.RESUME_CHECKPOINT:
190 |                 print(
191 |                     "Loading initial model weights from {:s}".format(
192 |                         self.cfg.RESUME_CHECKPOINT
193 |                     )
194 |                 )
195 |                 checkpoint.resume_checkpoint(
196 |                     self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE
197 |                 )
198 |                 eval_anchor_based_epoch(
199 |                     self.model,
200 |                     eval_loader,
201 |                     decoder,
202 |                     self.cls_criterion,
203 |                     self.loc_criterion,
204 |                     anchors,
205 |                     self.cfg.MODEL.NUM_CLASSES,
206 |                     self.writer,
207 |                     0,
208 |                     self.device,
209 |                 )
210 | 
211 | 
212 | if __name__ == "__main__":
213 |     """
214 |     Parse input arguments
215 |     """
216 |     parser = argparse.ArgumentParser(description="Train/Eval a ssds.pytorch network")
217 |     parser.add_argument(
218 |         "-cfg",
219 |         "--config",
220 |         dest="config_file",
221 |         help="optional config file",
222 |         default=None,
223 |         type=str,
224 |     )
225 |     parser.add_argument("-e", "--eval", action="store_true")
226 |     parser.add_argument("-r", "--render", action="store_true")
227 | 
228 |     if len(sys.argv) == 1:
229 |         parser.print_help()
230 |         sys.exit(1)
231 |     args = parser.parse_args()
232 | 
233 |     cfg = config.cfg_from_file(args.config_file)
234 |     solver = Solver(cfg, args.render)
235 |     if args.eval:
236 |         solver.eval_model()
237 |     else:
238 |         solver.train_model()
239 | 


--------------------------------------------------------------------------------
/ssds/utils/train_ddp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | from pynvml.smi import nvidia_smi
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.backends.cudnn as cudnn
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | 
 11 | from apex import amp
 12 | from apex.parallel import convert_syncbn_model
 13 | from apex.parallel import DistributedDataParallel as DDP
 14 | 
 15 | from ssds.core import optimizer, checkpoint, criterion, config
 16 | from ssds.modeling import model_builder
 17 | from ssds.pipeline.pipeline_anchor_apex import (
 18 |     train_anchor_based_epoch,
 19 |     ModelWithLossBasic,
 20 | )
 21 | from ssds.dataset.dataset_factory import load_data
 22 | 
 23 | nvsmi = nvidia_smi.getInstance()
 24 | 
 25 | 
 26 | def getMemoryUsage(idx=0):
 27 |     usage = nvsmi.DeviceQuery("memory.used")["gpu"][idx]["fb_memory_usage"]
 28 |     return "%d %s" % (usage["used"], usage["unit"])
 29 | 
 30 | 
 31 | class Solver(object):
 32 |     """
 33 |     A wrapper class for the training process
 34 |     """
 35 | 
 36 |     def __init__(self, cfg, local_rank, render=False):
 37 |         self.cfg = cfg
 38 |         self.local_rank = local_rank
 39 |         self.render = render
 40 | 
 41 |         # Build model
 42 |         if self.local_rank == 0:
 43 |             print("===> Building model")
 44 |         self.model = model_builder.create_model(cfg.MODEL)
 45 |         self.load_model()
 46 | 
 47 |         # Utilize GPUs for computation
 48 |         self.device = torch.device("cuda:{}".format(local_rank))
 49 | 
 50 |         # Convert to sync model
 51 |         self.model = convert_syncbn_model(self.model)
 52 |         self.model.to(self.device)
 53 | 
 54 |         # Print the model architecture and parameters
 55 |         if self.render and self.local_rank == 0:
 56 |             print("Model architectures:\n{}\n".format(self.model))
 57 | 
 58 |         # print trainable scope
 59 |         if self.local_rank == 0:
 60 |             print("Trainable scope: {}".format(cfg.TRAIN.TRAINABLE_SCOPE))
 61 |         trainable_param_ = optimizer.trainable_param(
 62 |             self.model, cfg.TRAIN.TRAINABLE_SCOPE
 63 |         )
 64 |         self.optimizer = optimizer.configure_optimizer(
 65 |             trainable_param_, cfg.TRAIN.OPTIMIZER
 66 |         )
 67 | 
 68 |         # to apex version
 69 |         self.model, self.optimizer = amp.initialize(
 70 |             self.model, self.optimizer, opt_level="O1", loss_scale=128.0
 71 |         )  # keep_batchnorm_fp32 = True,
 72 | 
 73 |         # add scheduler
 74 |         self.lr_scheduler = optimizer.configure_lr_scheduler(
 75 |             self.optimizer, cfg.TRAIN.LR_SCHEDULER
 76 |         )
 77 |         self.max_epochs = cfg.TRAIN.MAX_EPOCHS
 78 | 
 79 |         # metric
 80 |         self.cls_criterion = getattr(criterion, self.cfg.MATCHER.CLASSIFY_LOSS)(
 81 |             alpha=self.cfg.MATCHER.FOCAL_ALPHA,
 82 |             gamma=self.cfg.MATCHER.FOCAL_GAMMA,
 83 |             negpos_ratio=self.cfg.MATCHER.NEGPOS_RATIO,
 84 |         )
 85 |         self.loc_criterion = getattr(criterion, self.cfg.MATCHER.LOCATE_LOSS)()
 86 | 
 87 |         # Set the logger
 88 |         self.writer = (
 89 |             SummaryWriter(log_dir=cfg.LOG_DIR) if self.local_rank == 0 else None
 90 |         )
 91 | 
 92 |     def train_model(self):
 93 |         modelWithLoss = ModelWithLossBasic(
 94 |             self.model,
 95 |             self.cls_criterion,
 96 |             self.loc_criterion,
 97 |             self.cfg.MODEL.NUM_CLASSES,
 98 |             self.cfg.MATCHER.MATCH_THRESHOLD,
 99 |             self.cfg.MATCHER.CENTER_SAMPLING_RADIUS,
100 |         )
101 | 
102 |         if torch.cuda.is_available():
103 |             print("Utilize GPUs for computation")
104 |             print("Number of GPU available", torch.cuda.device_count())
105 |             if self.cfg.DEVICE_ID:
106 |                 modelWithLoss = DDP(
107 |                     modelWithLoss, delay_allreduce=True
108 |                 )  # , device_ids=self.cfg.DEVICE_ID)
109 |             cudnn.benchmark = True
110 | 
111 |         # Load data
112 |         if self.local_rank == 0:
113 |             print("===> Loading data")
114 |         train_loader = load_data(cfg.DATASET, "train")
115 | 
116 |         # multi scale training
117 |         if len(self.cfg.DATASET.MULTISCALE) > 1:
118 |             batch_size, target_size = self.cfg.DATASET.MULTISCALE[
119 |                 self.start_epoch % len(self.cfg.DATASET.MULTISCALE)
120 |             ]
121 |             train_loader.reset_size(batch_size, target_size)
122 | 
123 |         for epoch in iter(range(self.start_epoch + 1, self.max_epochs + 1)):
124 |             if self.local_rank == 0:
125 |                 sys.stdout.write(
126 |                     "\rEpoch {epoch:d}/{max_epochs:d}:\n".format(
127 |                         epoch=epoch, max_epochs=self.max_epochs
128 |                     )
129 |                 )
130 |             torch.cuda.empty_cache()
131 |             # start phases for epoch
132 |             anchors = model_builder.create_anchors(
133 |                 self.cfg.MODEL,
134 |                 modelWithLoss.module.model,
135 |                 self.cfg.MODEL.IMAGE_SIZE,
136 |                 self.render,
137 |             )
138 |             train_anchor_based_epoch(
139 |                 modelWithLoss,
140 |                 train_loader,
141 |                 self.optimizer,
142 |                 anchors,
143 |                 self.writer,
144 |                 epoch,
145 |                 self.device,
146 |                 self.local_rank,
147 |             )
148 |             # save checkpoint
149 |             if epoch % self.cfg.TRAIN.CHECKPOINTS_EPOCHS == 0 and self.local_rank == 0:
150 |                 checkpoint.save_checkpoints(
151 |                     modelWithLoss.module.model,
152 |                     self.cfg.EXP_DIR,
153 |                     self.cfg.CHECKPOINTS_PREFIX,
154 |                     epoch,
155 |                 )
156 | 
157 |             # multi scale training
158 |             if len(self.cfg.DATASET.MULTISCALE) > 1:
159 |                 batch_size, target_size = self.cfg.DATASET.MULTISCALE[
160 |                     epoch % len(self.cfg.DATASET.MULTISCALE)
161 |                 ]
162 |                 train_loader.reset_size(batch_size, target_size)
163 | 
164 |             if "eval" in self.cfg.PHASE:
165 |                 pass
166 | 
167 |             self.lr_scheduler.step()
168 | 
169 |     def load_model(self):
170 |         previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR)
171 |         if previous:
172 |             self.start_epoch = previous[0][-1]
173 |             self.model = checkpoint.resume_checkpoint(
174 |                 self.model, previous[1][-1], self.cfg.TRAIN.RESUME_SCOPE
175 |             )
176 |         else:
177 |             self.start_epoch = 0
178 |             if self.cfg.RESUME_CHECKPOINT:
179 |                 if self.local_rank == 0:
180 |                     print(
181 |                         "Loading initial model weights from {:s}".format(
182 |                             self.cfg.RESUME_CHECKPOINT
183 |                         )
184 |                     )
185 |                 self.model = checkpoint.resume_checkpoint(
186 |                     self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE
187 |                 )
188 | 
189 |     def eval_model(self):
190 |         return
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     """
195 |     Parse input arguments
196 |     """
197 |     parser = argparse.ArgumentParser(description="Train/Eval a ssds.pytorch network")
198 |     parser.add_argument(
199 |         "-cfg",
200 |         "--config",
201 |         dest="config_file",
202 |         help="optional config file",
203 |         default=None,
204 |         type=str,
205 |     )
206 |     parser.add_argument("--local_rank", type=int, default=0)
207 |     parser.add_argument("-e", "--eval", action="store_true")
208 |     parser.add_argument("-r", "--render", action="store_true")
209 | 
210 |     if len(sys.argv) == 1:
211 |         parser.print_help()
212 |         sys.exit(1)
213 |     args = parser.parse_args()
214 | 
215 |     torch.cuda.set_device(args.local_rank)
216 |     torch.distributed.init_process_group(backend="nccl")  # , init_method='env://')
217 | 
218 |     cfg = config.cfg_from_file(args.config_file)
219 |     solver = Solver(cfg, args.local_rank, args.render)
220 |     solver.train_model()
221 | 


--------------------------------------------------------------------------------
/ssds/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | from tqdm import tqdm
  5 | 
  6 | import torch
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | 
  9 | from ssds.core import checkpoint, config
 10 | from ssds.modeling import model_builder
 11 | from ssds.dataset.dataset_factory import load_data
 12 | import ssds.core.visualize_funcs as vsf
 13 | import ssds.core.tools as tools
 14 | 
 15 | if __name__ == "__main__":
 16 |     parser = argparse.ArgumentParser(description="visualize a ssds.pytorch network")
 17 |     parser.add_argument(
 18 |         "-cfg",
 19 |         "--config",
 20 |         dest="config_file",
 21 |         help="optional config file",
 22 |         default=None,
 23 |         type=str,
 24 |     )
 25 |     parser.add_argument(
 26 |         "-a",
 27 |         "--anchor-strategy",
 28 |         help="analysis the anchor strategy in validate dataset",
 29 |         action="store_true",
 30 |     )
 31 | 
 32 |     if len(sys.argv) == 1:
 33 |         parser.print_help()
 34 |         sys.exit(1)
 35 |     args = parser.parse_args()
 36 | 
 37 |     cfg = config.cfg_from_file(args.config_file)
 38 | 
 39 |     # Build model
 40 |     print("===> Building model")
 41 |     model = model_builder.create_model(cfg.MODEL)
 42 |     print("Model architectures:\n{}\n".format(model))
 43 |     anchors = model_builder.create_anchors(cfg.MODEL, model, cfg.MODEL.IMAGE_SIZE, True)
 44 |     decoder = model_builder.create_decoder(cfg.POST_PROCESS)
 45 | 
 46 |     print("Log details to {}".format(cfg.LOG_DIR))
 47 |     writer = SummaryWriter(log_dir=cfg.LOG_DIR)
 48 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 49 | 
 50 |     if cfg.RESUME_CHECKPOINT:
 51 |         print("Loading initial model weights from {:s}".format(cfg.RESUME_CHECKPOINT))
 52 |         checkpoint.resume_checkpoint(model, cfg.RESUME_CHECKPOINT, "")
 53 | 
 54 |     model.eval().to(device)
 55 |     data_loader = load_data(cfg.DATASET, "train")
 56 | 
 57 |     images, targets = next(iter(data_loader))
 58 |     if images.device != device:
 59 |         images = images.to(device)
 60 |     loc, conf = model(images)
 61 |     detections = decoder(loc, conf, anchors)
 62 | 
 63 |     # visualize anchor
 64 |     if len(cfg.DATASET.MULTISCALE) > 1:
 65 |         # multi scale training
 66 |         for i in range(len(cfg.DATASET.MULTISCALE)):
 67 |             batch_size, target_size = cfg.DATASET.MULTISCALE[i]
 68 |             data_loader.reset_size(batch_size, target_size)
 69 |             images, targets = next(iter(data_loader))
 70 |             vsf.add_defaultAnchors(writer, images[0], anchors, epoch=i)
 71 |     else:
 72 |         vsf.add_defaultAnchors(writer, images[0], anchors, epoch=0)
 73 | 
 74 |     for j, (stride, anchor) in enumerate(anchors.items()):
 75 |         size = conf[j].shape[-2:]
 76 |         from ssds.modeling.layers.box import extract_targets
 77 | 
 78 |         _, _, depth = extract_targets(
 79 |             targets,
 80 |             anchors,
 81 |             cfg.MODEL.NUM_CLASSES,
 82 |             stride,
 83 |             size,
 84 |             cfg.MATCHER.MATCH_THRESHOLD,
 85 |             cfg.MATCHER.CENTER_SAMPLING_RADIUS,
 86 |         )
 87 |         for i in range(images.shape[0]):
 88 |             vsf.add_matchedAnchorsWithBox(
 89 |                 writer, images[i], anchor, stride, depth[i], epoch=i
 90 |             )
 91 | 
 92 |     # visualize box
 93 |     targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4]
 94 |     boxes = torch.cat((detections[1], detections[0][..., None]), dim=2)
 95 |     vsf.add_imagesWithMatchedBoxes(
 96 |         writer, "Images", images[:5], boxes[:5], targets[:5], epoch=0
 97 |     )
 98 | 
 99 |     if args.anchor_strategy:
100 |         data_loader = load_data(cfg.DATASET, "eval")
101 |         title = "Load Data"
102 |         progress = tqdm(
103 |             tools.IteratorTimer(data_loader),
104 |             total=len(data_loader),
105 |             smoothing=0.9,
106 |             miniters=1,
107 |             leave=True,
108 |             desc=title,
109 |         )
110 |         all_targets = []
111 |         for images, targets in progress:
112 |             targets = targets.view(-1, 5)
113 |             targets = targets[targets[:, 4] != -1]
114 |             all_targets.append(targets)
115 |         all_targets = torch.cat(all_targets, dim=0)
116 |         vsf.add_anchorStrategy(writer, all_targets)
117 | 
118 |     # visualize graph
119 |     writer.add_graph(model, images)
120 |     writer.close()
121 | 


--------------------------------------------------------------------------------