├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── demo.py ├── doc ├── Makefile ├── imgs │ ├── archor_box.jpg │ ├── feature_map_visualize.png │ ├── graph.jpg │ ├── pr_curve.jpg │ ├── preprocess.jpg │ └── train_process.jpg └── source │ ├── _static │ ├── css │ │ └── ssds.css │ └── img │ │ └── ssds.pytorch.png │ ├── api │ ├── ssds.core.rst │ ├── ssds.dataset.rst │ ├── ssds.modeling.layers.rst │ ├── ssds.modeling.nets.rst │ ├── ssds.modeling.rst │ ├── ssds.modeling.ssds.rst │ ├── ssds.pipeline.rst │ └── ssds.rst │ ├── conf.py │ ├── config.md │ ├── index.rst │ ├── install.md │ └── usage.md ├── experiments ├── cfgs │ └── tests │ │ └── test.yml └── person.jpg ├── requirements.txt ├── setup.py └── ssds ├── __init__.py ├── core ├── __init__.py ├── checkpoint.py ├── config.py ├── criterion.py ├── data_parallel.py ├── evaluation_metrics.py ├── optimizer.py ├── tools.py └── visualize_funcs.py ├── dataset ├── __init__.py ├── coco.py ├── dali_coco.py ├── dali_dataiterator.py ├── dali_tfrecord.py ├── dataset_factory.py ├── detection_dataset.py └── transforms.py ├── modeling ├── __init__.py ├── layers │ ├── __init__.py │ ├── basic_layers.py │ ├── box.py │ ├── decoder.py │ ├── layers_parser.py │ └── rfb_layers.py ├── model_builder.py ├── nets │ ├── __init__.py │ ├── darknet.py │ ├── densenet.py │ ├── efficientnet.py │ ├── effnet.py │ ├── inception_v2.py │ ├── mobilenet.py │ ├── regnet.py │ ├── resnet.py │ ├── rutils.py │ └── shufflenet.py └── ssds │ ├── __init__.py │ ├── bifpn.py │ ├── fcos.py │ ├── fpn.py │ ├── fssd.py │ ├── shelf.py │ ├── ssd.py │ ├── ssdsbase.py │ └── yolo.py ├── pipeline ├── __init__.py ├── pipeline_anchor_apex.py └── pipeline_anchor_basic.py ├── ssds.py └── utils ├── __init__.py ├── export.py ├── train.py ├── train_ddp.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | weights/ 3 | data/ 4 | data 5 | experiments/models/ 6 | run.sh 7 | __pycache__ 8 | *.pyc 9 | log* 10 | ssds.egg-info 11 | doc/build 12 | doc/node_modules 13 | doc/package-lock.json 14 | dist 15 | build/lib/ssds -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvcr.io/nvidia/pytorch:20.06-py3 2 | 3 | RUN pip install opencv-python \ 4 | pynvml \ 5 | git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI 6 | 7 | COPY . ssds.pytorch/ 8 | RUN pip install --no-cache-dir -e ssds.pytorch/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Irene 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | 5 | # ssds.pytorch 6 | Repository for Single Shot MultiBox Detector and its variants, implemented with pytorch, python3. This repo is easy to setup and has plenty of visualization methods. We hope this repo can help people have a better understanding for ssd-like model and help people train and deploy the ssds model easily. 7 | 8 | Currently, it contains these features: 9 | - **Multiple SSD Variants**: ssd, fpn, bifpn, yolo and etc. 10 | - **Multiple Base Network**: resnet, regnet, mobilenet and etc. 11 | - **Visualize** the features of the ssd-like models to help the user understand the model design and performance. 12 | - **Fast Training and Inference**: Utilize Nvidia Apex and Dali to fast training and support the user convert the model to ONNX or TensorRT for deployment. 13 | 14 | This repo is depended on the work of [ODTK](https://github.com/NVIDIA/retinanet-examples), [Detectron](https://github.com/facebookresearch/Detectron) and [Tensorflow Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection). Thanks for their works. 15 | 16 | **Notice** The pretrain model for the current version does not finished yet, please check the [previous version](https://github.com/ShuangXieIrene/ssds.pytorch/tree/v0.3.1) for enrich pretrain models. 17 | 18 | ### Table of Contents 19 | - Installation 20 | - Usage 21 | - Performance and Model Zoo 22 | - Visualization 23 | - [Documentation](https://foreveryounggithub.github.io/ssds.doc) 24 | 25 | ## Installation 26 | ### requirements 27 | * python>=3.7 28 | * CUDA>=10.0 29 | * pytorch>=1.4 30 | ### basic installation: 31 | ```bash 32 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch 33 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git 34 | cd ssds.pytorch 35 | python setup.py clean -a install 36 | ``` 37 | #### extra python libs for parallel training 38 | Currently, nvidia DALI and apex is not include in the requirements.txt and need to install manually. 39 | 40 | * [DALI](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html) 41 | ```bash 42 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali 43 | ``` 44 | * [apex](https://github.com/NVIDIA/apex#linux) 45 | ```bash 46 | git clone https://github.com/NVIDIA/apex 47 | cd apex 48 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 49 | ``` 50 | 51 | ## Docker 52 | ```bash 53 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git 54 | docker build -t ssds:local ./ssds.pytorch/ 55 | docker run --gpus all -it --rm -v /data:/data ssds:local 56 | ``` 57 | 58 | ## Usage 59 | ### 0. Check the config file by Visualization 60 | Defined the network in a [config file](experiments/cfgs/tests/test.yml) and tweak the config file based on the visualized anchor boxes 61 | ```bash 62 | python -m ssds.utils.visualize -cfg experiments/cfgs/tests/test.yml 63 | ``` 64 | 65 | ### 1. Training 66 | ```bash 67 | # basic training 68 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml 69 | # parallel training 70 | python -m torch.distributed.launch --nproc_per_node={num_gpus} -m ssds.utils.train_ddp -cfg experiments/cfgs/tests/test.yml 71 | ``` 72 | 73 | ### 2. Evaluation 74 | ```bash 75 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml -e 76 | ``` 77 | 78 | ### 3. Export to ONNX or TRT model 79 | ```bash 80 | python -m ssds.utils.export -cfg experiments/cfgs/tests/test.yml -c best_mAP.pth -h 81 | ``` 82 | 83 | ## Performance 84 | 85 | 86 | ## Visualization -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import cv2 6 | from tqdm import tqdm 7 | from ssds.ssds import SSDDetector 8 | 9 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] 10 | FONT = cv2.FONT_HERSHEY_SIMPLEX 11 | 12 | def plot_one_box(img, x, color=None, label=None, line_thickness=None): 13 | # Plots one bounding box on image img 14 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 15 | color = color or [random.randint(0, 255) for _ in range(3)] 16 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 17 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 18 | if label: 19 | tf = max(tl - 1, 1) # font thickness 20 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 21 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 22 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 23 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 24 | 25 | def demo_image(model, image_path, display): 26 | # 1. prepare image 27 | image = cv2.imread(image_path) 28 | image = cv2.resize(image, model.image_size) 29 | 30 | # 2. model infer 31 | scores, boxes, classes = model(image) 32 | 33 | # 3. draw bounding box on the image 34 | for score, box, labels in zip(scores, boxes, classes): 35 | plot_one_box(image, box, COLORS[labels % 3], '{label}: {score:.3f}'.format(label=labels, score=score)) 36 | 37 | # 4. visualize result 38 | if display: 39 | cv2.imshow('result', image) 40 | cv2.waitKey(0) 41 | else: 42 | path, _ = os.path.splitext(image_path) 43 | cv2.imwrite(path + '_result.jpg', image) 44 | print("output file save at '{}'".format(path + '_result.jpg')) 45 | 46 | def demo_video(model, video_path, display): 47 | # 0. prepare video 48 | cap = cv2.VideoCapture(video_path) 49 | frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 50 | if cap.isOpened() and (not display): 51 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 52 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 53 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 54 | fps = int(cap.get(cv2.CAP_PROP_FPS)) 55 | writer = cv2.VideoWriter(video_path+"_output.mp4", fourcc, fps, (width,height)) 56 | 57 | for fid in tqdm(range(frames)): 58 | # 1. prepare image 59 | flag, image = cap.read() 60 | image = cv2.resize(image, model.image_size) 61 | 62 | # 2. model infer 63 | scores, boxes, classes = model(image) 64 | 65 | # 3. draw bounding box on the image 66 | for score, box, labels in zip(scores, boxes, classes): 67 | plot_one_box(image, box, COLORS[labels % 3], '{label}: {score:.3f}'.format(label=labels, score=score)) 68 | 69 | image = cv2.resize(image, (width,height)) 70 | 71 | # 4. visualize result 72 | if display: 73 | cv2.imshow("Image", image) 74 | if cv2.waitKey(1) & 0xFF == ord('q'): 75 | break 76 | else: 77 | writer.write(image) 78 | 79 | # 5. release the video resources 80 | cap.release() 81 | if display: 82 | cv2.destroyAllWindows() 83 | else: 84 | writer.release() 85 | 86 | 87 | if __name__ == '__main__': 88 | parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network') 89 | parser.add_argument('-cfg', '--confg-file', 90 | help='the address of optional config file', default=None, type=str, required=True) 91 | parser.add_argument('-i', '--demo-file', 92 | help='the address of the demo file', default=None, type=str, required=True) 93 | parser.add_argument('-t', '--type', 94 | default='image', choices=['image', 'video']) 95 | parser.add_argument('-d', '--display', 96 | help='whether display the detection result', action="store_true") 97 | parser.add_argument('-s', '--shift', action="store_true") 98 | 99 | if len(sys.argv) == 1: 100 | parser.print_help() 101 | sys.exit(1) 102 | args = parser.parse_args() 103 | 104 | model = SSDDetector(args.confg_file, args.shift) 105 | getattr(sys.modules[__name__], "demo_"+args.type)(model, args.demo_file, args.display) -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS ?= -j auto 6 | SPHINXBUILD ?= sphinx-build 7 | SPHINXPROJ ?= ssds.pytorch 8 | SOURCEDIR ?= source 9 | BUILDDIR ?= build 10 | PYCMD ?= python 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | # figures: 17 | # @$(PYCMD) source/scripts/build_activation_images.py 18 | 19 | docset: html 20 | doc2dash --name $(SPHINXPROJ) --enable-js --force $(BUILDDIR)/html/ 21 | 22 | # Manually fix because Zeal doesn't deal well with `icon.png`-only at 2x resolution. 23 | cp $(SPHINXPROJ).docset/icon.png $(SPHINXPROJ).docset/icon@2x.png 24 | convert $(SPHINXPROJ).docset/icon@2x.png -resize 16x16 $(SPHINXPROJ).docset/icon.png 25 | 26 | html-stable: 27 | # stable differs from `make html` in two ways: 28 | # 1) The stable logo is used instead of the unstable logo 29 | # 2) There will not be a link to the stable docs. 30 | # See conf.py for more details. 31 | RELEASE=1 make html 32 | 33 | .PHONY: help Makefile docset 34 | 35 | # Catch-all target: route all unknown targets to Sphinx using the new 36 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 37 | %: Makefile 38 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 39 | 40 | clean: 41 | @echo "Removing everything under 'build'.." 42 | @rm -rf $(BUILDDIR)/html/ $(BUILDDIR)/doctrees -------------------------------------------------------------------------------- /doc/imgs/archor_box.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/archor_box.jpg -------------------------------------------------------------------------------- /doc/imgs/feature_map_visualize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/feature_map_visualize.png -------------------------------------------------------------------------------- /doc/imgs/graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/graph.jpg -------------------------------------------------------------------------------- /doc/imgs/pr_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/pr_curve.jpg -------------------------------------------------------------------------------- /doc/imgs/preprocess.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/preprocess.jpg -------------------------------------------------------------------------------- /doc/imgs/train_process.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/imgs/train_process.jpg -------------------------------------------------------------------------------- /doc/source/_static/css/ssds.css: -------------------------------------------------------------------------------- 1 | .rst-content .hidden-section { 2 | display: none; 3 | } 4 | 5 | .rst-toc .hidden-section { 6 | display: none; 7 | } 8 | 9 | nav .hidden-section { 10 | display: inherit; 11 | } 12 | 13 | .wy-side-nav-search { 14 | background-color: #fff; 15 | color: #333; 16 | } 17 | 18 | .version{ 19 | color: #404040 !important; 20 | } -------------------------------------------------------------------------------- /doc/source/_static/img/ssds.pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/doc/source/_static/img/ssds.pytorch.png -------------------------------------------------------------------------------- /doc/source/api/ssds.core.rst: -------------------------------------------------------------------------------- 1 | ssds.core 2 | ================= 3 | 4 | ssds.core.checkpoint 5 | --------------------------- 6 | 7 | .. automodule:: ssds.core.checkpoint 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ssds.core.config 13 | ----------------------- 14 | 15 | .. automodule:: ssds.core.config 16 | :members: cfg_from_file 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | ssds.core.criterion 21 | -------------------------- 22 | 23 | .. automodule:: ssds.core.criterion 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | :member-order: bysource 28 | 29 | ssds.core.data\_parallel 30 | ------------------------------- 31 | 32 | .. autoclass:: ssds.core.data_parallel.BalancedDataParallel 33 | 34 | ssds.core.evaluation\_metrics 35 | ------------------------------------ 36 | 37 | .. autoclass:: ssds.core.evaluation_metrics.MeanAveragePrecision 38 | :members: __call__, get_results 39 | :undoc-members: 40 | :show-inheritance: 41 | 42 | ssds.core.optimizer 43 | -------------------------- 44 | 45 | .. automodule:: ssds.core.optimizer 46 | :members: 47 | :undoc-members: 48 | :show-inheritance: 49 | 50 | ssds.core.visualize\_funcs 51 | --------------------------------- 52 | 53 | .. automodule:: ssds.core.visualize_funcs 54 | :members: 55 | :undoc-members: 56 | :show-inheritance: 57 | -------------------------------------------------------------------------------- /doc/source/api/ssds.dataset.rst: -------------------------------------------------------------------------------- 1 | ssds.dataset 2 | ==================== 3 | 4 | ssds.dataset.dataset\_factory 5 | ------------------------------------ 6 | 7 | .. automodule:: ssds.dataset.dataset_factory 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ssds.dataset.detection\_dataset 13 | -------------------------------------- 14 | 15 | .. autoclass:: ssds.dataset.detection_dataset.DetectionDataset 16 | :members: __getitem__ 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | ssds.dataset.coco 21 | ------------------------ 22 | 23 | .. automodule:: ssds.dataset.coco 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | ssds.dataset.dali\_dataiterator 29 | -------------------------------------- 30 | 31 | .. automodule:: ssds.dataset.dali_dataiterator 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | ssds.dataset.dali\_coco 37 | ------------------------------ 38 | 39 | .. automodule:: ssds.dataset.dali_coco 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | ssds.dataset.dali\_tfrecord 45 | ---------------------------------- 46 | 47 | .. automodule:: ssds.dataset.dali_tfrecord 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | ssds.dataset.transforms 53 | ------------------------------ 54 | 55 | .. automodule:: ssds.dataset.transforms 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | -------------------------------------------------------------------------------- /doc/source/api/ssds.modeling.layers.rst: -------------------------------------------------------------------------------- 1 | ssds.modeling.layers 2 | ============================ 3 | 4 | ssds.modeling.layers.box 5 | ------------------------------- 6 | 7 | .. automodule:: ssds.modeling.layers.box 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ssds.modeling.layers.decoder 13 | ----------------------------------- 14 | 15 | .. autoclass:: ssds.modeling.layers.decoder.Decoder 16 | :members: __call__ 17 | :undoc-members: 18 | :show-inheritance: -------------------------------------------------------------------------------- /doc/source/api/ssds.modeling.nets.rst: -------------------------------------------------------------------------------- 1 | ssds.modeling.nets 2 | ========================== 3 | 4 | ssds.modeling.nets.darknet 5 | --------------------------------- 6 | 7 | .. automodule:: ssds.modeling.nets.darknet 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | :member-order: bysource 12 | 13 | ssds.modeling.nets.densenet 14 | ---------------------------------- 15 | 16 | .. automodule:: ssds.modeling.nets.densenet 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | :member-order: bysource 21 | 22 | ssds.modeling.nets.efficientnet 23 | -------------------------------------- 24 | 25 | .. automodule:: ssds.modeling.nets.efficientnet 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | :member-order: bysource 30 | 31 | ssds.modeling.nets.effnet 32 | -------------------------------- 33 | 34 | .. automodule:: ssds.modeling.nets.effnet 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | :member-order: bysource 39 | 40 | ssds.modeling.nets.inception\_v2 41 | --------------------------------------- 42 | 43 | .. autoclass:: ssds.modeling.nets.inception_v2.InceptionV2 44 | 45 | ssds.modeling.nets.mobilenet 46 | ----------------------------------- 47 | 48 | .. automodule:: ssds.modeling.nets.mobilenet 49 | :members: 50 | :undoc-members: 51 | :show-inheritance: 52 | :member-order: bysource 53 | 54 | ssds.modeling.nets.regnet 55 | -------------------------------- 56 | 57 | .. automodule:: ssds.modeling.nets.regnet 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | :member-order: bysource 62 | 63 | ssds.modeling.nets.resnet 64 | -------------------------------- 65 | 66 | .. automodule:: ssds.modeling.nets.resnet 67 | :members: 68 | :undoc-members: 69 | :show-inheritance: 70 | :member-order: bysource 71 | 72 | ssds.modeling.nets.shufflenet 73 | ------------------------------------ 74 | 75 | .. automodule:: ssds.modeling.nets.shufflenet 76 | :members: 77 | :undoc-members: 78 | :show-inheritance: 79 | :member-order: bysource -------------------------------------------------------------------------------- /doc/source/api/ssds.modeling.rst: -------------------------------------------------------------------------------- 1 | ssds.modeling 2 | ===================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | ssds.modeling.layers 8 | ssds.modeling.nets 9 | ssds.modeling.ssds 10 | 11 | ssds.modeling.model\_builder 12 | ----------------------------------- 13 | 14 | .. automodule:: ssds.modeling.model_builder 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: -------------------------------------------------------------------------------- /doc/source/api/ssds.modeling.ssds.rst: -------------------------------------------------------------------------------- 1 | ssds.modeling.ssds 2 | ========================== 3 | 4 | ssds.modeling.ssds.ssdsbase 5 | ---------------------------------- 6 | 7 | .. autoclass:: ssds.modeling.ssds.ssdsbase.SSDSBase 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ssds.modeling.ssds.ssd 13 | ----------------------------- 14 | 15 | .. autoclass:: ssds.modeling.ssds.SSD 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | ssds.modeling.ssds.yolo 21 | ------------------------------ 22 | 23 | .. autoclass:: ssds.modeling.ssds.YOLOV3 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | .. autoclass:: ssds.modeling.ssds.YOLOV4 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | ssds.modeling.ssds.fpn 34 | ----------------------------- 35 | 36 | .. autoclass:: ssds.modeling.ssds.SSDFPN 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | ssds.modeling.ssds.bifpn 42 | ------------------------------- 43 | 44 | .. autoclass:: ssds.modeling.ssds.SSDBiFPN 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: -------------------------------------------------------------------------------- /doc/source/api/ssds.pipeline.rst: -------------------------------------------------------------------------------- 1 | ssds.pipeline 2 | ===================== 3 | 4 | ssds.pipeline.pipeline\_anchor\_apex 5 | ------------------------------------------- 6 | 7 | .. automodule:: ssds.pipeline.pipeline_anchor_apex 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | ssds.pipeline.pipeline\_anchor\_basic 13 | -------------------------------------------- 14 | 15 | .. automodule:: ssds.pipeline.pipeline_anchor_basic 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /doc/source/api/ssds.rst: -------------------------------------------------------------------------------- 1 | ssds.pytorch API 2 | ==================== 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | ssds.core 8 | ssds.dataset 9 | ssds.modeling 10 | ssds.pipeline 11 | 12 | ssds.ssds 13 | --------------------------- 14 | 15 | .. autoclass:: ssds.ssds.SSDDetector 16 | :members: __call__ -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | # source code directory, relative to this file, for sphinx-autobuild 17 | sys.path.insert(0, os.path.abspath('../..')) 18 | 19 | import ssds 20 | 21 | import sphinx_rtd_theme 22 | 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = 'ssds.pytorch' 27 | copyright = '2020, Shuang Xie, Yang Liu' 28 | author = 'Shuang Xie, Yang Liu' 29 | 30 | # The full version, including alpha/beta/rc tags 31 | release = 'v1.5' 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # Add any Sphinx extension module names here, as strings. They can be 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 38 | # ones. 39 | extensions = [ 40 | 'sphinx.ext.autodoc', 41 | 'sphinx.ext.autosummary', 42 | 'sphinx.ext.doctest', 43 | 'sphinx.ext.intersphinx', 44 | 'sphinx.ext.todo', 45 | 'sphinx.ext.coverage', 46 | 'sphinx.ext.napoleon', 47 | 'sphinx.ext.viewcode', 48 | 'sphinxcontrib.katex', 49 | 'sphinx.ext.githubpages', 50 | 'sphinx.ext.autosectionlabel', 51 | 'sphinx_markdown_tables', 52 | 'recommonmark', 53 | ] 54 | 55 | # build the templated autosummary files 56 | autosummary_generate = True 57 | numpydoc_show_class_members = False 58 | 59 | # autosectionlabel throws warnings if section names are duplicated. 60 | # The following tells autosectionlabel to not throw a warning for 61 | # duplicated section names that are in different documents. 62 | autosectionlabel_prefix_document = True 63 | 64 | # katex options 65 | katex_prerender = True 66 | 67 | napoleon_use_ivar = True 68 | 69 | # Add any paths that contain templates here, relative to this directory. 70 | templates_path = ['_templates'] 71 | 72 | # List of patterns, relative to source directory, that match files and 73 | # directories to ignore when looking for source files. 74 | # This pattern also affects html_static_path and html_extra_path. 75 | exclude_patterns = [] 76 | 77 | # The suffix(es) of source filenames. 78 | # You can specify multiple suffix as a list of string: 79 | # 80 | source_suffix = { 81 | '.rst': 'restructuredtext', 82 | '.md': 'markdown', 83 | } 84 | 85 | # The master toctree document. 86 | master_doc = 'index' 87 | 88 | # -- Options for HTML output ------------------------------------------------- 89 | 90 | # The theme to use for HTML and HTML Help pages. See the documentation for 91 | # a list of builtin themes. 92 | # 93 | html_theme = 'sphinx_rtd_theme' 94 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | 100 | html_theme_options = { 101 | 'collapse_navigation': False, 102 | 'display_version': True, 103 | 'logo_only': True, 104 | } 105 | 106 | html_logo = '_static/img/ssds.pytorch.png' 107 | 108 | # Add any paths that contain custom static files (such as style sheets) here, 109 | # relative to this directory. They are copied after the builtin static files, 110 | # so a file named "default.css" will overwrite the builtin "default.css". 111 | html_static_path = ['_static'] 112 | 113 | html_css_files = [ 114 | 'css/ssds.css', 115 | ] -------------------------------------------------------------------------------- /doc/source/config.md: -------------------------------------------------------------------------------- 1 | ## Config System 2 | 3 | config system for ssds.pytorch 4 | 5 | ### MODEL 6 | 7 | | MODEL parameters | discription | 8 | |---|---| 9 | | MODEL.NETS | type of the backbone used to extract the features | 10 | | MODEL.SSDS | type of the ssds model used to detect boundingbox | 11 | | MODEL.IMAGE_SIZE | image size for ssd | 12 | | MODEL.NUM_CLASSES | number of the class for the model | 13 | | MODEL.FEATURE_LAYER | FEATURE_LAYER to extract the proposed bounding box, the first dimension is the feature layer/type, while the second dimension is feature map channel. | 14 | | MODEL.SIZES | SIZES for the proposed anchor box, 1 is default contains | 15 | | MODEL.ASPECT_RATIOS | ASPECT_RATIOS for the proposed anchor box, 1 is default contains | 16 | 17 | ### TRAIN 18 | 19 | | TRAIN parameters | discription | 20 | |---|---| 21 | | TRAIN.BATCH_SIZE | batch size for training | 22 | | TRAIN.TRAINABLE_SCOPE | trainable scope | 23 | | TRAIN.RESUME_SCOPE | resuming scope | 24 | | TRAIN.MAX_EPOCHS | the number of max epoch | 25 | | TRAIN.CHECKPOINTS_EPOCHS | the number of interval epoch for checkpoints saving | 26 | | TRAIN.CHECKPOINTS_KEPT | The number of checkpoints kept, older ones are deleted to save space | 27 | 28 | #### TRAIN.OPTIMIZER 29 | 30 | | TRAIN.OPTIMIZER parameters | discription | 31 | |---|---| 32 | | TRAIN.OPTIMIZER.OPTIMIZER | type of the optimizer | 33 | | TRAIN.OPTIMIZER.LEARNING_RATE | Initial learning rate | 34 | | TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE | Initial differential learning rate for different layers | 35 | | TRAIN.OPTIMIZER.MOMENTUM | Momentum | 36 | | TRAIN.OPTIMIZER.MOMENTUM_2 | Momentum_2 | 37 | | TRAIN.OPTIMIZER.EPS | epsilon | 38 | | TRAIN.OPTIMIZER.WEIGHT_DECAY | Weight decay, for regularization | 39 | 40 | #### TRAIN.LR_SCHEDULER 41 | 42 | | TRAIN.LR_SCHEDULER parameters | discription | 43 | |---|---| 44 | | TRAIN.LR_SCHEDULER.SCHEDULER | type of the LR_SCHEDULER | 45 | | TRAIN.LR_SCHEDULER.STEPS | Step size for reducing the learning rate | 46 | | TRAIN.LR_SCHEDULER.GAMMA | Factor for reducing the learning rate | 47 | | TRAIN.LR_SCHEDULER.LR_MIN | min learning rate | 48 | 49 | ### TEST 50 | 51 | | TEST parameters | discription | 52 | |---|---| 53 | | TEST.BATCH_SIZE | batch size for test | 54 | | TEST.TEST_SCOPE | the epoch scope for test | 55 | 56 | ### POST_PROCESS 57 | 58 | POST_PROCESS controls the parameter for ssds.modeling.layers.decoder.Decoder. which is used to decode the loc and conf feature maps 59 | to predicted boxes. 60 | 61 | | POST_PROCESS parameters | discription | 62 | |---|---| 63 | | POST_PROCESS.SCORE_THRESHOLD | the score threshold to filter the predict boxes, put it as 0.01 for evaluation | 64 | | POST_PROCESS.IOU_THRESHOLD | the iou threshold to filter the predict boxes | 65 | | POST_PROCESS.MAX_DETECTIONS | the max detection boxes for the final predicted output of ssds model | 66 | | POST_PROCESS.MAX_DETECTIONS_PER_LEVEL | the max detection boxes for the each level output of ssds detect heads | 67 | | POST_PROCESS.USE_DIOU | whether using diou to replace the iou in the nms part | 68 | | POST_PROCESS.RESCORE_CENTER | whether rescore the boxes based on its anchor center location | 69 | 70 | ### DATASET 71 | 72 | | DATASET parameters | discription | 73 | |---|---| 74 | | DATASET.DATASET | type of the dataset | 75 | | DATASET.DATASET_DIR | path to the dataset folder | 76 | | DATASET.TRAIN_SETS | train set scope | 77 | | DATASET.TEST_SETS | test set scope | 78 | | DATASET.PICKLE | whether use pickle to saved images and annotation (only works for Non-DALI dataset) | 79 | | DATASET.NUM_WORKERS | 8 (only works for Non-DALI dataset) | 80 | | DATASET.DEVICE_ID | the list of devices used to distributaed the data loading (only works for apex parrellel training)) | 81 | | DATASET.MULTISCALE | list of image size used for multiscale training | 82 | 83 | 84 | ### DATASET.PREPROC 85 | 86 | | DATASET.PREPROC parameters | discription | 87 | |---|---| 88 | | DATASET.PREPROC.MEAN | float, the mean for normalization | 89 | | DATASET.PREPROC.STD | float, the std for normalization | 90 | | DATASET.PREPROC.CROP_SCALE | list, the lower and upper bounder size for ssd random crop | 91 | | DATASET.PREPROC.CROP_ASPECT_RATIO | list, the lower and upper bounder aspect ratio for ssd random crop | 92 | | DATASET.PREPROC.CROP_ATTEMPTS | int, the numbder attempts to do the ssd random crop | 93 | | DATASET.PREPROC.HUE_DELTA | float, hue delta | 94 | | DATASET.PREPROC.BRI_DELTA | float, brightness delta | 95 | | DATASET.PREPROC.CONTRAST_RANGE | list, the lower and upper bounder for contrast | 96 | | DATASET.PREPROC.SATURATION_RANGE | list, the lower and upper bounder for saturation | 97 | | DATASET.PREPROC.MAX_EXPAND_RATIO | float, the max expand ratio for padding | 98 | 99 | ### Others 100 | 101 | | Others parameters | discription | 102 | |---|---| 103 | | EXP_DIR | the export dir | 104 | | LOG_DIR | the log dir | 105 | | RESUME_CHECKPOINT | The checkpoint used to resume | 106 | | PHASE | The phases | 107 | | DEVICE_ID | the list of devices used to distributaed the model training | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. ssds.pytorch documentation master file, created by 2 | sphinx-quickstart on Sun Jul 19 17:03:00 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ssds.pytorch documentation 7 | ======================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | install.md 13 | usage.md 14 | config.md 15 | 16 | 17 | API 18 | === 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | api/ssds 23 | 24 | Indices and tables 25 | ================== 26 | 27 | * :ref:`genindex` 28 | * :ref:`search` -------------------------------------------------------------------------------- /doc/source/install.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Python Installation 4 | 5 | #### requirements 6 | * python>=3.7 7 | * CUDA>=10.0 8 | * pytorch>=1.4 9 | #### basic installation: 10 | ```bash 11 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch 12 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git 13 | cd ssds.pytorch 14 | python setup.py clean -a install 15 | ``` 16 | #### extra python libs for parallel training 17 | Currently, nvidia DALI and apex is not include in the requirements.txt and need to install manually. 18 | 19 | * [DALI](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html) 20 | ```bash 21 | pip install --extra-index-url https://developer.download.nvidia.com/compute/redist/cuda/10.0 nvidia-dali 22 | ``` 23 | * [apex](https://github.com/NVIDIA/apex#linux) 24 | ```bash 25 | git clone https://github.com/NVIDIA/apex 26 | cd apex 27 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 28 | ``` 29 | 30 | ### Docker 31 | ```bash 32 | git clone https://github.com/ShuangXieIrene/ssds.pytorch.git 33 | docker build -t ssds:local ./ssds.pytorch/ 34 | docker run --gpus all -it --rm -v /data:/data ssds:local -------------------------------------------------------------------------------- /doc/source/usage.md: -------------------------------------------------------------------------------- 1 | ## Usage 2 | ### 0. Check the config file by Visualization 3 | Defined the network in a [config file](../experiments/cfgs/tests/test.yml) and tweak the config file based on the visualized anchor boxes 4 | ```bash 5 | python -m ssds.utils.visualize -cfg experiments/cfgs/tests/test.yml 6 | ``` 7 | 8 | ### 1. Training 9 | ```bash 10 | # basic training 11 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml 12 | # parallel training 13 | python -m torch.distributed.launch --nproc_per_node={num_gpus} -m ssds.utils.train_ddp -cfg experiments/cfgs/tests/test.yml 14 | ``` 15 | 16 | ### 2. Evaluation 17 | ```bash 18 | python -m ssds.utils.train -cfg experiments/cfgs/tests/test.yml -e 19 | ``` 20 | 21 | ### 3. Export to ONNX or TRT model 22 | ```bash 23 | python -m ssds.utils.export -cfg experiments/cfgs/tests/test.yml -c best_mAP.pth -h 24 | ``` 25 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/test.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: YOLOV3 3 | NETS: ResNet18 4 | IMAGE_SIZE: [320, 320] 5 | NUM_CLASSES: 80 6 | FEATURE_LAYER: [[3, 4, 5], [128, 256, 512]] 7 | SIZES: [[2.0, 2.828], [2.0, 2.828], [2.0, 4.0, 8.0]] 8 | ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 50 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 1 14 | TRAINABLE_SCOPE: 'transforms,extras,loc,conf' 15 | RESUME_SCOPE: '' 16 | OPTIMIZER: 17 | OPTIMIZER: adam 18 | LEARNING_RATE: 0.0001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: exponential 23 | GAMMA: 0.97 24 | WARM_UP_EPOCHS: 0 25 | 26 | TEST: 27 | BATCH_SIZE: 1 28 | TEST_SCOPE: [49, 50] 29 | 30 | MATCHER: 31 | LOCATE_LOSS: "CIOULoss" 32 | 33 | POST_PROCESS: 34 | SCORE_THRESHOLD: 0.01 35 | IOU_THRESHOLD: 0.6 36 | MAX_DETECTIONS: 100 37 | 38 | DATASET: 39 | DATASET: 'DaliCOCO' 40 | DATASET_DIR: '/data1/dataset/tiny_coco' 41 | TRAIN_SETS: ["train2017"] 42 | TEST_SETS: ["val2017"] 43 | DEVICE_ID: [0] 44 | 45 | EXP_DIR: './experiments/models/pretrain/yolo_resnet18_coco_test2' 46 | LOG_DIR: './experiments/models/pretrain/yolo_resnet18_coco_test2' 47 | # RESUME_CHECKPOINT: './experiments/weights/pretrain/SSD_ResNet18_300x300_COCO_20.4.pth' 48 | PHASE: ['train'] 49 | DEVICE_ID: [0] -------------------------------------------------------------------------------- /experiments/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/experiments/person.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | numpy 3 | pyyaml 4 | pynvml 5 | cython 6 | opencv-python 7 | matplotlib 8 | tensorboard 9 | git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | with open('./requirements.txt') as f: 5 | required_packages = f.read().splitlines() 6 | dependency_links = [required_packages.pop()[4:]] 7 | 8 | setup(name='ssds', 9 | version='1.5', 10 | description='Single Shot Detector and its variants', 11 | install_requires=required_packages, 12 | dependency_links=dependency_links, 13 | python_requires='>=3.6', 14 | packages=find_packages() 15 | ) -------------------------------------------------------------------------------- /ssds/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/__init__.py -------------------------------------------------------------------------------- /ssds/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/core/__init__.py -------------------------------------------------------------------------------- /ssds/core/checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import os 4 | from collections import OrderedDict 5 | 6 | 7 | def model_to_cpu(model_state): 8 | r""" make sure the model is load from cpu memory. In this case, the loaded model will not occupied the gpu memory. 9 | 10 | :meta private: 11 | """ 12 | new_state = OrderedDict() 13 | for k, v in model_state.items(): 14 | new_state[k] = v.cpu() 15 | return new_state 16 | 17 | 18 | def save_checkpoints(model, output_dir, checkpoint_prefix, epochs): 19 | r"""Save the model parameter to a pth file. 20 | 21 | Args: 22 | model: the ssds model 23 | output_dir (str): the folder for model saving, usually defined by cfg.EXP_DIR 24 | checkpoint_prefix (str): the prefix for the checkpoint, usually is the combination of the ssd model and the dataset 25 | epochs (int): the epoch for the current training 26 | """ 27 | if not os.path.exists(output_dir): 28 | os.makedirs(output_dir) 29 | 30 | filename = checkpoint_prefix + "_epoch_{:d}".format(epochs) + ".pth" 31 | filename = os.path.join(output_dir, filename) 32 | torch.save(model_to_cpu(model.state_dict()), filename) 33 | with open(os.path.join(output_dir, "checkpoint_list.txt"), "a") as f: 34 | f.write("epoch {epoch:d}: {filename}\n".format(epoch=epochs, filename=filename)) 35 | print("Wrote snapshot to: {:s}".format(filename)) 36 | 37 | 38 | def find_previous_checkpoint(output_dir): 39 | r"""Return the most recent checkpoint in the checkpoint_list.txt 40 | 41 | checkpoint_list.txt is usually saved at cfg.EXP_DIR 42 | 43 | Args: 44 | output_dir (str): the folder contains the previous checkpoints and checkpoint_list.txt 45 | """ 46 | if not os.path.exists(os.path.join(output_dir, "checkpoint_list.txt")): 47 | return False 48 | with open(os.path.join(output_dir, "checkpoint_list.txt"), "r") as f: 49 | lineList = f.readlines() 50 | epoches, resume_checkpoints = [list() for _ in range(2)] 51 | for line in lineList: 52 | epoch = int(line[line.find("epoch ") + len("epoch ") : line.find(":")]) 53 | checkpoint = line[line.find(":") + 2 : -1] 54 | epoches.append(epoch) 55 | resume_checkpoints.append(checkpoint) 56 | return epoches, resume_checkpoints 57 | 58 | 59 | def resume_checkpoint(model, resume_checkpoint, resume_scope=""): 60 | r"""Resume the checkpoints to the given ssds model based on the resume_scope. 61 | 62 | The resume_scope is defined by cfg.TRAIN.RESUME_SCOPE. 63 | 64 | When: 65 | 66 | * cfg.TRAIN.RESUME_SCOPE = "" 67 | All the parameters in the resume_checkpoint are resumed to the model 68 | * cfg.TRAIN.RESUME_SCOPE = "a,b,c" 69 | Only the the parameters in the a, b and c are resumed to the model 70 | 71 | Args: 72 | model: the ssds model 73 | resume_checkpoint (str): the file address for the checkpoint which contains the resumed parameters 74 | resume_scope: the scope of the resumed parameters, defined at cfg.TRAIN.RESUME_SCOPE 75 | """ 76 | if resume_checkpoint == "" or not os.path.isfile(resume_checkpoint): 77 | print(("=> no checkpoint found at '{}'".format(resume_checkpoint))) 78 | return False 79 | print(("=> loading checkpoint '{:s}'".format(resume_checkpoint))) 80 | checkpoint = torch.load(resume_checkpoint, map_location=torch.device("cpu")) 81 | if "state_dict" in checkpoint: 82 | checkpoint = checkpoint["state_dict"] 83 | 84 | # print("=> Weigths in the checkpoints:") 85 | # print([k for k, v in list(checkpoint.items())]) 86 | 87 | # remove the module in the parrallel model 88 | if "module." in list(checkpoint.items())[0][0]: 89 | pretrained_dict = { 90 | ".".join(k.split(".")[1:]): v for k, v in list(checkpoint.items()) 91 | } 92 | checkpoint = pretrained_dict 93 | 94 | # change the name of the weights which exists in other model 95 | # change_dict = { 96 | # } 97 | # for k, v in list(checkpoint.items()): 98 | # for _k, _v in list(change_dict.items()): 99 | # if _k in k: 100 | # new_key = k.replace(_k, _v) 101 | # checkpoint[new_key] = checkpoint.pop(k) 102 | 103 | # remove the output layers from the checkpoint 104 | # remove_list = { 105 | # } 106 | # for k in remove_list: 107 | # checkpoint.pop(k+'.weight', None) 108 | # checkpoint.pop(k+'.bias', None) 109 | 110 | # extract the weights based on the resume scope 111 | if resume_scope != "": 112 | pretrained_dict = {} 113 | for k, v in list(checkpoint.items()): 114 | for resume_key in resume_scope.split(","): 115 | if resume_key in k: 116 | pretrained_dict[k] = v 117 | break 118 | checkpoint = pretrained_dict 119 | 120 | pretrained_dict = {k: v for k, v in checkpoint.items() if k in model.state_dict()} 121 | # print("=> Resume weigths:") 122 | # print([k for k, v in list(pretrained_dict.items())]) 123 | 124 | checkpoint = model.state_dict() 125 | unresume_dict = set(checkpoint) - set(pretrained_dict) 126 | if len(unresume_dict) != 0: 127 | print("=> UNResume weigths:") 128 | print(unresume_dict) 129 | 130 | checkpoint.update(pretrained_dict) 131 | 132 | model.load_state_dict(checkpoint) 133 | return model 134 | -------------------------------------------------------------------------------- /ssds/core/data_parallel.py: -------------------------------------------------------------------------------- 1 | from torch.nn.parallel import DataParallel 2 | import torch 3 | from torch.nn.parallel._functions import Scatter 4 | from torch.nn.parallel.parallel_apply import parallel_apply 5 | 6 | 7 | def scatter(inputs, target_gpus, chunk_sizes, dim=0): 8 | r""" 9 | Slices tensors into approximately equal chunks and 10 | distributes them across given GPUs. Duplicates 11 | references to objects that are not tensors. 12 | """ 13 | 14 | def scatter_map(obj): 15 | if isinstance(obj, torch.Tensor): 16 | try: 17 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 18 | except: 19 | print("obj", obj.size()) 20 | print("dim", dim) 21 | print("chunk_sizes", chunk_sizes) 22 | quit() 23 | if isinstance(obj, tuple) and len(obj) > 0: 24 | return list(zip(*map(scatter_map, obj))) 25 | if isinstance(obj, list) and len(obj) > 0: 26 | return list(map(list, zip(*map(scatter_map, obj)))) 27 | if isinstance(obj, dict) and len(obj) > 0: 28 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 29 | return [obj for targets in target_gpus] 30 | 31 | # After scatter_map is called, a scatter_map cell will exist. This cell 32 | # has a reference to the actual function scatter_map, which has references 33 | # to a closure that has a reference to the scatter_map cell (because the 34 | # fn is recursive). To avoid this reference cycle, we set the function to 35 | # None, clearing the cell 36 | try: 37 | return scatter_map(inputs) 38 | finally: 39 | scatter_map = None 40 | 41 | 42 | def scatter_kwargs(inputs, kwargs, target_gpus, chunk_sizes, dim=0): 43 | r"""Scatter with support for kwargs dictionary""" 44 | inputs = scatter(inputs, target_gpus, chunk_sizes, dim) if inputs else [] 45 | kwargs = scatter(kwargs, target_gpus, chunk_sizes, dim) if kwargs else [] 46 | if len(inputs) < len(kwargs): 47 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 48 | elif len(kwargs) < len(inputs): 49 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 50 | inputs = tuple(inputs) 51 | kwargs = tuple(kwargs) 52 | return inputs, kwargs 53 | 54 | 55 | class BalancedDataParallel(DataParallel): 56 | """ This class is used to replace the original pytorch DataParallel and balance the first GPU memory usage. 57 | 58 | The original script is from: https://github.com/kimiyoung/transformer-xl/blob/master/pytorch/utils/data_parallel.py 59 | """ 60 | def __init__(self, gpu0_bsz, *args, **kwargs): 61 | self.gpu0_bsz = gpu0_bsz 62 | super().__init__(*args, **kwargs) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | if self.gpu0_bsz == 0: 68 | device_ids = self.device_ids[1:] 69 | else: 70 | device_ids = self.device_ids 71 | inputs, kwargs = self.scatter(inputs, kwargs, device_ids) 72 | # print('len(inputs)1: ', str(len(inputs))) 73 | # print('self.device_ids[:len(inputs)]', str(self.device_ids[:len(inputs)])) 74 | if len(self.device_ids) == 1: 75 | return self.module(*inputs[0], **kwargs[0]) 76 | replicas = self.replicate(self.module, self.device_ids[: len(inputs)]) 77 | if self.gpu0_bsz == 0: 78 | replicas = replicas[1:] 79 | outputs = self.parallel_apply(replicas, device_ids, inputs, kwargs) 80 | return self.gather(outputs, self.output_device) 81 | 82 | def parallel_apply(self, replicas, device_ids, inputs, kwargs): 83 | return parallel_apply(replicas, inputs, kwargs, device_ids[: len(inputs)]) 84 | 85 | def scatter(self, inputs, kwargs, device_ids): 86 | bsz = inputs[0].size(self.dim) 87 | num_dev = len(self.device_ids) 88 | gpu0_bsz = self.gpu0_bsz 89 | bsz_unit = (bsz - gpu0_bsz) // (num_dev - 1) 90 | if gpu0_bsz < bsz_unit: 91 | chunk_sizes = [gpu0_bsz] + [bsz_unit] * (num_dev - 1) 92 | delta = bsz - sum(chunk_sizes) 93 | for i in range(delta): 94 | chunk_sizes[i + 1] += 1 95 | if gpu0_bsz == 0: 96 | chunk_sizes = chunk_sizes[1:] 97 | else: 98 | return super().scatter(inputs, kwargs, device_ids) 99 | 100 | # print('bsz: ', bsz) 101 | # print('num_dev: ', num_dev) 102 | # print('gpu0_bsz: ', gpu0_bsz) 103 | # print('bsz_unit: ', bsz_unit) 104 | # print('chunk_sizes: ', chunk_sizes) 105 | return scatter_kwargs(inputs, kwargs, device_ids, chunk_sizes, dim=self.dim) 106 | -------------------------------------------------------------------------------- /ssds/core/evaluation_metrics.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class MeanAveragePrecision(object): 6 | def __init__(self, num_classes, conf_threshold, iou_threshold): 7 | self.num_classes = num_classes 8 | self.conf_threshold = conf_threshold 9 | self.iou_threshold = iou_threshold 10 | self.score, self.detect_ismatched, self.target_ismatched = [ 11 | [[] for _ in range(self.num_classes)] for _ in range(3) 12 | ] 13 | self.npos = [0 for _ in range(self.num_classes)] 14 | 15 | def __call__(self, detections, targets): 16 | def matrix_iou(a, b): 17 | """ 18 | return iou of a and b, numpy version for data augenmentation 19 | """ 20 | lt = torch.max(a[:, None, :2], b[:, :2]) 21 | rb = torch.min(a[:, None, 2:], b[:, 2:]) 22 | 23 | area_i = torch.prod(rb - lt, dim=2) * (lt < rb).all(dim=2) 24 | area_a = torch.prod(a[:, 2:] - a[:, :2], dim=1) 25 | area_b = torch.prod(b[:, 2:] - b[:, :2], dim=1) 26 | return area_i / (area_a[:, None] + area_b - area_i) 27 | 28 | for out_score, out_box, out_class, target in zip(*detections, targets): 29 | out_class = out_class[out_score > self.conf_threshold] 30 | out_box = out_box[out_score > self.conf_threshold] 31 | out_score = out_score[out_score > self.conf_threshold] 32 | for c in range(self.num_classes): 33 | target_c = target[target[:, 4] == c] 34 | out_score_c = out_score[out_class == c] 35 | out_box_c = out_box[out_class == c] 36 | if len(out_score_c) == 0: 37 | self.npos[c] += len(target_c) 38 | self.target_ismatched[c] += np.zeros( 39 | len(target_c), dtype=bool 40 | ).tolist() 41 | continue 42 | if len(target_c) == 0: 43 | self.score[c] += out_score_c.cpu().tolist() 44 | self.detect_ismatched[c] += np.zeros( 45 | len(out_score_c), dtype=bool 46 | ).tolist() 47 | continue 48 | iou_c = matrix_iou(out_box_c, target_c[:, :4]) 49 | max_overlap_tids = torch.argmax(iou_c, dim=1) 50 | is_box_detected = np.zeros(len(target_c), dtype=bool) 51 | lable_c = np.zeros(len(out_score_c), dtype=bool) 52 | for i in range(len(max_overlap_tids)): 53 | tid = max_overlap_tids[i] 54 | if iou_c[i][tid] >= self.iou_threshold and not is_box_detected[tid]: 55 | is_box_detected[tid] = True 56 | lable_c[i] = True 57 | self.npos[c] += len(target_c) 58 | self.detect_ismatched[c] += lable_c.tolist() 59 | self.score[c] += out_score_c.cpu().tolist() 60 | self.target_ismatched[c] += is_box_detected.tolist() 61 | return 62 | 63 | def get_results(self): 64 | def compute_average_precision(precision, recall): 65 | """Compute Average Precision according to the definition in VOCdevkit. 66 | 67 | Precision is modified to ensure that it does not decrease as recall 68 | decrease. 69 | 70 | Args: 71 | precision: A float [N, 1] numpy array of precisions 72 | recall: A float [N, 1] numpy array of recalls 73 | 74 | Raises: 75 | ValueError: if the input is not of the correct format 76 | 77 | Returns: 78 | average_precison: The area under the precision recall curve. NaN if 79 | precision and recall are None. 80 | 81 | """ 82 | if precision is None: 83 | if recall is not None: 84 | raise ValueError("If precision is None, recall must also be None") 85 | return np.NAN 86 | 87 | if not isinstance(precision, np.ndarray) or not isinstance( 88 | recall, np.ndarray 89 | ): 90 | raise ValueError("precision and recall must be numpy array") 91 | if precision.dtype != np.float or recall.dtype != np.float: 92 | raise ValueError("input must be float numpy array.") 93 | if len(precision) != len(recall): 94 | raise ValueError("precision and recall must be of the same size.") 95 | if not precision.size: 96 | return 0.0 97 | if np.amin(precision) < 0 or np.amax(precision) > 1: 98 | raise ValueError("Precision must be in the range of [0, 1].") 99 | if np.amin(recall) < 0 or np.amax(recall) > 1: 100 | raise ValueError("recall must be in the range of [0, 1].") 101 | if not all(recall[i] <= recall[i + 1] for i in range(len(recall) - 1)): 102 | raise ValueError("recall must be a non-decreasing array") 103 | 104 | recall = np.concatenate([[0], recall, [1]]) 105 | precision = np.concatenate([[0], precision, [0]]) 106 | 107 | # Preprocess precision to be a non-decreasing array 108 | for i in range(len(precision) - 2, -1, -1): 109 | precision[i] = np.maximum(precision[i], precision[i + 1]) 110 | 111 | indices = np.where(recall[1:] != recall[:-1])[0] + 1 112 | average_precision = np.sum( 113 | (recall[indices] - recall[indices - 1]) * precision[indices] 114 | ) 115 | return average_precision 116 | 117 | recall, precision, ap = [], [], [] 118 | for labels_c, scores_c, npos_c in zip( 119 | self.detect_ismatched, self.score, self.npos 120 | ): 121 | # to avoid missing ground truth in that class 122 | if npos_c == 0: 123 | ap += [np.NAN] 124 | recall += [[0], [1]] 125 | precision += [[0], [0]] 126 | continue 127 | 128 | sorted_indices = np.argsort(scores_c) 129 | sorted_indices = sorted_indices[::-1] 130 | labels_c = np.array(labels_c).astype(int) 131 | true_positive_labels = labels_c[sorted_indices] 132 | false_positive_labels = 1 - true_positive_labels 133 | tp = np.cumsum(true_positive_labels) 134 | fp = np.cumsum(false_positive_labels) 135 | 136 | rec = tp.astype(float) / float(npos_c) 137 | prec = tp.astype(float) / np.maximum(tp + fp, np.finfo(np.float64).eps) 138 | ap += [compute_average_precision(prec, rec)] 139 | recall += [rec] 140 | precision += [prec] 141 | mAP = np.nanmean(ap) 142 | return mAP, (precision, recall, ap) 143 | -------------------------------------------------------------------------------- /ssds/core/optimizer.py: -------------------------------------------------------------------------------- 1 | import torch.optim as optim 2 | from torch.optim import lr_scheduler 3 | 4 | 5 | class InvertedExponentialLR(lr_scheduler._LRScheduler): 6 | """Exponentially increases the learning rate between two boundaries over a number of 7 | iterations. 8 | Arguments: 9 | optimizer (torch.optim.Optimizer): wrapped optimizer. 10 | end_lr (float): the final learning rate. 11 | num_iter (int): the number of iterations over which the test occurs. 12 | last_epoch (int, optional): the index of last epoch. Default: -1. 13 | 14 | :meta private: 15 | """ 16 | 17 | def __init__(self, optimizer, end_lr, num_iter=100, last_epoch=-1): 18 | self.end_lr = end_lr 19 | self.num_iter = num_iter 20 | super(ExponentialLR, self).__init__(optimizer, last_epoch) 21 | 22 | def get_lr(self): 23 | curr_iter = self.last_epoch + 1 24 | r = curr_iter / self.num_iter 25 | return [base_lr * (self.end_lr / base_lr) ** r for base_lr in self.base_lrs] 26 | 27 | 28 | def trainable_param(model, trainable_scope): 29 | r""" Return the trainable parameters for the optimizers by :attr:`cfg.TRAIN.TRAINABLE_SCOPE` 30 | 31 | If the module in trainable scope, then train this module's parameters 32 | 33 | When : 34 | 35 | * cfg.TRAIN.TRAINABLE_SCOPE = "" 36 | All the parameters in the model are used to train 37 | * cfg.TRAIN.TRAINABLE_SCOPE = "a,b,c.d" 38 | Only the the parameters in the a, b and c.d are used to train 39 | * cfg.TRAIN.TRAINABLE_SCOPE = "a;b,c.d" 40 | Only the the parameters in the a, b and c.d are used to train. module a and model b&c.d can be assigned to different learning rate (differential learning rate) 41 | 42 | Args: 43 | model: the ssds model for training 44 | trainable_scope (str): the scope for the trainable parameter in the given ssds model, which is defined in the cfg.TRAIN.TRAINABLE_SCOPE 45 | """ 46 | trainable_param = [] 47 | 48 | if trainable_scope == "": 49 | for param in model.parameters(): 50 | param.requires_grad = True 51 | trainable_param.append(model.parameters()) 52 | else: 53 | for param in model.parameters(): 54 | param.requires_grad = False 55 | 56 | for train_scope in trainable_scope.split(";"): 57 | param_temp = [] 58 | for module in train_scope.split(","): 59 | submodule = module.split(".") 60 | tmp_model = model 61 | for subm in submodule: 62 | if hasattr(tmp_model, subm): 63 | tmp_model = getattr(tmp_model, subm) 64 | else: 65 | raise ValueError(module + " is not in the model") 66 | for param in tmp_model.parameters(): 67 | param.requires_grad = True 68 | param_temp.extend(tmp_model.parameters()) 69 | trainable_param.append(param_temp) 70 | return trainable_param 71 | 72 | 73 | def configure_optimizer(trainable_param, cfg): 74 | r""" Return the optimizer for the trainable parameters 75 | 76 | Basically, it returns the optimizer defined by :attr:`cfg.TRAIN.OPTIMIZER.OPTIMIZER`. The learning rate for the optimizer is defined by :attr:`cfg.TRAIN.OPTIMIZER.LEARNING_RATE` 77 | and :attr:`cfg.TRAIN.OPTIMIZER.DIFFERENTIAL_LEARNING_RATE`. Some other parameters are also defined in :attr:`cfg.TRAIN.OPTIMIZER`. 78 | 79 | Currently, there are 4 popular optimizers supported: sgd, rmsprop, adam and amsgrad. 80 | 81 | TODO: directly fetch the optimizer by getattr(optim, cfg.OPTIMIZER) and send the the relative parameter by dict. 82 | 83 | Args: 84 | trainable_param: the trainable parameter in the given ssds model, check :meth:`trainable_param` for more details. 85 | cfg: the config dict, which is defined in :attr:`cfg.TRAIN.OPTIMIZER`. 86 | """ 87 | 88 | 89 | if len(cfg.DIFFERENTIAL_LEARNING_RATE) == 0 or len(trainable_param) == 1: 90 | trainable_param = trainable_param[0] 91 | else: 92 | assert len(cfg.DIFFERENTIAL_LEARNING_RATE) == len(trainable_param) 93 | trainable_param = [ 94 | {"params": _param, "lr": _lr} 95 | for _param, _lr in zip(trainable_param, cfg.DIFFERENTIAL_LEARNING_RATE) 96 | ] 97 | 98 | if cfg.OPTIMIZER == "sgd": 99 | optimizer = optim.SGD( 100 | trainable_param, 101 | lr=cfg.LEARNING_RATE, 102 | momentum=cfg.MOMENTUM, 103 | weight_decay=cfg.WEIGHT_DECAY, 104 | ) 105 | elif cfg.OPTIMIZER == "rmsprop": 106 | optimizer = optim.RMSprop( 107 | trainable_param, 108 | lr=cfg.LEARNING_RATE, 109 | momentum=cfg.MOMENTUM, 110 | alpha=cfg.MOMENTUM_2, 111 | eps=cfg.EPS, 112 | weight_decay=cfg.WEIGHT_DECAY, 113 | ) 114 | elif cfg.OPTIMIZER == "adam": 115 | optimizer = optim.Adam( 116 | trainable_param, 117 | lr=cfg.LEARNING_RATE, 118 | betas=(cfg.MOMENTUM, cfg.MOMENTUM_2), 119 | weight_decay=cfg.WEIGHT_DECAY, 120 | ) 121 | elif cfg.OPTIMIZER == "amsgrad": 122 | optimizer = optim.Adam( 123 | trainable_param, 124 | lr=cfg.LEARNING_RATE, 125 | betas=(cfg.MOMENTUM, cfg.MOMENTUM_2), 126 | weight_decay=cfg.WEIGHT_DECAY, 127 | amsgrad=True, 128 | ) 129 | else: 130 | AssertionError("optimizer can not be recognized") 131 | return optimizer 132 | 133 | 134 | def configure_lr_scheduler(optimizer, cfg): 135 | r""" Return the learning rate scheduler for the trainable parameters 136 | 137 | Basically, it returns the learning rate scheduler defined by :attr:`cfg.TRAIN.LR_SCHEDULER.SCHEDULER`. 138 | Some parameters for the learning rate scheduler are also defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. 139 | 140 | Currently, there are 4 popular learning rate scheduler supported: step, multi_step, exponential and sgdr. 141 | 142 | TODO: directly fetch the optimizer by getattr(lr_scheduler, cfg.SCHEDULER) and send the the relative parameter by dict. 143 | 144 | Args: 145 | optimizer: the optimizer in the given ssds model, check :meth:`configure_optimizer` for more details. 146 | cfg: the config dict, which is defined in :attr:`cfg.TRAIN.LR_SCHEDULER`. 147 | """ 148 | if cfg.SCHEDULER == "step": 149 | scheduler = lr_scheduler.StepLR( 150 | optimizer, step_size=cfg.STEPS[0], gamma=cfg.GAMMA 151 | ) 152 | elif cfg.SCHEDULER == "multi_step": 153 | scheduler = lr_scheduler.MultiStepLR( 154 | optimizer, milestones=cfg.STEPS, gamma=cfg.GAMMA 155 | ) 156 | elif cfg.SCHEDULER == "exponential": 157 | scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=cfg.GAMMA) 158 | elif cfg.SCHEDULER == "inverted_exponential": 159 | scheduler = InvertedExponentialLR(optimizer, end_lr=cfg.LR_MIN) 160 | elif cfg.SCHEDULER == "sgdr": 161 | scheduler = lr_scheduler.CosineAnnealingWarmRestarts( 162 | optimizer, T_0=2, T_mult=2, eta_min=cfg.LR_MIN 163 | ) 164 | else: 165 | AssertionError("scheduler can not be recognized.") 166 | return scheduler 167 | -------------------------------------------------------------------------------- /ssds/core/tools.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class IteratorTimer: 5 | def __init__(self, iterable): 6 | self.iterable = iterable 7 | self.iterator = self.iterable.__iter__() 8 | 9 | def __iter__(self): 10 | return self 11 | 12 | def __len__(self): 13 | return len(self.iterable) 14 | 15 | def __next__(self): 16 | start = time.time() 17 | n = next(self.iterator) 18 | self.last_duration = time.time() - start 19 | return n 20 | 21 | next = __next__ 22 | 23 | 24 | class AverageMeter(object): 25 | """ Computes ans stores the average and current value""" 26 | 27 | def __init__(self): 28 | self.reset() 29 | 30 | def reset(self): 31 | self.val = 0.0 32 | self.avg = 0.0 33 | self.sum = 0.0 34 | self.count = 0 35 | 36 | def update(self, val, n=1): 37 | self.val = val 38 | self.sum += val * n 39 | self.count += n 40 | self.avg = self.sum / self.count 41 | 42 | def __call__(self): 43 | return self.val 44 | 45 | 46 | def format_dict_of_loss(dict_loss): 47 | try: 48 | string = ", ".join( 49 | [ 50 | ("{}: {:" + (".3f" if value >= 0.001 else ".1e") + "}").format( 51 | name, value 52 | ) 53 | for name, value in dict_loss.items() 54 | ] 55 | ) 56 | except (TypeError, ValueError) as e: 57 | print(dict_loss) 58 | string = "[Log Error] " + str(e) 59 | 60 | return string 61 | -------------------------------------------------------------------------------- /ssds/core/visualize_funcs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | from scipy.optimize import linear_sum_assignment 5 | 6 | 7 | def matrix_iou(a, b): 8 | """ 9 | return iou of a and b, numpy version for data augenmentation 10 | 11 | :meta private: 12 | """ 13 | lt = np.maximum(a[:, None, :2], b[:, :2]) 14 | rb = np.minimum(a[:, None, 2:], b[:, 2:]) 15 | 16 | area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) 17 | area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) 18 | area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) 19 | with np.errstate(divide="ignore", invalid="ignore"): 20 | return np.nan_to_num(area_i / (area_a[:, None] + area_b - area_i)) 21 | 22 | 23 | def add_prCurve(writer, precision, recall, class_names=[], epoch=0): 24 | def add_pr_curve_raw(writer, tag, precision, recall, epoch=0): 25 | """ the pr_curve_raw_data_pb() needs 26 | Args: 27 | precisions: ascending array 28 | recalls : descending array 29 | """ 30 | num_thresholds = len(precision) 31 | writer.add_pr_curve_raw( 32 | tag=tag, 33 | true_positive_counts=-np.ones(num_thresholds), 34 | false_positive_counts=-np.ones(num_thresholds), 35 | true_negative_counts=-np.ones(num_thresholds), 36 | false_negative_counts=-np.ones(num_thresholds), 37 | precision=precision, 38 | recall=recall, 39 | global_step=epoch, 40 | num_thresholds=num_thresholds, 41 | ) 42 | 43 | for i, (_prec, _rec) in enumerate(zip(precision, recall)): 44 | num_thresholds = min(500, len(_prec)) 45 | if num_thresholds != len(_prec): 46 | gap = int(len(_prec) / num_thresholds) 47 | _prec = np.append(_prec[::gap], _prec[-1]) 48 | _rec = np.append(_rec[::gap], _rec[-1]) 49 | num_thresholds = len(_prec) 50 | _prec.sort() 51 | _rec[::-1].sort() 52 | tag = class_names[i] if class_names else "pr_curve/{}".format(i + 1) 53 | add_pr_curve_raw( 54 | writer=writer, tag=tag, precision=_prec, recall=_rec, epoch=epoch 55 | ) 56 | 57 | 58 | def add_defaultAnchors(writer, image, anchors, epoch=0): 59 | if isinstance(image, torch.Tensor): 60 | image = (image * 255).int().permute(1, 2, 0).cpu().numpy().astype(np.uint8) 61 | 62 | size = np.array(image.shape[1::-1]) 63 | for stride, anchor in anchors.items(): 64 | image_show = image.copy() 65 | 66 | anchor_wh = anchor[:, 2:] - anchor[:, :2] + 1 67 | anchor_ctr = anchor[:, :2] + 0.5 * anchor_wh 68 | size_anchor = (size // stride) * stride 69 | x, y = torch.meshgrid( 70 | [torch.arange(0, size_anchor[i], stride, device="cpu") for i in range(2)] 71 | ) 72 | xyxy = torch.stack((x, y, x, y), 2).view(-1, 4) 73 | 74 | xy = (xyxy[:, :2] + anchor_ctr[0]).int() 75 | for _xy in xy: 76 | cv2.circle(image_show, tuple(_xy.tolist()), 2, (255, 0, 0), -1) 77 | shift_anchor = (anchor + xyxy[xyxy.shape[0] // 2]).int().tolist() 78 | for an in shift_anchor: 79 | cv2.rectangle(image_show, tuple(an[:2]), tuple(an[2:]), (0, 255, 0), 1) 80 | writer.add_image( 81 | "anchors/stride_{}".format(stride), image_show, epoch, dataformats="HWC" 82 | ) 83 | 84 | 85 | def add_matchedAnchorsWithBox(writer, image, anchor, stride, depth, epoch=0): 86 | if isinstance(image, torch.Tensor): 87 | image = (image * 255).int().permute(1, 2, 0).cpu().numpy().astype(np.uint8) 88 | if isinstance(depth, torch.Tensor): 89 | depth = depth.clamp(-1, 1).cpu().numpy().astype(np.int8) 90 | 91 | size = np.array(image.shape[1::-1]) 92 | anchor_wh = anchor[:, 2:] - anchor[:, :2] + 1 93 | anchor_ctr = anchor[:, :2] + 0.5 * anchor_wh 94 | size_anchor = (size // stride) * stride 95 | x, y = torch.meshgrid( 96 | [torch.arange(0, size_anchor[i], stride, device="cpu") for i in range(2)] 97 | ) 98 | xyxy = torch.stack((x, y, x, y), 2).view(-1, 4) 99 | 100 | xy = (xyxy[:, :2] + anchor_ctr[0]).int() 101 | depth_xy = (xyxy[:, :2] // stride).int() 102 | color = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] # neg, pos, ignore 103 | 104 | for an_idx, an in enumerate(anchor): 105 | image_show = image.copy() 106 | for _xy, _xyxy, _depth_xy in zip(xy, xyxy, depth_xy): 107 | _depth = depth[an_idx, 0, _depth_xy[1], _depth_xy[0]] 108 | cv2.circle(image_show, tuple(_xy.tolist()), 2, color[_depth], -1) 109 | writer.add_image( 110 | "matched_anchors/stride_{}_anchor_{}".format(stride, an_idx), 111 | image_show, 112 | epoch, 113 | dataformats="HWC", 114 | ) 115 | 116 | 117 | def add_imagesWithBoxes(writer, tag, images, boxes, class_names=[], epoch=0): 118 | if isinstance(images, torch.Tensor): 119 | images = (images * 255).int().permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) 120 | boxes = boxes.int().cpu().numpy() 121 | for i, (image, box) in enumerate(zip(images, boxes)): 122 | image = np.ascontiguousarray(image) 123 | for b in box: 124 | if b[4] == -1: 125 | continue 126 | cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (0, 255, 0), 1) 127 | c = class_names[b[4]] if class_names else b[4] 128 | cv2.putText( 129 | image, 130 | str(c), 131 | tuple(b[:2]), 132 | cv2.FONT_HERSHEY_SIMPLEX, 133 | 1, 134 | (0, 255, 0), 135 | 1, 136 | cv2.LINE_AA, 137 | ) 138 | writer.add_image(tag + "/{}".format(i), image, epoch, dataformats="HWC") 139 | 140 | 141 | def add_imagesWithMatchedBoxes( 142 | writer, tag, images, boxes, targets, class_names=[], epoch=0 143 | ): 144 | if isinstance(images, torch.Tensor): 145 | images = (images * 255).int().permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8) 146 | boxes = boxes.cpu().detach().numpy() 147 | targets = targets.int().cpu().numpy() 148 | for i, (image, box, target) in enumerate(zip(images, boxes, targets)): 149 | image = np.ascontiguousarray(image) 150 | box = box[box[:, 4] > 0.5] 151 | iou_c = matrix_iou(box[:, :4], target[:, :4]) 152 | matched = np.any(iou_c > 0.6, axis=1) 153 | for b in box[matched].astype(int): 154 | cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (255, 0, 255), 1) 155 | for b in target: 156 | if b[4] == -1: 157 | continue 158 | cv2.rectangle(image, tuple(b[:2]), tuple(b[2:4]), (0, 255, 0), 1) 159 | c = class_names[b[4]] if class_names else b[4] 160 | cv2.putText( 161 | image, 162 | str(c), 163 | tuple(b[:2]), 164 | cv2.FONT_HERSHEY_SIMPLEX, 165 | 1, 166 | (0, 255, 0), 167 | 1, 168 | cv2.LINE_AA, 169 | ) 170 | writer.add_image(tag + "/{}".format(i), image, epoch, dataformats="HWC") 171 | 172 | 173 | def add_anchorStrategy(writer, targets, num_thresholds=100): 174 | scale = torch.sqrt(targets[:, 2] * targets[:, 3]).cpu().numpy() 175 | ratio = (targets[:, 3] / targets[:, 2]).cpu().numpy() # h/w 176 | scale[scale > 1000] = -1 177 | ratio[np.isinf(ratio)] = -1 178 | 179 | scale.sort(), ratio.sort() 180 | 181 | import matplotlib.pyplot as plt 182 | 183 | plt.switch_backend("agg") 184 | plt.style.use("ggplot") 185 | 186 | fig = plt.figure() 187 | plt.hist(scale, bins=num_thresholds) 188 | plt.xlabel("scale") 189 | plt.ylabel("frequence") 190 | # plt.xticks((np.arange(num_thresholds+1)[::-1]/num_thresholds+1) * scale.max()) 191 | writer.add_figure("archor_strategy/scale_distribute", fig) 192 | fig.clf() 193 | 194 | fig = plt.figure() 195 | plt.hist(ratio, bins=num_thresholds) 196 | plt.xlabel("ratio") 197 | plt.ylabel("frequence") 198 | # plt.xticks([0.2,0.25,0.333,0.5,1,2,3,4,5]) 199 | writer.add_figure("archor_strategy/ratio_distribute", fig) 200 | fig.clf() 201 | 202 | 203 | def add_matchedAnchor(writer): 204 | raise NotImplementedError 205 | -------------------------------------------------------------------------------- /ssds/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dali_tfrecord import DaliTFRecord 2 | from .dali_coco import DaliCOCO 3 | from .coco import COCODetection -------------------------------------------------------------------------------- /ssds/dataset/coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import pickle 5 | from pycocotools.coco import COCO 6 | 7 | from .detection_dataset import DetectionDataset 8 | 9 | class COCODataset(object): 10 | r"""COCO Dataset, used to extract the data from annotation file only. 11 | 12 | For the dataset defined in the cfg.DATASET.DATASET, please refer to :class:`.COCODetection`. 13 | 14 | Saved the image path and the relative annotation to the self.img_paths and self.anno 15 | 16 | Arguments: 17 | dataset_dir (str): the directory of coco dataset 18 | image_sets (list): list folders that 19 | """ 20 | def __init__(self, dataset_dir, image_sets): 21 | self.dataset_dir = dataset_dir 22 | self.cache_path = os.path.join(dataset_dir, 'cache') 23 | self.image_sets = image_sets 24 | self.img_paths = [] 25 | self.anno = [] 26 | self.classes_names = [] 27 | 28 | if not os.path.exists(self.cache_path): 29 | os.makedirs(self.cache_path) 30 | self._load_anno_files(dataset_dir, image_sets) 31 | 32 | def _load_anno_files(self, dataset_dir, image_sets): 33 | for coco_name in image_sets: 34 | annofile = os.path.join(dataset_dir, 'annotations', 'instances_' + coco_name + '.json') 35 | _COCO = COCO(annofile) 36 | cats = _COCO.loadCats(_COCO.getCatIds()) 37 | indexes = _COCO.getImgIds() 38 | 39 | self.classes_names = tuple(c['name'] for c in cats) 40 | self.num_classes = len(self.classes_names) 41 | self._class_to_ind = dict(zip(self.classes_names, range(self.num_classes))) 42 | self._class_to_coco_cat_id = dict(zip([c['name'] for c in cats], 43 | _COCO.getCatIds())) 44 | self.img_paths.extend(self._load_coco_img_path(coco_name, indexes)) 45 | self.anno.extend(self._load_coco_annotations(coco_name, indexes, _COCO)) 46 | 47 | def _load_coco_img_path(self, coco_name, indexes): 48 | cache_file=os.path.join(self.cache_path, coco_name+'_img_path.pkl') 49 | if os.path.exists(cache_file): 50 | with open(cache_file, 'rb') as fid: 51 | img_path = pickle.load(fid) 52 | print('{} img path loaded from {}'.format(coco_name,cache_file)) 53 | return img_path 54 | 55 | print('parsing img path for {}'.format(coco_name)) 56 | img_path = [self.image_path_from_index(coco_name, index) 57 | for index in indexes] 58 | with open(cache_file, 'wb') as fid: 59 | pickle.dump(img_path,fid,pickle.HIGHEST_PROTOCOL) 60 | print('wrote img path to {}'.format(cache_file)) 61 | return img_path 62 | 63 | def _load_coco_annotations(self, coco_name, indexes, _COCO): 64 | cache_file=os.path.join(self.cache_path, coco_name+'_gt_db.pkl') 65 | if os.path.exists(cache_file): 66 | with open(cache_file, 'rb') as fid: 67 | roidb = pickle.load(fid) 68 | print('{} gt loaded from {}'.format(coco_name,cache_file)) 69 | return roidb 70 | 71 | print('parsing gt for {}'.format(coco_name)) 72 | gt_roidb = [self.annotation_from_index(index, _COCO) 73 | for index in indexes] 74 | with open(cache_file, 'wb') as fid: 75 | pickle.dump(gt_roidb,fid,pickle.HIGHEST_PROTOCOL) 76 | print('wrote gt to {}'.format(cache_file)) 77 | return gt_roidb 78 | 79 | def image_path_from_index(self, name, index): 80 | """ 81 | Construct an image path from the image's "index" identifier. 82 | Example image path for index=119993: 83 | images/train2014/COCO_train2014_000000119993.jpg 84 | """ 85 | file_name = (str(index).zfill(12) + '.jpg') 86 | image_path = os.path.join(self.dataset_dir, 'images', 87 | name, file_name) 88 | assert os.path.exists(image_path), \ 89 | 'Path does not exist: {}'.format(image_path) 90 | return image_path 91 | 92 | def annotation_from_index(self, index, _COCO, toPercent=True): 93 | """ 94 | Loads COCO bounding-box instance annotations. Crowd instances are 95 | handled by marking their overlaps (with all categories) to -1. This 96 | overlap value means that crowd "instances" are excluded from training. 97 | Return result with Percent Coords 98 | """ 99 | im_ann = _COCO.loadImgs(index)[0] 100 | width = im_ann['width'] 101 | height = im_ann['height'] 102 | 103 | annIds = _COCO.getAnnIds(imgIds=index, iscrowd=None) 104 | objs = _COCO.loadAnns(annIds) 105 | # Sanitize bboxes -- some are invalid 106 | valid_objs = [] 107 | for obj in objs: 108 | x1 = np.max((0, obj['bbox'][0])) 109 | y1 = np.max((0, obj['bbox'][1])) 110 | x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) 111 | y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) 112 | if obj['area'] > 0 and x2 >= x1 and y2 >= y1: 113 | obj['clean_bbox'] = [x1, y1, x2, y2] 114 | valid_objs.append(obj) 115 | 116 | # Lookup table to map from COCO category ids to our internal class 117 | # indices 118 | coco_cat_id_to_class_ind = dict([(self._class_to_coco_cat_id[name], 119 | self._class_to_ind[name]) 120 | for name in self.classes_names]) 121 | 122 | res = np.zeros((len(valid_objs), 5), dtype=np.float32) 123 | for ix, obj in enumerate(valid_objs): 124 | clss = coco_cat_id_to_class_ind[obj['category_id']] 125 | res[ix, 0:4] = obj['clean_bbox'] 126 | res[ix, 4] = clss 127 | 128 | if toPercent == True: 129 | res[:,:4:2] /= width 130 | res[:,1:4:2] /= height 131 | return res 132 | 133 | class COCODetection(COCODataset, DetectionDataset): 134 | r"""COCO Object Detection Dataset 135 | 136 | The derivative class for COCODataset and DetectionDataset. 137 | 138 | load the image path and the relative annotation from :class:`.COCODataset` and save them to the annotation database. 139 | Then fetch the data by the data pipeline in the :class:`ssds.dataset.detection_dataset.DetectionDataset`. 140 | 141 | Arguments: 142 | dataset_dir (str): the directory of coco dataset 143 | image_sets (list): list folders that 144 | """ 145 | def __init__(self, cfg, dataset_dir, image_sets, training=False, transform=None): 146 | DetectionDataset.__init__(self, cfg, training, transform) 147 | COCODataset.__init__(self, dataset_dir, image_sets) 148 | 149 | self.db = self._get_db() 150 | # self.db = self.reorder_data(self.db, self.cfg_joints_name, self.ds_joints_name) 151 | 152 | # loading img db to boost up the speed 153 | if self.using_pickle: 154 | pickle_path = os.path.join(dataset_dir, 'pickle', 'img_db_' + '_'.join(image_set) + '.pickle') 155 | if not os.path.exists(os.path.dirname(pickle_path)): 156 | os.makedirs(os.path.dirname(pickle_path)) 157 | if not os.path.exists(pickle_path): 158 | self.saving_pickle(pickle_path) 159 | self.img_db = self.loading_pickle(pickle_path) 160 | 161 | def _get_db(self): 162 | gt_db = [{ 163 | 'image': img_path, 164 | 'boxes': anno[:,:4], 165 | 'labels': anno[:,4] 166 | } for img_path, anno in zip(self.img_paths, self.anno)] 167 | return gt_db -------------------------------------------------------------------------------- /ssds/dataset/dali_coco.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import math 4 | import ctypes 5 | from contextlib import redirect_stdout 6 | from pycocotools.coco import COCO 7 | 8 | import nvidia.dali.ops as ops 9 | import nvidia.dali.types as types 10 | from nvidia.dali.pipeline import Pipeline 11 | 12 | from .dali_dataiterator import DaliDataset, DaliPipeline 13 | 14 | class COCOPipeline(Pipeline, DaliPipeline): 15 | 'Dali pipeline for COCO' 16 | 17 | def __init__(self, image_dir, annotations_file, cache_path, batch_size, target_size, preproc_param, num_threads, num_shards, device_ids, training=False): 18 | Pipeline.__init__(self, batch_size=batch_size, num_threads=num_threads, device_id = device_ids, prefetch_queue_depth=num_threads, seed=42) 19 | DaliPipeline.__init__(self, target_size=target_size, preproc_param=preproc_param, training=training) 20 | 21 | self.reader = ops.COCOReader(annotations_file=annotations_file, file_root=image_dir, num_shards=num_shards, shard_id=0, 22 | ltrb=True, ratio=True, shuffle_after_epoch=training, save_img_ids=True, 23 | dump_meta_files=True, dump_meta_files_path=cache_path) 24 | 25 | def define_graph(self): 26 | images, bboxes, labels, img_ids = self.reader() 27 | return self.predefined_graph(images, bboxes, labels) 28 | 29 | class DaliCOCO(DaliDataset): 30 | 'Data loader for data parallel using Dali for TFRecord files' 31 | def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False): 32 | super(DaliCOCO, self).__init__(cfg, dataset_dir, image_sets, batch_size, training) 33 | 34 | if len(image_sets) != 1: 35 | raise ValueError("For DaliCOCO dataset, the number of image_set has to be 1, currently it is {}".format(image_sets)) 36 | 37 | self.image_dir = os.path.join(dataset_dir, "images", image_sets[0]) 38 | self.annotations_file = os.path.join(dataset_dir, "annotations", "instances_{}.json".format(image_sets[0])) 39 | self.cache_path = os.path.join(dataset_dir, "cache") 40 | if not os.path.exists(self.cache_path): 41 | os.makedirs(self.cache_path) 42 | 43 | self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args) 44 | self.pipe.build() 45 | 46 | with redirect_stdout(None): 47 | self.coco = COCO(self.annotations_file) 48 | self.ids = list(self.coco.imgs.keys()) 49 | 50 | def __len__(self): 51 | return math.ceil(len(self.ids) // self.num_shards / self.batch_size) 52 | 53 | def reset_size(self, batch_size, target_size): 54 | self.batch_size = batch_size 55 | self.target_size = target_size 56 | self.pipeline_args["batch_size"] = batch_size 57 | self.pipeline_args["target_size"] = target_size 58 | 59 | del self.pipe 60 | 61 | self.pipe = COCOPipeline(image_dir=self.image_dir, annotations_file=self.annotations_file, cache_path=self.cache_path, **self.pipeline_args) 62 | self.pipe.build() -------------------------------------------------------------------------------- /ssds/dataset/dali_dataiterator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import ctypes 4 | 5 | import nvidia.dali.ops as ops 6 | import nvidia.dali.types as types 7 | 8 | 9 | class DaliPipeline(object): 10 | r""" The data pipeline for the Dali dataset 11 | """ 12 | 13 | def __init__(self, target_size, preproc_param, training=False): 14 | self.training = training 15 | mean = preproc_param.MEAN 16 | std = preproc_param.STD 17 | bri_delta = preproc_param.BRI_DELTA 18 | hue_delta = preproc_param.HUE_DELTA 19 | max_expand_ratio = preproc_param.MAX_EXPAND_RATIO 20 | contrast_range = preproc_param.CONTRAST_RANGE 21 | saturation_range = preproc_param.SATURATION_RANGE 22 | crop_aspect_ratio = preproc_param.CROP_ASPECT_RATIO 23 | crop_scale = preproc_param.CROP_SCALE 24 | crop_attempts = preproc_param.CROP_ATTEMPTS 25 | 26 | # decoder 27 | self.decode_train = ops.ImageDecoderSlice(device="mixed", output_type=types.RGB) 28 | self.decode_infer = ops.ImageDecoder(device="mixed", output_type=types.RGB) 29 | 30 | # ssd crop 31 | self.bbox_crop = ops.RandomBBoxCrop( 32 | device="cpu", 33 | bbox_layout="xyXY", 34 | scaling=crop_scale, 35 | aspect_ratio=crop_aspect_ratio, 36 | allow_no_crop=True, 37 | thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], 38 | num_attempts=crop_attempts, 39 | ) 40 | 41 | # color twist 42 | self.uniform_con = ops.Uniform(range=contrast_range) 43 | self.uniform_bri = ops.Uniform( 44 | range=(1.0 - bri_delta / 256.0, 1.0 + bri_delta / 256.0) 45 | ) 46 | self.uniform_sat = ops.Uniform(range=saturation_range) 47 | self.uniform_hue = ops.Uniform(range=(-hue_delta, hue_delta)) 48 | self.hsv = ops.Hsv(device="gpu") 49 | self.contrast = ops.BrightnessContrast(device="gpu") 50 | 51 | # hflip 52 | self.bbox_flip = ops.BbFlip(device="cpu", ltrb=True) 53 | self.img_flip = ops.Flip(device="gpu") 54 | self.coin_flip = ops.CoinFlip(probability=0.5) 55 | 56 | # past 57 | self.paste_pos = ops.Uniform(range=(0, 1)) 58 | self.paste_ratio = ops.Uniform(range=(1, max_expand_ratio)) 59 | self.paste = ops.Paste(device="gpu", fill_value=mean) 60 | self.bbox_paste = ops.BBoxPaste(device="cpu", ltrb=True) 61 | 62 | # resize and normalize 63 | self.resize = ops.Resize( 64 | device="gpu", 65 | interp_type=types.DALIInterpType.INTERP_CUBIC, 66 | resize_x=target_size[0], 67 | resize_y=target_size[1], 68 | save_attrs=True, 69 | ) 70 | self.normalize = ops.CropMirrorNormalize(device="gpu", mean=mean, std=std) 71 | 72 | def predefined_graph(self, images, bboxes, labels): 73 | if self.training: 74 | # crop 75 | crop_begin, crop_size, bboxes, labels = self.bbox_crop(bboxes, labels) 76 | images = self.decode_train(images, crop_begin, crop_size) 77 | 78 | # color twist 79 | images = self.hsv( 80 | images, hue=self.uniform_hue(), saturation=self.uniform_sat() 81 | ) 82 | images = self.contrast( 83 | images, brightness=self.uniform_bri(), contrast=self.uniform_con() 84 | ) 85 | 86 | # hflip 87 | flip = self.coin_flip() 88 | bboxes = self.bbox_flip(bboxes, horizontal=flip) 89 | images = self.img_flip(images, horizontal=flip) 90 | 91 | # past 92 | ratio = self.paste_ratio() 93 | px = self.paste_pos() 94 | py = self.paste_pos() 95 | images = self.paste(images.gpu(), paste_x=px, paste_y=py, ratio=ratio) 96 | bboxes = self.bbox_paste(bboxes, paste_x=px, paste_y=py, ratio=ratio) 97 | else: 98 | images = self.decode_infer(images) 99 | 100 | images, attrs = self.resize(images) 101 | images = self.normalize(images) 102 | 103 | return images, bboxes, labels 104 | 105 | 106 | class DaliDataset(object): 107 | r""" Data loader for data parallel using Dali 108 | """ 109 | 110 | def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False): 111 | 112 | self.training = training 113 | self.batch_size = batch_size 114 | self.target_size = cfg.IMAGE_SIZE 115 | self.preproc_param = cfg.PREPROC 116 | 117 | self.device_ids = ( 118 | torch.cuda.current_device() if len(cfg.DEVICE_ID) != 1 else cfg.DEVICE_ID[0] 119 | ) # ",".join([str(d) for d in device_ids]) 120 | self.num_shards = max(len(cfg.DEVICE_ID), 1) 121 | self.num_threads = cfg.NUM_WORKERS 122 | 123 | self.pipeline_args = { 124 | "target_size": self.target_size, 125 | "num_threads": self.num_threads, 126 | "num_shards": self.num_shards, 127 | "batch_size": self.batch_size, 128 | "training": self.training, 129 | "device_ids": self.device_ids, 130 | "preproc_param": self.preproc_param, 131 | } 132 | 133 | def __repr__(self): 134 | return "\n".join( 135 | [ 136 | " loader: dali" 137 | " length: {}" 138 | " target_size: {}".format(self.__len__(), self.target_size), 139 | ] 140 | ) 141 | 142 | def __len__(self): 143 | return math.ceil(len(self.pipe) // self.num_shards / self.batch_size) 144 | 145 | def __iter__(self): 146 | for _ in range(self.__len__()): 147 | data, num_detections = [], [] 148 | dali_data, dali_boxes, dali_labels = self.pipe.run() 149 | 150 | for l in range(len(dali_boxes)): 151 | num_detections.append(dali_boxes.at(l).shape[0]) 152 | 153 | torch_targets = -1 * torch.ones( 154 | [len(dali_boxes), max(max(num_detections), 1), 5] 155 | ) 156 | 157 | for batch in range(self.batch_size): 158 | # Convert dali tensor to pytorch 159 | dali_tensor = dali_data[batch] 160 | tensor_shape = dali_tensor.shape() 161 | 162 | datum = torch.zeros( 163 | dali_tensor.shape(), dtype=torch.float, device=torch.device("cuda") 164 | ) 165 | c_type_pointer = ctypes.c_void_p(datum.data_ptr()) 166 | dali_tensor.copy_to_external(c_type_pointer) 167 | 168 | # Rescale boxes 169 | b_arr = dali_boxes.at(batch) 170 | num_dets = b_arr.shape[0] 171 | if num_dets is not 0: 172 | torch_bbox = torch.from_numpy(b_arr).float() 173 | 174 | torch_bbox[:, ::2] *= self.target_size[0] 175 | torch_bbox[:, 1::2] *= self.target_size[1] 176 | # (l,t,r,b) -> (x,y,w,h) == (l,r, r-l, b-t) 177 | torch_bbox[:, 2] -= torch_bbox[:, 0] 178 | torch_bbox[:, 3] -= torch_bbox[:, 1] 179 | torch_targets[batch, :num_dets, :4] = torch_bbox # * ratio 180 | 181 | # Arrange labels in target tensor 182 | l_arr = dali_labels.at(batch) 183 | if num_dets is not 0: 184 | torch_label = torch.from_numpy(l_arr).float() 185 | torch_label -= 1 # Rescale labels to [0,n-1] instead of [1,n] 186 | torch_targets[batch, :num_dets, 4] = torch_label.squeeze() 187 | 188 | data.append(datum.unsqueeze(0)) 189 | 190 | data = torch.cat(data, dim=0) 191 | torch_targets = torch_targets.cuda(non_blocking=True) 192 | yield data, torch_targets 193 | 194 | def reset_size(self, batch_size, target_size): 195 | r""" 196 | :meta private: 197 | """ 198 | raise NotImplementedError() 199 | -------------------------------------------------------------------------------- /ssds/dataset/dali_tfrecord.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import math 4 | import ctypes 5 | from subprocess import call 6 | from glob import glob 7 | 8 | import nvidia.dali.ops as ops 9 | import nvidia.dali.types as types 10 | import nvidia.dali.tfrecord as tfrec 11 | from nvidia.dali.pipeline import Pipeline 12 | import nvidia.dali.plugin.pytorch as dalitorch 13 | 14 | from .dali_dataiterator import DaliDataset, DaliPipeline 15 | 16 | class TFRecordPipeline(Pipeline, DaliPipeline): 17 | """ Currently the easiest way for using dali to process the dataset is using TFRecord Files 18 | """ 19 | def __init__(self, tfrecords, batch_size, target_size, preproc_param, num_threads, num_shards, device_ids, training=False): 20 | Pipeline.__init__(self, batch_size=batch_size, num_threads=num_threads, 21 | device_id=device_ids, prefetch_queue_depth=num_threads, seed=42, 22 | exec_async=False, exec_pipelined=False) 23 | DaliPipeline.__init__(self, target_size=target_size, preproc_param=preproc_param, training=training) 24 | 25 | tfrecords_idx = [tfrecord+"_idx" for tfrecord in tfrecords] 26 | for tfrecord, tfrecord_idx in zip(tfrecords, tfrecords_idx): 27 | if os.path.exists(tfrecord_idx): 28 | continue 29 | call(["tfrecord2idx", tfrecord, tfrecord+"_idx"]) 30 | self.length = sum([len(open(f).readlines()) for f in tfrecords_idx]) 31 | 32 | self.input = ops.TFRecordReader(path = tfrecords, 33 | index_path = tfrecords_idx, 34 | features = { 35 | 'image/height' : tfrec.FixedLenFeature([1], tfrec.int64, -1), 36 | 'image/width' : tfrec.FixedLenFeature([1], tfrec.int64, -1), 37 | 'image/encoded' : tfrec.FixedLenFeature((), tfrec.string, ""), 38 | 'image/format' : tfrec.FixedLenFeature((), tfrec.string, ""), 39 | 'image/object/bbox/xmin': tfrec.VarLenFeature(tfrec.float32, 0.0), 40 | 'image/object/bbox/ymin': tfrec.VarLenFeature(tfrec.float32, 0.0), 41 | 'image/object/bbox/xmax': tfrec.VarLenFeature(tfrec.float32, 0.0), 42 | 'image/object/bbox/ymax': tfrec.VarLenFeature(tfrec.float32, 0.0), 43 | 'image/object/class/text': tfrec.FixedLenFeature([ ], tfrec.string, ''), 44 | 'image/object/class/label': tfrec.VarLenFeature(tfrec.int64, -1) 45 | }, 46 | num_shards = num_shards, 47 | random_shuffle = training) 48 | self.training = training 49 | self.cat = dalitorch.TorchPythonFunction(function=lambda l,t,r,b: torch.cat([l,t,r,b]).view(4,-1).permute(1,0)) #[l*w,t*h,r*w,b*h], [l,t,r,b] 50 | self.cast = ops.Cast(dtype=types.DALIDataType.INT32) 51 | 52 | def define_graph(self): 53 | inputs = self.input() 54 | images = inputs["image/encoded"] 55 | bboxes = self.cat(inputs["image/object/bbox/xmin"], inputs["image/object/bbox/ymin"], 56 | inputs["image/object/bbox/xmax"], inputs["image/object/bbox/ymax"]) 57 | labels = self.cast(inputs["image/object/class/label"]) 58 | return self.predefined_graph(images, bboxes, labels) 59 | 60 | def __len__(self): 61 | return self.length 62 | 63 | 64 | class DaliTFRecord(DaliDataset): 65 | 'Data loader for data parallel using Dali for TFRecord files' 66 | def __init__(self, cfg, dataset_dir, image_sets, batch_size, training=False): 67 | super(DaliTFRecord, self).__init__(cfg, dataset_dir, image_sets, batch_size, training) 68 | 69 | self.tfrecords = [path for sets in image_sets for path in glob(os.path.join(dataset_dir, sets))] 70 | self.pipe = TFRecordPipeline(tfrecords=self.tfrecords, **self.pipeline_args) 71 | self.pipe.build() 72 | 73 | 74 | def reset_size(self, batch_size, target_size): 75 | self.batch_size = batch_size 76 | self.target_size = target_size 77 | self.pipeline_args["batch_size"] = batch_size 78 | self.pipeline_args["target_size"] = target_size 79 | 80 | del self.pipe 81 | 82 | self.pipe = TFRecordPipeline(tfrecords=self.tfrecords, **self.pipeline_args) 83 | self.pipe.build() -------------------------------------------------------------------------------- /ssds/dataset/dataset_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import os 5 | from glob import glob 6 | 7 | from ssds import dataset 8 | 9 | 10 | def detection_collate(batch): 11 | """Custom collate fn for dealing with batches of images that have a different 12 | number of associated object annotations (bounding boxes). 13 | Arguments: 14 | batch: (tuple) A tuple of tensor images and lists of annotations 15 | Return: 16 | A tuple containing: 17 | 1) (tensor) batch of images stacked on their 0 dim 18 | 2) (tensors) annotations for a given image are stacked on 0 dim 19 | """ 20 | targets = [] 21 | imgs = [] 22 | num_detections = [] 23 | for img, target in batch: 24 | # for tup in sample: 25 | imgs.append(img) 26 | targets.append(target) 27 | num_detections.append(target.shape[0]) 28 | 29 | torch_targets = -1 * torch.ones( 30 | [len(targets), max(max(num_detections), 1), 5], dtype=torch.float, device="cpu" 31 | ) 32 | for i, target in enumerate(targets): 33 | num_dets = target.shape[0] 34 | torch_targets[i, :num_dets] = torch.from_numpy(target).float() 35 | return torch.stack(imgs, 0), torch_targets 36 | 37 | 38 | def load_data(cfg, phase): 39 | r""" create the dataloader based on the config file. 40 | 41 | * If the phase == "train", 42 | it returns the dataloader in cfg.DATASET.TRAIN_SETS and fetch the randomly; 43 | * If the phase == "test", 44 | it returns the dataloader in cfg.DATASET.TEST_SETS and fetch the squentially; 45 | 46 | Args: 47 | cfg: the configs defined by cfg.DATASET 48 | phase (str): "train" or "test" 49 | 50 | Returns: 51 | dataloader 52 | """ 53 | training = phase == "train" 54 | image_sets = cfg.TRAIN_SETS if training else cfg.TEST_SETS 55 | batch_size = cfg.TRAIN_BATCH_SIZE if training else cfg.TEST_BATCH_SIZE 56 | 57 | if "Dali" in cfg.DATASET: 58 | data_loader = getattr(dataset, cfg.DATASET)( 59 | cfg=cfg, 60 | dataset_dir=cfg.DATASET_DIR, 61 | image_sets=image_sets, 62 | batch_size=batch_size, 63 | training=training, 64 | ) 65 | else: 66 | _dataset = getattr(dataset, cfg.DATASET)( 67 | cfg=cfg, 68 | dataset_dir=cfg.DATASET_DIR, 69 | image_sets=image_sets, 70 | training=training, 71 | ) 72 | data_loader = data.DataLoader( 73 | _dataset, 74 | batch_size, 75 | num_workers=cfg.NUM_WORKERS, 76 | shuffle=training, 77 | collate_fn=detection_collate, 78 | pin_memory=True, 79 | ) 80 | return data_loader 81 | -------------------------------------------------------------------------------- /ssds/dataset/detection_dataset.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import sys 3 | import pickle 4 | import glob 5 | 6 | import cv2 7 | import numpy as np 8 | from PIL import Image 9 | import io 10 | 11 | import torch 12 | import torch.utils.data as data 13 | 14 | from . import transforms as preprocess 15 | 16 | class DetectionDataset(data.Dataset): 17 | '''The base class for the detection 2d dataset. 18 | 19 | It contains the data pipeline which is defined by :meth:`_init_transform`. 20 | 21 | DetectionDataset is the base class and does not contain the actual data, the derivative class 22 | need to fill the annotation to the self.db. 23 | ''' 24 | def __init__(self, cfg, is_train, transform=None): 25 | # super(DetectionDataset, self).__init__() 26 | self.is_train = is_train 27 | 28 | self.image_size = cfg.IMAGE_SIZE 29 | # self.num_classes = cfg.NUM_CLASSES 30 | # self.classes_names = cfg.CLASSES_NAME 31 | self.preproc_param = cfg.PREPROC 32 | self.using_pickle = cfg.PICKLE 33 | self.transform = transform 34 | 35 | self.db = [] 36 | self.img_db = [] 37 | self._init_transform() 38 | 39 | def _init_transform(self): 40 | if self.is_train: 41 | self.transform = preprocess.Compose([ 42 | preprocess.ConvertFromInts(), 43 | preprocess.ToAbsoluteCoords(), 44 | preprocess.RandomSampleCrop(scale=self.preproc_param.CROP_SCALE, 45 | num_attempts=self.preproc_param.CROP_ATTEMPTS), 46 | preprocess.RandomMirror(), 47 | # preprocess.PhotometricDistort(hue_delta=self.preproc_param.HUE_DELTA, 48 | # bri_delta=self.preproc_param.BRI_DELTA, 49 | # contrast_range=self.preproc_param.CONTRAST_RANGE, 50 | # saturation_range=self.preproc_param.SATURATION_RANGE), 51 | preprocess.Expand(mean=self.preproc_param.MEAN, 52 | max_expand_ratio=self.preproc_param.MAX_EXPAND_RATIO), 53 | preprocess.ToPercentCoords(), 54 | preprocess.Resize(tuple(self.image_size)), 55 | preprocess.ToAbsoluteCoords(), 56 | preprocess.ToTensor(), 57 | # preprocess.ToGPU(), 58 | preprocess.Normalize(mean=self.preproc_param.MEAN, std=self.preproc_param.STD), 59 | preprocess.ToXYWH(), 60 | ]) 61 | else: 62 | self.transform = preprocess.Compose([ 63 | preprocess.ConvertFromInts(), 64 | preprocess.Resize(tuple(self.image_size)), 65 | preprocess.ToAbsoluteCoords(), 66 | preprocess.ToTensor(), 67 | # preprocess.ToGPU(), 68 | preprocess.Normalize(mean=self.preproc_param.MEAN, std=self.preproc_param.STD), 69 | preprocess.ToXYWH(), 70 | ]) 71 | 72 | def _get_db(self): 73 | raise NotImplementedError 74 | 75 | def __len__(self): 76 | return len(self.db) 77 | 78 | def __getitem__(self, index): 79 | r''' fetch the image and annotation from self.db[index] 80 | 81 | The data in the self.db can be discribed as: 82 | 83 | db[index] = { 84 | 85 | 'image': 'Absolute Path', 86 | 87 | 'boxes': np.ndarray, 88 | 89 | 'labels': np.adarray} 90 | 91 | Args: 92 | index: index for db, 93 | 94 | 95 | Returns: 96 | 'image', torch(c,h,w), 97 | 98 | 'target', np.ndarray(n,5) 99 | 100 | 0~4 is the bounding box in AbsoluteCoords with format x,y,w,h 101 | 102 | 5 is the bounding box label 103 | ''' 104 | db_rec = copy.deepcopy(self.db[index]) 105 | 106 | # read the images 107 | if self.using_pickle: 108 | # decode image 109 | encoded_image = copy.deepcopy(self.img_db[index]) 110 | image = Image.open(io.BytesIO(encoded_image)) 111 | image = np.array(image) 112 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 113 | else: 114 | image_file = db_rec['image'] 115 | image = cv2.imread(image_file) 116 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 117 | if image is None: 118 | raise ValueError('Fail to read {}'.format(image_file)) 119 | 120 | boxes = db_rec['boxes'] 121 | labels = db_rec['labels'] 122 | 123 | # preprocess 124 | image, boxes, labels = self.transform(image, boxes, labels) 125 | return image, np.concatenate((boxes, labels[:,None]),axis=1) 126 | 127 | def reorder_data(self, db, cfg_joints_name, ds_joints_name): 128 | ''' reorder the db based on the cfg_joints_name 129 | 130 | :meta private: 131 | ''' 132 | order = [] 133 | for cfg_name in cfg_joints_name: 134 | if cfg_name in ds_joints_name: 135 | order.append(ds_joints_name.index(cfg_name)) 136 | else: 137 | order.append(-1) 138 | order = np.array(order) 139 | 140 | raise NotImplementedError 141 | return db 142 | 143 | def saving_pickle(self, pickle_path): 144 | ''' 145 | :meta private: 146 | ''' 147 | img_db = [] 148 | for idx, db_rec in enumerate(self.db): 149 | sys.stdout.write('\rLoading Image: {}/{}'.format(idx, len(self.db))) 150 | sys.stdout.flush() 151 | # load bytes from file 152 | with open(db_rec['image'], 'rb') as f: 153 | img_db.append(f.read()) 154 | 155 | # serialize 156 | sys.stdout.write('\rSaving img_db ({}) to {}\n'.format(len(self.db), pickle_path)) 157 | with open(pickle_path, 'wb') as handle: 158 | return pickle.dump(img_db, handle, protocol=pickle.HIGHEST_PROTOCOL) 159 | 160 | def loading_pickle(self, pickle_path): 161 | ''' 162 | :meta private: 163 | ''' 164 | sys.stdout.write('\rLoading Pickle from {}\n'.format(pickle_path)) 165 | with open(pickle_path, 'rb') as handle: 166 | return pickle.load(handle) -------------------------------------------------------------------------------- /ssds/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/modeling/__init__.py -------------------------------------------------------------------------------- /ssds/modeling/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/modeling/layers/__init__.py -------------------------------------------------------------------------------- /ssds/modeling/layers/basic_layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SepConvBNReLU(nn.Sequential): 6 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, expand_ratio=1): 7 | padding = (kernel_size - 1) // 2 8 | super(SepConvBNReLU, self).__init__( 9 | # dw 10 | nn.Conv2d( 11 | in_planes, 12 | in_planes, 13 | kernel_size, 14 | stride, 15 | padding, 16 | groups=in_planes, 17 | bias=False, 18 | ), 19 | nn.BatchNorm2d(in_planes), 20 | nn.ReLU(inplace=True), 21 | # pw 22 | nn.Conv2d(in_planes, out_planes, 1, 1, 0, bias=False), 23 | nn.BatchNorm2d(out_planes), 24 | nn.ReLU(inplace=True), 25 | ) 26 | 27 | 28 | class ConvBNReLU(nn.Sequential): 29 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1): 30 | padding = (kernel_size - 1) // 2 31 | super(ConvBNReLU, self).__init__( 32 | nn.Conv2d( 33 | in_planes, out_planes, kernel_size, stride, padding=padding, bias=False 34 | ), 35 | nn.BatchNorm2d(out_planes), 36 | nn.ReLU(inplace=True), 37 | ) 38 | 39 | 40 | class ConvBNReLUx2(nn.Sequential): 41 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1): 42 | padding = (kernel_size - 1) // 2 43 | super(ConvBNReLUx2, self).__init__( 44 | nn.Conv2d(in_planes, out_planes // 2, 1, bias=False), 45 | nn.BatchNorm2d(out_planes // 2), 46 | nn.ReLU(inplace=True), 47 | nn.Conv2d( 48 | out_planes // 2, 49 | out_planes, 50 | kernel_size, 51 | stride, 52 | padding=padding, 53 | bias=False, 54 | ), 55 | nn.BatchNorm2d(out_planes), 56 | nn.ReLU(inplace=True), 57 | ) 58 | -------------------------------------------------------------------------------- /ssds/modeling/layers/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .box import decode, nms 3 | 4 | 5 | class Decoder(object): 6 | r""" 7 | class Decoder contains the decoder func and nms func 8 | 9 | * decoder 10 | decoder is used to decode the boxes from loc and conf feature map, check :meth:`ssds.modeling.layers.box.decode` for more details. 11 | * nms 12 | nms is used to filter the decoded boxes by its confidence and box location, check :meth:`ssds.modeling.layers.box.nms` for more details. 13 | """ 14 | 15 | def __init__( 16 | self, conf_threshold, nms_threshold, top_n, top_n_per_level, rescore, use_diou 17 | ): 18 | self.conf_threshold = conf_threshold 19 | self.nms_threshold = nms_threshold 20 | self.top_n = top_n 21 | self.top_n_per_level = top_n_per_level 22 | self.rescore = rescore 23 | self.use_diou = use_diou 24 | 25 | def __call__(self, loc, conf, anchors): 26 | r""" 27 | Decode and filter boxes 28 | 29 | Returns: 30 | out_scores, (batch, top_n) 31 | 32 | out_boxes, (batch, top_n, 4) with ltrb format 33 | 34 | out_classes, (batch, top_n) 35 | """ 36 | decoded = [ 37 | decode( 38 | c, 39 | l, 40 | stride, 41 | self.conf_threshold, 42 | self.top_n_per_level, 43 | anchor, 44 | rescore=self.rescore, 45 | ) 46 | for l, c, (stride, anchor) in zip(loc, conf, anchors.items()) 47 | ] 48 | decoded = [torch.cat(tensors, 1) for tensors in zip(*decoded)] 49 | return nms(*decoded, self.nms_threshold, self.top_n, using_diou=self.use_diou) 50 | -------------------------------------------------------------------------------- /ssds/modeling/layers/layers_parser.py: -------------------------------------------------------------------------------- 1 | from ssds.modeling.layers.basic_layers import SepConvBNReLU, ConvBNReLUx2 2 | from ssds.modeling.layers.rfb_layers import BasicRFB, BasicRFB_lite 3 | 4 | 5 | def parse_feature_layer(layer, in_channels, depth): 6 | """ Parse the layers defined in the config files 7 | Args: 8 | layer: The name of the layer 9 | in_channels: The input channel of the layer 10 | depth: The output channel of the layer 11 | Return: 12 | list of operation 13 | """ 14 | if layer == "SepConv:S": 15 | return [SepConvBNReLU(in_channels, depth, stride=2, expand_ratio=1)] 16 | elif layer == "SepConv": 17 | return [SepConvBNReLU(in_channels, depth, stride=1, expand_ratio=1)] 18 | elif layer == "Conv:S": 19 | return [ConvBNReLUx2(in_channels, depth, stride=2)] 20 | elif layer == "Conv": 21 | return [ConvBNReLUx2(in_channels, depth, stride=1)] 22 | elif layer == "RBF:S": 23 | return [BasicRFB(in_channels, depth, stride=2, scale=1.0, visual=2)] 24 | elif layer == "RBF": 25 | return [BasicRFB(in_channels, depth, stride=1, scale=1.0, visual=2)] 26 | elif isinstance(layer, int): 27 | # temp, need TODO improve 28 | return [] 29 | else: 30 | raise AssertionError("Undefined layer: {}".format(layer)) 31 | -------------------------------------------------------------------------------- /ssds/modeling/model_builder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import OrderedDict 3 | 4 | from . import ssds, nets 5 | from .layers.box import generate_anchors, configure_ratio_scale 6 | from .layers.decoder import Decoder 7 | 8 | 9 | def create_model(cfg): 10 | """ create the model based on the config files 11 | Returns: 12 | torch ssds model with backbone as net 13 | """ 14 | ratios, scales = configure_ratio_scale(len(cfg.SIZES), cfg.ASPECT_RATIOS, cfg.SIZES) 15 | number_box = [len(r) * len(s) for r, s in zip(ratios, scales)] 16 | nets_outputs, extras, head = getattr(ssds, cfg.SSDS).add_extras( 17 | feature_layer=cfg.FEATURE_LAYER, mbox=number_box, num_classes=cfg.NUM_CLASSES 18 | ) 19 | model = getattr(ssds, cfg.SSDS)( 20 | backbone=getattr(nets, cfg.NETS)( 21 | outputs=nets_outputs, num_images=cfg.NUM_IMAGES 22 | ), 23 | extras=extras, 24 | head=head, 25 | num_classes=cfg.NUM_CLASSES, 26 | ) 27 | return model 28 | 29 | 30 | def create_anchors(cfg, model, image_size, visualize=False): 31 | """ current version for generate the anchor, only generate the default anchor for each feature map layers 32 | Returns: 33 | anchors: OrderedDict(key=stride, value=default_anchors) 34 | """ 35 | model.eval() 36 | with torch.no_grad(): 37 | x = torch.rand( 38 | (1, 3, image_size[0], image_size[1]), device=next(model.parameters()).device 39 | ) 40 | conf = model(x)[-1] 41 | strides = [x.shape[-1] // c.shape[-1] for c in conf] 42 | 43 | ratios, scales = configure_ratio_scale(len(strides), cfg.ASPECT_RATIOS, cfg.SIZES) 44 | anchors = OrderedDict( 45 | [ 46 | (strides[i], generate_anchors(strides[i], ratios[i], scales[i])) 47 | for i in range(len(strides)) 48 | ] 49 | ) 50 | if visualize: 51 | print("Anchor Boxs (width, height)") 52 | [ 53 | print("Stride {}: {}".format(k, (v[:, 2:] - v[:, :2] + 1).int().tolist())) 54 | for k, v in anchors.items() 55 | ] 56 | return anchors 57 | 58 | 59 | def create_decoder(cfg): 60 | r""" Generate decoder based on the cfg.POST_PROCESS. 61 | 62 | The generated decoder is the object of class Decoder, check more details by :class:`ssds.modeling.layers.decoder.Decoder`. 63 | 64 | Args: 65 | cfg: defined cfg.POST_PROCESS 66 | """ 67 | return Decoder( 68 | cfg.SCORE_THRESHOLD, 69 | cfg.IOU_THRESHOLD, 70 | cfg.MAX_DETECTIONS, 71 | cfg.MAX_DETECTIONS_PER_LEVEL, 72 | cfg.RESCORE_CENTER, 73 | cfg.USE_DIOU, 74 | ) 75 | -------------------------------------------------------------------------------- /ssds/modeling/nets/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .efficientnet import * 3 | from .mobilenet import * 4 | from .effnet import * 5 | from .shufflenet import * 6 | from .densenet import * 7 | from .inception_v2 import InceptionV2 8 | from .darknet import * 9 | from .regnet import * -------------------------------------------------------------------------------- /ssds/modeling/nets/darknet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .rutils import register 4 | 5 | 6 | def Conv3x3BNReLU(in_channels, out_channels, stride=1): 7 | return nn.Sequential( 8 | nn.Conv2d( 9 | in_channels=in_channels, 10 | out_channels=out_channels, 11 | kernel_size=3, 12 | stride=stride, 13 | padding=1, 14 | ), 15 | nn.BatchNorm2d(out_channels), 16 | nn.ReLU6(inplace=True), 17 | ) 18 | 19 | 20 | def Conv1x1BNReLU(in_channels, out_channels): 21 | return nn.Sequential( 22 | nn.Conv2d( 23 | in_channels=in_channels, 24 | out_channels=out_channels, 25 | kernel_size=1, 26 | stride=1, 27 | padding=0, 28 | ), 29 | nn.BatchNorm2d(out_channels), 30 | nn.ReLU6(inplace=True), 31 | ) 32 | 33 | 34 | class Residual(nn.Module): 35 | def __init__(self, nchannels): 36 | super(Residual, self).__init__() 37 | mid_channels = nchannels // 2 38 | self.conv1x1 = Conv1x1BNReLU(in_channels=nchannels, out_channels=mid_channels) 39 | self.conv3x3 = Conv3x3BNReLU(in_channels=mid_channels, out_channels=nchannels) 40 | 41 | def forward(self, x): 42 | out = self.conv3x3(self.conv1x1(x)) 43 | return out + x 44 | 45 | 46 | class DarkNet(nn.Module): 47 | def __init__( 48 | self, 49 | layers=[1, 2, 8, 8, 4], 50 | outputs=[5], 51 | groups=1, 52 | width_per_group=64, 53 | url=None, 54 | ): 55 | super(DarkNet, self).__init__() 56 | self.outputs = outputs 57 | self.url = url 58 | 59 | self.conv1 = Conv3x3BNReLU(in_channels=3, out_channels=32) 60 | 61 | self.block1 = self._make_layers( 62 | in_channels=32, out_channels=64, block_num=layers[0] 63 | ) 64 | self.block2 = self._make_layers( 65 | in_channels=64, out_channels=128, block_num=layers[1] 66 | ) 67 | self.block3 = self._make_layers( 68 | in_channels=128, out_channels=256, block_num=layers[2] 69 | ) 70 | self.block4 = self._make_layers( 71 | in_channels=256, out_channels=512, block_num=layers[3] 72 | ) 73 | self.block5 = self._make_layers( 74 | in_channels=512, out_channels=1024, block_num=layers[4] 75 | ) 76 | 77 | def _make_layers(self, in_channels, out_channels, block_num): 78 | _layers = [] 79 | _layers.append( 80 | Conv3x3BNReLU(in_channels=in_channels, out_channels=out_channels, stride=2) 81 | ) 82 | for _ in range(block_num): 83 | _layers.append(Residual(nchannels=out_channels)) 84 | return nn.Sequential(*_layers) 85 | 86 | def initialize(self): 87 | pass 88 | 89 | def forward(self, x): 90 | outputs = [] 91 | x = self.conv1(x) 92 | for level in range(1, 6): 93 | if level > max(self.outputs): 94 | break 95 | x = getattr(self, "block{}".format(level))(x) 96 | if level in self.outputs: 97 | outputs.append(x) 98 | return outputs 99 | 100 | 101 | @register 102 | def DarkNet53(outputs, **kwargs): 103 | return DarkNet(outputs=outputs) 104 | -------------------------------------------------------------------------------- /ssds/modeling/nets/densenet.py: -------------------------------------------------------------------------------- 1 | import re 2 | import torch 3 | import torch.nn as nn 4 | from torchvision.models import densenet 5 | import torch.utils.model_zoo as model_zoo 6 | from collections import OrderedDict 7 | from .rutils import register 8 | 9 | 10 | class DenseNet(nn.Module): 11 | def __init__( 12 | self, 13 | growth_rate=32, 14 | block_config=(6, 12, 24, 16), 15 | num_init_features=64, 16 | bn_size=4, 17 | drop_rate=0, 18 | memory_efficient=False, 19 | outputs=[], 20 | url=None, 21 | ): 22 | super(DenseNet, self).__init__() 23 | self.url = url 24 | self.outputs = outputs 25 | self.block_config = block_config 26 | 27 | # First convolution 28 | self.conv1 = nn.Sequential( 29 | OrderedDict( 30 | [ 31 | ( 32 | "conv", 33 | nn.Conv2d( 34 | 3, 35 | num_init_features, 36 | kernel_size=7, 37 | stride=2, 38 | padding=3, 39 | bias=False, 40 | ), 41 | ), 42 | ("norm", nn.BatchNorm2d(num_init_features)), 43 | ("relu", nn.ReLU(inplace=True)), 44 | ("pool", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), 45 | ] 46 | ) 47 | ) 48 | 49 | # Each denseblock 50 | num_features = num_init_features 51 | for i, num_layers in enumerate(block_config): 52 | block = densenet._DenseBlock( 53 | num_layers=num_layers, 54 | num_input_features=num_features, 55 | bn_size=bn_size, 56 | growth_rate=growth_rate, 57 | drop_rate=drop_rate, 58 | memory_efficient=memory_efficient, 59 | ) 60 | self.add_module("denseblock%d" % (i + 1), block) 61 | num_features = num_features + num_layers * growth_rate 62 | if i != len(block_config) - 1: 63 | trans = densenet._Transition( 64 | num_input_features=num_features, 65 | num_output_features=num_features // 2, 66 | ) 67 | self.add_module("transition%d" % (i + 1), trans) 68 | num_features = num_features // 2 69 | 70 | # Official init from torch repo. 71 | for m in self.modules(): 72 | if isinstance(m, nn.Conv2d): 73 | nn.init.kaiming_normal_(m.weight) 74 | elif isinstance(m, nn.BatchNorm2d): 75 | nn.init.constant_(m.weight, 1) 76 | nn.init.constant_(m.bias, 0) 77 | elif isinstance(m, nn.Linear): 78 | nn.init.constant_(m.bias, 0) 79 | 80 | def initialize(self): 81 | if self.url: 82 | checkpoint = model_zoo.load_url(self.url) 83 | 84 | pattern = re.compile( 85 | r"^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$" 86 | ) 87 | for key in list(checkpoint.keys()): 88 | res = pattern.match(key) 89 | if res: 90 | new_key = res.group(1) + res.group(2) 91 | checkpoint[new_key] = checkpoint[key] 92 | del checkpoint[key] 93 | 94 | change_dict = { 95 | "features.conv0.": "conv1.conv.", 96 | "features.norm0.": "conv1.norm.", 97 | } 98 | for i, num_layers in enumerate(self.block_config): 99 | change_dict[ 100 | "features.denseblock{}.".format(i + 1) 101 | ] = "denseblock{}.".format(i + 1) 102 | change_dict[ 103 | "features.transition{}.".format(i + 1) 104 | ] = "transition{}.".format(i + 1) 105 | for k, v in list(checkpoint.items()): 106 | for _k, _v in list(change_dict.items()): 107 | if _k in k: 108 | new_key = k.replace(_k, _v) 109 | checkpoint[new_key] = checkpoint.pop(k) 110 | 111 | remove_dict = ["classifier.", "features.norm5."] 112 | for k, v in list(checkpoint.items()): 113 | for _k in remove_dict: 114 | if _k in k: 115 | checkpoint.pop(k) 116 | self.load_state_dict(checkpoint) 117 | 118 | def forward(self, x): 119 | x = self.conv1(x) 120 | 121 | outputs = [] 122 | for j in range(len(self.block_config)): 123 | level = j + 1 # only 1 conv before 124 | if level > max(self.outputs): 125 | break 126 | if level > 1: 127 | x = getattr(self, "transition{}".format(level - 1))(x) 128 | x = getattr(self, "denseblock{}".format(level))(x) 129 | if level in self.outputs: 130 | outputs.append(x) 131 | 132 | return outputs 133 | 134 | 135 | @register 136 | def DenseNet121(outputs, **kwargs): 137 | return DenseNet( 138 | 32, (6, 12, 24, 16), 64, outputs=outputs, url=densenet.model_urls["densenet121"] 139 | ) 140 | 141 | 142 | # print(DenseNet121([4])) 143 | -------------------------------------------------------------------------------- /ssds/modeling/nets/effnet.py: -------------------------------------------------------------------------------- 1 | """ This file is similar with the effcientnet file, but use torch hub instand of using 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | from .rutils import register 7 | 8 | 9 | class EffNet(nn.Module): 10 | def __init__(self, model_name, outputs, exportable=False, **kwargs): 11 | super(EffNet, self).__init__() 12 | self.outputs = outputs 13 | 14 | if exportable: 15 | import geffnet 16 | 17 | geffnet.config.set_exportable(True) 18 | model = geffnet.create_model(model_name, **kwargs) 19 | else: 20 | model = torch.hub.load( 21 | "rwightman/gen-efficientnet-pytorch", model_name, **kwargs 22 | ) 23 | 24 | self.conv_stem = model.conv_stem 25 | self.bn1 = model.bn1 26 | self.act1 = model.act1 27 | for j in range(7): 28 | self.add_module( 29 | "block{}".format(j + 1), getattr(model.blocks, "{}".format(j)) 30 | ) 31 | 32 | def forward(self, x): 33 | x = self.act1(self.bn1(self.conv_stem(x))) 34 | 35 | outputs = [] 36 | for level in range(1, 8): 37 | # level = j + 1 # only 1 conv before 38 | if level > max(self.outputs): 39 | break 40 | x = getattr(self, "block{}".format(level))(x) 41 | if level in self.outputs: 42 | outputs.append(x) 43 | 44 | return outputs 45 | 46 | def initialize(self): 47 | pass 48 | 49 | 50 | @register 51 | def EffNetB0(outputs, **kwargs): 52 | return EffNet("efficientnet_b0", outputs, drop_connect_rate=0.2, pretrained=True) 53 | 54 | 55 | @register 56 | def EffNetB1(outputs, **kwargs): 57 | return EffNet("efficientnet_b1", outputs, drop_connect_rate=0.2, pretrained=True) 58 | 59 | 60 | @register 61 | def EffNetB2(outputs, **kwargs): 62 | return EffNet("efficientnet_b2", outputs, drop_connect_rate=0.2, pretrained=True) 63 | 64 | 65 | @register 66 | def EffNetB3(outputs, **kwargs): 67 | return EffNet("efficientnet_b3", outputs, drop_connect_rate=0.2, pretrained=True) 68 | 69 | 70 | @register 71 | def EffNetB4(outputs, **kwargs): 72 | return EffNet("efficientnet_b4", outputs, drop_connect_rate=0.2, pretrained=True) 73 | 74 | 75 | @register 76 | def EffNetB5(outputs, **kwargs): 77 | return EffNet("efficientnet_b5", outputs, drop_connect_rate=0.2, pretrained=True) 78 | 79 | 80 | @register 81 | def EffNetB6(outputs, **kwargs): 82 | return EffNet("efficientnet_b6", outputs, drop_connect_rate=0.2, pretrained=True) 83 | 84 | 85 | @register 86 | def EffNetB7(outputs, **kwargs): 87 | return EffNet("efficientnet_b7", outputs, drop_connect_rate=0.2, pretrained=True) 88 | 89 | 90 | @register 91 | def EffNetB0Ex(outputs, **kwargs): 92 | return EffNet( 93 | "efficientnet_b0", 94 | outputs, 95 | drop_connect_rate=0.2, 96 | pretrained=True, 97 | exportable=True, 98 | ) 99 | -------------------------------------------------------------------------------- /ssds/modeling/nets/mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision.models import mobilenet 4 | import torch.utils.model_zoo as model_zoo 5 | from .rutils import register 6 | 7 | 8 | class SepConvBNReLU(nn.Sequential): 9 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, expand_ratio=1): 10 | padding = (kernel_size - 1) // 2 11 | super(SepConvBNReLU, self).__init__( 12 | # dw 13 | nn.Conv2d( 14 | in_planes, 15 | in_planes, 16 | kernel_size, 17 | stride, 18 | padding, 19 | groups=in_planes, 20 | bias=False, 21 | ), 22 | nn.BatchNorm2d(in_planes), 23 | nn.ReLU6(inplace=True), 24 | # pw 25 | nn.Conv2d(in_planes, out_planes, 1, 1, 0, bias=False), 26 | nn.BatchNorm2d(out_planes), 27 | nn.ReLU6(inplace=True), 28 | ) 29 | 30 | 31 | class MobileNet(nn.Module): 32 | def __init__(self, num_classes=1000, width_mult=1.0, version="v1", round_nearest=8): 33 | """ 34 | MobileNet V2 main class 35 | Args: 36 | num_classes (int): Number of classes 37 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 38 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 39 | Set to 1 to turn off rounding 40 | """ 41 | super(MobileNet, self).__init__() 42 | 43 | input_channel = 32 44 | if version == "v2": 45 | settings = [ 46 | # t, c, n, s 47 | [1, 16, 1, 1], 48 | [6, 24, 2, 2], 49 | [6, 32, 3, 2], 50 | [6, 64, 4, 2], 51 | [6, 96, 3, 1], 52 | [6, 160, 3, 2], 53 | [6, 320, 1, 1], 54 | ] 55 | last_channel = 1280 56 | layer = mobilenet.InvertedResidual 57 | elif version == "v1": 58 | settings = [ 59 | # t, c, n, s 60 | [1, 64, 1, 1], 61 | [1, 128, 2, 2], 62 | [1, 256, 2, 2], 63 | [1, 512, 6, 2], 64 | [1, 1024, 2, 2], 65 | ] 66 | last_channel = 1024 67 | layer = SepConvBNReLU 68 | self.settings = settings 69 | self.version = version 70 | 71 | # building first layer 72 | input_channel = mobilenet._make_divisible( 73 | input_channel * width_mult, round_nearest 74 | ) 75 | self.last_channel = mobilenet._make_divisible( 76 | last_channel * max(1.0, width_mult), round_nearest 77 | ) 78 | self.conv1 = mobilenet.ConvBNReLU(3, input_channel, stride=2) 79 | # building inverted residual blocks 80 | for j, (t, c, n, s) in enumerate(settings): 81 | output_channel = mobilenet._make_divisible(c * width_mult, round_nearest) 82 | layers = [] 83 | for i in range(n): 84 | stride = s if i == 0 else 1 85 | layers.append( 86 | layer(input_channel, output_channel, stride=stride, expand_ratio=t) 87 | ) 88 | input_channel = output_channel 89 | self.add_module("layer{}".format(j + 1), nn.Sequential(*layers)) 90 | # building last several layers 91 | if self.version == "v2": 92 | self.head_conv = mobilenet.ConvBNReLU( 93 | input_channel, self.last_channel, kernel_size=1 94 | ) 95 | 96 | # building classifier 97 | self.classifier = nn.Sequential( 98 | nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes), 99 | ) 100 | 101 | # weight initialization 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode="fan_out") 105 | if m.bias is not None: 106 | nn.init.zeros_(m.bias) 107 | elif isinstance(m, nn.BatchNorm2d): 108 | nn.init.ones_(m.weight) 109 | nn.init.zeros_(m.bias) 110 | elif isinstance(m, nn.Linear): 111 | nn.init.normal_(m.weight, 0, 0.01) 112 | nn.init.zeros_(m.bias) 113 | 114 | def forward(self, x): 115 | x = self.conv1(x) 116 | for j in range(len(self.settings)): 117 | x = getattr(self, "layer{}".format(j + 1))(x) 118 | if self.version == "v2": 119 | x = self.head_conv(x) 120 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1) 121 | x = self.classifier(x) 122 | return x 123 | 124 | 125 | class MobileNetEx(MobileNet): 126 | def __init__(self, width_mult=1.0, version="v1", outputs=[7], url=None): 127 | super(MobileNetEx, self).__init__(width_mult=width_mult, version=version) 128 | self.url = url 129 | self.outputs = outputs 130 | 131 | def initialize(self): 132 | if self.url: 133 | checkpoint = model_zoo.load_url(self.url) 134 | if self.version == "v2": 135 | change_dict = {"features.0.": "conv1."} 136 | f_idx = 1 137 | for j, (t, c, n, s) in enumerate(self.settings): 138 | for i in range(n): 139 | change_dict[ 140 | "features.{}.".format(f_idx) 141 | ] = "layer{}.{}.".format(j + 1, i) 142 | f_idx += 1 143 | change_dict["features.{}.".format(f_idx)] = "head_conv." 144 | for k, v in list(checkpoint.items()): 145 | for _k, _v in list(change_dict.items()): 146 | if _k in k: 147 | new_key = k.replace(_k, _v) 148 | checkpoint[new_key] = checkpoint.pop(k) 149 | else: 150 | change_dict = {"features.Conv2d_0.conv.": "conv1."} 151 | f_idx = 1 152 | for j, (t, c, n, s) in enumerate(self.settings): 153 | for i in range(n): 154 | for z in range(2): 155 | change_dict[ 156 | "features.Conv2d_{}.depthwise.{}".format(f_idx, z) 157 | ] = "layer{}.{}.{}".format(j + 1, i, z) 158 | change_dict[ 159 | "features.Conv2d_{}.pointwise.{}".format(f_idx, z) 160 | ] = "layer{}.{}.{}".format(j + 1, i, z + 3) 161 | f_idx += 1 162 | for k, v in list(checkpoint.items()): 163 | for _k, _v in list(change_dict.items()): 164 | if _k in k: 165 | new_key = k.replace(_k, _v) 166 | checkpoint[new_key] = checkpoint.pop(k) 167 | 168 | remove_dict = ["classifier."] 169 | for k, v in list(checkpoint.items()): 170 | for _k in remove_dict: 171 | if _k in k: 172 | checkpoint.pop(k) 173 | 174 | org_checkpoint = self.state_dict() 175 | org_checkpoint.update(checkpoint) 176 | checkpoint = org_checkpoint 177 | 178 | self.load_state_dict(checkpoint) 179 | 180 | def forward(self, x): 181 | x = self.conv1(x) 182 | 183 | outputs = [] 184 | for j in range(len(self.settings)): 185 | level = j + 1 # only 1 conv before 186 | if level > max(self.outputs): 187 | break 188 | x = getattr(self, "layer{}".format(level))(x) 189 | if level in self.outputs: 190 | outputs.append(x) 191 | 192 | return outputs 193 | 194 | 195 | @register 196 | def MobileNetV1(outputs, **kwargs): 197 | return MobileNetEx( 198 | width_mult=1.0, 199 | version="v1", 200 | outputs=outputs, 201 | url="https://www.dropbox.com/s/kygo8l6dwah3djv/mobilenet_v1_1.0_224.pth?dl=1", 202 | ) 203 | 204 | 205 | @register 206 | def MobileNetV2(outputs, **kwargs): 207 | return MobileNetEx( 208 | width_mult=1.0, 209 | version="v2", 210 | outputs=outputs, 211 | url=mobilenet.model_urls["mobilenet_v2"], 212 | ) 213 | -------------------------------------------------------------------------------- /ssds/modeling/nets/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | from torchvision.models import resnet 5 | import torch.utils.model_zoo as model_zoo 6 | from .rutils import register 7 | 8 | 9 | class ResNet(resnet.ResNet): 10 | "Deep Residual Network - https://arxiv.org/abs/1512.03385" 11 | 12 | def __init__( 13 | self, 14 | layers=[3, 4, 6, 3], 15 | bottleneck=resnet.Bottleneck, 16 | outputs=[5], 17 | groups=1, 18 | width_per_group=64, 19 | url=None, 20 | ): 21 | self.stride = 128 22 | self.bottleneck = bottleneck 23 | self.outputs = outputs 24 | self.url = url 25 | 26 | # torchvision added support for ResNeXt in version 0.3.0, 27 | # and introduces additional args to torchvision.models.resnet constructor 28 | kwargs_common = {"block": bottleneck, "layers": layers} 29 | kwargs_extra = ( 30 | {"groups": groups, "width_per_group": width_per_group} 31 | if torchvision.__version__ > "0.2.1" 32 | else {} 33 | ) 34 | kwargs = {**kwargs_common, **kwargs_extra} 35 | super().__init__(**kwargs) 36 | 37 | def initialize(self): 38 | if self.url: 39 | self.load_state_dict(model_zoo.load_url(self.url)) 40 | 41 | def forward(self, x): 42 | x = self.conv1(x) 43 | x = self.bn1(x) 44 | x = self.relu(x) 45 | x = self.maxpool(x) 46 | 47 | outputs = [] 48 | for i, layer in enumerate([self.layer1, self.layer2, self.layer3, self.layer4]): 49 | level = i + 2 50 | if level > max(self.outputs): 51 | break 52 | x = layer(x) 53 | if level in self.outputs: 54 | outputs.append(x) 55 | 56 | return outputs 57 | 58 | 59 | @register 60 | def ResNet18(outputs, **kwargs): 61 | return ResNet( 62 | layers=[2, 2, 2, 2], 63 | bottleneck=resnet.BasicBlock, 64 | outputs=outputs, 65 | url=resnet.model_urls["resnet18"], 66 | ) 67 | 68 | 69 | @register 70 | def ResNet34(outputs, **kwargs): 71 | return ResNet( 72 | layers=[3, 4, 6, 3], 73 | bottleneck=resnet.BasicBlock, 74 | outputs=outputs, 75 | url=resnet.model_urls["resnet34"], 76 | ) 77 | 78 | 79 | @register 80 | def ResNet50(outputs, **kwargs): 81 | return ResNet( 82 | layers=[3, 4, 6, 3], 83 | bottleneck=resnet.Bottleneck, 84 | outputs=outputs, 85 | url=resnet.model_urls["resnet50"], 86 | ) 87 | 88 | 89 | @register 90 | def ResNet101(outputs, **kwargs): 91 | return ResNet( 92 | layers=[3, 4, 23, 3], 93 | bottleneck=resnet.Bottleneck, 94 | outputs=outputs, 95 | url=resnet.model_urls["resnet101"], 96 | ) 97 | 98 | 99 | @register 100 | def ResNet152(outputs, **kwargs): 101 | return ResNet( 102 | layers=[3, 8, 36, 3], 103 | bottleneck=resnet.Bottleneck, 104 | outputs=outputs, 105 | url=resnet.model_urls["resnet152"], 106 | ) 107 | 108 | 109 | @register 110 | def ResNeXt50_32x4d(outputs, **kwargs): 111 | return ResNet( 112 | layers=[3, 4, 6, 3], 113 | bottleneck=resnet.Bottleneck, 114 | outputs=outputs, 115 | groups=32, 116 | width_per_group=4, 117 | url=resnet.model_urls["resnext50_32x4d"], 118 | ) 119 | 120 | 121 | @register 122 | def ResNeXt101_32x8d(outputs, **kwargs): 123 | return ResNet( 124 | layers=[3, 4, 23, 3], 125 | bottleneck=resnet.Bottleneck, 126 | outputs=outputs, 127 | groups=32, 128 | width_per_group=8, 129 | url=resnet.model_urls["resnext101_32x8d"], 130 | ) 131 | 132 | 133 | @register 134 | def WideResNet50_2(outputs, **kwargs): 135 | return ResNet( 136 | layers=[3, 4, 6, 3], 137 | bottleneck=resnet.Bottleneck, 138 | outputs=outputs, 139 | width_per_group=128, 140 | url=resnet.model_urls["wide_resnet50_2"], 141 | ) 142 | 143 | 144 | @register 145 | def WideResNet101_2(outputs, **kwargs): 146 | return ResNet( 147 | layers=[3, 4, 23, 3], 148 | bottleneck=resnet.Bottleneck, 149 | outputs=outputs, 150 | width_per_group=128, 151 | url=resnet.model_urls["wide_resnet101_2"], 152 | ) -------------------------------------------------------------------------------- /ssds/modeling/nets/rutils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | def register(f): 5 | all = sys.modules[f.__module__].__dict__.setdefault("__all__", []) 6 | if f.__name__ in all: 7 | raise RuntimeError("{} already exist!".format(f.__name__)) 8 | all.append(f.__name__) 9 | return f 10 | -------------------------------------------------------------------------------- /ssds/modeling/nets/shufflenet.py: -------------------------------------------------------------------------------- 1 | import torchvision 2 | from torchvision.models import shufflenetv2 3 | import torch.utils.model_zoo as model_zoo 4 | from .rutils import register 5 | 6 | 7 | class ShuffleNetV2(shufflenetv2.ShuffleNetV2): 8 | def __init__(self, stages_repeats, stages_out_channels, outputs=[4], url=None): 9 | super(ShuffleNetV2, self).__init__(stages_repeats, stages_out_channels) 10 | self.outputs = outputs 11 | self.url = url 12 | 13 | def initialize(self): 14 | if self.url: 15 | self.load_state_dict(model_zoo.load_url(self.url)) 16 | 17 | def forward(self, x): 18 | x = self.maxpool(self.conv1(x)) 19 | 20 | outputs = [] 21 | for i, stage in enumerate([self.stage2, self.stage3, self.stage4]): 22 | level = i + 2 23 | if level > max(self.outputs): 24 | break 25 | x = stage(x) 26 | if level in self.outputs: 27 | outputs.append(x) 28 | return outputs 29 | 30 | 31 | @register 32 | def ShuffleNetV2_x1(outputs, **kwargs): 33 | return ShuffleNetV2( 34 | [4, 8, 4], 35 | [24, 116, 232, 464, 1024], 36 | outputs=outputs, 37 | url=shufflenetv2.model_urls["shufflenetv2_x1.0"], 38 | ) 39 | 40 | 41 | @register 42 | def ShuffleNetV2_x2(outputs, **kwargs): 43 | return ShuffleNetV2([4, 8, 4], [24, 244, 488, 976, 2048], outputs=outputs) 44 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/__init__.py: -------------------------------------------------------------------------------- 1 | from .ssd import SSD 2 | from .fpn import SSDFPN 3 | from .shelf import SSDShelf 4 | from .bifpn import SSDBiFPN 5 | from .yolo import YOLOV3, YOLOV4 -------------------------------------------------------------------------------- /ssds/modeling/ssds/bifpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .ssdsbase import SSDSBase 6 | from .fpn import SharedHead 7 | from ssds.modeling.layers.basic_layers import ConvBNReLU, SepConvBNReLU 8 | 9 | 10 | class BiFPNModule(nn.Module): 11 | def __init__(self, channels, levels, init=0.5, block=ConvBNReLU): 12 | super(BiFPNModule, self).__init__() 13 | 14 | self.levels = levels 15 | self.w1 = nn.Parameter(torch.Tensor(2, levels).fill_(init)) 16 | self.w2 = nn.Parameter(torch.Tensor(3, levels - 2).fill_(init)) 17 | 18 | for i in range(levels - 1, 0, -1): 19 | self.add_module("top-down-{}".format(i - 1), block(channels, channels)) 20 | 21 | for i in range(0, levels - 1, 1): 22 | self.add_module("bottom-up-{}".format(i + 1), block(channels, channels)) 23 | 24 | for m in self.modules(): 25 | if isinstance(m, nn.Conv2d): 26 | nn.init.xavier_uniform_(m.weight) 27 | if m.bias is not None: 28 | nn.init.constant_(m.bias, val=0) 29 | 30 | def forward(self, xx): 31 | assert len(xx) == self.levels 32 | levels = self.levels 33 | 34 | # normalize weights 35 | w1 = F.relu(self.w1) 36 | w1 /= torch.sum(w1, dim=0) + 1e-6 37 | w2 = F.relu(self.w2) 38 | w2 /= torch.sum(w2, dim=0) + 1e-6 39 | 40 | # build top-down 41 | xs = [[]] + [x for x in xx[1:-1]] + [[]] 42 | for i in range(levels - 1, 0, -1): 43 | xx[i - 1] = w1[0, i - 1] * xx[i - 1] + w1[1, i - 1] * F.interpolate( 44 | xx[i], scale_factor=2, mode="nearest" 45 | ) 46 | xx[i - 1] = getattr(self, "top-down-{}".format(i - 1))(xx[i - 1]) 47 | 48 | # build bottom-up 49 | for i in range(0, levels - 2, 1): 50 | xx[i + 1] = ( 51 | w2[0, i] * xx[i + 1] 52 | + w2[1, i] * F.max_pool2d(xx[i], kernel_size=2) 53 | + w2[2, i] * xs[i + 1] 54 | ) 55 | xx[i + 1] = getattr(self, "bottom-up-{}".format(i + 1))(xx[i + 1]) 56 | 57 | xx[levels - 1] = w1[0, levels - 1] * xx[levels - 1] + w1[ 58 | 1, levels - 1 59 | ] * F.max_pool2d(xx[levels - 2], kernel_size=2) 60 | xx[levels - 1] = getattr(self, "bottom-up-{}".format(levels - 1))( 61 | xx[levels - 1] 62 | ) 63 | return xx 64 | 65 | 66 | class SSDBiFPN(SSDSBase): 67 | """EfficientDet: Scalable and Efficient Object Detection 68 | See: https://arxiv.org/abs/1911.09070v6 for more details. 69 | 70 | Compared with the original implementation, change the conv2d 71 | in the extra and head to ConvBNReLU to helps the model converage easily 72 | Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum 73 | 74 | Args: 75 | backbone: backbone layers for input 76 | extras: contains transforms, extra and stack_bifpn layers that feed to multibox loc and conf layers 77 | head: "multibox head" consists of loc and conf conv layers 78 | num_classes: num of classes 79 | """ 80 | 81 | def __init__(self, backbone, extras, head, num_classes): 82 | super(SSDBiFPN, self).__init__(backbone, num_classes) 83 | 84 | # SSD network 85 | self.transforms = nn.ModuleList(extras[0]) 86 | self.extras = nn.ModuleList(extras[1]) 87 | self.stack_bifpn = extras[2] 88 | self.loc = head[0] 89 | self.conf = head[1] 90 | 91 | self.initialize() 92 | 93 | def initialize(self): 94 | r""" 95 | :meta private: 96 | """ 97 | self.backbone.initialize() 98 | self.transforms.apply(self.initialize_extra) 99 | self.extras.apply(self.initialize_extra) 100 | self.loc.apply(self.initialize_head) 101 | self.conf.apply(self.initialize_head) 102 | self.conf[-1].apply(self.initialize_prior) 103 | 104 | def forward(self, x): 105 | r"""Applies network layers and ops on input image(s) x. 106 | 107 | Args: 108 | x: input image or batch of images. 109 | 110 | Return: 111 | When self.training==True, loc and conf for each anchor box; 112 | 113 | When self.training==False. loc and conf.sigmoid() for each anchor box; 114 | 115 | For each player, conf with shape [batch, num_anchor*num_classes, height, width]; 116 | 117 | For each player, loc with shape [batch, num_anchor*4, height, width]. 118 | """ 119 | loc, conf = [list() for _ in range(2)] 120 | 121 | # apply bases layers and cache source layer outputs 122 | features = self.backbone(x) 123 | 124 | x = features[-1] 125 | features_len = len(features) 126 | for i in range(features_len): 127 | features[i] = self.transforms[i](features[i]) 128 | features = self.stack_bifpn(features) 129 | 130 | for i, v in enumerate(self.extras): 131 | if i < features_len: 132 | xx = v(features[i]) 133 | elif i == features_len: 134 | xx = v(x) 135 | else: 136 | xx = v(xx) 137 | loc.append(self.loc(xx)) 138 | conf.append(self.conf(xx)) 139 | 140 | if not self.training: 141 | conf = [c.sigmoid() for c in conf] 142 | return tuple(loc), tuple(conf) 143 | 144 | @staticmethod 145 | def add_extras(feature_layer, mbox, num_classes): 146 | r"""Define and declare the extras, loc and conf modules for the ssdfpn model. 147 | 148 | The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssdfpn model can be int, list of int and str: 149 | 150 | * int 151 | The int in the feature_layer represents the output feature in the backbone. 152 | * list of int 153 | The list of int in the feature_layer represents the output feature in the backbone, the first int is the \ 154 | backbone output and the second int is the upsampling branch to fuse feature. 155 | * str 156 | The str in the feature_layer represents the extra layers append at the end of the backbone. 157 | 158 | Args: 159 | feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER 160 | mbox: the number of boxes for each feature map 161 | num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES 162 | """ 163 | nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)] 164 | transform_layers = [] 165 | if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)): 166 | raise ValueError( 167 | "For SSDFPN module, the number of box have to be same in every layer" 168 | ) 169 | loc_layers = SharedHead(mbox[0] * 4) 170 | conf_layers = SharedHead(mbox[0] * num_classes) 171 | 172 | for layer, depth in zip(feature_layer[0], feature_layer[1]): 173 | if isinstance(layer, int): 174 | nets_outputs.append(layer) 175 | transform_layers += [ 176 | nn.Conv2d(depth, 256, 1) 177 | ] # [ConvBNReLU(depth, 256, 1)] 178 | extra_layers += [ 179 | ConvBNReLU(256, 256, 3) 180 | ] # [nn.Conv2d(256, 256, 3, padding=1)] 181 | elif layer == "Conv:S": 182 | extra_layers += [ 183 | ConvBNReLU(depth, 256, 3, stride=2) 184 | ] # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)] 185 | else: 186 | raise ValueError(layer + " does not support by SSDFPN") 187 | num_stack = 1 if len(feature_layer) == 2 else feature_layer[2] 188 | fpn = nn.Sequential( 189 | *[BiFPNModule(256, len(transform_layers)) for _ in range(num_stack)] 190 | ) 191 | return ( 192 | nets_outputs, 193 | (transform_layers, extra_layers, fpn), 194 | (loc_layers, conf_layers), 195 | ) 196 | 197 | 198 | if __name__ == "__main__": 199 | model = BiFPNModule(1, 4) 200 | model.eval() 201 | xx = [torch.ones(1, 1, i, i) * i for i in [8, 4, 2, 1]] 202 | model(xx) 203 | 204 | torch.onnx.export(model, xx, "test.onnx") 205 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/fcos.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .ssdsbase import SSDSBase 7 | from ssds.modeling.layers.basic_layers import ConvBNReLU 8 | 9 | 10 | class SharedHead(nn.Sequential): 11 | def __init__(self, out_planes): 12 | layers = [] 13 | for _ in range(4): 14 | layers += [ 15 | ConvBNReLU(256, 256, 3) 16 | ] # [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()] 17 | layers += [nn.Conv2d(256, out_planes, 3, padding=1)] 18 | super(SharedHead, self).__init__(*layers) 19 | 20 | 21 | class FCOS(SSDSBase): 22 | """RetinaNet in Focal Loss for Dense Object Detection 23 | See: https://arxiv.org/pdf/1708.02002.pdf for more details. 24 | Compared with the original implementation, change the conv2d 25 | in the extra and head to ConvBNReLU to helps the model converage easily 26 | Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum 27 | 28 | Args: 29 | backbone: backbone layers for input 30 | extras: extra layers that feed to multibox loc and conf layers 31 | head: "multibox head" consists of loc and conf conv layers 32 | num_classes: num of classes 33 | """ 34 | 35 | def __init__(self, backbone, extras, head, num_classes): 36 | super(SSDFPN, self).__init__(backbone, num_classes) 37 | 38 | # SSD network 39 | self.transforms = nn.ModuleList(extras[0]) 40 | self.extras = nn.ModuleList(extras[1]) 41 | self.loc = head[0] 42 | self.conf = head[1] 43 | 44 | self.initialize() 45 | 46 | def initialize(self): 47 | self.backbone.initialize() 48 | self.transforms.apply(self.initialize_extra) 49 | self.extras.apply(self.initialize_extra) 50 | self.loc.apply(self.initialize_head) 51 | self.conf.apply(self.initialize_head) 52 | self.conf[-1].apply(self.initialize_prior) 53 | 54 | def forward(self, x): 55 | loc, conf, center = [list() for _ in range(3)] 56 | 57 | # apply bases layers and cache source layer outputs 58 | features = self.backbone(x) 59 | 60 | x = features[-1] 61 | features_len = len(features) 62 | for i in range(len(features))[::-1]: 63 | if i != features_len - 1: 64 | xx = F.interpolate( 65 | xx, scale_factor=2, mode="nearest" 66 | ) + self.transforms[i](features[i]) 67 | else: 68 | xx = self.transforms[i](features[i]) 69 | features[i] = xx 70 | 71 | for i, v in enumerate(self.extras): 72 | if i < features_len: 73 | xx = v(features[i]) 74 | elif i == features_len: 75 | xx = v(x) 76 | else: 77 | xx = v(xx) 78 | loc.append(self.loc(xx)) 79 | conf_out = self.conf(xx) 80 | conf.append(conf_out[:, : self.num_classes].contiguous()) 81 | center.append(conf_out[:, self.num_classes :].contiguous()) 82 | 83 | if not self.training: 84 | conf = [c.sigmoid() for c in conf] 85 | return tuple(loc), tuple(conf), tuple(center) 86 | 87 | @staticmethod 88 | def add_extras(feature_layer, mbox, num_classes): 89 | nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)] 90 | if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)): 91 | raise ValueError( 92 | "For SSDFPN module, the number of box have to be same in every layer" 93 | ) 94 | loc_layers = SharedHead(4) 95 | conf_layers = SharedHead(num_classes + 1) 96 | 97 | for layer, depth in zip(feature_layer[0], feature_layer[1]): 98 | if isinstance(layer, int): 99 | nets_outputs.append(layer) 100 | transform_layers += [ 101 | nn.Conv2d(depth, 256, 1) 102 | ] # [ConvBNReLU(depth, 256, 1)] 103 | extra_layers += [ 104 | ConvBNReLU(256, 256, 3) 105 | ] # [nn.Conv2d(256, 256, 3, padding=1)] 106 | elif layer == "Conv:S": 107 | extra_layers += [ 108 | ConvBNReLU(depth, 256, 3, stride=2) 109 | ] # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)] 110 | else: 111 | raise ValueError(layer + " does not support by SSDFPN") 112 | return nets_outputs, (transform_layers, extra_layers), (loc_layers, conf_layers) 113 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/fpn.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .ssdsbase import SSDSBase 7 | from ssds.modeling.layers.basic_layers import ConvBNReLU 8 | 9 | 10 | class SharedHead(nn.Sequential): 11 | def __init__(self, out_planes): 12 | layers = [] 13 | for _ in range(4): 14 | layers += [ 15 | ConvBNReLU(256, 256, 3) 16 | ] # [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()] 17 | layers += [nn.Conv2d(256, out_planes, 3, padding=1)] 18 | super(SharedHead, self).__init__(*layers) 19 | 20 | 21 | class SSDFPN(SSDSBase): 22 | """RetinaNet in Focal Loss for Dense Object Detection 23 | See: https://arxiv.org/abs/1708.02002v2 for more details. 24 | 25 | Compared with the original implementation, change the conv2d 26 | in the extra and head to ConvBNReLU to helps the model converage easily 27 | Not add the bn&relu to transforms cause it is followed by interpolate and element-wise sum 28 | 29 | Args: 30 | backbone: backbone layers for input 31 | extras: contains transforms and extra layers that feed to multibox loc and conf layers 32 | head: "multibox head" consists of loc and conf conv layers 33 | num_classes: num of classes 34 | """ 35 | 36 | def __init__(self, backbone, extras, head, num_classes): 37 | super(SSDFPN, self).__init__(backbone, num_classes) 38 | 39 | # SSD network 40 | self.transforms = nn.ModuleList(extras[0]) 41 | self.extras = nn.ModuleList(extras[1]) 42 | self.loc = head[0] 43 | self.conf = head[1] 44 | 45 | self.initialize() 46 | 47 | def initialize(self): 48 | r""" 49 | :meta private: 50 | """ 51 | self.backbone.initialize() 52 | self.transforms.apply(self.initialize_extra) 53 | self.extras.apply(self.initialize_extra) 54 | self.loc.apply(self.initialize_head) 55 | self.conf.apply(self.initialize_head) 56 | self.conf[-1].apply(self.initialize_prior) 57 | 58 | def forward(self, x): 59 | r"""Applies network layers and ops on input image(s) x. 60 | 61 | Args: 62 | x: input image or batch of images. 63 | 64 | Return: 65 | When self.training==True, loc and conf for each anchor box; 66 | 67 | When self.training==False. loc and conf.sigmoid() for each anchor box; 68 | 69 | For each player, conf with shape [batch, num_anchor*num_classes, height, width]; 70 | 71 | For each player, loc with shape [batch, num_anchor*4, height, width]. 72 | """ 73 | loc, conf = [list() for _ in range(2)] 74 | 75 | # apply bases layers and cache source layer outputs 76 | features = self.backbone(x) 77 | 78 | x = features[-1] 79 | features_len = len(features) 80 | for i in range(len(features))[::-1]: 81 | if i != features_len - 1: 82 | xx = F.interpolate( 83 | xx, scale_factor=2, mode="nearest" 84 | ) + self.transforms[i](features[i]) 85 | else: 86 | xx = self.transforms[i](features[i]) 87 | features[i] = xx 88 | 89 | for i, v in enumerate(self.extras): 90 | if i < features_len: 91 | xx = v(features[i]) 92 | elif i == features_len: 93 | xx = v(x) 94 | else: 95 | xx = v(xx) 96 | loc.append(self.loc(xx)) 97 | conf.append(self.conf(xx)) 98 | 99 | if not self.training: 100 | conf = [c.sigmoid() for c in conf] 101 | return tuple(loc), tuple(conf) 102 | 103 | @staticmethod 104 | def add_extras(feature_layer, mbox, num_classes): 105 | r"""Define and declare the extras, loc and conf modules for the ssdfpn model. 106 | 107 | The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssdfpn model can be int, list of int and str: 108 | 109 | * int 110 | The int in the feature_layer represents the output feature in the backbone. 111 | * list of int 112 | The list of int in the feature_layer represents the output feature in the backbone, the first int is the \ 113 | backbone output and the second int is the upsampling branch to fuse feature. 114 | * str 115 | The str in the feature_layer represents the extra layers append at the end of the backbone. 116 | 117 | Args: 118 | feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER 119 | mbox: the number of boxes for each feature map 120 | num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES 121 | """ 122 | 123 | nets_outputs, transform_layers, extra_layers = [list() for _ in range(3)] 124 | if not all(mbox[i] == mbox[i + 1] for i in range(len(mbox) - 1)): 125 | raise ValueError( 126 | "For SSDFPN module, the number of box have to be same in every layer" 127 | ) 128 | loc_layers = SharedHead(mbox[0] * 4) 129 | conf_layers = SharedHead(mbox[0] * num_classes) 130 | 131 | for layer, depth in zip(feature_layer[0], feature_layer[1]): 132 | if isinstance(layer, int): 133 | nets_outputs.append(layer) 134 | transform_layers += [ 135 | nn.Conv2d(depth, 256, 1) 136 | ] # [ConvBNReLU(depth, 256, 1)] 137 | extra_layers += [ 138 | ConvBNReLU(256, 256, 3) 139 | ] # [nn.Conv2d(256, 256, 3, padding=1)] 140 | elif layer == "Conv:S": 141 | extra_layers += [ 142 | ConvBNReLU(depth, 256, 3, stride=2) 143 | ] # [nn.Conv2d(depth, 256, 3, stride=2, padding=1)] 144 | else: 145 | raise ValueError(layer + " does not support by SSDFPN") 146 | return nets_outputs, (transform_layers, extra_layers), (loc_layers, conf_layers) 147 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/fssd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import os 6 | 7 | from ssds.modeling.layers.basic_layers import _conv 8 | from ssds.modeling.layers.layers_parser import parse_feature_layer 9 | 10 | 11 | class FSSD(nn.Module): 12 | """FSSD: Feature Fusion Single Shot Multibox Detector 13 | See: https://arxiv.org/pdf/1712.00960.pdf for more details. 14 | 15 | Args: 16 | phase: (string) Can be "eval" or "train" or "feature" 17 | base: base layers for input 18 | extras: extra layers that feed to multibox loc and conf layers 19 | head: "multibox head" consists of loc and conf conv layers 20 | features: include to feature layers to fusion feature and build pyramids 21 | feature_layer: the feature layers for head to loc and conf 22 | num_classes: num of classes 23 | """ 24 | 25 | def __init__(self, base, extras, head, features, feature_layer, num_classes): 26 | super(FSSD, self).__init__() 27 | self.num_classes = num_classes 28 | 29 | # FSSD network 30 | self.base = nn.ModuleList(base) 31 | self.extras = nn.ModuleList(extras) 32 | self.feature_layer = feature_layer[0][0] 33 | self.transforms = nn.ModuleList(features[0]) 34 | self.pyramids = nn.ModuleList(features[1]) 35 | self.norm = nn.BatchNorm2d( 36 | int(feature_layer[0][1][-1] / 2) * len(self.transforms), affine=True 37 | ) 38 | 39 | self.loc = nn.ModuleList(head[0]) 40 | self.conf = nn.ModuleList(head[1]) 41 | 42 | self.softmax = nn.Softmax(dim=-1) 43 | 44 | def forward(self, x, phase="eval"): 45 | """Applies network layers and ops on input image(s) x. 46 | 47 | Args: 48 | x: input image or batch of images. Shape: [batch,3,300,300]. 49 | 50 | Return: 51 | Depending on phase: 52 | test: 53 | Variable(tensor) of output class label predictions, 54 | confidence score, and corresponding location predictions for 55 | each object detected. Shape: [batch,topk,7] 56 | 57 | train: 58 | list of concat outputs from: 59 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 60 | 2: localization layers, Shape: [batch,num_priors*4] 61 | 62 | feature: 63 | the features maps of the feature extractor 64 | """ 65 | sources, transformed, pyramids, loc, conf = [list() for _ in range(5)] 66 | 67 | # apply bases layers and cache source layer outputs 68 | for k in range(len(self.base)): 69 | x = self.base[k](x) 70 | if k in self.feature_layer: 71 | sources.append(x) 72 | 73 | # apply extra layers and cache source layer outputs 74 | for k, v in enumerate(self.extras): 75 | x = v(x) 76 | sources.append(x) 77 | 78 | assert len(self.transforms) == len(sources) 79 | upsize = (sources[0].size()[2], sources[0].size()[3]) 80 | 81 | for k, v in enumerate(self.transforms): 82 | size = None if k == 0 else upsize 83 | transformed.append(v(sources[k], size)) 84 | x = torch.cat(transformed, 1) 85 | x = self.norm(x) 86 | for k, v in enumerate(self.pyramids): 87 | x = v(x) 88 | pyramids.append(x) 89 | 90 | if phase == "feature": 91 | return pyramids 92 | 93 | # apply multibox head to pyramids layers 94 | for (x, l, c) in zip(sources, self.loc, self.conf): 95 | loc.append(l(x).view(x.size(0), 4, -1)) 96 | conf.append(c(x).view(x.size(0), self.num_classes, -1)) 97 | loc = torch.cat(loc, 2).contiguous() 98 | conf = torch.cat(conf, 2).contiguous() 99 | 100 | return loc, conf 101 | 102 | 103 | class BasicConvWithUpSample(nn.Module): 104 | # temp, need TODO improve 105 | def __init__( 106 | self, 107 | in_planes, 108 | out_planes, 109 | kernel_size, 110 | stride=1, 111 | padding=0, 112 | dilation=1, 113 | groups=1, 114 | relu=True, 115 | bn=False, 116 | bias=True, 117 | ): 118 | super(BasicConvWithUpSample, self).__init__() 119 | self.out_channels = out_planes 120 | self.conv = nn.Conv2d( 121 | in_planes, 122 | out_planes, 123 | kernel_size=kernel_size, 124 | stride=stride, 125 | padding=padding, 126 | dilation=dilation, 127 | groups=groups, 128 | bias=bias, 129 | ) 130 | self.bn = ( 131 | nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) 132 | if bn 133 | else None 134 | ) 135 | self.relu = nn.ReLU(inplace=True) if relu else None 136 | 137 | def forward(self, x, up_size=None): 138 | x = self.conv(x) 139 | if self.bn is not None: 140 | x = self.bn(x) 141 | if self.relu is not None: 142 | x = self.relu(x) 143 | if up_size is not None: 144 | x = F.upsample(x, size=up_size, mode="bilinear") 145 | return x 146 | 147 | 148 | def add_extras(base, feature_layer, mbox, num_classes, version): 149 | extra_layers = [] 150 | feature_transform_layers = [] 151 | pyramid_feature_layers = [] 152 | loc_layers = [] 153 | conf_layers = [] 154 | in_channels = None 155 | 156 | feature_transform_channel = int(feature_layer[0][1][-1] / 2) 157 | for layer, depth in zip(feature_layer[0][0], feature_layer[0][1]): 158 | extra_layers += parse_feature_layer(layer, in_channels, depth) 159 | in_channels = depth 160 | feature_transform_layers += [ 161 | BasicConvWithUpSample( 162 | in_channels, feature_transform_channel, kernel_size=1, padding=0 163 | ) 164 | ] 165 | 166 | in_channels = len(feature_transform_layers) * feature_transform_channel 167 | for layer, depth, box in zip(feature_layer[1][0], feature_layer[1][1], mbox): 168 | pyramid_feature_layers += parse_feature_layer(layer, in_channels, depth) 169 | in_channels = depth 170 | loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)] 171 | conf_layers += [ 172 | nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1) 173 | ] 174 | return ( 175 | base, 176 | extra_layers, 177 | (feature_transform_layers, pyramid_feature_layers), 178 | (loc_layers, conf_layers), 179 | ) 180 | 181 | 182 | def build_fssd(base, feature_layer, mbox, num_classes): 183 | base_, extras_, features_, head_ = add_extras( 184 | base(), feature_layer, mbox, num_classes, version="fssd" 185 | ) 186 | return FSSD(base_, extras_, head_, features_, feature_layer, num_classes) 187 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/shelf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from collections import OrderedDict 4 | 5 | from .ssdsbase import SSDSBase 6 | from ssds.modeling.layers.layers_parser import parse_feature_layer 7 | from ssds.modeling.layers.basic_layers import ConvBNReLU 8 | 9 | 10 | class SharedBlock(nn.Module): 11 | """ The conv params in this block is shared 12 | """ 13 | 14 | def __init__(self, planes): 15 | super(SharedBlock, self).__init__() 16 | 17 | self.planes = planes 18 | self.conv1 = nn.Conv2d( 19 | self.planes, self.planes, kernel_size=3, stride=1, padding=1, bias=False 20 | ) 21 | self.bn1 = nn.BatchNorm2d(planes) 22 | self.relu1 = nn.ReLU(inplace=True) 23 | self.drop = nn.Dropout2d(p=0.25) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | self.relu2 = nn.ReLU(inplace=True) 26 | 27 | def forward(self, x): 28 | out = self.conv1(x) 29 | out = self.bn1(out) 30 | out = self.relu1(out) 31 | out = self.drop(out) 32 | out = self.conv1(out) 33 | out = self.bn2(out) 34 | out = out + x 35 | return self.relu2(out) 36 | 37 | 38 | class ShelfPyramid(nn.Module): 39 | def __init__(self, settings, conv=nn.ConvTranspose2d, block=SharedBlock): 40 | super().__init__() 41 | 42 | # "output_padding":1 is not work for tensorrt 43 | extra_args = {"padding": 1, "bias": True} if conv == nn.ConvTranspose2d else {} 44 | for i, depth in enumerate(settings): 45 | if i == 0: 46 | self.add_module("block{}".format(i), block(depth)) 47 | else: 48 | self.add_module("block{}".format(i), block(depth)) 49 | self.add_module( 50 | "conv{}".format(i), 51 | conv(settings[i - 1], depth, kernel_size=3, stride=2, **extra_args), 52 | ) 53 | 54 | def forward(self, xx): 55 | out = [] 56 | x = xx[0] 57 | for i in range(len(xx)): 58 | if i != 0: 59 | x = getattr(self, "conv{}".format(i))(x) + xx[i] 60 | x = getattr(self, "block{}".format(i))(x) 61 | out.append(x) 62 | return out[::-1] 63 | 64 | 65 | class Head(nn.Sequential): 66 | def __init__(self, in_channels, out_planes): 67 | super(Head, self).__init__( 68 | ConvBNReLU(in_channels, in_channels, 3), 69 | nn.Conv2d(in_channels, out_planes, 3, padding=1), 70 | ) 71 | 72 | 73 | class SSDShelf(SSDSBase): 74 | """ShelfNet for Fast Semantic Segmentation 75 | See: https://arxiv.org/pdf/1811.11254.pdf for more details. 76 | 77 | Args: 78 | backbone: backbone layers for input 79 | extras: extra layers that feed to multibox loc and conf layers 80 | head: "multibox head" consists of loc and conf conv layers 81 | num_classes: num of classes 82 | """ 83 | 84 | def __init__(self, backbone, extras, head, num_classes): 85 | super(SSDShelf, self).__init__(backbone, num_classes) 86 | 87 | self.transforms = nn.ModuleList(extras[0]) 88 | self.shelf_head = nn.Sequential(extras[1]) 89 | self.loc = nn.ModuleList(head[0]) 90 | self.conf = nn.ModuleList(head[1]) 91 | 92 | self.initialize() 93 | 94 | def initialize(self): 95 | self.backbone.initialize() 96 | self.transforms.apply(self.initialize_extra) 97 | self.shelf_head.apply(self.initialize_extra) 98 | self.loc.apply(self.initialize_head) 99 | self.conf.apply(self.initialize_head) 100 | for c in self.conf: 101 | c[-1].apply(self.initialize_prior) 102 | 103 | def forward(self, x): 104 | loc, conf = [list() for _ in range(2)] 105 | 106 | # apply bases layers and cache source layer outputs 107 | features = self.backbone(x) 108 | 109 | features_len = len(features) 110 | features = [self.transforms[i](x) for i, x in enumerate(features)] 111 | 112 | features = self.shelf_head(features[::-1]) 113 | for i in range(len(features), len(self.transforms)): 114 | features.append(self.transforms[i](features[-1])) 115 | 116 | # apply multibox head to source layers 117 | for (x, l, c) in zip(features, self.loc, self.conf): 118 | loc.append(l(x)) 119 | conf.append(c(x)) 120 | 121 | if not self.training: 122 | conf = [c.sigmoid() for c in conf] 123 | return tuple(loc), tuple(conf) 124 | 125 | @staticmethod 126 | def add_extras(feature_layer, mbox, num_classes): 127 | nets_outputs, transform_layers, loc_layers, conf_layers = [ 128 | list() for _ in range(4) 129 | ] 130 | shelf_depths = [] 131 | for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox): 132 | if isinstance(layer, int): 133 | if isinstance(depth, list): 134 | if len(depth) == 2: 135 | in_channels = depth[0] 136 | depth = depth[1] 137 | else: 138 | in_channels = depth 139 | nets_outputs.append(layer) 140 | shelf_depths.append(in_channels) 141 | transform_layers += [nn.Conv2d(in_channels, depth, 1)] 142 | else: 143 | transform_layers += parse_feature_layer(layer, in_channels, depth) 144 | in_channels = depth 145 | 146 | loc_layers += [Head(in_channels, box * 4)] 147 | conf_layers += [Head(in_channels, box * num_classes)] 148 | 149 | shelf_head = OrderedDict( 150 | [ 151 | ("decoder0", ShelfPyramid(shelf_depths[::-1])), 152 | ("encoder0", ShelfPyramid(shelf_depths, conv=ConvBNReLU)), 153 | ("decoder1", ShelfPyramid(shelf_depths[::-1])), 154 | ] 155 | ) 156 | return nets_outputs, (transform_layers, shelf_head), (loc_layers, conf_layers) 157 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/ssd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import copy 5 | 6 | from .ssdsbase import SSDSBase 7 | from ssds.modeling.layers.layers_parser import parse_feature_layer 8 | 9 | 10 | class SSD(SSDSBase): 11 | r"""SSD: Single Shot MultiBox Detector 12 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 13 | 14 | Args: 15 | backbone: backbone layers for input 16 | extras: extra layers that feed to multibox loc and conf layers 17 | head: "multibox head" consists of loc and conf conv layers 18 | num_classes: num of classes 19 | """ 20 | 21 | def __init__(self, backbone, extras, head, num_classes): 22 | super(SSD, self).__init__(backbone, num_classes) 23 | 24 | # SSD head 25 | self.extras = nn.ModuleList(extras) 26 | self.loc = nn.ModuleList(head[0]) 27 | self.conf = nn.ModuleList(head[1]) 28 | 29 | self.initialize() 30 | 31 | def initialize(self): 32 | r""" 33 | :meta private: 34 | """ 35 | self.backbone.initialize() 36 | self.extras.apply(self.initialize_extra) 37 | self.loc.apply(self.initialize_head) 38 | self.conf.apply(self.initialize_head) 39 | for c in self.conf: 40 | c.apply(self.initialize_prior) 41 | 42 | def forward(self, x): 43 | r"""Applies network layers and ops on input image(s) x. 44 | 45 | Args: 46 | x: input image or batch of images. 47 | 48 | Return: 49 | When self.training==True, loc and conf for each anchor box; 50 | 51 | When self.training==False. loc and conf.sigmoid() for each anchor box; 52 | 53 | For each player, conf with shape [batch, num_anchor*num_classes, height, width]; 54 | 55 | For each player, loc with shape [batch, num_anchor*4, height, width]. 56 | """ 57 | loc, conf = [list() for _ in range(2)] 58 | 59 | # apply backbone to input and cache outputs 60 | features = self.backbone(x) 61 | 62 | # apply extra blocks and cache outputs 63 | for v in self.extras: 64 | x = v(features[-1]) 65 | features.append(x) 66 | 67 | # apply multibox head to source layers 68 | for (x, l, c) in zip(features, self.loc, self.conf): 69 | loc.append(l(x)) 70 | conf.append(c(x)) 71 | 72 | if not self.training: 73 | conf = [c.sigmoid() for c in conf] 74 | return tuple(loc), tuple(conf) 75 | 76 | @staticmethod 77 | def add_extras(feature_layer, mbox, num_classes): 78 | r"""Define and declare the extras, loc and conf modules for the ssd model. 79 | 80 | The feature_layer is defined in cfg.MODEL.FEATURE_LAYER. For ssd model can be int, list of int and str: 81 | 82 | * int 83 | The int in the feature_layer represents the output feature in the backbone. 84 | * str 85 | The str in the feature_layer represents the extra layers append at the end of the backbone. 86 | 87 | Args: 88 | feature_layer: the feature layers with detection head, defined by cfg.MODEL.FEATURE_LAYER 89 | mbox: the number of boxes for each feature map 90 | num_classes: the number of classes, defined by cfg.MODEL.NUM_CLASSES 91 | """ 92 | nets_outputs, extra_layers, loc_layers, conf_layers = [list() for _ in range(4)] 93 | in_channels = None 94 | for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox): 95 | if isinstance(layer, int): 96 | nets_outputs.append(layer) 97 | else: 98 | extra_layers += parse_feature_layer(layer, in_channels, depth) 99 | in_channels = depth 100 | loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)] 101 | conf_layers += [ 102 | nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1) 103 | ] 104 | return nets_outputs, extra_layers, (loc_layers, conf_layers) 105 | -------------------------------------------------------------------------------- /ssds/modeling/ssds/ssdsbase.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | 6 | class SSDSBase(nn.Module): 7 | r"""Base class for all ssds model. 8 | """ 9 | def __init__(self, backbone, num_classes): 10 | super(SSDSBase, self).__init__() 11 | self.backbone = backbone 12 | self.num_classes = num_classes 13 | 14 | # Initialize class head prior 15 | def initialize_prior(self, layer): 16 | pi = 0.01 17 | b = -math.log((1 - pi) / pi) 18 | nn.init.constant_(layer.bias, b) 19 | nn.init.normal_(layer.weight, std=0.01) 20 | 21 | def initialize_head(self, layer): 22 | if isinstance(layer, nn.Conv2d): 23 | nn.init.normal_(layer.weight, std=0.01) 24 | if layer.bias is not None: 25 | nn.init.constant_(layer.bias, val=0) 26 | 27 | def initialize_extra(self, layer): 28 | if isinstance(layer, nn.Conv2d): 29 | nn.init.xavier_uniform_(layer.weight) 30 | if layer.bias is not None: 31 | nn.init.constant_(layer.bias, val=0) 32 | -------------------------------------------------------------------------------- /ssds/pipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/pipeline/__init__.py -------------------------------------------------------------------------------- /ssds/pipeline/pipeline_anchor_apex.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import torch 3 | import time 4 | from datetime import timedelta 5 | 6 | from apex import amp 7 | 8 | import ssds.core.tools as tools 9 | import ssds.core.visualize_funcs as vsf 10 | from ssds.modeling.layers.box import extract_targets 11 | 12 | CURSOR_UP_ONE = "\x1b[1A" 13 | ERASE_LINE = "\x1b[2K" 14 | 15 | 16 | class ModelWithLossBasic(torch.nn.Module): 17 | r""" Class use to help the gpu memory becomes more balance in ddp model 18 | """ 19 | 20 | def __init__( 21 | self, 22 | model, 23 | cls_criterion, 24 | loc_criterion, 25 | num_classes, 26 | match, 27 | center_sampling_radius, 28 | ): 29 | super(ModelWithLossBasic, self).__init__() 30 | self.model = model 31 | self.cls_criterion = cls_criterion 32 | self.loc_criterion = loc_criterion 33 | self.num_classes = num_classes 34 | self.match = match 35 | self.center_radius = center_sampling_radius 36 | 37 | def forward(self, images, targets, anchors): 38 | r""" 39 | :meta private: 40 | """ 41 | loc, conf = self.model(images) 42 | 43 | cls_losses, loc_losses, fg_targets = [], [], [] 44 | for j, (stride, anchor) in enumerate(anchors.items()): 45 | size = conf[j].shape[-2:] 46 | conf_target, loc_target, depth = extract_targets( 47 | targets, 48 | anchors, 49 | self.num_classes, 50 | stride, 51 | size, 52 | self.match, 53 | self.center_radius, 54 | ) 55 | fg_targets.append((depth > 0).sum().float().clamp(min=1)) 56 | 57 | c = conf[j].view_as(conf_target).float() 58 | cls_mask = (depth >= 0).expand_as(conf_target).float() 59 | cls_loss = self.cls_criterion(c, conf_target, depth) 60 | cls_loss = cls_mask * cls_loss 61 | cls_losses.append(cls_loss.sum()) 62 | 63 | l = loc[j].view_as(loc_target).float() 64 | loc_loss = self.loc_criterion(l, loc_target) 65 | loc_mask = (depth > 0).expand_as(loc_loss).float() 66 | loc_loss = loc_mask * loc_loss 67 | loc_losses.append(loc_loss.sum()) 68 | 69 | fg_targets = torch.stack(fg_targets).sum() 70 | cls_loss = torch.stack(cls_losses).sum() / fg_targets 71 | loc_loss = torch.stack(loc_losses).sum() / fg_targets 72 | return cls_loss, loc_loss, cls_losses, loc_losses 73 | 74 | 75 | def train_anchor_based_epoch( 76 | model, data_loader, optimizer, anchors, writer, epoch, device, local_rank 77 | ): 78 | r""" the pipeline for training 79 | """ 80 | model.train() 81 | title = "Train: " 82 | 83 | if local_rank == 0: 84 | loss_writer = { 85 | "loc_loss": tools.AverageMeter(), 86 | "cls_loss": tools.AverageMeter(), 87 | } 88 | loss_writer.update( 89 | { 90 | "loc_loss_{}".format(j): tools.AverageMeter() 91 | for j, _ in enumerate(anchors.items()) 92 | } 93 | ) 94 | loss_writer.update( 95 | { 96 | "cls_loss_{}".format(j): tools.AverageMeter() 97 | for j, _ in enumerate(anchors.items()) 98 | } 99 | ) 100 | start_time = time.time() 101 | dataset_len = len(data_loader) 102 | 103 | for batch_idx, (images, targets) in enumerate(data_loader): 104 | if images.device != device: 105 | images, targets = images.to(device), targets.to(device) 106 | if targets.dtype != torch.float: 107 | targets = targets.float() 108 | 109 | cls_loss, loc_loss, cls_losses, loc_losses = model(images, targets, anchors) 110 | if torch.isnan(loc_loss) or torch.isnan(cls_loss): 111 | continue 112 | if local_rank == 0: 113 | for j, (cl, ll) in enumerate(zip(cls_losses, loc_losses)): 114 | loss_writer["cls_loss_{}".format(j)].update(cl.item()) 115 | loss_writer["loc_loss_{}".format(j)].update(ll.item()) 116 | loss_writer["cls_loss"].update(cls_loss.item()) 117 | loss_writer["loc_loss"].update(loc_loss.item()) 118 | log = { 119 | "cls_loss": cls_loss.item(), 120 | "loc_loss": loc_loss.item(), 121 | "lr": optimizer.param_groups[0]["lr"], 122 | } 123 | 124 | optimizer.zero_grad() 125 | total_loss = cls_loss + loc_loss 126 | if total_loss.item() == float("Inf") or torch.isnan(total_loss): 127 | continue 128 | with amp.scale_loss(total_loss, optimizer) as scaled_loss: 129 | scaled_loss.backward() 130 | optimizer.step() 131 | 132 | if local_rank == 0: 133 | elapsed_time = time.time() - start_time 134 | estimat_time = elapsed_time * (dataset_len) / (batch_idx + 1) 135 | # log per iter 136 | print( 137 | title + tools.format_dict_of_loss(log), 138 | "|", 139 | batch_idx + 1, 140 | "/", 141 | dataset_len, 142 | "| Time:", 143 | timedelta(seconds=int(elapsed_time)), 144 | "/", 145 | timedelta(seconds=int(estimat_time)), 146 | "\r", 147 | end="", 148 | ) 149 | sys.stdout.flush() 150 | 151 | if local_rank == 0: 152 | log = {"lr": optimizer.param_groups[0]["lr"]} 153 | log.update({k: v.avg for k, v in loss_writer.items()}) 154 | print( 155 | CURSOR_UP_ONE 156 | + ERASE_LINE 157 | + "===>Avg Train: " 158 | + tools.format_dict_of_loss(log), 159 | " | Time: ", 160 | timedelta(seconds=int(time.time() - start_time)), 161 | ) 162 | 163 | # log for tensorboard 164 | for key, value in log.items(): 165 | writer.add_scalar("Train/{}".format(key), value, epoch) 166 | targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4] 167 | vsf.add_imagesWithBoxes( 168 | writer, "Train Image", images[:5], targets[:5], epoch=epoch 169 | ) 170 | 171 | return 172 | -------------------------------------------------------------------------------- /ssds/pipeline/pipeline_anchor_basic.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from tqdm import tqdm 3 | import torch 4 | 5 | import ssds.core.tools as tools 6 | import ssds.core.visualize_funcs as vsf 7 | from ssds.core.evaluation_metrics import MeanAveragePrecision 8 | from ssds.modeling.layers.box import extract_targets 9 | 10 | CURSOR_UP_ONE = "\x1b[1A" 11 | ERASE_LINE = "\x1b[2K" 12 | 13 | 14 | def train_anchor_based_epoch( 15 | model, 16 | data_loader, 17 | optimizer, 18 | cls_criterion, 19 | loc_criterion, 20 | anchors, 21 | num_classes, 22 | match, 23 | center_sampling_radius, 24 | writer, 25 | epoch, 26 | device, 27 | ): 28 | r""" the pipeline for training 29 | """ 30 | model.train() 31 | 32 | title = "Train: " 33 | progress = tqdm( 34 | tools.IteratorTimer(data_loader), 35 | total=len(data_loader), 36 | smoothing=0.9, 37 | miniters=1, 38 | leave=True, 39 | desc=title, 40 | ) 41 | 42 | loss_writer = {"loc_loss": tools.AverageMeter(), "cls_loss": tools.AverageMeter()} 43 | loss_writer.update( 44 | { 45 | "loc_loss_{}".format(j): tools.AverageMeter() 46 | for j, _ in enumerate(anchors.items()) 47 | } 48 | ) 49 | loss_writer.update( 50 | { 51 | "cls_loss_{}".format(j): tools.AverageMeter() 52 | for j, _ in enumerate(anchors.items()) 53 | } 54 | ) 55 | 56 | for batch_idx, (images, targets) in enumerate(progress): 57 | if images.device != device: 58 | images, targets = images.to(device), targets.to(device) 59 | if targets.dtype != torch.float: 60 | targets = targets.float() 61 | 62 | loc, conf = model(images) 63 | 64 | cls_losses, loc_losses, fg_targets = [], [], [] 65 | for j, (stride, anchor) in enumerate(anchors.items()): 66 | size = conf[j].shape[-2:] 67 | conf_target, loc_target, depth = extract_targets( 68 | targets, 69 | anchors, 70 | num_classes, 71 | stride, 72 | size, 73 | match, 74 | center_sampling_radius, 75 | ) 76 | fg_targets.append((depth > 0).sum().float().clamp(min=1)) 77 | 78 | c = conf[j].view_as(conf_target).float() 79 | cls_mask = (depth >= 0).expand_as(conf_target).float() 80 | cls_loss = cls_criterion(c, conf_target, depth) 81 | cls_loss = cls_mask * cls_loss 82 | cls_losses.append(cls_loss.sum()) 83 | 84 | l = loc[j].view_as(loc_target).float() 85 | loc_loss = loc_criterion(l, loc_target) 86 | loc_mask = (depth > 0).expand_as(loc_loss).float() 87 | loc_loss = loc_mask * loc_loss 88 | loc_losses.append(loc_loss.sum()) 89 | 90 | if torch.isnan(loc_loss.sum()) or torch.isnan(cls_loss.sum()): 91 | continue 92 | loss_writer["cls_loss_{}".format(j)].update(cls_losses[-1].item()) 93 | loss_writer["loc_loss_{}".format(j)].update(loc_losses[-1].item()) 94 | 95 | fg_targets = torch.stack(fg_targets).sum() 96 | cls_loss = torch.stack(cls_losses).sum() / fg_targets 97 | loc_loss = torch.stack(loc_losses).sum() / fg_targets 98 | if torch.isnan(loc_loss) or torch.isnan(cls_loss): 99 | continue 100 | loss_writer["cls_loss"].update(cls_loss.item()) 101 | loss_writer["loc_loss"].update(loc_loss.item()) 102 | 103 | log = { 104 | "cls_loss": cls_loss.item(), 105 | "loc_loss": loc_loss.item(), 106 | "lr": optimizer.param_groups[0]["lr"], 107 | } 108 | 109 | optimizer.zero_grad() 110 | total_loss = cls_loss + loc_loss 111 | if total_loss.item() == float("Inf"): 112 | continue 113 | total_loss.backward() 114 | optimizer.step() 115 | 116 | # log per iter 117 | progress.set_description(title + tools.format_dict_of_loss(log)) 118 | progress.update(1) 119 | 120 | progress.close() 121 | log = {"lr": optimizer.param_groups[0]["lr"]} 122 | log.update({k: v.avg for k, v in loss_writer.items()}) 123 | print( 124 | CURSOR_UP_ONE + ERASE_LINE + "===>Avg Train: " + tools.format_dict_of_loss(log) 125 | ) 126 | 127 | # log for tensorboard 128 | for key, value in log.items(): 129 | writer.add_scalar("Train/{}".format(key), value, epoch) 130 | targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4] 131 | vsf.add_imagesWithBoxes(writer, "Train Image", images[:5], targets[:5], epoch=epoch) 132 | 133 | return 134 | 135 | 136 | def eval_anchor_based_epoch( 137 | model, 138 | data_loader, 139 | decoder, 140 | cls_criterion, 141 | loc_criterion, 142 | anchors, 143 | num_classes, 144 | writer, 145 | epoch, 146 | device, 147 | ): 148 | r""" the pipeline for evaluation 149 | """ 150 | model.eval() 151 | title = "Eval: " 152 | progress = tqdm( 153 | tools.IteratorTimer(data_loader), 154 | total=len(data_loader), 155 | smoothing=0.9, 156 | miniters=1, 157 | leave=True, 158 | desc=title, 159 | ) 160 | 161 | metric = MeanAveragePrecision( 162 | num_classes, decoder.conf_threshold, decoder.nms_threshold 163 | ) 164 | for batch_idx, (images, targets) in enumerate(progress): 165 | if images.device != device: 166 | images, targets = images.to(device), targets.to(device) 167 | if targets.dtype != torch.float: 168 | targets = targets.float() 169 | 170 | loc, conf = model(images) 171 | 172 | # removed loss since the conf is sigmod in the evaluation stage, 173 | # the conf loss is not meaningful anymore 174 | detections = decoder(loc, conf, anchors) 175 | targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4] # from xywh to ltrb 176 | metric(detections, targets) 177 | 178 | # log per iter 179 | progress.update(1) 180 | 181 | progress.close() 182 | mAP, (prec, rec, ap) = metric.get_results() 183 | 184 | log = {"mAP": mAP} 185 | if len(ap) < 5: 186 | for i, a in enumerate(ap): 187 | log["AP@cls{}".format(i)] = a 188 | print( 189 | CURSOR_UP_ONE + ERASE_LINE + "===>Avg Eval: " + tools.format_dict_of_loss(log) 190 | ) 191 | 192 | # log for tensorboard 193 | for key, value in log.items(): 194 | writer.add_scalar("Eval/{}".format(key), value, epoch) 195 | vsf.add_prCurve(writer, prec, rec, epoch=epoch) 196 | boxes = torch.cat((detections[1], detections[0][..., None]), dim=2) 197 | vsf.add_imagesWithMatchedBoxes( 198 | writer, "Eval Image", images[:5], boxes[:5], targets[:5], epoch=epoch 199 | ) 200 | return 201 | -------------------------------------------------------------------------------- /ssds/ssds.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .core import checkpoint, config 4 | from .modeling import model_builder 5 | 6 | 7 | class SSDDetector(object): 8 | r"""The class used for ssds model inference 9 | 10 | Args: 11 | cfg_file (str): The path to the config file 12 | is_print (bool): Whether to print the model arch and the anchor strategy 13 | """ 14 | def __init__(self, cfg_file, is_print=False): 15 | # Config 16 | cfg = config.cfg_from_file(cfg_file) 17 | 18 | # Build model 19 | print("===> Building model") 20 | self.model = model_builder.create_model(cfg.MODEL) 21 | if is_print: 22 | print("Model architectures:\n{}\n".format(self.model)) 23 | self.anchors = model_builder.create_anchors( 24 | cfg.MODEL, self.model, cfg.MODEL.IMAGE_SIZE, is_print 25 | ) 26 | self.decoder = model_builder.create_decoder(cfg.POST_PROCESS) 27 | 28 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 29 | if cfg.RESUME_CHECKPOINT: 30 | print( 31 | "Loading initial model weights from {:s}".format(cfg.RESUME_CHECKPOINT) 32 | ) 33 | checkpoint.resume_checkpoint(self.model, cfg.RESUME_CHECKPOINT, "") 34 | self.model.eval().to(self.device) 35 | 36 | self.image_size = tuple(cfg.MODEL.IMAGE_SIZE) 37 | self.num_classes = cfg.MODEL.NUM_CLASSES 38 | self.mean = cfg.DATASET.PREPROC.MEAN 39 | self.std = cfg.DATASET.PREPROC.STD 40 | 41 | def __call__(self, imgs): 42 | r"""Applied the SSD model to do the inference for the given images 43 | 44 | Args: 45 | imgs (np.ndarray): The images used for inference, with shape [H,W,3], [3,H,W] [N,H,W,3] or [N,3,H,W] 46 | """ 47 | pick1st = False 48 | if len(imgs.shape) == 3: 49 | imgs = imgs[None, ...] 50 | pick1st = True 51 | if len(imgs.shape) != 4: 52 | raise AssertionError("image dims has to be 3 or 4") 53 | if imgs.shape[3] == 3: 54 | imgs = imgs.transpose(0, 3, 1, 2) 55 | 56 | imgs_tensor = torch.Tensor(imgs).to(self.device) 57 | imgs_tensor = (imgs_tensor - self.mean) / self.std 58 | 59 | loc, conf = self.model(imgs_tensor) 60 | detections = self.decoder(loc, conf, self.anchors) 61 | out_scores, out_boxes, out_classes = ( 62 | d.cpu().detach().numpy() for d in detections 63 | ) 64 | 65 | if pick1st: 66 | return out_scores[0], out_boxes[0].astype(int), out_classes[0].astype(int) 67 | else: 68 | return out_scores, out_boxes.astype(int), out_classes.astype(int) 69 | -------------------------------------------------------------------------------- /ssds/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuangXieIrene/ssds.pytorch/b5ec682a42c923afe964205b21448e9f141d55bc/ssds/utils/__init__.py -------------------------------------------------------------------------------- /ssds/utils/export.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import sys 4 | import glob 5 | import json 6 | import argparse 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from ssds.core import checkpoint, config 12 | from ssds.modeling import model_builder 13 | 14 | 15 | class ExportModel(nn.Module): 16 | def __init__(self, model, nhwc): 17 | super(ExportModel, self).__init__() 18 | self.model = model 19 | self.nhwc = nhwc 20 | 21 | def forward(self, x): 22 | if self.nhwc: 23 | x = x.permute(0, 3, 1, 2).contiguous() / 255.0 24 | return self.model(x) 25 | 26 | 27 | class Solver(object): 28 | """ 29 | A wrapper class for the training process 30 | """ 31 | 32 | def __init__(self, cfg, nhwc, render=False): 33 | self.cfg = cfg 34 | self.render = render 35 | self.nhwc = nhwc 36 | 37 | # Build model 38 | print("===> Building model") 39 | self.model = model_builder.create_model(cfg.MODEL) 40 | self.model.eval().cuda() 41 | self.anchors = model_builder.create_anchors( 42 | self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE 43 | ) 44 | 45 | # Print the model architecture and parameters 46 | if self.render: 47 | print("Model architectures:\n{}\n".format(self.model)) 48 | model_builder.create_anchors( 49 | self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE, self.render 50 | ) 51 | 52 | def export_onnx(self, weights, export_path, batch): 53 | if weights != None: 54 | checkpoint.resume_checkpoint(self.model, weights) 55 | export_model = ExportModel(self.model, self.nhwc) 56 | 57 | import torch.onnx.symbolic_opset9 as onnx_symbolic 58 | 59 | def upsample_nearest2d(g, input, output_size, *args): 60 | # Currently, TRT 5.1/6.0 ONNX Parser does not support all ONNX ops 61 | # needed to support dynamic upsampling ONNX forumlation 62 | # Here we hardcode scale=2 as a temporary workaround 63 | scales = g.op("Constant", value_t=torch.tensor([1.0, 1.0, 2.0, 2.0])) 64 | return g.op("Upsample", input, scales, mode_s="nearest") 65 | 66 | onnx_symbolic.upsample_nearest2d = upsample_nearest2d 67 | 68 | export_model.eval().cuda() 69 | if self.nhwc: 70 | dummy_input = torch.rand( 71 | batch, 72 | self.cfg.MODEL.IMAGE_SIZE[1], 73 | self.cfg.MODEL.IMAGE_SIZE[0], 74 | 3, 75 | requires_grad=False, 76 | ).cuda() 77 | else: 78 | dummy_input = torch.rand( 79 | batch, 80 | 3, 81 | self.cfg.MODEL.IMAGE_SIZE[1], 82 | self.cfg.MODEL.IMAGE_SIZE[0], 83 | requires_grad=False, 84 | ).cuda() 85 | 86 | outputs = export_model(dummy_input) 87 | optional_args = dict(keep_initializers_as_inputs=True) 88 | input_names = ["input"] 89 | output_names = [ 90 | n.format(i) for n in ["loc_{}", "conf_{}"] for i in range(len(outputs[0])) 91 | ] 92 | if export_path: 93 | print("Saving model weights & graph to {:s}".format(export_path)) 94 | param = { 95 | "image_size": self.cfg.MODEL.IMAGE_SIZE, 96 | "score": self.cfg.POST_PROCESS.SCORE_THRESHOLD, 97 | "iou": self.cfg.POST_PROCESS.IOU_THRESHOLD, 98 | "max_detects": self.cfg.POST_PROCESS.MAX_DETECTIONS, 99 | "max_detects_per_level": self.cfg.POST_PROCESS.MAX_DETECTIONS_PER_LEVEL, 100 | "rescore": self.cfg.POST_PROCESS.RESCORE_CENTER, 101 | "use_diou": self.cfg.POST_PROCESS.USE_DIOU, 102 | "NHWC": self.nhwc, 103 | "anchors": [v.view(-1).tolist() for k, v in self.anchors.items()], 104 | } 105 | with open(export_path + ".json", "w") as output_json: 106 | json.dump(param, output_json, indent=2) 107 | 108 | torch.onnx.export( 109 | export_model, 110 | dummy_input, 111 | export_path, 112 | verbose=self.render, 113 | export_params=True, 114 | input_names=input_names, 115 | output_names=output_names, 116 | **optional_args 117 | ) 118 | return False 119 | else: 120 | onnx_bytes = io.BytesIO() 121 | torch.onnx.export( 122 | export_model.cuda(), 123 | dummy_input, 124 | onnx_bytes, 125 | verbose=self.render, 126 | input_names=input_names, 127 | output_names=output_names, 128 | ) 129 | return onnx_bytes 130 | 131 | def export_trt( 132 | self, weights, export_path, batch, precision, calibration_files, workspace_size 133 | ): 134 | if not hasattr(ssds, "_C"): 135 | raise AssertionError( 136 | "Currently ssds lib is not install with external cpp plugin," 137 | "and cannot export to tensorrt model." 138 | "Please reinstall the ssds lib by `python setup_cpp.py clean -a install`" 139 | ) 140 | onnx_bytes = self.export_onnx(weights, None, batch) 141 | del self.model 142 | 143 | model_name = self.cfg.MODEL.SSDS + "_" + self.cfg.MODEL.NETS 144 | anchors = [v.view(-1).tolist() for k, v in self.anchors.items()] 145 | if calibration_files != "": 146 | calibration_files = glob.glob(calibration_files) 147 | num_files = (len(calibration_files) // batch) * batch 148 | calibration_files = calibration_files[:num_files] 149 | else: 150 | calibration_files = [] 151 | batch = 1 152 | 153 | from ssds._C import trtConvert 154 | 155 | trtConvert( 156 | export_path, 157 | onnx_bytes.getvalue(), 158 | len(onnx_bytes.getvalue()), 159 | batch, 160 | precision, 161 | self.cfg.POST_PROCESS.SCORE_THRESHOLD, 162 | self.cfg.POST_PROCESS.MAX_DETECTIONS, 163 | anchors, 164 | self.cfg.POST_PROCESS.IOU_THRESHOLD, 165 | self.cfg.POST_PROCESS.MAX_DETECTIONS, 166 | calibration_files, 167 | model_name, 168 | "", 169 | self.nhwc, 170 | self.render, 171 | workspace_size, 172 | ) 173 | return False 174 | 175 | 176 | if __name__ == "__main__": 177 | parser = argparse.ArgumentParser(description="export a ssds.pytorch network") 178 | parser.add_argument( 179 | "-cfg", 180 | "--config", 181 | dest="config_file", 182 | help="optional config file", 183 | default=None, 184 | type=str, 185 | ) 186 | parser.add_argument( 187 | "-c", "--checkpoint", help="optional checkpoint file", default=None, type=str 188 | ) 189 | parser.add_argument("-o", "--onnx", help="output onnx file", default=None, type=str) 190 | parser.add_argument("-t", "--trt", help="output trt file", default=None, type=str) 191 | parser.add_argument( 192 | "-b", "--batch", help="batch size for output model", default=1, type=int 193 | ) 194 | parser.add_argument( 195 | "-p", 196 | "--precision", 197 | help="precision for output trt model", 198 | default="FP32", 199 | choices=["FP32", "FP16", "INT8"], 200 | ) 201 | parser.add_argument( 202 | "-i", 203 | "--image-files", 204 | help="image files for calibrate output trt model with int8 model", 205 | default="", 206 | type=str, 207 | ) 208 | parser.add_argument( 209 | "-w", 210 | "--max-workspace-size", 211 | help="The max workspace size for output plan file. The final result is" 212 | "1 << max-workspace-size. Example 30 for 1 GB.", 213 | type=int, 214 | ) 215 | parser.add_argument("--nhwc", action="store_true") 216 | parser.add_argument("-r", "--render", action="store_true") 217 | 218 | if len(sys.argv) == 1: 219 | parser.print_help() 220 | sys.exit(1) 221 | args = parser.parse_args() 222 | 223 | cfg = config.cfg_from_file(args.config_file) 224 | solver = Solver(cfg, args.nhwc, args.render) 225 | if args.onnx: 226 | solver.export_onnx(args.checkpoint, args.onnx, args.batch) 227 | if args.trt: 228 | solver.export_trt( 229 | args.checkpoint, 230 | args.trt, 231 | args.batch, 232 | args.precision, 233 | args.image_files, 234 | 1 << args.max_workspace_size, 235 | ) -------------------------------------------------------------------------------- /ssds/utils/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | from torch.utils.tensorboard import SummaryWriter 9 | 10 | from ssds.core import optimizer, checkpoint, criterion, config, data_parallel 11 | from ssds.modeling import model_builder 12 | from ssds.pipeline.pipeline_anchor_basic import ( 13 | train_anchor_based_epoch, 14 | eval_anchor_based_epoch, 15 | ) 16 | from ssds.dataset.dataset_factory import load_data 17 | 18 | 19 | class Solver(object): 20 | """ 21 | A wrapper class for the training process 22 | """ 23 | 24 | def __init__(self, cfg, render=False): 25 | self.cfg = cfg 26 | self.render = render 27 | 28 | # Build model 29 | print("===> Building model") 30 | self.model = model_builder.create_model(cfg.MODEL) 31 | 32 | # Utilize GPUs for computation 33 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | 35 | # Print the model architecture and parameters 36 | if self.render: 37 | print("Model architectures:\n{}\n".format(self.model)) 38 | # print('Parameters and size:') 39 | # for name, param in self.model.named_parameters(): 40 | # print('{}: {}'.format(name, list(param.size()))) 41 | 42 | # print trainable scope 43 | print("Trainable scope: {}".format(cfg.TRAIN.TRAINABLE_SCOPE)) 44 | trainable_param_ = optimizer.trainable_param( 45 | self.model, cfg.TRAIN.TRAINABLE_SCOPE 46 | ) 47 | self.optimizer = optimizer.configure_optimizer( 48 | trainable_param_, cfg.TRAIN.OPTIMIZER 49 | ) 50 | self.lr_scheduler = optimizer.configure_lr_scheduler( 51 | self.optimizer, cfg.TRAIN.LR_SCHEDULER 52 | ) 53 | self.max_epochs = cfg.TRAIN.MAX_EPOCHS 54 | 55 | # metric 56 | self.cls_criterion = getattr(criterion, self.cfg.MATCHER.CLASSIFY_LOSS)( 57 | alpha=self.cfg.MATCHER.FOCAL_ALPHA, 58 | gamma=self.cfg.MATCHER.FOCAL_GAMMA, 59 | negpos_ratio=self.cfg.MATCHER.NEGPOS_RATIO, 60 | ) 61 | self.loc_criterion = getattr(criterion, self.cfg.MATCHER.LOCATE_LOSS)() 62 | 63 | # Set the logger 64 | self.writer = SummaryWriter(log_dir=cfg.LOG_DIR) 65 | 66 | def train_model(self): 67 | previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR) 68 | if previous: 69 | start_epoch = previous[0][-1] 70 | checkpoint.resume_checkpoint( 71 | self.model, previous[1][-1], self.cfg.TRAIN.RESUME_SCOPE 72 | ) 73 | else: 74 | start_epoch = 0 75 | if self.cfg.RESUME_CHECKPOINT: 76 | print( 77 | "Loading initial model weights from {:s}".format( 78 | self.cfg.RESUME_CHECKPOINT 79 | ) 80 | ) 81 | checkpoint.resume_checkpoint( 82 | self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE 83 | ) 84 | 85 | if torch.cuda.is_available(): 86 | print("Utilize GPUs for computation") 87 | print("Number of GPU available", torch.cuda.device_count()) 88 | if len(self.cfg.DEVICE_ID) > 1: 89 | gpu0_bsz = self.cfg.TRAIN.BATCH_SIZE // ( 90 | 8 * (len(self.cfg.DEVICE_ID) - 1) + 1 91 | ) 92 | self.model = data_parallel.BalancedDataParallel( 93 | gpu0_bsz, self.model, device_ids=self.cfg.DEVICE_ID 94 | ) 95 | # self.model = nn.DataParallel(self.model, device_ids=self.cfg.DEVICE_ID) 96 | self.model.to(self.device) 97 | cudnn.benchmark = True 98 | 99 | # Load data 100 | print("===> Loading data") 101 | train_loader = load_data(cfg.DATASET, "train") 102 | eval_loader = load_data(cfg.DATASET, "eval") if "eval" in cfg.PHASE else None 103 | 104 | warm_up = self.cfg.TRAIN.LR_SCHEDULER.WARM_UP_EPOCHS 105 | for epoch in iter(range(start_epoch + 1, self.max_epochs + 1)): 106 | sys.stdout.write( 107 | "\rEpoch {epoch:d}/{max_epochs:d}:\n".format( 108 | epoch=epoch, max_epochs=self.max_epochs 109 | ) 110 | ) 111 | torch.cuda.empty_cache() 112 | # start phases for epoch 113 | anchors = model_builder.create_anchors( 114 | self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE, self.render 115 | ) 116 | train_anchor_based_epoch( 117 | self.model, 118 | train_loader, 119 | self.optimizer, 120 | self.cls_criterion, 121 | self.loc_criterion, 122 | anchors, 123 | self.cfg.MODEL.NUM_CLASSES, 124 | self.cfg.MATCHER.MATCH_THRESHOLD, 125 | self.cfg.MATCHER.CENTER_SAMPLING_RADIUS, 126 | self.writer, 127 | epoch, 128 | self.device, 129 | ) 130 | # save checkpoint 131 | if epoch % self.cfg.TRAIN.CHECKPOINTS_EPOCHS == 0: 132 | checkpoint.save_checkpoints( 133 | self.model, self.cfg.EXP_DIR, self.cfg.CHECKPOINTS_PREFIX, epoch 134 | ) 135 | # multi scale training 136 | if len(self.cfg.DATASET.MULTISCALE) > 1: 137 | batch_size, target_size = self.cfg.DATASET.MULTISCALE[ 138 | epoch % len(self.cfg.DATASET.MULTISCALE) 139 | ] 140 | train_loader.reset_size(batch_size, target_size) 141 | if "eval" in self.cfg.PHASE: 142 | anchors = model_builder.create_anchors( 143 | self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE 144 | ) 145 | decoder = model_builder.create_decoder(self.cfg.POST_PROCESS) 146 | eval_anchor_based_epoch( 147 | self.model, 148 | eval_loader, 149 | decoder, 150 | self.cls_criterion, 151 | self.loc_criterion, 152 | anchors, 153 | self.cfg.MODEL.NUM_CLASSES, 154 | self.writer, 155 | epoch, 156 | self.device, 157 | ) 158 | 159 | self.lr_scheduler.step() 160 | 161 | def eval_model(self): 162 | eval_loader = load_data(cfg.DATASET, "eval") 163 | self.model.to(self.device) 164 | anchors = model_builder.create_anchors( 165 | self.cfg.MODEL, self.model, self.cfg.MODEL.IMAGE_SIZE 166 | ) 167 | decoder = model_builder.create_decoder(self.cfg.POST_PROCESS) 168 | 169 | previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR) 170 | if previous: 171 | for epoch, resume_checkpoint in zip(previous[0], previous[1]): 172 | if self.cfg.TEST.TEST_SCOPE[0] <= epoch <= self.cfg.TEST.TEST_SCOPE[1]: 173 | checkpoint.resume_checkpoint( 174 | self.model, resume_checkpoint, self.cfg.TRAIN.RESUME_SCOPE 175 | ) 176 | eval_anchor_based_epoch( 177 | self.model, 178 | eval_loader, 179 | decoder, 180 | self.cls_criterion, 181 | self.loc_criterion, 182 | anchors, 183 | self.cfg.MODEL.NUM_CLASSES, 184 | self.writer, 185 | epoch, 186 | self.device, 187 | ) 188 | else: 189 | if self.cfg.RESUME_CHECKPOINT: 190 | print( 191 | "Loading initial model weights from {:s}".format( 192 | self.cfg.RESUME_CHECKPOINT 193 | ) 194 | ) 195 | checkpoint.resume_checkpoint( 196 | self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE 197 | ) 198 | eval_anchor_based_epoch( 199 | self.model, 200 | eval_loader, 201 | decoder, 202 | self.cls_criterion, 203 | self.loc_criterion, 204 | anchors, 205 | self.cfg.MODEL.NUM_CLASSES, 206 | self.writer, 207 | 0, 208 | self.device, 209 | ) 210 | 211 | 212 | if __name__ == "__main__": 213 | """ 214 | Parse input arguments 215 | """ 216 | parser = argparse.ArgumentParser(description="Train/Eval a ssds.pytorch network") 217 | parser.add_argument( 218 | "-cfg", 219 | "--config", 220 | dest="config_file", 221 | help="optional config file", 222 | default=None, 223 | type=str, 224 | ) 225 | parser.add_argument("-e", "--eval", action="store_true") 226 | parser.add_argument("-r", "--render", action="store_true") 227 | 228 | if len(sys.argv) == 1: 229 | parser.print_help() 230 | sys.exit(1) 231 | args = parser.parse_args() 232 | 233 | cfg = config.cfg_from_file(args.config_file) 234 | solver = Solver(cfg, args.render) 235 | if args.eval: 236 | solver.eval_model() 237 | else: 238 | solver.train_model() 239 | -------------------------------------------------------------------------------- /ssds/utils/train_ddp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | from pynvml.smi import nvidia_smi 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.backends.cudnn as cudnn 9 | from torch.utils.tensorboard import SummaryWriter 10 | 11 | from apex import amp 12 | from apex.parallel import convert_syncbn_model 13 | from apex.parallel import DistributedDataParallel as DDP 14 | 15 | from ssds.core import optimizer, checkpoint, criterion, config 16 | from ssds.modeling import model_builder 17 | from ssds.pipeline.pipeline_anchor_apex import ( 18 | train_anchor_based_epoch, 19 | ModelWithLossBasic, 20 | ) 21 | from ssds.dataset.dataset_factory import load_data 22 | 23 | nvsmi = nvidia_smi.getInstance() 24 | 25 | 26 | def getMemoryUsage(idx=0): 27 | usage = nvsmi.DeviceQuery("memory.used")["gpu"][idx]["fb_memory_usage"] 28 | return "%d %s" % (usage["used"], usage["unit"]) 29 | 30 | 31 | class Solver(object): 32 | """ 33 | A wrapper class for the training process 34 | """ 35 | 36 | def __init__(self, cfg, local_rank, render=False): 37 | self.cfg = cfg 38 | self.local_rank = local_rank 39 | self.render = render 40 | 41 | # Build model 42 | if self.local_rank == 0: 43 | print("===> Building model") 44 | self.model = model_builder.create_model(cfg.MODEL) 45 | self.load_model() 46 | 47 | # Utilize GPUs for computation 48 | self.device = torch.device("cuda:{}".format(local_rank)) 49 | 50 | # Convert to sync model 51 | self.model = convert_syncbn_model(self.model) 52 | self.model.to(self.device) 53 | 54 | # Print the model architecture and parameters 55 | if self.render and self.local_rank == 0: 56 | print("Model architectures:\n{}\n".format(self.model)) 57 | 58 | # print trainable scope 59 | if self.local_rank == 0: 60 | print("Trainable scope: {}".format(cfg.TRAIN.TRAINABLE_SCOPE)) 61 | trainable_param_ = optimizer.trainable_param( 62 | self.model, cfg.TRAIN.TRAINABLE_SCOPE 63 | ) 64 | self.optimizer = optimizer.configure_optimizer( 65 | trainable_param_, cfg.TRAIN.OPTIMIZER 66 | ) 67 | 68 | # to apex version 69 | self.model, self.optimizer = amp.initialize( 70 | self.model, self.optimizer, opt_level="O1", loss_scale=128.0 71 | ) # keep_batchnorm_fp32 = True, 72 | 73 | # add scheduler 74 | self.lr_scheduler = optimizer.configure_lr_scheduler( 75 | self.optimizer, cfg.TRAIN.LR_SCHEDULER 76 | ) 77 | self.max_epochs = cfg.TRAIN.MAX_EPOCHS 78 | 79 | # metric 80 | self.cls_criterion = getattr(criterion, self.cfg.MATCHER.CLASSIFY_LOSS)( 81 | alpha=self.cfg.MATCHER.FOCAL_ALPHA, 82 | gamma=self.cfg.MATCHER.FOCAL_GAMMA, 83 | negpos_ratio=self.cfg.MATCHER.NEGPOS_RATIO, 84 | ) 85 | self.loc_criterion = getattr(criterion, self.cfg.MATCHER.LOCATE_LOSS)() 86 | 87 | # Set the logger 88 | self.writer = ( 89 | SummaryWriter(log_dir=cfg.LOG_DIR) if self.local_rank == 0 else None 90 | ) 91 | 92 | def train_model(self): 93 | modelWithLoss = ModelWithLossBasic( 94 | self.model, 95 | self.cls_criterion, 96 | self.loc_criterion, 97 | self.cfg.MODEL.NUM_CLASSES, 98 | self.cfg.MATCHER.MATCH_THRESHOLD, 99 | self.cfg.MATCHER.CENTER_SAMPLING_RADIUS, 100 | ) 101 | 102 | if torch.cuda.is_available(): 103 | print("Utilize GPUs for computation") 104 | print("Number of GPU available", torch.cuda.device_count()) 105 | if self.cfg.DEVICE_ID: 106 | modelWithLoss = DDP( 107 | modelWithLoss, delay_allreduce=True 108 | ) # , device_ids=self.cfg.DEVICE_ID) 109 | cudnn.benchmark = True 110 | 111 | # Load data 112 | if self.local_rank == 0: 113 | print("===> Loading data") 114 | train_loader = load_data(cfg.DATASET, "train") 115 | 116 | # multi scale training 117 | if len(self.cfg.DATASET.MULTISCALE) > 1: 118 | batch_size, target_size = self.cfg.DATASET.MULTISCALE[ 119 | self.start_epoch % len(self.cfg.DATASET.MULTISCALE) 120 | ] 121 | train_loader.reset_size(batch_size, target_size) 122 | 123 | for epoch in iter(range(self.start_epoch + 1, self.max_epochs + 1)): 124 | if self.local_rank == 0: 125 | sys.stdout.write( 126 | "\rEpoch {epoch:d}/{max_epochs:d}:\n".format( 127 | epoch=epoch, max_epochs=self.max_epochs 128 | ) 129 | ) 130 | torch.cuda.empty_cache() 131 | # start phases for epoch 132 | anchors = model_builder.create_anchors( 133 | self.cfg.MODEL, 134 | modelWithLoss.module.model, 135 | self.cfg.MODEL.IMAGE_SIZE, 136 | self.render, 137 | ) 138 | train_anchor_based_epoch( 139 | modelWithLoss, 140 | train_loader, 141 | self.optimizer, 142 | anchors, 143 | self.writer, 144 | epoch, 145 | self.device, 146 | self.local_rank, 147 | ) 148 | # save checkpoint 149 | if epoch % self.cfg.TRAIN.CHECKPOINTS_EPOCHS == 0 and self.local_rank == 0: 150 | checkpoint.save_checkpoints( 151 | modelWithLoss.module.model, 152 | self.cfg.EXP_DIR, 153 | self.cfg.CHECKPOINTS_PREFIX, 154 | epoch, 155 | ) 156 | 157 | # multi scale training 158 | if len(self.cfg.DATASET.MULTISCALE) > 1: 159 | batch_size, target_size = self.cfg.DATASET.MULTISCALE[ 160 | epoch % len(self.cfg.DATASET.MULTISCALE) 161 | ] 162 | train_loader.reset_size(batch_size, target_size) 163 | 164 | if "eval" in self.cfg.PHASE: 165 | pass 166 | 167 | self.lr_scheduler.step() 168 | 169 | def load_model(self): 170 | previous = checkpoint.find_previous_checkpoint(self.cfg.EXP_DIR) 171 | if previous: 172 | self.start_epoch = previous[0][-1] 173 | self.model = checkpoint.resume_checkpoint( 174 | self.model, previous[1][-1], self.cfg.TRAIN.RESUME_SCOPE 175 | ) 176 | else: 177 | self.start_epoch = 0 178 | if self.cfg.RESUME_CHECKPOINT: 179 | if self.local_rank == 0: 180 | print( 181 | "Loading initial model weights from {:s}".format( 182 | self.cfg.RESUME_CHECKPOINT 183 | ) 184 | ) 185 | self.model = checkpoint.resume_checkpoint( 186 | self.model, self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE 187 | ) 188 | 189 | def eval_model(self): 190 | return 191 | 192 | 193 | if __name__ == "__main__": 194 | """ 195 | Parse input arguments 196 | """ 197 | parser = argparse.ArgumentParser(description="Train/Eval a ssds.pytorch network") 198 | parser.add_argument( 199 | "-cfg", 200 | "--config", 201 | dest="config_file", 202 | help="optional config file", 203 | default=None, 204 | type=str, 205 | ) 206 | parser.add_argument("--local_rank", type=int, default=0) 207 | parser.add_argument("-e", "--eval", action="store_true") 208 | parser.add_argument("-r", "--render", action="store_true") 209 | 210 | if len(sys.argv) == 1: 211 | parser.print_help() 212 | sys.exit(1) 213 | args = parser.parse_args() 214 | 215 | torch.cuda.set_device(args.local_rank) 216 | torch.distributed.init_process_group(backend="nccl") # , init_method='env://') 217 | 218 | cfg = config.cfg_from_file(args.config_file) 219 | solver = Solver(cfg, args.local_rank, args.render) 220 | solver.train_model() 221 | -------------------------------------------------------------------------------- /ssds/utils/visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | from tqdm import tqdm 5 | 6 | import torch 7 | from torch.utils.tensorboard import SummaryWriter 8 | 9 | from ssds.core import checkpoint, config 10 | from ssds.modeling import model_builder 11 | from ssds.dataset.dataset_factory import load_data 12 | import ssds.core.visualize_funcs as vsf 13 | import ssds.core.tools as tools 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser(description="visualize a ssds.pytorch network") 17 | parser.add_argument( 18 | "-cfg", 19 | "--config", 20 | dest="config_file", 21 | help="optional config file", 22 | default=None, 23 | type=str, 24 | ) 25 | parser.add_argument( 26 | "-a", 27 | "--anchor-strategy", 28 | help="analysis the anchor strategy in validate dataset", 29 | action="store_true", 30 | ) 31 | 32 | if len(sys.argv) == 1: 33 | parser.print_help() 34 | sys.exit(1) 35 | args = parser.parse_args() 36 | 37 | cfg = config.cfg_from_file(args.config_file) 38 | 39 | # Build model 40 | print("===> Building model") 41 | model = model_builder.create_model(cfg.MODEL) 42 | print("Model architectures:\n{}\n".format(model)) 43 | anchors = model_builder.create_anchors(cfg.MODEL, model, cfg.MODEL.IMAGE_SIZE, True) 44 | decoder = model_builder.create_decoder(cfg.POST_PROCESS) 45 | 46 | print("Log details to {}".format(cfg.LOG_DIR)) 47 | writer = SummaryWriter(log_dir=cfg.LOG_DIR) 48 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 49 | 50 | if cfg.RESUME_CHECKPOINT: 51 | print("Loading initial model weights from {:s}".format(cfg.RESUME_CHECKPOINT)) 52 | checkpoint.resume_checkpoint(model, cfg.RESUME_CHECKPOINT, "") 53 | 54 | model.eval().to(device) 55 | data_loader = load_data(cfg.DATASET, "train") 56 | 57 | images, targets = next(iter(data_loader)) 58 | if images.device != device: 59 | images = images.to(device) 60 | loc, conf = model(images) 61 | detections = decoder(loc, conf, anchors) 62 | 63 | # visualize anchor 64 | if len(cfg.DATASET.MULTISCALE) > 1: 65 | # multi scale training 66 | for i in range(len(cfg.DATASET.MULTISCALE)): 67 | batch_size, target_size = cfg.DATASET.MULTISCALE[i] 68 | data_loader.reset_size(batch_size, target_size) 69 | images, targets = next(iter(data_loader)) 70 | vsf.add_defaultAnchors(writer, images[0], anchors, epoch=i) 71 | else: 72 | vsf.add_defaultAnchors(writer, images[0], anchors, epoch=0) 73 | 74 | for j, (stride, anchor) in enumerate(anchors.items()): 75 | size = conf[j].shape[-2:] 76 | from ssds.modeling.layers.box import extract_targets 77 | 78 | _, _, depth = extract_targets( 79 | targets, 80 | anchors, 81 | cfg.MODEL.NUM_CLASSES, 82 | stride, 83 | size, 84 | cfg.MATCHER.MATCH_THRESHOLD, 85 | cfg.MATCHER.CENTER_SAMPLING_RADIUS, 86 | ) 87 | for i in range(images.shape[0]): 88 | vsf.add_matchedAnchorsWithBox( 89 | writer, images[i], anchor, stride, depth[i], epoch=i 90 | ) 91 | 92 | # visualize box 93 | targets[:, :, 2:4] = targets[:, :, :2] + targets[:, :, 2:4] 94 | boxes = torch.cat((detections[1], detections[0][..., None]), dim=2) 95 | vsf.add_imagesWithMatchedBoxes( 96 | writer, "Images", images[:5], boxes[:5], targets[:5], epoch=0 97 | ) 98 | 99 | if args.anchor_strategy: 100 | data_loader = load_data(cfg.DATASET, "eval") 101 | title = "Load Data" 102 | progress = tqdm( 103 | tools.IteratorTimer(data_loader), 104 | total=len(data_loader), 105 | smoothing=0.9, 106 | miniters=1, 107 | leave=True, 108 | desc=title, 109 | ) 110 | all_targets = [] 111 | for images, targets in progress: 112 | targets = targets.view(-1, 5) 113 | targets = targets[targets[:, 4] != -1] 114 | all_targets.append(targets) 115 | all_targets = torch.cat(all_targets, dim=0) 116 | vsf.add_anchorStrategy(writer, all_targets) 117 | 118 | # visualize graph 119 | writer.add_graph(model, images) 120 | writer.close() 121 | --------------------------------------------------------------------------------