├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── configs
├── config.py
├── drf_res101_voc.yaml
├── drf_vgg_voc.yaml
├── refine_drf_res101_voc.yaml
├── refine_drf_vgg_voc.yaml
├── refine_res101_voc.yaml
├── refine_vgg_voc.yaml
├── refine_vgg_voc_512.yaml
├── ssd_darknet19_voc.yaml
├── ssd_darknet53_voc.yaml
├── ssd_mobilenetv2_voc.yaml
├── ssd_res101_voc.yaml
├── ssd_res18_voc.yaml
├── ssd_res50_coco.yaml
├── ssd_res50_voc.yaml
├── ssd_vgg_voc.yaml
├── weave_vgg_voc.yaml
└── weave_vgg_voc_512.yaml
├── data
├── __init__.py
├── coco.py
├── data_augment.py
├── drf_net.jpg
├── scripts
│ ├── VOC2007.sh
│ └── VOC2012.sh
├── voc0712.py
└── voc_eval.py
├── demo.py
├── eval.py
├── images
├── dog.jpg
├── eagle.jpg
└── person.jpg
├── layers
├── __init__.py
├── functions
│ ├── __init__.py
│ ├── detection.py
│ ├── prior_box.py
│ └── prior_layer.py
└── modules
│ ├── __init__.py
│ ├── focal_loss_sigmoid.py
│ ├── focal_loss_softmax.py
│ ├── multibox_loss.py
│ ├── refine_multibox_loss.py
│ ├── weight_smooth_l1_loss.py
│ └── weight_softmax_loss.py
├── make.sh
├── models
├── darknet.py
├── dense_conv.py
├── drf_res.py
├── drf_vgg.py
├── mobilenetv2.py
├── model_builder.py
├── model_helper.py
├── refine_dense_conv.py
├── refine_drf_res.py
├── refine_drf_vgg.py
├── refine_res.py
├── refine_vgg.py
├── resnet.py
├── vgg.py
├── weave_res.py
└── weave_vgg.py
├── train.py
└── utils
├── __init__.py
├── augmentations.py
├── averageMeter.py
├── box_utils.py
├── build.py
├── collections.py
├── convert_darknet.py
├── get_class_map.py
├── nms
├── __init__.py
├── cpu_nms.pyx
├── gpu_nms.hpp
├── gpu_nms.pyx
├── nms_kernel.cu
└── py_cpu_nms.py
├── nms_wrapper.py
└── timer.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb linguist-language=Python
2 | .ipynb_checkpoints/* linguist-documentation
3 | dev.ipynb linguist-documentation
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # sftp
7 | sftp-config.json
8 |
9 | # coco
10 | ./utils/nms/*.so
11 | ./utils/build
12 |
13 | # C extensions
14 | *.so
15 |
16 | # Distribution / packaging
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 |
35 | # PyInstaller
36 | # Usually these files are written by a python script from a template
37 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
38 | *.manifest
39 | *.spec
40 |
41 | # Installer logs
42 | pip-log.txt
43 | pip-delete-this-directory.txt
44 |
45 | # Unit test / coverage reports
46 | htmlcov/
47 | .tox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *,cover
54 | .hypothesis/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 | *.json
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # IPython Notebook
79 | .ipynb_checkpoints
80 |
81 | # pyenv
82 | .python-version
83 |
84 | # celery beat schedule file
85 | celerybeat-schedule
86 |
87 | # dotenv
88 | .env
89 |
90 | # virtualenv
91 | venv/
92 | ENV/
93 |
94 | # Spyder project settings
95 | .spyderproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # atom remote-sync package
101 | .remote-sync.json
102 |
103 | # weights
104 | weights/
105 |
106 | #DS_Store
107 | .DS_Store
108 |
109 | # dev stuff
110 | eval/
111 | eval.ipynb
112 | dev.ipynb
113 | .vscode/
114 |
115 | # not ready
116 | videos/
117 | templates/
118 | data/ssd_dataloader.py
119 | data/datasets/
120 | doc/visualize.py
121 | read_results.py
122 | ssd300_120000/
123 | demos/live
124 | webdemo.py
125 | test_data_aug.py
126 | weights/
127 |
128 | # attributes
129 |
130 | # pycharm
131 | .idea/
132 |
133 | # temp checkout soln
134 | data/datasets/
135 | data/ssd_dataloader.py
136 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Max deGroot, Ellis Brown
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SSD Pytorch
2 | A [PyTorch](http://pytorch.org/) implementation of SSDs (include original ssd, DRFNet, RefineDet)
3 |
4 |
5 |
6 | ### Table of Contents
7 | - Installation
8 | - Datasets
9 | - Train
10 | - Evaluate
11 | - Performance
12 | - Reference
13 |
14 |
15 |
16 |
17 |
18 |
19 | ## Installation
20 | - Install [PyTorch-0.4.0](http://pytorch.org/) by selecting your environment on the website and running the appropriate command.
21 | - Clone this repository.
22 | * Note: We currently only support Python 3+.
23 | - Then download the dataset by following the [instructions](#download-voc2007-trainval--test) below.
24 | - Compile the nms and install coco tools:
25 |
26 | ```shell
27 | cd SSD_Pytorch
28 | # if you use anaconda3, maybe you need https://github.com/rbgirshick/py-faster-rcnn/issues/706
29 | ./make.sh
30 | pip install pycocotools
31 |
32 | ```
33 |
34 | Note*: Check you GPU architecture support in utils/build.py, line 131. Default is:
35 |
36 | ```Shell
37 | 'nvcc': ['-arch=sm_52',
38 |
39 | ```
40 |
41 | ## Datasets
42 | To make things easy, we provide a simple VOC dataset loader that inherits `torch.utils.data.Dataset` making it fully compatible with the `torchvision.datasets` [API](http://pytorch.org/docs/torchvision/datasets.html).
43 |
44 | ### VOC Dataset
45 | ##### Download VOC2007 trainval & test
46 |
47 | ```Shell
48 | # specify a directory for dataset to be downloaded into, else default is ~/data/
49 | sh data/scripts/VOC2007.sh #
50 | ```
51 |
52 | ##### Download VOC2012 trainval
53 |
54 | ```Shell
55 | # specify a directory for dataset to be downloaded into, else default is ~/data/
56 | sh data/scripts/VOC2012.sh #
57 | ```
58 |
59 | ##### Merge VOC2007 and VOC2012
60 |
61 | ```Shell
62 | move all images in VOC2007 and VOC2012 into VOCROOT/VOC0712/JPEGImages
63 | move all annotations in VOC2007 and VOC2012 into VOCROOT/VOC0712/JPEGImages/Annotations
64 | rename and merge some txt VOC2007 and VOC2012 ImageSets/Main/*.txt to VOCROOT/VOC0712/JPEGImages/ImageSets/Main/*.txt
65 | the merged txt list as follows:
66 | 2012_test.txt, 2007_test.txt, 0712_trainval_test.txt, 2012_trainval.txt, 0712_trainval.txt
67 |
68 | ```
69 | ### COCO Dataset
70 | Install the MS COCO dataset at /path/to/coco from [official website](http://mscoco.org/), default is ~/data/COCO. Following the [instructions](https://github.com/rbgirshick/py-faster-rcnn/blob/77b773655505599b94fd8f3f9928dbf1a9a776c7/data/README.md) to prepare *minival2014* and *valminusminival2014* annotations. All label files (.json) should be under the COCO/annotations/ folder. It should have this basic structure
71 | ```Shell
72 | $COCO/
73 | $COCO/cache/
74 | $COCO/annotations/
75 | $COCO/images/
76 | $COCO/images/test2015/
77 | $COCO/images/train2014/
78 | $COCO/images/val2014/
79 | ```
80 | *UPDATE*: The current COCO dataset has released new *train2017* and *val2017* sets which are just new splits of the same image sets.
81 |
82 |
83 | ## Training
84 | - First download the fc-reduced [VGG-16](https://arxiv.org/abs/1409.1556) PyTorch base network weights at: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
85 | - ResNet pre-trained basenet weight file is available at [ResNet50](https://download.pytorch.org/models/resnet50-19c8e357.pth), [ResNet101](https://download.pytorch.org/models/resnet101-5d3b4d8f.pth), [ResNet152](https://download.pytorch.org/models/resnet152-b121ed2d.pth)
86 | - By default, we assume you have downloaded the file in the `SSD_Pytorch/weights/pretrained_models` dir:
87 |
88 | ```Shell
89 | mkdir weights
90 | cd weights
91 | mkdir pretrained_models
92 |
93 | wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
94 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth
95 | wget https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
96 | wget https://download.pytorch.org/models/resnet152-b121ed2d.pth
97 | mv download_weights pretrained_models
98 | ```
99 |
100 | - To train SSD_Pytorch using the train script simply specify the parameters listed in `train.py` as a flag or manually change them.
101 |
102 | ```Shell
103 | python train.py --cfg ./configs/ssd_vgg_voc.yaml
104 | ```
105 |
106 | - Note:
107 | All training configs are in ssd_vgg_voc.yaml, you can change it by yourself.
108 |
109 | - To evaluate a trained network:
110 |
111 | ```Shell
112 | python eval.py --cfg ./configs/ssd_vgg_voc.yaml --weights ./eval_weights
113 | ```
114 |
115 | - To detect one images
116 |
117 | ```
118 | # you need put some images in ./images
119 | python demo.py --cfg ./configs/ssd_vgg_voc.yaml --images ./images --save_folder ./output
120 |
121 | ```
122 | You can specify the parameters listed in the `eval.py` or `demo.py` file by flagging them or manually changing them.
123 |
124 | ## Performance
125 |
126 | #### VOC2007 Test
127 |
128 | ##### mAP
129 |
130 | we retrained some models, so it's different from the origin paper
131 | size = 300
132 |
133 | |ssd_vgg|ssd_res|ssd_darknet|drf_ssd_vgg|drf_ssd_res|refine_drf_vgg|refine_ssd_vgg|
134 | |:-: |:-: |:-: |:-: |:-: |:-:|:-: |
135 | | 77.5% | 77.0 | 77.6% | 79.6 % | 79.0% |80.2% |80.4 % |
136 |
137 |
138 |
139 |
140 | ## References
141 | - Wei Liu, et al. "SSD: Single Shot MultiBox Detector." [ECCV2016]((http://arxiv.org/abs/1512.02325)).
142 | - [Original Implementation (CAFFE)](https://github.com/weiliu89/caffe/tree/ssd)
143 | - A list of other great SSD ports that were sources of inspiration (especially the Chainer repo):
144 | * [ssd.pytorch]((https://github.com/amdegroot/ssd.pytorch)),
145 | [RFBNet](https://github.com/ruinmessi/RFBNet)
146 | [Chainer](https://github.com/Hakuyume/chainer-ssd),
147 | [torchcv](https://github.com/kuangliu/torchcv)
148 | )
149 |
150 |
151 |
152 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/configs/drf_res101_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: drf_res101
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: drf_res.DRFSSDRes101
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet101-5d3b4d8f.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 | SMALL:
21 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5], [3, 3], [1, 1]]
22 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
23 | NUM_ANCHORS: [6, 6, 6, 6, 4, 4]
24 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
25 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
26 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
27 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
28 | VARIANCE : [0.1, 0.2]
29 | CLIP: True
30 | IMG_WH: [320, 320]
31 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
32 | USE_MAX_SIZE: True
33 |
34 | BIG:
35 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
36 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
37 | NUM_ANCHORS: [6, 6, 6, 6, 6, 4, 4]
38 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
39 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
40 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
41 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
42 |
43 | CLIP: True
44 | IMG_WH: [512, 512]
45 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
46 | USE_MAX_SIZE: True
47 |
48 | SOLVER:
49 | WEIGHT_DECAY: 0.0005
50 | BASE_LR: 0.001
51 | GAMMA: 0.1
52 | MOMENTUM: 0.9
53 | EPOCH_STEPS: [0, 150, 250]
54 | END_EPOCH: 250
55 | START_EPOCH: 0
56 |
57 | DATASETS:
58 | TRAIN_TYPE: [['0712', '0712_trainval']]
59 | VAL_TYPE: [['0712', '2007_test']]
60 | DATAROOT: 'data/datasets/VOCdevkit0712/'
61 | DATA_TYPE: 'VOC'
62 | SETS:
63 | VOC: [['0712', '0712_trainval']]
64 | VOC0712PLUS: [['0712', '0712_trainval_test']]
65 | VOC0712: [['2012', '2012_trainval']]
66 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
67 | VOC2007: [['0712', "2007_test"]]
68 | COCOval: [['2014', 'minival']]
69 | VOCROOT: 'data/datasets/VOCdevkit0712/'
70 | COCOROOT: 'data/datasets/coco2015'
71 |
72 | TEST:
73 | INPUT_WH: [300, 300]
74 | CONFIDENCE_THRESH: 0.01
75 | NMS_OVERLAP: 0.45
76 | BATCH_SIZE: 16
77 |
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/configs/drf_vgg_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: drf_vgg
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: drf_vgg.DRFVgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 3
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [6, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [320, 320]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [6, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.004
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [320, 320]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/refine_drf_res101_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: drf_res101
3 | SIZE: '300'
4 | REFINE: True
5 | CONV_BODY: refine_drf_res.RefineDRFRes101
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet101-5d3b4d8f.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 | SMALL:
21 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
22 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
23 | ODM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [6, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [320, 320]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | ODM_CHANNELS: [512, 1024, 512, 256, 256, 256]
39 | NUM_ANCHORS: [6, 6, 6, 6, 6, 4, 4]
40 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
41 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
42 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
43 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
44 |
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: True
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.001
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [300, 300]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/configs/refine_drf_vgg_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: refine_drf_vgg
3 | SIZE: '300'
4 | REFINE: True
5 | CONV_BODY: refine_drf_vgg.RefineDRFVgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | ODM_CHANNELS: [512, 1024, 512, 256, 256, 256]
25 | NUM_ANCHORS: [6, 6, 6, 6, 4, 4]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
27 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
28 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
29 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: True
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
38 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
39 | ODM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
40 | NUM_ANCHORS: [6, 6, 6, 6, 6, 4, 4]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
42 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
43 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
44 | ASPECT_RATIOS : [[2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
45 |
46 | CLIP: True
47 | IMG_WH: [512, 512]
48 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
49 | USE_MAX_SIZE: True
50 |
51 | SOLVER:
52 | WEIGHT_DECAY: 0.0005
53 | BASE_LR: 0.004
54 | GAMMA: 0.1
55 | MOMENTUM: 0.9
56 | EPOCH_STEPS: [0, 150, 200]
57 | END_EPOCH: 250
58 | START_EPOCH: 0
59 |
60 | DATASETS:
61 | TRAIN_TYPE: [['0712', '0712_trainval']]
62 | VAL_TYPE: [['0712', '2007_test']]
63 | DATAROOT: 'data/datasets/VOCdevkit0712/'
64 | DATA_TYPE: 'VOC'
65 | SETS:
66 | VOC: [['0712', '0712_trainval']]
67 | VOC0712PLUS: [['0712', '0712_trainval_test']]
68 | VOC0712: [['2012', '2012_trainval']]
69 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
70 | VOC2007: [['0712', "2007_test"]]
71 | COCOval: [['2014', 'minival']]
72 | VOCROOT: 'data/datasets/VOCdevkit0712/'
73 | COCOROOT: 'data/datasets/coco2015'
74 |
75 | TEST:
76 | INPUT_WH: [300, 300]
77 | CONFIDENCE_THRESH: 0.01
78 | NMS_OVERLAP: 0.45
79 | BATCH_SIZE: 16
80 |
81 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/configs/refine_res101_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: refine_res
3 | SIZE: '300'
4 | REFINE: True
5 | CONV_BODY: refine_res.RefineResnet101
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet101-5d3b4d8f.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5]]
23 | ARM_CHANNELS: [512, 1024, 512, 256]
24 | ODM_CHANNELS: [256, 256, 256, 256]
25 | NUM_ANCHORS: [3, 3, 3, 3]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
27 | MIN_SIZES: [30, 64, 128, 256]
28 | MAX_SIZES: [64, 128, 256, 315]
29 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: False
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8]]
38 | ARM_CHANNELS: [512, 1024, 512, 256]
39 | ODM_CHANNELS: [256, 256, 256, 256]
40 | NUM_ANCHORS: [3, 3, 3, 3]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
42 | MIN_SIZES: [30, 64, 128, 256]
43 | MAX_SIZES: [64, 128, 256, 315]
44 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: False
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.001
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [320, 320]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/configs/refine_vgg_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: refine_vgg
3 | SIZE: '300'
4 | REFINE: True
5 | CONV_BODY: refine_vgg.refine_vgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5]]
23 | ARM_CHANNELS: [512, 1024, 256, 256]
24 | ODM_CHANNELS: [256, 256, 256, 256]
25 | NUM_ANCHORS: [3, 3, 3, 3]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
27 | MIN_SIZES: [30, 64, 128, 256]
28 | MAX_SIZES: [64, 128, 256, 315]
29 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: False
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8]]
38 | ARM_CHANNELS: [512, 1024, 256, 256]
39 | ODM_CHANNELS: [256, 256, 256, 256]
40 | NUM_ANCHORS: [3, 3, 3, 3]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
42 | MIN_SIZES: [30, 64, 128, 256]
43 | MAX_SIZES: [64, 128, 256, 315]
44 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: False
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.002
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [320, 320]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/configs/refine_vgg_voc_512.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: refine_vgg
3 | SIZE: '512'
4 | REFINE: True
5 | CONV_BODY: refine_vgg.refine_vgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5]]
23 | ARM_CHANNELS: [512, 1024, 256, 256]
24 | ODM_CHANNELS: [256, 256, 256, 256]
25 | NUM_ANCHORS: [3, 3, 3, 3]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
27 | MIN_SIZES: [30, 64, 128, 256]
28 | MAX_SIZES: [64, 128, 256, 315]
29 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: False
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8]]
38 | ARM_CHANNELS: [512, 1024, 256, 256]
39 | ODM_CHANNELS: [256, 256, 256, 256]
40 | NUM_ANCHORS: [3, 3, 3, 3]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
42 | MIN_SIZES: [30, 64, 128, 256]
43 | MAX_SIZES: [64, 128, 256, 315]
44 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: False
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.002
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [512, 512]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/configs/ssd_darknet19_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_darknet19
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: darknet.SSDarknet19
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/convert_darknet19.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [256, 512, 1024, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [256, 512, 1024, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_darknet53_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_darknet53
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: darknet.SSDarknet53
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/convert_darknet53.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [256, 512, 1024, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [256, 512, 1024, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_mobilenetv2_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_mobilenetv2
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: mobilenetv2.SSDMobilenetv2
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/mobilenetv2_feature.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [32, 96, 1280, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [32, 96, 1280, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_res101_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_res50
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: resnet.SSDResnet101
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet101-5d3b4d8f.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_res18_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_res18
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: resnet.SSDResnet18
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet18-5c106cde.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [128, 256, 512, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [128, 256, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_res50_coco.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_res50
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: resnet.SSDResnet50
6 | NUM_CLASSES: 81
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet50-19c8e357.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['2014', 'train'], ['2014', 'valminusminival']]
60 | VAL_TYPE: [['2014', 'minival']]
61 | DATAROOT: 'data/datasets/coco2015'
62 | DATA_TYPE: 'COCO'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_res50_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_res50
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: resnet.SSDResnet50
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/resnet50-19c8e357.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 16
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/ssd_vgg_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: ssd_vgg
3 | SIZE: '300'
4 | REFINE: False
5 | CONV_BODY: vgg.SSDVgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[38, 38], [19, 19], [10, 10], [5, 5], [3, 3], [1, 1]]
23 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256]
24 | NUM_ANCHORS: [4, 6, 6, 6, 4, 4]
25 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
26 | MIN_SIZES: [30, 60, 111, 162, 213, 264]
27 | MAX_SIZES: [60, 111, 162, 213, 264, 315]
28 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
29 | VARIANCE : [0.1, 0.2]
30 | CLIP: True
31 | IMG_WH: [300, 300]
32 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
33 | USE_MAX_SIZE: True
34 |
35 | BIG:
36 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8], [4, 4], [2, 2], [1, 1]]
37 | ARM_CHANNELS: [512, 1024, 512, 256, 256, 256, 256]
38 | NUM_ANCHORS: [4, 6, 6, 6, 6, 4, 4]
39 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
40 | MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
41 | MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6]
42 | ASPECT_RATIOS : [[2, 0.5], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 3, 0.5, 0.333], [2, 0.5], [2, 0.5]]
43 |
44 | CLIP: True
45 | IMG_WH: [512, 512]
46 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
47 | USE_MAX_SIZE: True
48 |
49 | SOLVER:
50 | WEIGHT_DECAY: 0.0005
51 | BASE_LR: 0.001
52 | GAMMA: 0.1
53 | MOMENTUM: 0.9
54 | EPOCH_STEPS: [0, 150, 200]
55 | END_EPOCH: 250
56 | START_EPOCH: 0
57 |
58 | DATASETS:
59 | TRAIN_TYPE: [['0712', '0712_trainval']]
60 | VAL_TYPE: [['0712', '2007_test']]
61 | DATAROOT: 'data/datasets/VOCdevkit0712/'
62 | DATA_TYPE: 'VOC'
63 | SETS:
64 | VOC: [['0712', '0712_trainval']]
65 | VOC0712PLUS: [['0712', '0712_trainval_test']]
66 | VOC0712: [['2012', '2012_trainval']]
67 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
68 | VOC2007: [['0712', "2007_test"]]
69 | COCOval: [['2014', 'minival']]
70 | VOCROOT: 'data/datasets/VOCdevkit0712/'
71 | COCOROOT: 'data/datasets/coco2015'
72 |
73 | TEST:
74 | INPUT_WH: [300, 300]
75 | CONFIDENCE_THRESH: 0.01
76 | NMS_OVERLAP: 0.45
77 | BATCH_SIZE: 32
78 |
79 |
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/configs/weave_vgg_voc.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: weave_vgg
3 | SIZE: '300'
4 | REFINE: True
5 | CONV_BODY: weave_vgg.weave_vgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5]]
23 | ARM_CHANNELS: [512, 1024, 256, 256]
24 | ODM_CHANNELS: [256, 256, 256, 256]
25 | NUM_ANCHORS: [3, 3, 3, 3]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
27 | MIN_SIZES: [30, 64, 128, 256]
28 | MAX_SIZES: [64, 128, 256, 315]
29 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: False
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8]]
38 | ARM_CHANNELS: [512, 1024, 256, 256]
39 | ODM_CHANNELS: [256, 256, 256, 256]
40 | NUM_ANCHORS: [3, 3, 3, 3]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
42 | MIN_SIZES: [30, 64, 128, 256]
43 | MAX_SIZES: [64, 128, 256, 315]
44 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: False
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.004
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [320, 320]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/configs/weave_vgg_voc_512.yaml:
--------------------------------------------------------------------------------
1 | MODEL:
2 | TYPE: weave_512_vgg
3 | SIZE: '512'
4 | REFINE: True
5 | CONV_BODY: weave_vgg.weave_vgg
6 | NUM_CLASSES: 21
7 | LOAD_PRETRAINED_WEIGHTS: True
8 | PRETRAIN_WEIGHTS: './weights/pretrained_models/vgg16_reducedfc.pth'
9 |
10 | TRAIN:
11 | OVERLAP: 0.5
12 | BGR_MEAN: [104, 117, 123]
13 | BATCH_SIZE: 32
14 | OHEM: True
15 | NEG_RATIO: 3
16 | WARMUP: True
17 | WARMUP_EPOCH: 2
18 | TRAIN_ON: True
19 |
20 |
21 | SMALL:
22 | FEATURE_MAPS: [[40, 40], [20, 20], [10, 10], [5, 5]]
23 | ARM_CHANNELS: [512, 1024, 256, 256]
24 | ODM_CHANNELS: [256, 256, 256, 256]
25 | NUM_ANCHORS: [3, 3, 3, 3]
26 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
27 | MIN_SIZES: [30, 64, 128, 256]
28 | MAX_SIZES: [64, 128, 256, 315]
29 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
30 | VARIANCE : [0.1, 0.2]
31 | CLIP: True
32 | IMG_WH: [320, 320]
33 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
34 | USE_MAX_SIZE: False
35 |
36 | BIG:
37 | FEATURE_MAPS: [[64, 64], [32, 32], [16, 16], [8, 8]]
38 | ARM_CHANNELS: [512, 1024, 256, 256]
39 | ODM_CHANNELS: [256, 256, 256, 256]
40 | NUM_ANCHORS: [3, 3, 3, 3]
41 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64]]
42 | MIN_SIZES: [30, 64, 128, 256]
43 | MAX_SIZES: [64, 128, 256, 315]
44 | ASPECT_RATIOS : [[2, 0.5], [2, 0.5], [2, 0.5], [2, 0.5]]
45 | CLIP: True
46 | IMG_WH: [512, 512]
47 | INPUT_FIXED: True # if you want to input different size, you need to set this False.
48 | USE_MAX_SIZE: False
49 |
50 | SOLVER:
51 | WEIGHT_DECAY: 0.0005
52 | BASE_LR: 0.002
53 | GAMMA: 0.1
54 | MOMENTUM: 0.9
55 | EPOCH_STEPS: [0, 150, 200]
56 | END_EPOCH: 250
57 | START_EPOCH: 0
58 |
59 | DATASETS:
60 | TRAIN_TYPE: [['0712', '0712_trainval']]
61 | VAL_TYPE: [['0712', '2007_test']]
62 | DATAROOT: 'data/datasets/VOCdevkit0712/'
63 | DATA_TYPE: 'VOC'
64 | SETS:
65 | VOC: [['0712', '0712_trainval']]
66 | VOC0712PLUS: [['0712', '0712_trainval_test']]
67 | VOC0712: [['2012', '2012_trainval']]
68 | COCO: [['2014', 'train'], ['2014', 'valminusminival']]
69 | VOC2007: [['0712', "2007_test"]]
70 | COCOval: [['2014', 'minival']]
71 | VOCROOT: 'data/datasets/VOCdevkit0712/'
72 | COCOROOT: 'data/datasets/coco2015'
73 |
74 | TEST:
75 | INPUT_WH: [512, 512]
76 | CONFIDENCE_THRESH: 0.01
77 | NMS_OVERLAP: 0.45
78 | BATCH_SIZE: 16
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .voc0712 import VOCDetection, detection_collate
3 | from .coco import *
4 | from .data_augment import *
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/data/data_augment.py:
--------------------------------------------------------------------------------
1 | """Data augmentation functionality. Passed as callable transformations to
2 | Dataset classes.
3 |
4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper
5 | http://arxiv.org/abs/1512.02325
6 |
7 | TODO: implement data_augment for training
8 |
9 | Ellis Brown, Max deGroot
10 | """
11 |
12 | import torch
13 | from torchvision import transforms
14 | import cv2
15 | import numpy as np
16 | import random
17 | import math
18 | from utils.box_utils import matrix_iou
19 |
20 |
21 | def _crop(image, boxes, labels):
22 | height, width, _ = image.shape
23 |
24 | if len(boxes) == 0:
25 | return image, boxes, labels
26 |
27 | while True:
28 | mode = random.choice((
29 | None,
30 | (0.1, None),
31 | (0.3, None),
32 | (0.5, None),
33 | (0.7, None),
34 | (0.9, None),
35 | (None, None),
36 | ))
37 |
38 | if mode is None:
39 | return image, boxes, labels
40 |
41 | min_iou, max_iou = mode
42 | if min_iou is None:
43 | min_iou = float('-inf')
44 | if max_iou is None:
45 | max_iou = float('inf')
46 |
47 | for _ in range(50):
48 | scale = random.uniform(0.3, 1.)
49 | min_ratio = max(0.5, scale * scale)
50 | max_ratio = min(2, 1. / scale / scale)
51 | ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
52 | w = int(scale * ratio * width)
53 | h = int((scale / ratio) * height)
54 |
55 | l = random.randrange(width - w)
56 | t = random.randrange(height - h)
57 | roi = np.array((l, t, l + w, t + h))
58 |
59 | iou = matrix_iou(boxes, roi[np.newaxis])
60 |
61 | if not (min_iou <= iou.min() and iou.max() <= max_iou):
62 | continue
63 |
64 | image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
65 |
66 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2
67 | mask = np.logical_and(roi[:2] < centers, centers < roi[2:]) \
68 | .all(axis=1)
69 | boxes_t = boxes[mask].copy()
70 | labels_t = labels[mask].copy()
71 | if len(boxes_t) == 0:
72 | continue
73 |
74 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
75 | boxes_t[:, :2] -= roi[:2]
76 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
77 | boxes_t[:, 2:] -= roi[:2]
78 |
79 | return image_t, boxes_t, labels_t
80 |
81 |
82 | def _distort(image):
83 | def _convert(image, alpha=1, beta=0):
84 | tmp = image.astype(float) * alpha + beta
85 | tmp[tmp < 0] = 0
86 | tmp[tmp > 255] = 255
87 | image[:] = tmp
88 |
89 | image = image.copy()
90 |
91 | if random.randrange(2):
92 | _convert(image, beta=random.uniform(-32, 32))
93 |
94 | if random.randrange(2):
95 | _convert(image, alpha=random.uniform(0.5, 1.5))
96 |
97 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
98 |
99 | if random.randrange(2):
100 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
101 | tmp %= 180
102 | image[:, :, 0] = tmp
103 |
104 | if random.randrange(2):
105 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
106 |
107 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
108 |
109 | return image
110 |
111 |
112 | def _expand(image, boxes, fill, p):
113 | if random.random() > p:
114 | return image, boxes
115 |
116 | height, width, depth = image.shape
117 | for _ in range(50):
118 | scale = random.uniform(1, 4)
119 |
120 | min_ratio = max(0.5, 1. / scale / scale)
121 | max_ratio = min(2, scale * scale)
122 | ratio = math.sqrt(random.uniform(min_ratio, max_ratio))
123 | ws = scale * ratio
124 | hs = scale / ratio
125 | if ws < 1 or hs < 1:
126 | continue
127 | w = int(ws * width)
128 | h = int(hs * height)
129 |
130 | left = random.randint(0, w - width)
131 | top = random.randint(0, h - height)
132 |
133 | boxes_t = boxes.copy()
134 | boxes_t[:, :2] += (left, top)
135 | boxes_t[:, 2:] += (left, top)
136 |
137 | expand_image = np.empty((h, w, depth), dtype=image.dtype)
138 | expand_image[:, :] = fill
139 | expand_image[top:top + height, left:left + width] = image
140 | image = expand_image
141 |
142 | return image, boxes_t
143 |
144 |
145 | def _mirror(image, boxes):
146 | _, width, _ = image.shape
147 | if random.randrange(2):
148 | image = image[:, ::-1]
149 | boxes = boxes.copy()
150 | boxes[:, 0::2] = width - boxes[:, 2::-2]
151 | return image, boxes
152 |
153 |
154 | def preproc_for_test(image, resize_wh, mean):
155 | interp_methods = [
156 | cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST,
157 | cv2.INTER_LANCZOS4
158 | ]
159 | interp_method = interp_methods[random.randrange(5)]
160 | # interp_method = interp_methods[0]
161 | image = cv2.resize(
162 | image, (resize_wh[0], resize_wh[1]), interpolation=interp_method)
163 | image = image.astype(np.float32)
164 | image -= mean
165 | # to rgb
166 | # image = image[:, :, (2, 1, 0)]
167 | return image.transpose(2, 0, 1)
168 |
169 |
170 | class preproc(object):
171 | def __init__(self, resize_wh, rgb_means, p):
172 | self.means = rgb_means
173 | self.resize_wh = resize_wh
174 | self.p = p
175 |
176 | def __call__(self, image, targets):
177 | boxes = targets[:, :-1].copy()
178 | labels = targets[:, -1].copy()
179 | if len(boxes) == 0:
180 | #boxes = np.empty((0, 4))
181 | targets = np.zeros((1, 5))
182 | image = preproc_for_test(image, self.resize_wh, self.means)
183 | return torch.from_numpy(image), targets
184 |
185 | image_o = image.copy()
186 | targets_o = targets.copy()
187 | height_o, width_o, _ = image_o.shape
188 | boxes_o = targets_o[:, :-1]
189 | labels_o = targets_o[:, -1]
190 | boxes_o[:, 0::2] /= width_o
191 | boxes_o[:, 1::2] /= height_o
192 | labels_o = np.expand_dims(labels_o, 1)
193 | targets_o = np.hstack((boxes_o, labels_o))
194 |
195 | image_t, boxes, labels = _crop(image, boxes, labels)
196 | image_t = _distort(image_t)
197 | image_t, boxes = _expand(image_t, boxes, self.means, self.p)
198 | image_t, boxes = _mirror(image_t, boxes)
199 | #image_t, boxes = _mirror(image, boxes)
200 |
201 | height, width, _ = image_t.shape
202 | image_t = preproc_for_test(image_t, self.resize_wh, self.means)
203 | boxes = boxes.copy()
204 | boxes[:, 0::2] /= width
205 | boxes[:, 1::2] /= height
206 | b_w = (boxes[:, 2] - boxes[:, 0]) * 1.
207 | b_h = (boxes[:, 3] - boxes[:, 1]) * 1.
208 | mask_b = np.minimum(b_w, b_h) > 0.01
209 | boxes_t = boxes[mask_b]
210 | labels_t = labels[mask_b].copy()
211 |
212 | if len(boxes_t) == 0:
213 | image = preproc_for_test(image_o, self.resize_wh, self.means)
214 | return torch.from_numpy(image), targets_o
215 |
216 | labels_t = np.expand_dims(labels_t, 1)
217 | targets_t = np.hstack((boxes_t, labels_t))
218 |
219 | return torch.from_numpy(image_t), targets_t
220 |
221 |
222 | class BaseTransform(object):
223 | """Defines the transformations that should be applied to test PIL image
224 | for input into the network
225 |
226 | dimension -> tensorize -> color adj
227 |
228 | Arguments:
229 | resize (int): input dimension to SSD
230 | rgb_means ((int,int,int)): average RGB of the dataset
231 | (104,117,123)
232 | swap ((int,int,int)): final order of channels
233 | Returns:
234 | transform (transform) : callable transform to be applied to test/val
235 | data
236 | """
237 |
238 | def __init__(self, resize_wh, rgb_means, swap=(2, 0, 1)):
239 | self.means = rgb_means
240 | self.resize_wh = resize_wh
241 | self.swap = swap
242 |
243 | # assume input is cv2 img for now
244 | def __call__(self, img, target=None):
245 |
246 | interp_methods = [
247 | cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA,
248 | cv2.INTER_NEAREST, cv2.INTER_LANCZOS4
249 | ]
250 | interp_method = interp_methods[0]
251 | img = cv2.resize(
252 | np.array(img), (self.resize_wh[0], self.resize_wh[1]),
253 | interpolation=interp_method).astype(np.float32)
254 | img -= self.means
255 | img = img.transpose(self.swap)
256 | return torch.from_numpy(img), target
257 |
--------------------------------------------------------------------------------
/data/drf_net.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/data/drf_net.jpg
--------------------------------------------------------------------------------
/data/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 |
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 |
39 | end=`date +%s`
40 | runtime=$((end-start))
41 |
42 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Ellis Brown
3 |
4 | start=`date +%s`
5 |
6 | # handle optional download dir
7 | if [ -z "$1" ]
8 | then
9 | # navigate to ~/data
10 | echo "navigating to ~/data/ ..."
11 | mkdir -p ~/data
12 | cd ~/data/
13 | else
14 | # check if is valid directory
15 | if [ ! -d $1 ]; then
16 | echo $1 "is not a valid directory"
17 | exit 0
18 | fi
19 | echo "navigating to" $1 "..."
20 | cd $1
21 | fi
22 |
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 |
28 |
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 |
35 | end=`date +%s`
36 | runtime=$((end-start))
37 |
38 | echo "Completed in" $runtime "seconds"
--------------------------------------------------------------------------------
/data/voc_eval.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast/er R-CNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Bharath Hariharan
5 | # --------------------------------------------------------
6 |
7 | import xml.etree.ElementTree as ET
8 | import os
9 | import pickle
10 | import numpy as np
11 | import pdb
12 | import matplotlib
13 | matplotlib.use('Agg')
14 | import matplotlib.pyplot as plt
15 |
16 |
17 | def parse_rec(filename):
18 | """ Parse a PASCAL VOC xml file """
19 | tree = ET.parse(filename)
20 | objects = []
21 | for obj in tree.findall('object'):
22 | obj_struct = {}
23 | obj_struct['name'] = obj.find('name').text
24 | obj_struct['pose'] = obj.find('pose').text
25 | obj_struct['truncated'] = int(obj.find('truncated').text)
26 | obj_struct['difficult'] = int(obj.find('difficult').text)
27 | bbox = obj.find('bndbox')
28 | obj_struct['bbox'] = [
29 | int(bbox.find('xmin').text),
30 | int(bbox.find('ymin').text),
31 | int(bbox.find('xmax').text),
32 | int(bbox.find('ymax').text)
33 | ]
34 | objects.append(obj_struct)
35 |
36 | return objects
37 |
38 |
39 | def voc_ap(rec, prec, use_07_metric=False):
40 | """ ap = voc_ap(rec, prec, [use_07_metric])
41 | Compute VOC AP given precision and recall.
42 | If use_07_metric is true, uses the
43 | VOC 07 11 point method (default:False).
44 | """
45 | if use_07_metric:
46 | # 11 point metric
47 | ap = 0.
48 | for t in np.arange(0., 1.1, 0.1):
49 | if np.sum(rec >= t) == 0:
50 | p = 0
51 | else:
52 | p = np.max(prec[rec >= t])
53 | ap = ap + p / 11.
54 | else:
55 | # correct AP calculation
56 | # first append sentinel values at the end
57 | mrec = np.concatenate(([0.], rec, [1.]))
58 | mpre = np.concatenate(([0.], prec, [0.]))
59 |
60 | # compute the precision envelope
61 | for i in range(mpre.size - 1, 0, -1):
62 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
63 |
64 | # to calculate area under PR curve, look for points
65 | # where X axis (recall) changes value
66 | i = np.where(mrec[1:] != mrec[:-1])[0]
67 |
68 | # and sum (\Delta recall) * prec
69 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
70 | return ap
71 |
72 |
73 | def voc_eval(detpath,
74 | annopath,
75 | imagesetfile,
76 | classname,
77 | cachedir,
78 | ovthresh=0.5,
79 | use_07_metric=False):
80 | """rec, prec, ap = voc_eval(detpath,
81 | annopath,
82 | imagesetfile,
83 | classname,
84 | [ovthresh],
85 | [use_07_metric])
86 |
87 | Top level function that does the PASCAL VOC evaluation.
88 |
89 | detpath: Path to detections
90 | detpath.format(classname) should produce the detection results file.
91 | annopath: Path to annotations
92 | annopath.format(imagename) should be the xml annotations file.
93 | imagesetfile: Text file containing the list of images, one image per line.
94 | classname: Category name (duh)
95 | cachedir: Directory for caching the annotations
96 | [ovthresh]: Overlap threshold (default = 0.5)
97 | [use_07_metric]: Whether to use VOC07's 11 point AP computation
98 | (default False)
99 | """
100 | # assumes detections are in detpath.format(classname)
101 | # assumes annotations are in annopath.format(imagename)
102 | # assumes imagesetfile is a text file with each line an image name
103 | # cachedir caches the annotations in a pickle file
104 |
105 | # first load gt
106 | if not os.path.isdir(cachedir):
107 | os.mkdir(cachedir)
108 | cachefile = os.path.join(cachedir, 'annots.pkl')
109 | # read list of images
110 | with open(imagesetfile, 'r') as f:
111 | lines = f.readlines()
112 | imagenames = [x.strip() for x in lines]
113 |
114 | if not os.path.isfile(cachefile):
115 | # load annots
116 | recs = {}
117 | for i, imagename in enumerate(imagenames):
118 | recs[imagename] = parse_rec(annopath.format(imagename))
119 | if i % 100 == 0:
120 | print('Reading annotation for {:d}/{:d}'.format(
121 | i + 1, len(imagenames)))
122 | # save
123 | print('Saving cached annotations to {:s}'.format(cachefile))
124 | with open(cachefile, 'wb') as f:
125 | pickle.dump(recs, f)
126 | else:
127 | # load
128 | with open(cachefile, 'rb') as f:
129 | recs = pickle.load(f)
130 |
131 | # extract gt objects for this class
132 | class_recs = {}
133 | npos = 0
134 | for imagename in imagenames:
135 | R = [obj for obj in recs[imagename] if obj['name'] == classname]
136 | bbox = np.array([x['bbox'] for x in R])
137 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
138 | det = [False] * len(R)
139 | npos = npos + sum(~difficult)
140 | class_recs[imagename] = {
141 | 'bbox': bbox,
142 | 'difficult': difficult,
143 | 'det': det
144 | }
145 |
146 | # read dets
147 | detfile = detpath.format(classname)
148 | with open(detfile, 'r') as f:
149 | lines = f.readlines()
150 |
151 | splitlines = [x.strip().split(' ') for x in lines]
152 | image_ids = [x[0] for x in splitlines]
153 | confidence = np.array([float(x[1]) for x in splitlines])
154 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
155 | # sort by confidence
156 | sorted_ind = np.argsort(-confidence)
157 | sorted_scores = np.sort(-confidence)
158 | BB = BB[sorted_ind, :]
159 | image_ids = [image_ids[x] for x in sorted_ind]
160 |
161 | # go down dets and mark TPs and FPs
162 | nd = len(image_ids)
163 | tp = np.zeros(nd)
164 | fp = np.zeros(nd)
165 | for d in range(nd):
166 | R = class_recs[image_ids[d]]
167 | bb = BB[d, :].astype(float)
168 | ovmax = -np.inf
169 | BBGT = R['bbox'].astype(float)
170 |
171 | if BBGT.size > 0:
172 | # compute overlaps
173 | # intersection
174 | ixmin = np.maximum(BBGT[:, 0], bb[0])
175 | iymin = np.maximum(BBGT[:, 1], bb[1])
176 | ixmax = np.minimum(BBGT[:, 2], bb[2])
177 | iymax = np.minimum(BBGT[:, 3], bb[3])
178 | iw = np.maximum(ixmax - ixmin + 1., 0.)
179 | ih = np.maximum(iymax - iymin + 1., 0.)
180 | inters = iw * ih
181 |
182 | # union
183 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 |
187 | overlaps = inters / uni
188 | ovmax = np.max(overlaps)
189 | jmax = np.argmax(overlaps)
190 |
191 | if ovmax > ovthresh:
192 | if not R['difficult'][jmax]:
193 | if not R['det'][jmax]:
194 | tp[d] = 1.
195 | R['det'][jmax] = 1
196 | else:
197 | fp[d] = 1.
198 | else:
199 | fp[d] = 1.
200 |
201 | # compute precision recall
202 | fp = np.cumsum(fp)
203 | tp = np.cumsum(tp)
204 | rec = tp / float(npos)
205 | # avoid divide by zero in case the first detection matches a difficult
206 | # ground truth
207 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 | # if classname == 'person':
209 | final_rec = round(rec[-1], 4)
210 | final_prec = round(prec[-1], 4)
211 | plt_save_path = os.path.join(".", "eval", "pr")
212 | if not os.path.exists(plt_save_path):
213 | os.makedirs(plt_save_path)
214 | plt.plot(rec, prec, 'r')
215 | pr_curl = os.path.join(
216 | plt_save_path, '{}_{}_{}pr.jpg'.format(classname, str(final_prec),
217 | str(final_rec)))
218 | plt.savefig(pr_curl)
219 | plt.close()
220 | ap = voc_ap(rec, prec, use_07_metric)
221 |
222 | return rec, prec, ap
223 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | import os
2 | os.environ["CUDA_VISIBLE_DEVICES"] = "1,0"
3 | import torch
4 | import torch.nn as nn
5 | import torch.optim as optim
6 | import torch.backends.cudnn as cudnn
7 | import torch.nn.init as init
8 | import argparse
9 | from torch.autograd import Variable
10 | import torch.utils.data as data
11 | from data import COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
12 | from layers.modules import MultiBoxLoss, RefineMultiBoxLoss
13 | from layers.functions import Detect
14 | from utils.nms_wrapper import nms, soft_nms
15 | from configs.config import cfg, cfg_from_file, VOC_CLASSES, COCO_CLASSES
16 | from utils.box_utils import draw_rects
17 | import numpy as np
18 | import time
19 | import os
20 | import sys
21 | import pickle
22 | import datetime
23 | from models.model_builder import SSD
24 | import yaml
25 | import cv2
26 |
27 |
28 | def arg_parse():
29 | parser = argparse.ArgumentParser(
30 | description='Single Shot MultiBox Detection')
31 | parser.add_argument(
32 | "--images",
33 | dest='images',
34 | help="Image / Directory containing images to perform detection upon",
35 | default="images",
36 | type=str)
37 | parser.add_argument(
38 | '--weights',
39 | default='weights/ssd_darknet_300.pth',
40 | type=str,
41 | help='Trained state_dict file path to open')
42 | parser.add_argument(
43 | '--cfg',
44 | dest='cfg_file',
45 | required=True,
46 | help='Config file for training (and optionally testing)')
47 | parser.add_argument(
48 | '--save_folder',
49 | default='eval/',
50 | type=str,
51 | help='File path to save results')
52 | parser.add_argument(
53 | '--num_workers',
54 | default=8,
55 | type=int,
56 | help='Number of workers used in dataloading')
57 | parser.add_argument(
58 | '--retest', default=False, type=bool, help='test cache results')
59 | args = parser.parse_args()
60 | return args
61 |
62 |
63 | def im_detect(img, net, detector, transform, thresh=0.01):
64 | with torch.no_grad():
65 | t0 = time.time()
66 | w, h = img.shape[1], img.shape[0]
67 | x = transform(img)[0].unsqueeze(0)
68 | x = x.cuda()
69 | t1 = time.time()
70 | output = net(x)
71 | boxes, scores = detector.forward(output)
72 | t2 = time.time()
73 | max_conf, max_id = scores[0].topk(1, 1, True, True)
74 | pos = max_id > 0
75 | if len(pos) == 0:
76 | return np.empty((0, 6))
77 | boxes = boxes[0][pos.view(-1, 1).expand(len(pos), 4)].view(-1, 4)
78 | scores = max_conf[pos].view(-1, 1)
79 | max_id = max_id[pos].view(-1, 1)
80 | inds = scores > thresh
81 | if len(inds) == 0:
82 | return np.empty((0, 6))
83 | boxes = boxes[inds.view(-1, 1).expand(len(inds), 4)].view(-1, 4)
84 | scores = scores[inds].view(-1, 1)
85 | max_id = max_id[inds].view(-1, 1)
86 | c_dets = torch.cat((boxes, scores, max_id.float()), 1).cpu().numpy()
87 | img_classes = np.unique(c_dets[:, -1])
88 | output = None
89 | flag = False
90 | for cls in img_classes:
91 | cls_mask = np.where(c_dets[:, -1] == cls)[0]
92 | image_pred_class = c_dets[cls_mask, :]
93 | keep = nms(image_pred_class, cfg.TEST.NMS_OVERLAP, force_cpu=True)
94 | keep = keep[:50]
95 | image_pred_class = image_pred_class[keep, :]
96 | if not flag:
97 | output = image_pred_class
98 | flag = True
99 | else:
100 | output = np.concatenate((output, image_pred_class), axis=0)
101 | output[:, 0:2][output[:, 0:2] < 0] = 0
102 | output[:, 2:4][output[:, 2:4] > 1] = 1
103 | scale = np.array([w, h, w, h])
104 | output[:, :4] = output[:, :4] * scale
105 | t3 = time.time()
106 | print("transform_t:", round(t1 - t0, 3), "detect_time:",
107 | round(t2 - t1, 3), "nms_time:", round(t3 - t2, 3))
108 | return output
109 |
110 |
111 | def main():
112 | global args
113 | args = arg_parse()
114 | cfg_from_file(args.cfg_file)
115 | bgr_means = cfg.TRAIN.BGR_MEAN
116 | dataset_name = cfg.DATASETS.DATA_TYPE
117 | batch_size = cfg.TEST.BATCH_SIZE
118 | num_workers = args.num_workers
119 | if cfg.DATASETS.DATA_TYPE == 'VOC':
120 | trainvalDataset = VOCDetection
121 | classes = VOC_CLASSES
122 | top_k = 200
123 | else:
124 | trainvalDataset = COCODetection
125 | classes = COCO_CLASSES
126 | top_k = 300
127 | valSet = cfg.DATASETS.VAL_TYPE
128 | num_classes = cfg.MODEL.NUM_CLASSES
129 | save_folder = args.save_folder
130 | if not os.path.exists(save_folder):
131 | os.mkdir(save_folder)
132 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
133 | cfg.TRAIN.TRAIN_ON = False
134 | net = SSD(cfg)
135 |
136 | checkpoint = torch.load(args.weights)
137 | state_dict = checkpoint['model']
138 | from collections import OrderedDict
139 | new_state_dict = OrderedDict()
140 | for k, v in state_dict.items():
141 | head = k[:7]
142 | if head == 'module.':
143 | name = k[7:] # remove `module.`
144 | else:
145 | name = k
146 | new_state_dict[name] = v
147 | net.load_state_dict(new_state_dict)
148 |
149 | detector = Detect(cfg)
150 | img_wh = cfg.TEST.INPUT_WH
151 | ValTransform = BaseTransform(img_wh, bgr_means, (2, 0, 1))
152 | input_folder = args.images
153 | thresh = cfg.TEST.CONFIDENCE_THRESH
154 | for item in os.listdir(input_folder)[2:3]:
155 | img_path = os.path.join(input_folder, item)
156 | print(img_path)
157 | img = cv2.imread(img_path)
158 | dets = im_detect(img, net, detector, ValTransform, thresh)
159 | draw_img = draw_rects(img, dets, classes)
160 | out_img_name = "output_" + item
161 | save_path = os.path.join(save_folder, out_img_name)
162 | cv2.imwrite(save_path, img)
163 |
164 |
165 | if __name__ == '__main__':
166 | st = time.time()
167 | main()
168 | print("final time", time.time() - st)
169 |
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | os.environ["CUDA_VISIBLE_DEVICES"] = "1,0"
3 | import torch
4 | import torch.nn as nn
5 | import torch.optim as optim
6 | import torch.backends.cudnn as cudnn
7 | import torch.nn.init as init
8 | import argparse
9 | from torch.autograd import Variable
10 | import torch.utils.data as data
11 | from data import COCODetection, VOCDetection, detection_collate, BaseTransform, preproc
12 | from layers.modules import MultiBoxLoss, RefineMultiBoxLoss
13 | from layers.functions import Detect
14 | from utils.nms_wrapper import nms, soft_nms
15 | from configs.config import cfg, cfg_from_file
16 | import numpy as np
17 | import time
18 | import os
19 | import sys
20 | import pickle
21 | import datetime
22 | from models.model_builder import SSD
23 | import yaml
24 |
25 |
26 | def arg_parse():
27 | parser = argparse.ArgumentParser(
28 | description='Single Shot MultiBox Detection')
29 | parser.add_argument(
30 | '--weights',
31 | default='weights/ssd_darknet_300.pth',
32 | type=str,
33 | help='Trained state_dict file path to open')
34 | parser.add_argument(
35 | '--cfg',
36 | dest='cfg_file',
37 | required=True,
38 | help='Config file for training (and optionally testing)')
39 | parser.add_argument(
40 | '--save_folder',
41 | default='eval/',
42 | type=str,
43 | help='File path to save results')
44 | parser.add_argument(
45 | '--num_workers',
46 | default=8,
47 | type=int,
48 | help='Number of workers used in dataloading')
49 | parser.add_argument(
50 | '--retest', default=False, type=bool, help='test cache results')
51 | args = parser.parse_args()
52 | return args
53 |
54 |
55 | def eval_net(val_dataset,
56 | val_loader,
57 | net,
58 | detector,
59 | cfg,
60 | transform,
61 | max_per_image=300,
62 | thresh=0.01,
63 | batch_size=1):
64 | net.eval()
65 | num_images = len(val_dataset)
66 | num_classes = cfg.MODEL.NUM_CLASSES
67 | eval_save_folder = "./eval/"
68 | if not os.path.exists(eval_save_folder):
69 | os.mkdir(eval_save_folder)
70 | all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
71 | det_file = os.path.join(eval_save_folder, 'detections.pkl')
72 |
73 | if args.retest:
74 | f = open(det_file, 'rb')
75 | all_boxes = pickle.load(f)
76 | print('Evaluating detections')
77 | val_dataset.evaluate_detections(all_boxes, eval_save_folder)
78 | return
79 |
80 | for idx, (imgs, _, img_info) in enumerate(val_loader):
81 | with torch.no_grad():
82 | t1 = time.time()
83 | x = imgs
84 | x = x.cuda()
85 | output = net(x)
86 | t4 = time.time()
87 | boxes, scores = detector.forward(output)
88 | t2 = time.time()
89 | for k in range(boxes.size(0)):
90 | i = idx * batch_size + k
91 | boxes_ = boxes[k]
92 | scores_ = scores[k]
93 | boxes_ = boxes_.cpu().numpy()
94 | scores_ = scores_.cpu().numpy()
95 | img_wh = img_info[k]
96 | scale = np.array([img_wh[0], img_wh[1], img_wh[0], img_wh[1]])
97 | boxes_ *= scale
98 | for j in range(1, num_classes):
99 | inds = np.where(scores_[:, j] > thresh)[0]
100 | if len(inds) == 0:
101 | all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
102 | continue
103 | c_bboxes = boxes_[inds]
104 | c_scores = scores_[inds, j]
105 | c_dets = np.hstack((c_bboxes,
106 | c_scores[:, np.newaxis])).astype(
107 | np.float32, copy=False)
108 | keep = nms(c_dets, cfg.TEST.NMS_OVERLAP, force_cpu=True)
109 | keep = keep[:50]
110 | c_dets = c_dets[keep, :]
111 | all_boxes[j][i] = c_dets
112 | t3 = time.time()
113 | detect_time = t2 - t1
114 | nms_time = t3 - t2
115 | forward_time = t4 - t1
116 | if idx % 10 == 0:
117 | print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s'.format(
118 | i + 1, num_images, forward_time, detect_time, nms_time))
119 |
120 | with open(det_file, 'wb') as f:
121 | pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
122 | print('Evaluating detections')
123 | val_dataset.evaluate_detections(all_boxes, eval_save_folder)
124 | print("detect time: ", time.time() - st)
125 |
126 |
127 | def main():
128 | global args
129 | args = arg_parse()
130 | cfg_from_file(args.cfg_file)
131 | bgr_means = cfg.TRAIN.BGR_MEAN
132 | dataset_name = cfg.DATASETS.DATA_TYPE
133 | batch_size = cfg.TEST.BATCH_SIZE
134 | num_workers = args.num_workers
135 | if cfg.DATASETS.DATA_TYPE == 'VOC':
136 | trainvalDataset = VOCDetection
137 | top_k = 200
138 | else:
139 | trainvalDataset = COCODetection
140 | top_k = 300
141 | dataroot = cfg.DATASETS.DATAROOT
142 | if cfg.MODEL.SIZE == '300':
143 | size_cfg = cfg.SMALL
144 | else:
145 | size_cfg = cfg.BIG
146 | valSet = cfg.DATASETS.VAL_TYPE
147 | num_classes = cfg.MODEL.NUM_CLASSES
148 | save_folder = args.save_folder
149 | if not os.path.exists(save_folder):
150 | os.mkdir(save_folder)
151 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
152 | cfg.TRAIN.TRAIN_ON = False
153 | net = SSD(cfg)
154 |
155 | checkpoint = torch.load(args.weights)
156 | state_dict = checkpoint['model']
157 | from collections import OrderedDict
158 | new_state_dict = OrderedDict()
159 | for k, v in state_dict.items():
160 | head = k[:7]
161 | if head == 'module.':
162 | name = k[7:] # remove `module.`
163 | else:
164 | name = k
165 | new_state_dict[name] = v
166 | net.load_state_dict(new_state_dict)
167 | detector = Detect(cfg)
168 | ValTransform = BaseTransform(size_cfg.IMG_WH, bgr_means, (2, 0, 1))
169 | val_dataset = trainvalDataset(dataroot, valSet, ValTransform, "val")
170 | val_loader = data.DataLoader(
171 | val_dataset,
172 | batch_size,
173 | shuffle=False,
174 | num_workers=num_workers,
175 | collate_fn=detection_collate)
176 | top_k = 300
177 | thresh = cfg.TEST.CONFIDENCE_THRESH
178 | eval_net(
179 | val_dataset,
180 | val_loader,
181 | net,
182 | detector,
183 | cfg,
184 | ValTransform,
185 | top_k,
186 | thresh=thresh,
187 | batch_size=batch_size)
188 |
189 |
190 | if __name__ == '__main__':
191 | st = time.time()
192 | main()
193 | print("final time", time.time() - st)
194 |
--------------------------------------------------------------------------------
/images/dog.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/images/dog.jpg
--------------------------------------------------------------------------------
/images/eagle.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/images/eagle.jpg
--------------------------------------------------------------------------------
/images/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/images/person.jpg
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | # from .refine_prior_box import RefinePriorBox
4 |
5 |
6 | __all__ = ['Detect', 'PriorBox']
7 |
--------------------------------------------------------------------------------
/layers/functions/detection.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.backends.cudnn as cudnn
4 | from torch.autograd import Function
5 | from torch.autograd import Variable
6 | import torch.nn.functional as F
7 | from utils.box_utils import decode, center_size
8 |
9 |
10 | class Detect(Function):
11 | """At test time, Detect is the final layer of SSD. Decode location preds,
12 | apply non-maximum suppression to location predictions based on conf
13 | scores and threshold to a top_k number of output predictions for both
14 | confidence score and locations.
15 | """
16 |
17 | def __init__(self, cfg):
18 | self.cfg = cfg
19 | self.num_classes = cfg.MODEL.NUM_CLASSES
20 | #self.thresh = thresh
21 | self.size = cfg.MODEL.SIZE
22 | if self.size == '300':
23 | size_cfg = cfg.SMALL
24 | else:
25 | size_cfg = cfg.BIG
26 | # Parameters used in nms.
27 | self.variance = size_cfg.VARIANCE
28 | self.object_score = cfg.MODEL.OBJECT_SCORE
29 |
30 | def forward(self, predictions):
31 | """
32 | Args:
33 | loc_data: (tensor) Loc preds from loc layers
34 | Shape: [batch,num_priors*4]
35 | conf_data: (tensor) Shape: Conf preds from conf layers
36 | Shape: [batch*num_priors,num_classes]
37 | prior_data: (tensor) Prior boxes and variances from priorbox layers
38 | Shape: [1,num_priors,4]
39 | """
40 | # loc, conf, priors = predictions
41 | if self.cfg.MODEL.REFINE:
42 | arm_loc, arm_conf, loc, conf, priors = predictions
43 | arm_conf = F.softmax(arm_conf.view(-1, 2), 1)
44 | conf = F.softmax(conf.view(-1, self.num_classes), 1)
45 | arm_loc_data = arm_loc.data
46 | arm_conf_data = arm_conf.data
47 | arm_object_conf = arm_conf_data[:, 1:]
48 | no_object_index = arm_object_conf <= self.object_score
49 | conf.data[no_object_index.expand_as(conf.data)] = 0
50 | else:
51 | loc, conf, priors = predictions
52 | conf = F.softmax(conf.view(-1, self.num_classes), 1)
53 | loc_data = loc.data
54 | conf_data = conf.data
55 | # prior_data = priors.data
56 | prior_data = priors[:loc_data.size(1), :]
57 |
58 | num = loc_data.size(0) # batch size
59 |
60 | self.num_priors = prior_data.size(0)
61 |
62 | self.boxes = torch.zeros(num, self.num_priors, 4)
63 | self.scores = torch.zeros(num, self.num_priors, self.num_classes)
64 | conf_preds = conf_data.view(num, self.num_priors, self.num_classes)
65 | batch_prior = prior_data.view(-1, self.num_priors, 4).expand(
66 | (num, self.num_priors, 4))
67 | batch_prior = batch_prior.contiguous().view(-1, 4)
68 | if self.cfg.MODEL.REFINE:
69 | default = decode(
70 | arm_loc_data.view(-1, 4), batch_prior, self.variance)
71 | default = center_size(default)
72 | decoded_boxes = decode(
73 | loc_data.view(-1, 4), default, self.variance)
74 | else:
75 | decoded_boxes = decode(
76 | loc_data.view(-1, 4), batch_prior, self.variance)
77 |
78 | self.scores = conf_preds.view(num, self.num_priors, self.num_classes)
79 | self.boxes = decoded_boxes.view(num, self.num_priors, 4)
80 | return self.boxes, self.scores
--------------------------------------------------------------------------------
/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from math import sqrt as sqrt
3 | from itertools import product as product
4 |
5 |
6 | class PriorBox(object):
7 | """Compute priorbox coordinates in center-offset form for each source
8 | feature map.
9 | Note:
10 | This 'layer' has changed between versions of the original SSD
11 | paper, so we include both versions, but note v2 is the most tested and most
12 | recent version of the paper.
13 |
14 | """
15 |
16 | def __init__(self, cfg):
17 | super(PriorBox, self).__init__()
18 | self.size = cfg.MODEL.SIZE
19 | if self.size == '300':
20 | size_cfg = cfg.SMALL
21 | else:
22 | size_cfg = cfg.BIG
23 | self.img_wh = size_cfg.IMG_WH
24 | self.num_priors = len(size_cfg.ASPECT_RATIOS)
25 | self.feature_maps = size_cfg.FEATURE_MAPS
26 | self.variance = size_cfg.VARIANCE or [0.1]
27 | self.min_sizes = size_cfg.MIN_SIZES
28 | self.use_max_sizes = size_cfg.USE_MAX_SIZE
29 | if self.use_max_sizes:
30 | self.max_sizes = size_cfg.MAX_SIZES
31 | self.steps = size_cfg.STEPS
32 | self.aspect_ratios = size_cfg.ASPECT_RATIOS
33 | self.clip = size_cfg.CLIP
34 | for v in self.variance:
35 | if v <= 0:
36 | raise ValueError('Variances must be greater than 0')
37 |
38 | def forward(self):
39 | mean = []
40 | for k, f in enumerate(self.feature_maps):
41 | grid_h, grid_w = f[1], f[0]
42 | for i in range(grid_h):
43 | for j in range(grid_w):
44 | f_k_h = self.img_wh[1] / self.steps[k][1]
45 | f_k_w = self.img_wh[0] / self.steps[k][0]
46 | # unit center x,y
47 | cx = (j + 0.5) / f_k_w
48 | cy = (i + 0.5) / f_k_h
49 |
50 | # aspect_ratio: 1
51 | # rel size: min_size
52 | s_k_h = self.min_sizes[k] / self.img_wh[1]
53 | s_k_w = self.min_sizes[k] / self.img_wh[0]
54 | mean += [cx, cy, s_k_w, s_k_h]
55 |
56 | # aspect_ratio: 1
57 | # rel size: sqrt(s_k * s_(k+1))
58 | if self.use_max_sizes:
59 | s_k_prime_w = sqrt(
60 | s_k_w * (self.max_sizes[k] / self.img_wh[0]))
61 | s_k_prime_h = sqrt(
62 | s_k_h * (self.max_sizes[k] / self.img_wh[1]))
63 | mean += [cx, cy, s_k_prime_w, s_k_prime_h]
64 |
65 | for ar in self.aspect_ratios[k]:
66 | mean += [cx, cy, s_k_w * sqrt(ar), s_k_h / sqrt(ar)]
67 |
68 | # back to torch land
69 | output = torch.Tensor(mean).view(-1, 4)
70 | if self.clip:
71 | output.clamp_(max=1, min=0)
72 | # print(output.size())
73 | return output
74 |
--------------------------------------------------------------------------------
/layers/functions/prior_layer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from math import sqrt as sqrt
3 | from math import ceil
4 | import torch.nn as nn
5 | from itertools import product as product
6 |
7 |
8 | class PriorLayer(nn.Module):
9 | def __init__(self, cfg):
10 | super(PriorLayer, self).__init__()
11 | self.size = cfg.MODEL.SIZE
12 | if self.size == '300':
13 | size_cfg = cfg.SMALL
14 | else:
15 | size_cfg = cfg.BIG
16 | self.img_wh = size_cfg.IMG_WH
17 | self.num_priors = len(size_cfg.ASPECT_RATIOS)
18 | self.feature_maps = size_cfg.FEATURE_MAPS
19 | self.variance = size_cfg.VARIANCE or [0.1]
20 | self.min_sizes = size_cfg.MIN_SIZES
21 | self.use_max_sizes = size_cfg.USE_MAX_SIZE
22 | if self.use_max_sizes:
23 | self.max_sizes = size_cfg.MAX_SIZES
24 | self.steps = size_cfg.STEPS
25 | self.aspect_ratios = size_cfg.ASPECT_RATIOS
26 | self.clip = size_cfg.CLIP
27 | for v in self.variance:
28 | if v <= 0:
29 | raise ValueError('Variances must be greater than 0')
30 |
31 | def forward(self, img_wh, feature_maps_wh):
32 | self.img_wh = img_wh
33 | self.feature_maps_wh = feature_maps_wh
34 | mean = []
35 | for k, f in enumerate(self.feature_maps_wh):
36 | grid_h, grid_w = f[1], f[0]
37 | for i in range(grid_h):
38 | for j in range(grid_w):
39 | f_k_h = self.img_wh[1] / self.steps[k][1]
40 | f_k_w = self.img_wh[0] / self.steps[k][0]
41 | # unit center x,y
42 | cx = (j + 0.5) / f_k_w
43 | cy = (i + 0.5) / f_k_h
44 |
45 | # aspect_ratio: 1
46 | # rel size: min_size
47 | s_k_h = self.min_sizes[k] / self.img_wh[1]
48 | s_k_w = self.min_sizes[k] / self.img_wh[0]
49 | mean += [cx, cy, s_k_w, s_k_h]
50 |
51 | # aspect_ratio: 1
52 | # rel size: sqrt(s_k * s_(k+1))
53 | if self.use_max_sizes:
54 | s_k_prime_w = sqrt(
55 | s_k_w * (self.max_sizes[k] / self.img_wh[0]))
56 | s_k_prime_h = sqrt(
57 | s_k_h * (self.max_sizes[k] / self.img_wh[1]))
58 | mean += [cx, cy, s_k_prime_w, s_k_prime_h]
59 |
60 | for ar in self.aspect_ratios[k]:
61 | mean += [cx, cy, s_k_w * sqrt(ar), s_k_h / sqrt(ar)]
62 |
63 | output = torch.Tensor(mean).view(-1, 4)
64 | if self.clip:
65 | output.clamp_(max=1, min=0)
66 | return output
67 |
--------------------------------------------------------------------------------
/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .weight_smooth_l1_loss import WeightSmoothL1Loss
2 | from .weight_softmax_loss import WeightSoftmaxLoss
3 | from .multibox_loss import MultiBoxLoss
4 | from .refine_multibox_loss import RefineMultiBoxLoss
5 | from .focal_loss_sigmoid import FocalLossSigmoid
6 | from .focal_loss_softmax import FocalLossSoftmax
7 |
8 |
9 |
10 | __all__ = ['MultiBoxLoss', 'WeightSoftmaxLoss', ]
11 |
--------------------------------------------------------------------------------
/layers/modules/focal_loss_sigmoid.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 |
9 |
10 | class FocalLossSigmoid(nn.Module):
11 | '''
12 | sigmoid version focal loss
13 | '''
14 |
15 | def __init__(self, alpha=0.25, gamma=2, size_average=False):
16 | super(FocalLossSigmoid, self).__init__()
17 | self.alpha = alpha
18 | self.gamma = gamma
19 | self.size_average = size_average
20 |
21 | def forward(self, inputs, targets):
22 | N = inputs.size(0)
23 | C = inputs.size(1)
24 | P = torch.sigmoid(inputs)
25 | alpha_mask = self.alpha * targets
26 | loss_pos = -1. * torch.pow(
27 | 1 - P, self.gamma) * torch.log(P) * targets * alpha_mask
28 | loss_neg = -1. * torch.pow(P, self.gamma) * torch.log(1 - P) * (
29 | 1 - targets) * (1 - alpha_mask)
30 | batch_loss = loss_neg + loss_pos
31 | if self.size_average:
32 | loss = batch_loss.mean()
33 | else:
34 | loss = batch_loss.sum()
35 | return loss
36 |
--------------------------------------------------------------------------------
/layers/modules/focal_loss_softmax.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 |
9 |
10 | class FocalLossSoftmax(nn.Module):
11 | '''
12 | softmax version focal loss
13 | '''
14 |
15 | def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
16 | super(FocalLossSoftmax, self).__init__()
17 | if alpha is None:
18 | self.alpha = Variable(torch.ones(class_num, 1))
19 | else:
20 | if isinstance(alpha, Variable):
21 | self.alpha = alpha
22 | else:
23 | self.alpha = Variable(alpha)
24 | self.gamma = gamma
25 | self.class_num = class_num
26 | self.size_average = size_average
27 |
28 | def forward(self, inputs, targets):
29 | N = inputs.size(0)
30 | C = inputs.size(1)
31 | P = F.softmax(inputs)
32 |
33 | class_mask = inputs.data.new(N, C).fill_(0)
34 | class_mask = Variable(class_mask)
35 | ids = targets.view(-1, 1)
36 | class_mask.scatter_(1, ids.data, 1.)
37 |
38 | if inputs.is_cuda and not self.alpha.is_cuda:
39 | self.alpha = self.alpha.cuda()
40 | alpha = self.alpha[ids.data.view(-1)]
41 | probs = (P * class_mask).sum(1).view(-1, 1)
42 | log_p = probs.log()
43 | batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p
44 |
45 | if self.size_average:
46 | loss = batch_loss.mean()
47 | else:
48 | loss = batch_loss.sum()
49 | return loss
--------------------------------------------------------------------------------
/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import numpy as np
5 | from torch.autograd import Variable
6 | from utils.box_utils import match, log_sum_exp
7 | from .focal_loss_softmax import FocalLossSoftmax
8 | from .focal_loss_sigmoid import FocalLossSigmoid
9 |
10 | GPU = False
11 | if torch.cuda.is_available():
12 | GPU = True
13 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
14 |
15 |
16 | class MultiBoxLoss(nn.Module):
17 | """SSD Weighted Loss Function
18 | Compute Targets:
19 | 1) Produce Confidence Target Indices by matching ground truth boxes
20 | with (default) 'priorboxes' that have jaccard index > threshold parameter
21 | (default threshold: 0.5).
22 | 2) Produce localization target by 'encoding' variance into offsets of ground
23 | truth boxes and their matched 'priorboxes'.
24 | 3) Hard negative mining to filter the excessive number of negative examples
25 | that comes with using a large number of default bounding boxes.
26 | (default negative:positive ratio 3:1)
27 | Objective Loss:
28 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
29 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
30 | weighted by α which is set to 1 by cross val.
31 | Args:
32 | c: class confidences,
33 | l: predicted boxes,
34 | g: ground truth boxes
35 | N: number of matched default boxes
36 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
37 | """
38 |
39 | def __init__(self, cfg):
40 | super(MultiBoxLoss, self).__init__()
41 | self.cfg = cfg
42 | self.size = cfg.MODEL.SIZE
43 | if self.size == '300':
44 | size_cfg = cfg.SMALL
45 | else:
46 | size_cfg = cfg.BIG
47 | self.variance = size_cfg.VARIANCE
48 | self.num_classes = cfg.MODEL.NUM_CLASSES
49 | self.threshold = cfg.TRAIN.OVERLAP
50 | self.OHEM = cfg.TRAIN.OHEM
51 | self.negpos_ratio = cfg.TRAIN.NEG_RATIO
52 | self.variance = size_cfg.VARIANCE
53 | if cfg.TRAIN.FOCAL_LOSS:
54 | if cfg.TRAIN.FOCAL_LOSS_TYPE == 'SOFTMAX':
55 | self.focaloss = FocalLossSoftmax(
56 | self.num_classes, gamma=2, size_average=False)
57 | else:
58 | self.focaloss = FocalLossSigmoid()
59 |
60 | def forward(self, predictions, targets):
61 | """Multibox Loss
62 | Args:
63 | predictions (tuple): A tuple containing loc preds, conf preds,
64 | and prior boxes from SSD net.
65 | conf shape: torch.size(batch_size,num_priors,num_classes)
66 | loc shape: torch.size(batch_size,num_priors,4)
67 | priors shape: torch.size(num_priors,4)
68 |
69 | ground_truth (tensor): Ground truth boxes and labels for a batch,
70 | shape: [batch_size,num_objs,5] (last idx is the label).
71 | """
72 | loc_data, conf_data, priors = predictions
73 | num = loc_data.size(0)
74 | priors = priors[:loc_data.size(1), :]
75 | num_priors = (priors.size(0))
76 | num_classes = self.num_classes
77 | loc_t = torch.Tensor(num, num_priors, 4)
78 | conf_t = torch.LongTensor(num, num_priors)
79 | for idx in range(num):
80 | truths = targets[idx][:, :-1].data
81 | labels = targets[idx][:, -1].data
82 | if self.num_classes == 2:
83 | labels = labels > 0
84 | defaults = priors.data
85 | match(self.threshold, truths, defaults, self.variance, labels,
86 | loc_t, conf_t, idx)
87 | loc_t = loc_t.cuda()
88 | conf_t = conf_t.cuda()
89 |
90 | pos = conf_t > 0
91 | num_pos = pos.sum(1, keepdim=True)
92 |
93 | if self.OHEM:
94 | # Compute max conf across batch for hard negative mining
95 | batch_conf = conf_data.view(-1, self.num_classes)
96 |
97 | loss_hard = log_sum_exp(batch_conf) - batch_conf.gather(
98 | 1, conf_t.view(-1, 1))
99 | # Hard Negative Mining
100 | loss_hard[pos.view(-1, 1)] = 0 # filter out pos boxes for now
101 | loss_hard = loss_hard.view(num, -1)
102 | _, loss_idx = loss_hard.sort(1, descending=True)
103 | _, idx_rank = loss_idx.sort(1)
104 | num_pos = pos.long().sum(1, keepdim=True)
105 | if num_pos.data.sum() > 0:
106 | num_neg = torch.clamp(
107 | self.negpos_ratio * num_pos, max=pos.size(1) - 1)
108 | else:
109 | fake_num_pos = torch.ones(32, 1).long() * 15
110 | num_neg = torch.clamp(
111 | self.negpos_ratio * fake_num_pos, max=pos.size(1) - 1)
112 | neg = idx_rank < num_neg.expand_as(idx_rank)
113 |
114 | # Confidence Loss Including Positive and Negative Examples
115 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
116 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
117 | conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
118 | -1, self.num_classes)
119 | targets_weighted = conf_t[(pos + neg).gt(0)]
120 | loss_c = F.cross_entropy(
121 | conf_p, targets_weighted, size_average=False)
122 | else:
123 | loss_c = F.cross_entropy(conf_p, conf_t, size_average=False)
124 | # Localization Loss (Smooth L1)
125 | # Shape: [batch,num_priors,4]
126 | if num_pos.data.sum() > 0:
127 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
128 | loc_p = loc_data[pos_idx].view(-1, 4)
129 | loc_t = loc_t[pos_idx].view(-1, 4)
130 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
131 | N = num_pos.data.sum()
132 | else:
133 | loss_l = torch.zeros(1)
134 | N = 1.0
135 | loss_l /= float(N)
136 | loss_c /= float(N)
137 | return loss_l, loss_c
138 |
--------------------------------------------------------------------------------
/layers/modules/refine_multibox_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import numpy as np
8 | from torch.autograd import Variable
9 | from utils.box_utils import match, log_sum_exp, refine_match
10 | from layers.modules import WeightSoftmaxLoss, WeightSmoothL1Loss
11 | GPU = False
12 | if torch.cuda.is_available():
13 | GPU = True
14 | torch.set_default_tensor_type('torch.cuda.FloatTensor')
15 |
16 |
17 | class RefineMultiBoxLoss(nn.Module):
18 | """SSD Weighted Loss Function
19 | Compute Targets:
20 | 1) Produce Confidence Target Indices by matching ground truth boxes
21 | with (default) 'priorboxes' that have jaccard index > threshold parameter
22 | (default threshold: 0.5).
23 | 2) Produce localization target by 'encoding' variance into offsets of ground
24 | truth boxes and their matched 'priorboxes'.
25 | 3) Hard negative mining to filter the excessive number of negative examples
26 | that comes with using a large number of default bounding boxes.
27 | (default negative:positive ratio 3:1)
28 | Objective Loss:
29 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
30 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
31 | weighted by α which is set to 1 by cross val.
32 | Args:
33 | c: class confidences,
34 | l: predicted boxes,
35 | g: ground truth boxes
36 | N: number of matched default boxes
37 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
38 | """
39 |
40 | def __init__(self, cfg, num_classes):
41 | super(RefineMultiBoxLoss, self).__init__()
42 | self.cfg = cfg
43 | self.size = cfg.MODEL.SIZE
44 | if self.size == '300':
45 | size_cfg = cfg.SMALL
46 | else:
47 | size_cfg = cfg.BIG
48 | self.variance = size_cfg.VARIANCE
49 | self.num_classes = num_classes
50 | self.threshold = cfg.TRAIN.OVERLAP
51 | self.OHEM = cfg.TRAIN.OHEM
52 | self.negpos_ratio = cfg.TRAIN.NEG_RATIO
53 | self.object_score = cfg.MODEL.OBJECT_SCORE
54 | self.variance = size_cfg.VARIANCE
55 | if cfg.TRAIN.FOCAL_LOSS:
56 | if cfg.TRAIN.FOCAL_LOSS_TYPE == 'SOFTMAX':
57 | self.focaloss = FocalLossSoftmax(
58 | self.num_classes, gamma=2, size_average=False)
59 | else:
60 | self.focaloss = FocalLossSigmoid()
61 |
62 | def forward(self,
63 | predictions,
64 | targets,
65 | use_arm=False,
66 | filter_object=False,
67 | debug=False):
68 | """Multibox Loss
69 | Args:
70 | predictions (tuple): A tuple containing loc preds, conf preds,
71 | and prior boxes from SSD net.
72 | conf shape: torch.size(batch_size,num_priors,num_classes)
73 | loc shape: torch.size(batch_size,num_priors,4)
74 | priors shape: torch.size(num_priors,4)
75 |
76 | ground_truth (tensor): Ground truth boxes and labels for a batch,
77 | shape: [batch_size,num_objs,5] (last idx is the label).
78 | """
79 | # arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
80 | if use_arm:
81 | arm_loc_data, arm_conf_data, loc_data, conf_data, priors = predictions
82 | else:
83 | loc_data, conf_data, _, _, priors = predictions
84 | num = loc_data.size(0)
85 | priors = priors[:loc_data.size(1), :]
86 | num_priors = (priors.size(0))
87 | num_classes = self.num_classes
88 |
89 | # match priors (default boxes) and ground truth boxes
90 | loc_t = torch.Tensor(num, num_priors, 4)
91 | conf_t = torch.LongTensor(num, num_priors)
92 | defaults = priors.data
93 | for idx in range(num):
94 | truths = targets[idx][:, :-1].data
95 | labels = targets[idx][:, -1].data
96 | if self.num_classes == 2:
97 | labels = labels > 0
98 | if use_arm:
99 | bbox_weight = refine_match(
100 | self.threshold,
101 | truths,
102 | defaults,
103 | self.variance,
104 | labels,
105 | loc_t,
106 | conf_t,
107 | idx,
108 | arm_loc_data[idx].data,
109 | use_weight=False)
110 | else:
111 | match(self.threshold, truths, defaults, self.variance, labels,
112 | loc_t, conf_t, idx)
113 |
114 | loc_t = loc_t.cuda()
115 | conf_t = conf_t.cuda()
116 | # wrap targets
117 | loc_t = Variable(loc_t, requires_grad=False)
118 | conf_t = Variable(conf_t, requires_grad=False)
119 |
120 | if use_arm and filter_object:
121 | P = F.softmax(arm_conf_data, 2)
122 | arm_conf_data_temp = P[:, :, 1]
123 | object_score_index = arm_conf_data_temp <= self.object_score
124 | pos = conf_t > 0
125 | pos[object_score_index.detach()] = 0
126 | else:
127 | pos = conf_t > 0
128 | num_pos = pos.sum(1, keepdim=True)
129 | if debug:
130 | if use_arm:
131 | print("odm pos num: ", str(loc_t.size(0)), str(loc_t.size(1)))
132 | else:
133 | print("arm pos num", str(loc_t.size(0)), str(loc_t.size(1)))
134 |
135 | if self.OHEM:
136 | # Compute max conf across batch for hard negative mining
137 | batch_conf = conf_data.view(-1, self.num_classes)
138 |
139 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
140 | 1, conf_t.view(-1, 1))
141 |
142 | # Hard Negative Mining
143 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
144 | loss_c = loss_c.view(num, -1)
145 | _, loss_idx = loss_c.sort(1, descending=True)
146 | _, idx_rank = loss_idx.sort(1)
147 | num_pos = pos.long().sum(1, keepdim=True)
148 |
149 | if num_pos.data.sum() > 0:
150 | num_neg = torch.clamp(
151 | self.negpos_ratio * num_pos, max=pos.size(1) - 1)
152 | else:
153 | fake_num_pos = torch.ones(32, 1).long() * 15
154 | num_neg = torch.clamp(
155 | self.negpos_ratio * fake_num_pos, max=pos.size(1) - 1)
156 | neg = idx_rank < num_neg.expand_as(idx_rank)
157 |
158 | # Confidence Loss Including Positive and Negative Examples
159 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
160 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
161 | conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
162 | -1, self.num_classes)
163 |
164 | targets_weighted = conf_t[(pos + neg).gt(0)]
165 | loss_c = F.cross_entropy(
166 | conf_p, targets_weighted, size_average=False)
167 | else:
168 | loss_c = F.cross_entropy(conf_p, conf_t, size_average=False)
169 |
170 | # Localization Loss (Smooth L1)
171 | # Shape: [batch,num_priors,4]
172 | if num_pos.data.sum() > 0:
173 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
174 | loc_p = loc_data[pos_idx].view(-1, 4)
175 | loc_t = loc_t[pos_idx].view(-1, 4)
176 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
177 | N = num_pos.data.sum()
178 | else:
179 | loss_l = torch.zeros(1)
180 | N = 1.0
181 |
182 | loss_l /= float(N)
183 | loss_c /= float(N)
184 | return loss_l, loss_c
185 |
--------------------------------------------------------------------------------
/layers/modules/weight_smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 |
9 |
10 | class WeightSmoothL1Loss(nn.Module):
11 | def __init__(self, class_num, size_average=False):
12 | super(WeightSmoothL1Loss, self).__init__()
13 | self.class_num = class_num
14 | self.size_average = size_average
15 |
16 | def forward(self, inputs, targets, weights):
17 | N = inputs.size(0)
18 | loc_num = inputs.size(1)
19 | abs_out = torch.abs(inputs - targets)
20 |
21 | if inputs.is_cuda and not weights.is_cuda:
22 | weights = weights.cuda()
23 |
24 | weights = weights.view(-1, 1)
25 |
26 | weights = torch.cat((weights, weights, weights, weights), 1)
27 | mask_big = abs_out >= 1.
28 | mask_small = abs_out < 1.
29 | loss_big = weights[mask_big] * (abs_out[mask_big] - 0.5)
30 | loss_small = weights[mask_small] * 0.5 * torch.pow(
31 | abs_out[mask_small], 2)
32 | loss_sum = loss_big.sum() + loss_small.sum()
33 |
34 | if self.size_average:
35 | loss = loss_sum / N * loc_num
36 | else:
37 | loss = loss_sum
38 | return loss
39 |
--------------------------------------------------------------------------------
/layers/modules/weight_softmax_loss.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 |
9 |
10 | class WeightSoftmaxLoss(nn.Module):
11 | def __init__(self, class_num, gamma=2, size_average=True):
12 | super(WeightSoftmaxLoss, self).__init__()
13 | # if isinstance(weights, Variable):
14 | # self.weights = weights
15 | # else:
16 | # self.weights = Variable(weights)
17 |
18 | self.class_num = class_num
19 | self.gamma = gamma
20 | self.size_average = size_average
21 |
22 | def forward(self, inputs, targets, weights):
23 | N = inputs.size(0)
24 | C = inputs.size(1)
25 | P = F.softmax(inputs)
26 |
27 | class_mask = inputs.data.new(N, C).fill_(0)
28 | class_mask = Variable(class_mask)
29 | ids = targets.view(-1, 1)
30 | class_mask.scatter_(1, ids.data, 1.)
31 | if inputs.is_cuda and not weights.is_cuda:
32 | weights = weights.cuda()
33 | probs = (P * class_mask).sum(1).view(-1, 1)
34 |
35 | log_p = probs.log()
36 | weights = weights.view(-1, 1)
37 | batch_loss = -weights * log_p
38 |
39 | if self.size_average:
40 | loss = batch_loss.mean()
41 | else:
42 | loss = batch_loss.sum()
43 | return loss
--------------------------------------------------------------------------------
/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ./utils/
3 |
4 | CUDA_PATH=/usr/local/cuda/
5 |
6 | python build.py build_ext --inplace
7 | # if you use anaconda3 maybe you need add this
8 | # change code like https://github.com/rbgirshick/py-faster-rcnn/issues/706
9 | mv nms/cpu_nms.cpython-36m-x86_64-linux-gnu.so nms/cpu_nms.so
10 | mv nms/gpu_nms.cpython-36m-x86_64-linux-gnu.so nms/gpu_nms.so
11 | cd ..
12 |
--------------------------------------------------------------------------------
/models/darknet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 | #
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from models.model_helper import weights_init
9 |
10 |
11 | def add_extras(size, in_channel, batch_norm=False):
12 | # Extra layers added to resnet for feature scaling
13 | layers = []
14 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
15 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
16 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
17 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
18 | if size == '300':
19 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
20 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)]
21 | else:
22 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
23 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
24 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
25 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
26 |
27 | return layers
28 |
29 |
30 | class ConvBN(nn.Module):
31 | def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=0):
32 | super().__init__()
33 | self.conv = nn.Conv2d(
34 | ch_in,
35 | ch_out,
36 | kernel_size=kernel_size,
37 | stride=stride,
38 | padding=padding,
39 | bias=False)
40 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01, eps=1e-05, affine=True)
41 |
42 | def forward(self, x):
43 | return F.leaky_relu(
44 | self.bn(self.conv(x)), negative_slope=0.1, inplace=True)
45 |
46 |
47 | class DarknetBlock(nn.Module):
48 | def __init__(self, ch_in):
49 | super().__init__()
50 | ch_hid = ch_in // 2
51 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
52 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
53 |
54 | def forward(self, x):
55 | out = self.conv1(x)
56 | out = self.conv2(out)
57 | return out + x
58 |
59 |
60 | class Darknet19(nn.Module):
61 | def __init__(self, size):
62 | super().__init__()
63 | self.conv = ConvBN(3, 32, kernel_size=3, stride=1, padding=1)
64 | self.layer1 = self._make_layer1()
65 | self.layer2 = self._make_layer2()
66 | self.layer3 = self._make_layer3()
67 | self.layer4 = self._make_layer4()
68 | self.layer5 = self._make_layer5()
69 | self.extras = nn.ModuleList(add_extras(str(size), 1024))
70 |
71 | def _make_layer1(self):
72 | layers = [
73 | nn.MaxPool2d(kernel_size=2, stride=2),
74 | ConvBN(32, 64, kernel_size=3, stride=1, padding=1)
75 | ]
76 | return nn.Sequential(*layers)
77 |
78 | def _make_layer2(self):
79 | layers = [
80 | nn.MaxPool2d(kernel_size=2, stride=2),
81 | ConvBN(64, 128, kernel_size=3, stride=1, padding=1),
82 | ConvBN(128, 64, kernel_size=1, stride=1),
83 | ConvBN(64, 128, kernel_size=3, stride=1, padding=1)
84 | ]
85 | return nn.Sequential(*layers)
86 |
87 | def _make_layer3(self):
88 | layers = [
89 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
90 | ConvBN(128, 256, kernel_size=3, stride=1, padding=1),
91 | ConvBN(256, 128, kernel_size=1, stride=1),
92 | ConvBN(128, 256, kernel_size=3, stride=1, padding=1)
93 | ]
94 | return nn.Sequential(*layers)
95 |
96 | def _make_layer4(self):
97 | layers = [
98 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
99 | ConvBN(256, 512, kernel_size=3, stride=1, padding=1),
100 | ConvBN(512, 256, kernel_size=1, stride=1),
101 | ConvBN(256, 512, kernel_size=3, stride=1, padding=1),
102 | ConvBN(512, 256, kernel_size=1, stride=1),
103 | ConvBN(256, 512, kernel_size=3, stride=1, padding=1)
104 | ]
105 | return nn.Sequential(*layers)
106 |
107 | def _make_layer5(self):
108 | layers = [
109 | nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
110 | ConvBN(512, 1024, kernel_size=3, stride=1, padding=1),
111 | ConvBN(1024, 512, kernel_size=1, stride=1),
112 | ConvBN(512, 1024, kernel_size=3, stride=1, padding=1),
113 | ConvBN(1024, 512, kernel_size=1, stride=1),
114 | ConvBN(512, 1024, kernel_size=3, stride=1, padding=1)
115 | ]
116 | return nn.Sequential(*layers)
117 |
118 | def forward(self, x):
119 | out = self.conv(x)
120 | c1 = self.layer1(out)
121 | c2 = self.layer2(c1)
122 | c3 = self.layer3(c2)
123 | c4 = self.layer4(c3)
124 | c5 = self.layer5(c4)
125 | sources = [c3, c4, c5]
126 | x = c5
127 | for k, v in enumerate(self.extras):
128 | x = F.relu(v(x), inplace=True)
129 | if k % 2 == 1:
130 | sources.append(x)
131 | return sources
132 |
133 |
134 | class Darknet53(nn.Module):
135 | def __init__(self, num_blocks, size):
136 | super().__init__()
137 | self.conv = ConvBN(3, 32, kernel_size=3, stride=1, padding=1)
138 | self.layer1 = self._make_layer(32, num_blocks[0], stride=2)
139 | self.layer2 = self._make_layer(64, num_blocks[1], stride=2)
140 | self.layer3 = self._make_layer(128, num_blocks[2], stride=2)
141 | self.layer4 = self._make_layer(256, num_blocks[3], stride=2)
142 | self.layer5 = self._make_layer(512, num_blocks[4], stride=2)
143 | self.extras = nn.ModuleList(add_extras(str(size), 1024))
144 | self._init_modules()
145 |
146 | def _make_layer(self, ch_in, num_blocks, stride=1):
147 | layers = [ConvBN(ch_in, ch_in * 2, stride=stride, padding=1)]
148 | for i in range(num_blocks):
149 | layers.append(DarknetBlock(ch_in * 2))
150 | return nn.Sequential(*layers)
151 |
152 | def _init_modules(self):
153 | self.extras.apply(weights_init)
154 |
155 | def forward(self, x):
156 | out = self.conv(x)
157 | c1 = self.layer1(out)
158 | c2 = self.layer2(c1)
159 | c3 = self.layer3(c2)
160 | c4 = self.layer4(c3)
161 | c5 = self.layer5(c4)
162 | sources = [c3, c4, c5]
163 | x = c5
164 | for k, v in enumerate(self.extras):
165 | x = F.relu(v(x), inplace=True)
166 | if k % 2 == 1:
167 | sources.append(x)
168 | return sources
169 |
170 |
171 | def SSDarknet53(size, channel_size='48'):
172 | return Darknet53([1, 2, 8, 8, 4], size)
173 |
174 |
175 | def SSDarknet19(size, channel_size='48'):
176 | return Darknet19(size)
177 |
178 |
179 | if __name__ == "__main__":
180 | import os
181 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
182 | model3 = SSDarknet19(size=300)
183 | with torch.no_grad():
184 | model3.eval()
185 | x = torch.randn(16, 3, 300, 300)
186 | model3.cuda()
187 | model3(x.cuda())
188 | import time
189 | st = time.time()
190 | for i in range(100):
191 | model3(x.cuda())
192 | print(time.time() - st)
193 |
--------------------------------------------------------------------------------
/models/drf_res.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | import models.dense_conv
8 | from torch.autograd import Variable
9 | from models.model_helper import weights_init
10 |
11 |
12 | def add_extras(size, in_channel, batch_norm=False):
13 | layers = []
14 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
15 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
16 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
17 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
18 | if size == '300':
19 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
20 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)]
21 | else:
22 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
23 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
24 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
25 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
26 |
27 | return layers
28 |
29 |
30 | class Bottleneck(nn.Module):
31 | expansion = 4
32 |
33 | def __init__(self, in_planes, planes, stride=1):
34 | super(Bottleneck, self).__init__()
35 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
36 | self.bn1 = nn.BatchNorm2d(planes)
37 | self.conv2 = nn.Conv2d(
38 | planes,
39 | planes,
40 | kernel_size=3,
41 | stride=stride,
42 | padding=1,
43 | bias=False)
44 | self.bn2 = nn.BatchNorm2d(planes)
45 | self.conv3 = nn.Conv2d(
46 | planes, self.expansion * planes, kernel_size=1, bias=False)
47 | self.bn3 = nn.BatchNorm2d(self.expansion * planes)
48 |
49 | self.downsample = nn.Sequential()
50 | if stride != 1 or in_planes != self.expansion * planes:
51 | self.downsample = nn.Sequential(
52 | nn.Conv2d(
53 | in_planes,
54 | self.expansion * planes,
55 | kernel_size=1,
56 | stride=stride,
57 | bias=False), nn.BatchNorm2d(self.expansion * planes))
58 |
59 | def forward(self, x):
60 | out = F.relu(self.bn1(self.conv1(x)))
61 | out = F.relu(self.bn2(self.conv2(out)))
62 | out = self.bn3(self.conv3(out))
63 | out += self.downsample(x)
64 | out = F.relu(out)
65 | return out
66 |
67 |
68 | class DenseSSDResnet(nn.Module):
69 | def __init__(self, block, num_blocks, size='300', channel_size='48'):
70 | super(DenseSSDResnet, self).__init__()
71 | self.in_planes = 64
72 |
73 | self.conv1 = nn.Conv2d(
74 | 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
75 | self.bn1 = nn.BatchNorm2d(64)
76 |
77 | # Bottom-up layers
78 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
79 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
80 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
81 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
82 |
83 | self.extras = nn.ModuleList(add_extras(str(size), 2048))
84 |
85 | dense_list = models.dense_conv.dense_list_res(channel_size, size)
86 | self.dense_list0 = nn.ModuleList(dense_list[0])
87 | self.dense_list1 = nn.ModuleList(dense_list[1])
88 | self.dense_list2 = nn.ModuleList(dense_list[2])
89 | self.dense_list3 = nn.ModuleList(dense_list[3])
90 | self.dense_list4 = nn.ModuleList(dense_list[4])
91 | self.dense_list5 = nn.ModuleList(dense_list[5])
92 | self.smooth1 = nn.Conv2d(2048, 512, kernel_size=3, stride=1, padding=1)
93 | self._init_modules()
94 |
95 | def _make_layer(self, block, planes, num_blocks, stride):
96 | strides = [stride] + [1] * (num_blocks - 1)
97 | layers = []
98 | for stride in strides:
99 | layers.append(block(self.in_planes, planes, stride))
100 | self.in_planes = planes * block.expansion
101 | return nn.Sequential(*layers)
102 |
103 | def _init_modules(self):
104 | self.extras.apply(weights_init)
105 | self.dense_list0.apply(weights_init)
106 | self.dense_list1.apply(weights_init)
107 | self.dense_list2.apply(weights_init)
108 | self.dense_list3.apply(weights_init)
109 | self.dense_list4.apply(weights_init)
110 | self.dense_list5.apply(weights_init)
111 | self.smooth1.apply(weights_init)
112 |
113 | def forward(self, x):
114 | # Bottom-up
115 | c1 = F.relu(self.bn1(self.conv1(x)))
116 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
117 |
118 | c2 = self.layer1(c1)
119 | dense1_p1 = self.dense_list0[0](c2)
120 | dense1_p2 = self.dense_list0[1](dense1_p1)
121 | dense1_p3 = self.dense_list0[2](dense1_p2)
122 | dense1_p1_conv = self.dense_list0[3](dense1_p1)
123 | dense1_p2_conv = self.dense_list0[4](dense1_p2)
124 | dense1_p3_conv = self.dense_list0[5](dense1_p3)
125 |
126 | c3 = self.layer2(c2)
127 | dense2_p1 = self.dense_list1[0](c3)
128 | dense2_p2 = self.dense_list1[1](dense2_p1)
129 | dense2_p3 = self.dense_list1[2](dense2_p2)
130 | dense2_p1_conv = self.dense_list1[3](dense2_p1)
131 | dense2_p2_conv = self.dense_list1[4](dense2_p2)
132 | dense2_p3_conv = self.dense_list1[5](dense2_p3)
133 |
134 | c4 = self.layer3(c3)
135 | dense3_up_conv = self.dense_list2[0](c4)
136 | dense3_up = self.dense_list2[1](dense3_up_conv)
137 | dense3_p1 = self.dense_list2[2](c4)
138 | dense3_p2 = self.dense_list2[3](dense3_p1)
139 | dense3_p1_conv = self.dense_list2[4](dense3_p1)
140 | dense3_p2_conv = self.dense_list2[5](dense3_p2)
141 |
142 | c5 = self.layer4(c4)
143 | c5_ = self.smooth1(c5)
144 | dense4_up1_conv = self.dense_list3[0](c5)
145 | dense4_up2_conv = self.dense_list3[1](c5)
146 | dense4_up1 = self.dense_list3[2](dense4_up1_conv)
147 | dense4_up2 = self.dense_list3[3](dense4_up2_conv)
148 | dense4_p = self.dense_list3[4](c5)
149 | dense4_p_conv = self.dense_list3[5](dense4_p)
150 |
151 | p6 = F.relu(self.extras[0](c5), inplace=True)
152 | p6 = F.relu(self.extras[1](p6), inplace=True)
153 |
154 | x = p6
155 |
156 | dense5_up1_conv = self.dense_list4[0](p6)
157 | dense5_up2_conv = self.dense_list4[1](p6)
158 | dense5_up3_conv = self.dense_list4[2](p6)
159 | dense5_up1 = self.dense_list4[3](dense5_up1_conv)
160 | dense5_up2 = self.dense_list4[4](dense5_up2_conv)
161 | dense5_up3 = self.dense_list4[5](dense5_up3_conv)
162 |
163 | dense_out1 = torch.cat(
164 | (dense1_p1_conv, c3, dense3_up, dense4_up2, dense5_up3), 1)
165 | dense_out1 = F.relu(self.dense_list5[0](dense_out1))
166 |
167 | dense_out2 = torch.cat(
168 | (dense1_p2_conv, dense2_p1_conv, c4, dense4_up1, dense5_up2), 1)
169 | dense_out2 = F.relu(self.dense_list5[1](dense_out2))
170 |
171 | dense_out3 = torch.cat(
172 | (dense1_p3_conv, dense2_p2_conv, dense3_p1_conv, c5_, dense5_up1),
173 | 1)
174 | dense_out3 = F.relu(self.dense_list5[2](dense_out3))
175 |
176 | dense_out4 = torch.cat(
177 | (dense2_p3_conv, dense3_p2_conv, dense4_p_conv, p6), 1)
178 | dense_out4 = F.relu(self.dense_list5[3](dense_out4))
179 |
180 | sources = [dense_out1, dense_out2, dense_out3, dense_out4]
181 | # apply extra layers and cache source layer outputs
182 | for k, v in enumerate(self.extras):
183 | if k > 1:
184 | x = F.relu(v(x), inplace=True)
185 | if k % 2 == 1:
186 | sources.append(x)
187 |
188 | return sources
189 |
190 |
191 | def DRFSSDRes50(size, channel_size='48'):
192 | return DenseSSDResnet(Bottleneck, [3, 4, 6, 3], size, channel_size)
193 |
194 |
195 | def DRFSSDRes101(size, channel_size='48'):
196 | return DenseSSDResnet(Bottleneck, [3, 4, 23, 3], size, channel_size)
197 |
198 |
199 | def DRFSSDRes152(size, channel_size='48'):
200 | return DenseSSDResnet(Bottleneck, [3, 8, 36, 3], size, channel_size)
201 |
--------------------------------------------------------------------------------
/models/mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | from torch.nn import init
7 | from models.model_helper import weights_init
8 |
9 |
10 | def add_extras(size, in_channel, batch_norm=False):
11 | # Extra layers added to resnet for feature scaling
12 | layers = []
13 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
14 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
15 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
16 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
17 | if size == '300':
18 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
19 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)]
20 | else:
21 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
22 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
23 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
24 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
25 |
26 | return layers
27 |
28 |
29 | def _make_divisible(v, divisor, min_value=None):
30 | """
31 | This function is taken from the original tf repo.
32 | It ensures that all layers have a channel number that is divisible by 8
33 | It can be seen here:
34 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
35 | :param v:
36 | :param divisor:
37 | :param min_value:
38 | :return:
39 | """
40 | if min_value is None:
41 | min_value = divisor
42 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
43 | # Make sure that round down does not go down by more than 10%.
44 | if new_v < 0.9 * v:
45 | new_v += divisor
46 | return new_v
47 |
48 |
49 | class LinearBottleneck(nn.Module):
50 | def __init__(self, inplanes, outplanes, stride=1, t=6,
51 | activation=nn.ReLU6):
52 | super(LinearBottleneck, self).__init__()
53 | self.conv1 = nn.Conv2d(
54 | inplanes, inplanes * t, kernel_size=1, bias=False)
55 | self.bn1 = nn.BatchNorm2d(inplanes * t)
56 | self.conv2 = nn.Conv2d(
57 | inplanes * t,
58 | inplanes * t,
59 | kernel_size=3,
60 | stride=stride,
61 | padding=1,
62 | bias=False,
63 | groups=inplanes * t)
64 | self.bn2 = nn.BatchNorm2d(inplanes * t)
65 | self.conv3 = nn.Conv2d(
66 | inplanes * t, outplanes, kernel_size=1, bias=False)
67 | self.bn3 = nn.BatchNorm2d(outplanes)
68 | self.activation = activation(inplace=True)
69 | self.stride = stride
70 | self.t = t
71 | self.inplanes = inplanes
72 | self.outplanes = outplanes
73 |
74 | def forward(self, x):
75 | residual = x
76 |
77 | out = self.conv1(x)
78 | out = self.bn1(out)
79 | out = self.activation(out)
80 |
81 | out = self.conv2(out)
82 | out = self.bn2(out)
83 | out = self.activation(out)
84 |
85 | out = self.conv3(out)
86 | out = self.bn3(out)
87 |
88 | if self.stride == 1 and self.inplanes == self.outplanes:
89 | out += residual
90 |
91 | return out
92 |
93 |
94 | class MobileNet2(nn.Module):
95 | """MobileNet2 implementation.
96 | """
97 |
98 | def __init__(self,
99 | scale=1.0,
100 | input_size=224,
101 | t=6,
102 | in_channels=3,
103 | size=300,
104 | activation=nn.ReLU6):
105 | """
106 | MobileNet2 constructor.
107 | :param in_channels: (int, optional): number of channels in the input tensor.
108 | Default is 3 for RGB image inputs.
109 | :param input_size:
110 | :param num_classes: number of classes to predict. Default
111 | is 1000 for ImageNet.
112 | :param scale:
113 | :param t:
114 | :param activation:
115 | """
116 |
117 | super(MobileNet2, self).__init__()
118 |
119 | self.scale = scale
120 | self.t = t
121 | self.activation_type = activation
122 | self.activation = activation(inplace=True)
123 | self.size = size
124 |
125 | self.num_of_channels = [32, 16, 24, 32, 64, 96, 160, 320]
126 | # assert (input_size % 32 == 0)
127 |
128 | self.c = [
129 | _make_divisible(ch * self.scale, 8) for ch in self.num_of_channels
130 | ]
131 | self.n = [1, 1, 2, 3, 4, 3, 3, 1]
132 | self.s = [2, 1, 2, 2, 2, 1, 2, 1]
133 | self.conv1 = nn.Conv2d(
134 | in_channels,
135 | self.c[0],
136 | kernel_size=3,
137 | bias=False,
138 | stride=self.s[0],
139 | padding=1)
140 | self.bn1 = nn.BatchNorm2d(self.c[0])
141 | # self.bottlenecks = self._make_bottlenecks()
142 | self.bottlenecks = nn.ModuleList(self._make_bottlenecks())
143 |
144 | # Last convolution has 1280 output channels for scale <= 1
145 | self.last_conv_out_ch = 1280 if self.scale <= 1 else _make_divisible(
146 | 1280 * self.scale, 8)
147 | self.conv_last = nn.Conv2d(
148 | self.c[-1], self.last_conv_out_ch, kernel_size=1, bias=False)
149 | self.bn_last = nn.BatchNorm2d(self.last_conv_out_ch)
150 |
151 | self.extras = nn.ModuleList(
152 | add_extras(str(self.size), self.last_conv_out_ch))
153 | self._init_modules()
154 |
155 | def _init_modules(self):
156 | self.extras.apply(weights_init)
157 |
158 | def _make_stage(self, inplanes, outplanes, n, stride, t, stage):
159 | modules = OrderedDict()
160 | stage_name = "LinearBottleneck{}".format(stage)
161 |
162 | # First module is the only one utilizing stride
163 | first_module = LinearBottleneck(
164 | inplanes=inplanes,
165 | outplanes=outplanes,
166 | stride=stride,
167 | t=t,
168 | activation=self.activation_type)
169 | modules[stage_name + "_0"] = first_module
170 |
171 | # add more LinearBottleneck depending on number of repeats
172 | for i in range(n - 1):
173 | name = stage_name + "_{}".format(i + 1)
174 | module = LinearBottleneck(
175 | inplanes=outplanes,
176 | outplanes=outplanes,
177 | stride=1,
178 | t=6,
179 | activation=self.activation_type)
180 | modules[name] = module
181 | return nn.Sequential(modules)
182 |
183 | def _make_bottlenecks(self):
184 | modules = list()
185 | stage_name = "Bottlenecks"
186 |
187 | # First module is the only one with t=1
188 | bottleneck1 = self._make_stage(
189 | inplanes=self.c[0],
190 | outplanes=self.c[1],
191 | n=self.n[1],
192 | stride=self.s[1],
193 | t=1,
194 | stage=0)
195 | modules.append(bottleneck1)
196 |
197 | # add more LinearBottleneck depending on number of repeats
198 | for i in range(1, len(self.c) - 1):
199 | name = stage_name + "_{}".format(i)
200 | module = self._make_stage(
201 | inplanes=self.c[i],
202 | outplanes=self.c[i + 1],
203 | n=self.n[i + 1],
204 | stride=self.s[i + 1],
205 | t=self.t,
206 | stage=i)
207 | modules += module
208 |
209 | return modules
210 |
211 | def forward(self, x):
212 | x = self.conv1(x)
213 | x = self.bn1(x)
214 | x = self.activation(x)
215 |
216 | sources = list()
217 | for i in range(6):
218 | x = self.bottlenecks[i](x)
219 | sources.append(x)
220 | for i in range(6, 13):
221 | x = self.bottlenecks[i](x)
222 | sources.append(x)
223 | for i in range(13, len(self.bottlenecks)):
224 | x = self.bottlenecks[i](x)
225 | x = self.conv_last(x)
226 | x = self.bn_last(x)
227 | x = self.activation(x)
228 | sources.append(x)
229 | for k, v in enumerate(self.extras):
230 | x = F.relu(v(x), inplace=True)
231 | if k % 2 == 1:
232 | sources.append(x)
233 | return sources
234 |
235 |
236 | def SSDMobilenetv2(size, channel_size='48'):
237 | return MobileNet2(size=size)
238 |
239 |
240 | if __name__ == "__main__":
241 | import os
242 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
243 | model3 = MobileNet2(size=300)
244 | with torch.no_grad():
245 | model3.eval()
246 | x = torch.randn(16, 3, 300, 300)
247 | model3.cuda()
248 | model3(x.cuda())
249 | import time
250 | st = time.time()
251 | for i in range(100):
252 | model3(x.cuda())
253 | print(time.time() - st)
254 | # print(model3(x))
255 |
--------------------------------------------------------------------------------
/models/model_builder.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from layers import *
9 | import os
10 | from models.model_helper import weights_init
11 | import importlib
12 | from layers.functions.prior_layer import PriorLayer
13 |
14 |
15 | def get_func(func_name):
16 | """Helper to return a function object by name. func_name must identify a
17 | function in this module or the path to a function relative to the base
18 | 'modeling' module.
19 | """
20 | if func_name == '':
21 | return None
22 | try:
23 | parts = func_name.split('.')
24 | # Refers to a function in this module
25 | if len(parts) == 1:
26 | return globals()[parts[0]]
27 | # Otherwise, assume we're referencing a module under modeling
28 | module_name = 'models.' + '.'.join(parts[:-1])
29 | module = importlib.import_module(module_name)
30 | return getattr(module, parts[-1])
31 | except Exception:
32 | print('Failed to find function: %s', func_name)
33 | raise
34 |
35 |
36 | class SSD(nn.Module):
37 | """Single Shot Multibox Architecture
38 | The network is composed of a base VGG network followed by the
39 | added multibox conv layers. Each multibox layer branches into
40 | 1) conv2d for class conf scores
41 | 2) conv2d for localization predictions
42 | 3) associated priorbox layer to produce default bounding
43 | boxes specific to the layer's feature map size.
44 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
45 |
46 | Args:
47 | phase: (string) Can be "test" or "train"
48 | base: VGG16 layers for input, size of either 300 or 500
49 | extras: extra layers that feed to multibox loc and conf layers
50 | head: "multibox head" consists of loc and conf conv layers
51 | """
52 |
53 | def _init_modules(self):
54 | self.arm_loc.apply(weights_init)
55 | self.arm_conf.apply(weights_init)
56 | if self.cfg.MODEL.REFINE:
57 | self.odm_loc.apply(weights_init)
58 | self.odm_conf.apply(weights_init)
59 | if self.cfg.MODEL.LOAD_PRETRAINED_WEIGHTS:
60 | weights = torch.load(self.cfg.MODEL.PRETRAIN_WEIGHTS)
61 | print("load pretrain model {}".format(
62 | self.cfg.MODEL.PRETRAIN_WEIGHTS))
63 | if self.cfg.MODEL.TYPE.split('_')[-1] == 'vgg':
64 | self.extractor.vgg.load_state_dict(weights)
65 | else:
66 | self.extractor.load_state_dict(weights, strict=False)
67 |
68 | def __init__(self, cfg):
69 | super(SSD, self).__init__()
70 | self.cfg = cfg
71 | self.size = cfg.MODEL.SIZE
72 | if self.size == '300':
73 | size_cfg = cfg.SMALL
74 | else:
75 | size_cfg = cfg.BIG
76 | self.num_classes = cfg.MODEL.NUM_CLASSES
77 | self.prior_layer = PriorLayer(cfg)
78 | self.priorbox = PriorBox(cfg)
79 | self.priors = self.priorbox.forward()
80 | self.extractor = get_func(cfg.MODEL.CONV_BODY)(self.size,
81 | cfg.TRAIN.CHANNEL_SIZE)
82 | if cfg.MODEL.REFINE:
83 | self.odm_channels = size_cfg.ODM_CHANNELS
84 | self.arm_num_classes = 2
85 | self.odm_loc = nn.ModuleList()
86 | self.odm_conf = nn.ModuleList()
87 | self.arm_loc = nn.ModuleList()
88 | self.arm_conf = nn.ModuleList()
89 | self.arm_channels = size_cfg.ARM_CHANNELS
90 | self.num_anchors = size_cfg.NUM_ANCHORS
91 | self.input_fixed = size_cfg.INPUT_FIXED
92 | self.arm_loc = nn.ModuleList()
93 | self.arm_conf = nn.ModuleList()
94 | for i in range(len(self.arm_channels)):
95 | if cfg.MODEL.REFINE:
96 | self.arm_loc += [
97 | nn.Conv2d(
98 | self.arm_channels[i],
99 | self.num_anchors[i] * 4,
100 | kernel_size=3,
101 | padding=1)
102 | ]
103 | self.arm_conf += [
104 | nn.Conv2d(
105 | self.arm_channels[i],
106 | self.num_anchors[i] * self.arm_num_classes,
107 | kernel_size=3,
108 | padding=1)
109 | ]
110 | self.odm_loc += [
111 | nn.Conv2d(
112 | self.odm_channels[i],
113 | self.num_anchors[i] * 4,
114 | kernel_size=3,
115 | padding=1)
116 | ]
117 | self.odm_conf += [
118 | nn.Conv2d(
119 | self.odm_channels[i],
120 | self.num_anchors[i] * self.num_classes,
121 | kernel_size=3,
122 | padding=1)
123 | ]
124 | else:
125 | self.arm_loc += [
126 | nn.Conv2d(
127 | self.arm_channels[i],
128 | self.num_anchors[i] * 4,
129 | kernel_size=3,
130 | padding=1)
131 | ]
132 | self.arm_conf += [
133 | nn.Conv2d(
134 | self.arm_channels[i],
135 | self.num_anchors[i] * self.num_classes,
136 | kernel_size=3,
137 | padding=1)
138 | ]
139 | if cfg.TRAIN.TRAIN_ON:
140 | self._init_modules()
141 |
142 | def forward(self, x):
143 |
144 | arm_loc = list()
145 | arm_conf = list()
146 | if self.cfg.MODEL.REFINE:
147 | odm_loc = list()
148 | odm_conf = list()
149 | arm_xs, odm_xs = self.extractor(x)
150 | for (x, l, c) in zip(odm_xs, self.odm_loc, self.odm_conf):
151 | odm_loc.append(l(x).permute(0, 2, 3, 1).contiguous())
152 | odm_conf.append(c(x).permute(0, 2, 3, 1).contiguous())
153 | odm_loc = torch.cat([o.view(o.size(0), -1) for o in odm_loc], 1)
154 | odm_conf = torch.cat([o.view(o.size(0), -1) for o in odm_conf], 1)
155 | else:
156 | arm_xs = self.extractor(x)
157 | img_wh = (x.size(3), x.size(2))
158 | feature_maps_wh = [(t.size(3), t.size(2)) for t in arm_xs]
159 | for (x, l, c) in zip(arm_xs, self.arm_loc, self.arm_conf):
160 | arm_loc.append(l(x).permute(0, 2, 3, 1).contiguous())
161 | arm_conf.append(c(x).permute(0, 2, 3, 1).contiguous())
162 | arm_loc = torch.cat([o.view(o.size(0), -1) for o in arm_loc], 1)
163 | arm_conf = torch.cat([o.view(o.size(0), -1) for o in arm_conf], 1)
164 | if self.cfg.MODEL.REFINE:
165 | output = (arm_loc.view(arm_loc.size(0), -1, 4),
166 | arm_conf.view(
167 | arm_conf.size(0), -1, self.arm_num_classes),
168 | odm_loc.view(odm_loc.size(0), -1, 4),
169 | odm_conf.view(odm_conf.size(0), -1, self.num_classes),
170 | self.priors if self.input_fixed else self.prior_layer(
171 | img_wh, feature_maps_wh))
172 | else:
173 | output = (arm_loc.view(arm_loc.size(0), -1, 4),
174 | arm_conf.view(arm_conf.size(0), -1, self.num_classes),
175 | self.priors if self.input_fixed else self.prior_layer(
176 | img_wh, feature_maps_wh))
177 | return output
178 |
--------------------------------------------------------------------------------
/models/refine_res.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from models.model_helper import FpnAdapter, weights_init
9 |
10 |
11 | def add_extras(size, in_channel, batch_norm=False):
12 | # Extra layers added to resnet for feature scaling
13 | layers = []
14 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
15 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
16 | return layers
17 |
18 |
19 | def conv3x3(in_planes, out_planes, stride=1):
20 | "3x3 convolution with padding"
21 | return nn.Conv2d(
22 | in_planes,
23 | out_planes,
24 | kernel_size=3,
25 | stride=stride,
26 | padding=1,
27 | bias=False)
28 |
29 |
30 | class BasicBlock(nn.Module):
31 | expansion = 1
32 |
33 | def __init__(self, inplanes, planes, stride=1, downsample=None):
34 | super(BasicBlock, self).__init__()
35 | self.conv1 = conv3x3(inplanes, planes, stride)
36 | self.bn1 = nn.BatchNorm2d(planes)
37 | self.relu = nn.ReLU(inplace=True)
38 | self.conv2 = conv3x3(planes, planes)
39 | self.bn2 = nn.BatchNorm2d(planes)
40 | self.downsample = downsample
41 | self.stride = stride
42 |
43 | def forward(self, x):
44 | residual = x
45 |
46 | out = self.conv1(x)
47 | out = self.bn1(out)
48 | out = self.relu(out)
49 |
50 | out = self.conv2(out)
51 | out = self.bn2(out)
52 |
53 | if self.downsample is not None:
54 | residual = self.downsample(x)
55 | out += residual
56 | out = self.relu(out)
57 |
58 | return out
59 |
60 |
61 | class Bottleneck(nn.Module):
62 | expansion = 4
63 |
64 | def __init__(self, inplanes, planes, stride=1, downsample=None):
65 | super(Bottleneck, self).__init__()
66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
67 | self.bn1 = nn.BatchNorm2d(planes)
68 | self.conv2 = nn.Conv2d(
69 | planes,
70 | planes,
71 | kernel_size=3,
72 | stride=stride,
73 | padding=1,
74 | bias=False)
75 | self.bn2 = nn.BatchNorm2d(planes)
76 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
77 | self.bn3 = nn.BatchNorm2d(planes * 4)
78 | self.relu = nn.ReLU(inplace=True)
79 | self.downsample = downsample
80 | self.stride = stride
81 |
82 | def forward(self, x):
83 | residual = x
84 |
85 | out = self.conv1(x)
86 | out = self.bn1(out)
87 | out = self.relu(out)
88 |
89 | out = self.conv2(out)
90 | out = self.bn2(out)
91 | out = self.relu(out)
92 |
93 | out = self.conv3(out)
94 | out = self.bn3(out)
95 |
96 | if self.downsample is not None:
97 | residual = self.downsample(x)
98 |
99 | out += residual
100 | out = self.relu(out)
101 |
102 | return out
103 |
104 |
105 | class RefineResnet(nn.Module):
106 | def __init__(self, block, num_blocks, size):
107 | super(RefineResnet, self).__init__()
108 | self.inplanes = 64
109 |
110 | self.conv1 = nn.Conv2d(
111 | 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
112 | self.bn1 = nn.BatchNorm2d(64)
113 |
114 | # Bottom-up layers
115 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
116 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
117 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
118 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
119 | self.inchannel = block.expansion * 512
120 | self.extras = nn.ModuleList(add_extras(str(size), self.inchannel))
121 | self.smooth1 = nn.Conv2d(
122 | self.inchannel, 512, kernel_size=3, stride=1, padding=1)
123 | self.fpn = FpnAdapter([512, 1024, 512, 256], 4)
124 | self._init_modules()
125 |
126 | def _make_layer(self, block, planes, blocks, stride=1):
127 | downsample = None
128 | if stride != 1 or self.inplanes != planes * block.expansion:
129 | downsample = nn.Sequential(
130 | nn.Conv2d(
131 | self.inplanes,
132 | planes * block.expansion,
133 | kernel_size=1,
134 | stride=stride,
135 | bias=False),
136 | nn.BatchNorm2d(planes * block.expansion),
137 | )
138 |
139 | layers = []
140 | layers.append(block(self.inplanes, planes, stride, downsample))
141 | self.inplanes = planes * block.expansion
142 | for i in range(1, blocks):
143 | layers.append(block(self.inplanes, planes))
144 |
145 | return nn.Sequential(*layers)
146 |
147 | def _init_modules(self):
148 | self.extras.apply(weights_init)
149 | self.smooth1.apply(weights_init)
150 |
151 | def forward(self, x):
152 | # Bottom-up
153 | odm_sources = list()
154 | c1 = F.relu(self.bn1(self.conv1(x)))
155 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
156 | c2 = self.layer1(c1)
157 | c3 = self.layer2(c2)
158 | c4 = self.layer3(c3)
159 | c5 = self.layer4(c4)
160 | x = c5
161 | c5_ = self.smooth1(c5)
162 | arm_sources = [c3, c4, c5_]
163 | for k, v in enumerate(self.extras):
164 | x = F.relu(v(x), inplace=True)
165 | if k % 2 == 1:
166 | arm_sources.append(x)
167 | odm_sources = self.fpn(arm_sources)
168 | return arm_sources, odm_sources
169 |
170 |
171 | def RefineResnet50(size, channel_size='48'):
172 | return RefineResnet(Bottleneck, [3, 4, 6, 3], size)
173 |
174 |
175 | def RefineResnet101(size, channel_size='48'):
176 | return RefineResnet(Bottleneck, [3, 4, 23, 3], size)
177 |
178 |
179 | def RefineResnet152(size, channel_size='48'):
180 | return RefineResnet(Bottleneck, [3, 8, 36, 3], size)
181 |
182 |
183 | if __name__ == "__main__":
184 | import os
185 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
186 | model = RefineResnet50(size=300)
187 | print(model)
188 | with torch.no_grad():
189 | model.eval()
190 | x = torch.randn(1, 3, 320, 320)
191 | model.cuda()
192 | model(x.cuda())
193 |
--------------------------------------------------------------------------------
/models/refine_vgg.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | import torch.nn.init as init
9 | from models.model_helper import FpnAdapter, WeaveAdapter, weights_init
10 |
11 |
12 | class L2Norm(nn.Module):
13 | def __init__(self, n_channels, scale):
14 | super(L2Norm, self).__init__()
15 | self.n_channels = n_channels
16 | self.gamma = scale or None
17 | self.eps = 1e-10
18 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
19 | self.reset_parameters()
20 |
21 | def reset_parameters(self):
22 | init.constant_(self.weight, self.gamma)
23 |
24 | def forward(self, x):
25 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
26 | x = x / norm
27 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
28 | x) * x
29 | return out
30 |
31 |
32 | # This function is derived from torchvision VGG make_layers()
33 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
34 |
35 |
36 | def vgg(cfg, i, batch_norm=False):
37 | layers = []
38 | in_channels = i
39 | for v in cfg:
40 | if v == 'M':
41 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
42 | elif v == 'C':
43 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
44 | else:
45 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
46 | if batch_norm:
47 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
48 | else:
49 | layers += [conv2d, nn.ReLU(inplace=True)]
50 | in_channels = v
51 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
52 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
53 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
54 | layers += [
55 | pool5, conv6,
56 | nn.ReLU(inplace=True), conv7,
57 | nn.ReLU(inplace=True)
58 | ]
59 | return layers
60 |
61 |
62 | base = {
63 | '300': [
64 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
65 | 512, 512, 512
66 | ],
67 | '512': [
68 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
69 | 512, 512, 512
70 | ],
71 | }
72 |
73 |
74 | def add_extras(size):
75 | layers = []
76 | layers += [nn.Conv2d(1024, 256, kernel_size=1, stride=1)]
77 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
78 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
79 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
80 |
81 | return layers
82 |
83 |
84 | # def last_layer_trans():
85 | # return nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
86 | # nn.ReLU(inplace=True),
87 | # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
88 | # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
89 |
90 | # def trans_layers(size):
91 | # layers = list()
92 | # layers += [nn.Sequential(nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
93 | # nn.ReLU(inplace=True),
94 | # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))]
95 | # layers += [nn.Sequential(nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1),
96 | # nn.ReLU(inplace=True),
97 | # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))]
98 | # layers += [nn.Sequential(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
99 | # nn.ReLU(inplace=True),
100 | # nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))]
101 |
102 | # return layers
103 |
104 | # def latent_layers(size):
105 | # layers = []
106 | # for i in range(3):
107 | # layers += [nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)]
108 | # return layers
109 |
110 | # def up_layers(size):
111 | # layers = []
112 | # for i in range(3):
113 | # layers += [nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0)]
114 | # return layers
115 |
116 |
117 | class VGG16Extractor(nn.Module):
118 | def __init__(self, size, channel_size='48'):
119 | super(VGG16Extractor, self).__init__()
120 | self.vgg = nn.ModuleList(vgg(base[str(size)], 3))
121 | self.extras = nn.ModuleList(add_extras(str(size)))
122 | self.L2Norm_4_3 = L2Norm(512, 10)
123 | self.L2Norm_5_3 = L2Norm(1024, 8)
124 | # self.last_layer_trans = last_layer_trans()
125 | # self.trans_layers = nn.ModuleList(trans_layers(str(size)))
126 | # self.latent_layers = nn.ModuleList(latent_layers((str(size))))
127 | # self.up_layers = nn.ModuleList(up_layers(str(size)))
128 | self.fpn = FpnAdapter([512, 1024, 256, 256], 4)
129 | self._init_modules()
130 |
131 | def _init_modules(self):
132 | self.extras.apply(weights_init)
133 | # self.last_layer_trans.apply(weights_init)
134 | # self.trans_layers.apply(weights_init)
135 | # self.latent_layers.apply(weights_init)
136 | # self.up_layers.apply(weights_init)
137 |
138 | def forward(self, x):
139 | """Applies network layers and ops on input image(s) x.
140 | Args:
141 | x: input image or batch of images. Shape: [batch,3*batch,300,300].
142 | Return:
143 | Depending on phase:
144 | test:
145 | Variable(tensor) of output class label predictions,
146 | confidence score, and corresponding location predictions for
147 | each object detected. Shape: [batch,topk,7]
148 | train:
149 | list of concat outputs from:
150 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
151 | 2: localization layers, Shape: [batch,num_priors*4]
152 | 3: priorbox layers, Shape: [2,num_priors*4]
153 | """
154 | arm_sources = list()
155 |
156 | for i in range(23):
157 | x = self.vgg[i](x)
158 | #38x38
159 | c2 = x
160 | c2 = self.L2Norm_4_3(c2)
161 | arm_sources.append(c2)
162 |
163 | for k in range(23, len(self.vgg)):
164 | x = self.vgg[k](x)
165 | #19x19
166 | c3 = x
167 | c3 = self.L2Norm_5_3(c3)
168 | arm_sources.append(c3)
169 |
170 | # 10x10
171 | x = F.relu(self.extras[0](x), inplace=True)
172 | x = F.relu(self.extras[1](x), inplace=True)
173 | c4 = x
174 | arm_sources.append(c4)
175 |
176 | # 5x5
177 | x = F.relu(self.extras[2](x), inplace=True)
178 | x = F.relu(self.extras[3](x), inplace=True)
179 | c5 = x
180 | arm_sources.append(c5)
181 |
182 | if len(self.extras) > 4:
183 | x = F.relu(self.extras[4](x), inplace=True)
184 | x = F.relu(self.extras[5](x), inplace=True)
185 | c6 = x
186 | arm_sources.append(c6)
187 |
188 | # x = self.last_layer_trans(x)
189 | # odm_sources.append(x)
190 |
191 | # trans_layer_list = list()
192 |
193 | # for(p, t) in zip(arm_sources, self.trans_layers):
194 | # trans_layer_list.append(t(p))
195 |
196 | # trans_layer_list.reverse()
197 | # for (t, u, l) in zip(trans_layer_list, self.up_layers, self.latent_layers):
198 | # x = F.relu(l(F.relu(u(x)+ t, inplace=True)), inplace=True)
199 | # odm_sources.append(x)
200 |
201 | # odm_sources.reverse()
202 | odm_sources = self.fpn(arm_sources)
203 | return arm_sources, odm_sources
204 |
205 |
206 | def refine_vgg(size, channel_size='48'):
207 | return VGG16Extractor(size)
--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from models.model_helper import weights_init
9 |
10 |
11 | def add_extras(size, in_channel, batch_norm=False):
12 | # Extra layers added to resnet for feature scaling
13 | layers = []
14 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
15 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
16 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
17 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
18 | if size == '300':
19 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
20 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=0)]
21 | else:
22 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
23 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
24 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
25 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
26 |
27 | return layers
28 |
29 |
30 | def conv3x3(in_planes, out_planes, stride=1):
31 | "3x3 convolution with padding"
32 | return nn.Conv2d(
33 | in_planes,
34 | out_planes,
35 | kernel_size=3,
36 | stride=stride,
37 | padding=1,
38 | bias=False)
39 |
40 |
41 | class BasicBlock(nn.Module):
42 | expansion = 1
43 |
44 | def __init__(self, inplanes, planes, stride=1, downsample=None):
45 | super(BasicBlock, self).__init__()
46 | self.conv1 = conv3x3(inplanes, planes, stride)
47 | self.bn1 = nn.BatchNorm2d(planes)
48 | self.relu = nn.ReLU(inplace=True)
49 | self.conv2 = conv3x3(planes, planes)
50 | self.bn2 = nn.BatchNorm2d(planes)
51 | self.downsample = downsample
52 | self.stride = stride
53 |
54 | def forward(self, x):
55 | residual = x
56 |
57 | out = self.conv1(x)
58 | out = self.bn1(out)
59 | out = self.relu(out)
60 |
61 | out = self.conv2(out)
62 | out = self.bn2(out)
63 |
64 | if self.downsample is not None:
65 | residual = self.downsample(x)
66 | out += residual
67 | out = self.relu(out)
68 |
69 | return out
70 |
71 |
72 | class Bottleneck(nn.Module):
73 | expansion = 4
74 |
75 | def __init__(self, inplanes, planes, stride=1, downsample=None):
76 | super(Bottleneck, self).__init__()
77 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
78 | self.bn1 = nn.BatchNorm2d(planes)
79 | self.conv2 = nn.Conv2d(
80 | planes,
81 | planes,
82 | kernel_size=3,
83 | stride=stride,
84 | padding=1,
85 | bias=False)
86 | self.bn2 = nn.BatchNorm2d(planes)
87 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
88 | self.bn3 = nn.BatchNorm2d(planes * 4)
89 | self.relu = nn.ReLU(inplace=True)
90 | self.downsample = downsample
91 | self.stride = stride
92 |
93 | def forward(self, x):
94 | residual = x
95 |
96 | out = self.conv1(x)
97 | out = self.bn1(out)
98 | out = self.relu(out)
99 |
100 | out = self.conv2(out)
101 | out = self.bn2(out)
102 | out = self.relu(out)
103 |
104 | out = self.conv3(out)
105 | out = self.bn3(out)
106 |
107 | if self.downsample is not None:
108 | residual = self.downsample(x)
109 |
110 | out += residual
111 | out = self.relu(out)
112 |
113 | return out
114 |
115 |
116 | class SSDResnet(nn.Module):
117 | def __init__(self, block, num_blocks, size):
118 | super(SSDResnet, self).__init__()
119 | self.inplanes = 64
120 |
121 | self.conv1 = nn.Conv2d(
122 | 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
123 | self.bn1 = nn.BatchNorm2d(64)
124 |
125 | # Bottom-up layers
126 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
127 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
128 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
129 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
130 | self.inchannel = block.expansion * 512
131 | self.extras = nn.ModuleList(add_extras(str(size), self.inchannel))
132 | self.smooth1 = nn.Conv2d(
133 | self.inchannel, 512, kernel_size=3, stride=1, padding=1)
134 | self._init_modules()
135 |
136 | def _make_layer(self, block, planes, blocks, stride=1):
137 | downsample = None
138 | if stride != 1 or self.inplanes != planes * block.expansion:
139 | downsample = nn.Sequential(
140 | nn.Conv2d(
141 | self.inplanes,
142 | planes * block.expansion,
143 | kernel_size=1,
144 | stride=stride,
145 | bias=False),
146 | nn.BatchNorm2d(planes * block.expansion),
147 | )
148 |
149 | layers = []
150 | layers.append(block(self.inplanes, planes, stride, downsample))
151 | self.inplanes = planes * block.expansion
152 | for i in range(1, blocks):
153 | layers.append(block(self.inplanes, planes))
154 |
155 | return nn.Sequential(*layers)
156 |
157 | def _init_modules(self):
158 | self.extras.apply(weights_init)
159 | self.smooth1.apply(weights_init)
160 |
161 | def forward(self, x):
162 | # Bottom-up
163 | c1 = F.relu(self.bn1(self.conv1(x)))
164 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
165 | c2 = self.layer1(c1)
166 | c3 = self.layer2(c2)
167 | c4 = self.layer3(c3)
168 | c5 = self.layer4(c4)
169 | x = c5
170 | c5_ = self.smooth1(c5)
171 | sources = [c3, c4, c5_]
172 | for k, v in enumerate(self.extras):
173 | x = F.relu(v(x), inplace=True)
174 | if k % 2 == 1:
175 | sources.append(x)
176 | return sources
177 |
178 |
179 | def SSDResnet18(size, channel_size='48'):
180 | return SSDResnet(BasicBlock, [2, 2, 2, 2], size)
181 |
182 |
183 | def SSDResnet34(size, channel_size='48'):
184 | return SSDResnet(BasicBlock, [3, 4, 6, 3], size)
185 |
186 |
187 | def SSDResnet50(size, channel_size='48'):
188 | return SSDResnet(Bottleneck, [3, 4, 6, 3], size)
189 |
190 |
191 | def SSDResnet101(size, channel_size='48'):
192 | return SSDResnet(Bottleneck, [3, 4, 23, 3], size)
193 |
194 |
195 | def SSDResnet152(size, channel_size='48'):
196 | return SSDResnet(Bottleneck, [3, 8, 36, 3], size)
197 |
198 |
199 | if __name__ == "__main__":
200 | import os
201 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
202 | model3 = SSDResnet18(size=300)
203 | with torch.no_grad():
204 | model3.eval()
205 | x = torch.randn(1, 3, 300, 300)
206 | model3.cuda()
207 | model3(x.cuda())
208 | import time
209 | st = time.time()
210 | for i in range(1):
211 | model3(x.cuda())
212 | print(time.time() - st)
213 | # print(model3(x))
214 |
--------------------------------------------------------------------------------
/models/vgg.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | import torch.nn.init as init
9 | from models.model_helper import weights_init
10 |
11 |
12 | class L2Norm(nn.Module):
13 | def __init__(self, n_channels, scale):
14 | super(L2Norm, self).__init__()
15 | self.n_channels = n_channels
16 | self.gamma = scale or None
17 | self.eps = 1e-10
18 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
19 | self.reset_parameters()
20 |
21 | def reset_parameters(self):
22 | init.constant_(self.weight, self.gamma)
23 |
24 | def forward(self, x):
25 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
26 | x = x / norm
27 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
28 | x) * x
29 | return out
30 |
31 |
32 | # This function is derived from torchvision VGG make_layers()
33 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
34 |
35 |
36 | def vgg(cfg, i, batch_norm=False):
37 | layers = []
38 | in_channels = i
39 | for v in cfg:
40 | if v == 'M':
41 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
42 | elif v == 'C':
43 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
44 | else:
45 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
46 | if batch_norm:
47 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
48 | else:
49 | layers += [conv2d, nn.ReLU(inplace=True)]
50 | in_channels = v
51 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
52 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
53 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
54 | layers += [
55 | pool5, conv6,
56 | nn.ReLU(inplace=True), conv7,
57 | nn.ReLU(inplace=True)
58 | ]
59 | return layers
60 |
61 |
62 | extras_cfg = {
63 | '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
64 | '512': [
65 | 256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128, 'S',
66 | 256
67 | ],
68 | }
69 |
70 | base = {
71 | '300': [
72 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
73 | 512, 512, 512
74 | ],
75 | '512': [
76 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
77 | 512, 512, 512
78 | ],
79 | }
80 |
81 |
82 | def add_extras(cfg, i, batch_norm=False):
83 | # Extra layers added to VGG for feature scaling
84 | layers = []
85 | in_channels = i
86 | flag = False
87 | for k, v in enumerate(cfg):
88 | if in_channels != 'S':
89 | if v == 'S':
90 | layers += [
91 | nn.Conv2d(
92 | in_channels,
93 | cfg[k + 1],
94 | kernel_size=(1, 3)[flag],
95 | stride=2,
96 | padding=1)
97 | ]
98 | else:
99 | layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
100 | flag = not flag
101 | in_channels = v
102 | return layers
103 |
104 |
105 | class VGG16Extractor(nn.Module):
106 | def __init__(self, size):
107 | super(VGG16Extractor, self).__init__()
108 | self.vgg = nn.ModuleList(vgg(base[str(size)], 3))
109 | self.L2Norm = L2Norm(512, 20)
110 | self.extras = nn.ModuleList(add_extras(extras_cfg[str(size)], 1024))
111 | self._init_modules()
112 |
113 | def _init_modules(self):
114 | self.extras.apply(weights_init)
115 | self.vgg.apply(weights_init)
116 |
117 | def forward(self, x):
118 | """Applies network layers and ops on input image(s) x.
119 |
120 | Args:
121 | x: input image or batch of images. Shape: [batch,3*batch,300,300].
122 |
123 | Return:
124 | Depending on phase:
125 | test:
126 | Variable(tensor) of output class label predictions,
127 | confidence score, and corresponding location predictions for
128 | each object detected. Shape: [batch,topk,7]
129 |
130 | train:
131 | list of concat outputs from:
132 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
133 | 2: localization layers, Shape: [batch,num_priors*4]
134 | 3: priorbox layers, Shape: [2,num_priors*4]
135 | """
136 | sources = list()
137 |
138 | # apply vgg up to conv4_3 relu
139 | for k in range(23):
140 | x = self.vgg[k](x)
141 |
142 | s = self.L2Norm(x)
143 | sources.append(s)
144 |
145 | # apply vgg up to fc7
146 | for k in range(23, len(self.vgg)):
147 | x = self.vgg[k](x)
148 | sources.append(x)
149 |
150 | # apply extra layers and cache source layer outputs
151 | for k, v in enumerate(self.extras):
152 | x = F.relu(v(x), inplace=True)
153 | if k % 2 == 1:
154 | sources.append(x)
155 | return sources
156 |
157 |
158 | def SSDVgg(size, channel_size='48'):
159 | return VGG16Extractor(size)
160 |
161 |
162 | if __name__ == "__main__":
163 | import os
164 | os.environ["CUDA_VISIBLE_DEVICES"] = "3"
165 | with torch.no_grad():
166 | model3 = VGG16Extractor(300)
167 | model3.eval()
168 | x = torch.randn(16, 3, 300, 300)
169 | model3.cuda()
170 | model3(x.cuda())
171 | import time
172 | st = time.time()
173 | for i in range(1000):
174 | model3(x.cuda())
175 | print(time.time() - st)
176 |
--------------------------------------------------------------------------------
/models/weave_res.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | from models.model_helper import FpnAdapter, WeaveAdapter, weights_init
9 |
10 |
11 | def add_extras(size, in_channel, batch_norm=False):
12 | # Extra layers added to resnet for feature scaling
13 | layers = []
14 | layers += [nn.Conv2d(in_channel, 256, kernel_size=1, stride=1)]
15 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
16 | return layers
17 |
18 |
19 | def conv3x3(in_planes, out_planes, stride=1):
20 | "3x3 convolution with padding"
21 | return nn.Conv2d(
22 | in_planes,
23 | out_planes,
24 | kernel_size=3,
25 | stride=stride,
26 | padding=1,
27 | bias=False)
28 |
29 |
30 | class BasicBlock(nn.Module):
31 | expansion = 1
32 |
33 | def __init__(self, inplanes, planes, stride=1, downsample=None):
34 | super(BasicBlock, self).__init__()
35 | self.conv1 = conv3x3(inplanes, planes, stride)
36 | self.bn1 = nn.BatchNorm2d(planes)
37 | self.relu = nn.ReLU(inplace=True)
38 | self.conv2 = conv3x3(planes, planes)
39 | self.bn2 = nn.BatchNorm2d(planes)
40 | self.downsample = downsample
41 | self.stride = stride
42 |
43 | def forward(self, x):
44 | residual = x
45 |
46 | out = self.conv1(x)
47 | out = self.bn1(out)
48 | out = self.relu(out)
49 |
50 | out = self.conv2(out)
51 | out = self.bn2(out)
52 |
53 | if self.downsample is not None:
54 | residual = self.downsample(x)
55 | out += residual
56 | out = self.relu(out)
57 |
58 | return out
59 |
60 |
61 | class Bottleneck(nn.Module):
62 | expansion = 4
63 |
64 | def __init__(self, inplanes, planes, stride=1, downsample=None):
65 | super(Bottleneck, self).__init__()
66 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
67 | self.bn1 = nn.BatchNorm2d(planes)
68 | self.conv2 = nn.Conv2d(
69 | planes,
70 | planes,
71 | kernel_size=3,
72 | stride=stride,
73 | padding=1,
74 | bias=False)
75 | self.bn2 = nn.BatchNorm2d(planes)
76 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
77 | self.bn3 = nn.BatchNorm2d(planes * 4)
78 | self.relu = nn.ReLU(inplace=True)
79 | self.downsample = downsample
80 | self.stride = stride
81 |
82 | def forward(self, x):
83 | residual = x
84 |
85 | out = self.conv1(x)
86 | out = self.bn1(out)
87 | out = self.relu(out)
88 |
89 | out = self.conv2(out)
90 | out = self.bn2(out)
91 | out = self.relu(out)
92 |
93 | out = self.conv3(out)
94 | out = self.bn3(out)
95 |
96 | if self.downsample is not None:
97 | residual = self.downsample(x)
98 |
99 | out += residual
100 | out = self.relu(out)
101 |
102 | return out
103 |
104 |
105 | class WeaveResnet(nn.Module):
106 | def __init__(self, block, num_blocks, size):
107 | super(WeaveResnet, self).__init__()
108 | self.inplanes = 64
109 |
110 | self.conv1 = nn.Conv2d(
111 | 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
112 | self.bn1 = nn.BatchNorm2d(64)
113 |
114 | # Bottom-up layers
115 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
116 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
117 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
118 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
119 | self.inchannel = block.expansion * 512
120 | self.extras = nn.ModuleList(add_extras(str(size), self.inchannel))
121 | self.smooth1 = nn.Conv2d(
122 | self.inchannel, 512, kernel_size=3, stride=1, padding=1)
123 | self.weave = WeaveAdapter([512, 1024, 512, 256], 4)
124 | self._init_modules()
125 |
126 | def _make_layer(self, block, planes, blocks, stride=1):
127 | downsample = None
128 | if stride != 1 or self.inplanes != planes * block.expansion:
129 | downsample = nn.Sequential(
130 | nn.Conv2d(
131 | self.inplanes,
132 | planes * block.expansion,
133 | kernel_size=1,
134 | stride=stride,
135 | bias=False),
136 | nn.BatchNorm2d(planes * block.expansion),
137 | )
138 |
139 | layers = []
140 | layers.append(block(self.inplanes, planes, stride, downsample))
141 | self.inplanes = planes * block.expansion
142 | for i in range(1, blocks):
143 | layers.append(block(self.inplanes, planes))
144 |
145 | return nn.Sequential(*layers)
146 |
147 | def _init_modules(self):
148 | self.extras.apply(weights_init)
149 | self.smooth1.apply(weights_init)
150 |
151 | def forward(self, x):
152 | # Bottom-up
153 | odm_sources = list()
154 | c1 = F.relu(self.bn1(self.conv1(x)))
155 | c1 = F.max_pool2d(c1, kernel_size=3, stride=2, padding=1)
156 | c2 = self.layer1(c1)
157 | c3 = self.layer2(c2)
158 | c4 = self.layer3(c3)
159 | c5 = self.layer4(c4)
160 | x = c5
161 | c5_ = self.smooth1(c5)
162 | arm_sources = [c3, c4, c5_]
163 | for k, v in enumerate(self.extras):
164 | x = F.relu(v(x), inplace=True)
165 | if k % 2 == 1:
166 | arm_sources.append(x)
167 | odm_sources = self.weave(arm_sources)
168 | return arm_sources, odm_sources
169 |
170 |
171 | def WeaveResnet50(size, channel_size='48'):
172 | return WeaveResnet(Bottleneck, [3, 4, 6, 3], size)
173 |
174 |
175 | def WeaveResnet101(size, channel_size='48'):
176 | return WeaveResnet(Bottleneck, [3, 4, 23, 3], size)
177 |
178 |
179 | def WeaveResnet152(size, channel_size='48'):
180 | return WeaveResnet(Bottleneck, [3, 8, 36, 3], size)
181 |
182 |
183 | if __name__ == "__main__":
184 | import os
185 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
186 | model = WeaveResnet50(size=300)
187 | print(model)
188 | with torch.no_grad():
189 | model.eval()
190 | x = torch.randn(1, 3, 320, 320)
191 | model.cuda()
192 | model(x.cuda())
193 |
--------------------------------------------------------------------------------
/models/weave_vgg.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Written by yq_yao
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from torch.autograd import Variable
8 | import torch.nn.init as init
9 | from models.model_helper import FpnAdapter, WeaveAdapter, weights_init, WeaveAdapter2
10 | # from model_helper import FpnAdapter, WeaveAdapter, weights_init, WeaveAdapter2
11 |
12 | class L2Norm(nn.Module):
13 | def __init__(self, n_channels, scale):
14 | super(L2Norm, self).__init__()
15 | self.n_channels = n_channels
16 | self.gamma = scale or None
17 | self.eps = 1e-10
18 | self.weight = nn.Parameter(torch.Tensor(self.n_channels))
19 | self.reset_parameters()
20 |
21 | def reset_parameters(self):
22 | init.constant_(self.weight, self.gamma)
23 |
24 | def forward(self, x):
25 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
26 | x = x / norm
27 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(
28 | x) * x
29 | return out
30 |
31 |
32 | # This function is derived from torchvision VGG make_layers()
33 | # https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
34 |
35 |
36 | def vgg(cfg, i, batch_norm=False):
37 | layers = []
38 | in_channels = i
39 | for v in cfg:
40 | if v == 'M':
41 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
42 | elif v == 'C':
43 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
44 | else:
45 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
46 | if batch_norm:
47 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
48 | else:
49 | layers += [conv2d, nn.ReLU(inplace=True)]
50 | in_channels = v
51 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
52 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
53 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
54 | layers += [
55 | pool5, conv6,
56 | nn.ReLU(inplace=True), conv7,
57 | nn.ReLU(inplace=True)
58 | ]
59 | return layers
60 |
61 |
62 | base = {
63 | '300': [
64 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
65 | 512, 512, 512
66 | ],
67 | '512': [
68 | 64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
69 | 512, 512, 512
70 | ],
71 | }
72 |
73 |
74 | def add_extras(size):
75 | layers = []
76 | layers += [nn.Conv2d(1024, 256, kernel_size=1, stride=1)]
77 | layers += [nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1)]
78 | layers += [nn.Conv2d(256, 128, kernel_size=1, stride=1)]
79 | layers += [nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)]
80 |
81 | return layers
82 |
83 |
84 | class VGG16Extractor(nn.Module):
85 | def __init__(self, size, channel_size='48'):
86 | super(VGG16Extractor, self).__init__()
87 | self.vgg = nn.ModuleList(vgg(base[str(size)], 3))
88 | self.extras = nn.ModuleList(add_extras(str(size)))
89 | self.L2Norm_4_3 = L2Norm(512, 10)
90 | self.L2Norm_5_3 = L2Norm(1024, 8)
91 | self.raw_channels = [512, 1024, 256, 256]
92 | self.weave_add_channels = [(48, 48), (48, 48), (48, 48), (48, 48)]
93 | self.weave_channels = [256, 256, 256, 256]
94 | # self.weave = WeaveAdapter([512, 1024, 256, 256], 4)
95 | self.weave = WeaveAdapter2(self.raw_channels, self.weave_add_channels, self.weave_channels)
96 | self._init_modules()
97 |
98 | def _init_modules(self):
99 | self.extras.apply(weights_init)
100 |
101 | def forward(self, x):
102 | """Applies network layers and ops on input image(s) x.
103 | Args:
104 | x: input image or batch of images. Shape: [batch,3*batch,300,300].
105 | Return:
106 | Depending on phase:
107 | test:
108 | Variable(tensor) of output class label predictions,
109 | confidence score, and corresponding location predictions for
110 | each object detected. Shape: [batch,topk,7]
111 | train:
112 | list of concat outputs from:
113 | 1: confidence layers, Shape: [batch*num_priors,num_classes]
114 | 2: localization layers, Shape: [batch,num_priors*4]
115 | 3: priorbox layers, Shape: [2,num_priors*4]
116 | """
117 | arm_sources = list()
118 | odm_sources = list()
119 |
120 | for i in range(23):
121 | x = self.vgg[i](x)
122 | #38x38
123 | c2 = x
124 | c2 = self.L2Norm_4_3(c2)
125 | arm_sources.append(c2)
126 |
127 | for k in range(23, len(self.vgg)):
128 | x = self.vgg[k](x)
129 | #19x19
130 | c3 = x
131 | c3 = self.L2Norm_5_3(c3)
132 | arm_sources.append(c3)
133 |
134 | # 10x10
135 | x = F.relu(self.extras[0](x), inplace=True)
136 | x = F.relu(self.extras[1](x), inplace=True)
137 | c4 = x
138 | arm_sources.append(c4)
139 |
140 | # 5x5
141 | x = F.relu(self.extras[2](x), inplace=True)
142 | x = F.relu(self.extras[3](x), inplace=True)
143 | c5 = x
144 | arm_sources.append(c5)
145 |
146 | if len(self.extras) > 4:
147 | x = F.relu(self.extras[4](x), inplace=True)
148 | x = F.relu(self.extras[5](x), inplace=True)
149 | c6 = x
150 | arm_sources.append(c6)
151 | odm_sources = self.weave(arm_sources)
152 | return arm_sources, odm_sources
153 |
154 |
155 | def weave_vgg(size, channel_size='48'):
156 | return VGG16Extractor(size)
157 |
158 |
159 | if __name__ == "__main__":
160 | import os
161 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
162 | model = weave_vgg(size=300)
163 | print(model)
164 | with torch.no_grad():
165 | model.eval()
166 | x = torch.randn(1, 3, 320, 320)
167 | model.cuda()
168 | model(x.cuda())
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/utils/__init__.py
--------------------------------------------------------------------------------
/utils/averageMeter.py:
--------------------------------------------------------------------------------
1 | class AverageMeter(object):
2 | """Computes and stores the average and current value"""
3 |
4 | def __init__(self):
5 | self.reset()
6 |
7 | def reset(self):
8 | self.val = 0
9 | self.avg = 0
10 | self.sum = 0
11 | self.count = 0
12 |
13 | def update(self, val, n=1):
14 | self.val = val
15 | self.sum += val * n
16 | self.count += n
17 | self.avg = self.sum / self.count
--------------------------------------------------------------------------------
/utils/build.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | import numpy as np
11 | from distutils.core import setup
12 | from distutils.extension import Extension
13 | from Cython.Distutils import build_ext
14 |
15 |
16 | def find_in_path(name, path):
17 | "Find a file in a search path"
18 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
19 | for dir in path.split(os.pathsep):
20 | binpath = pjoin(dir, name)
21 | if os.path.exists(binpath):
22 | return os.path.abspath(binpath)
23 | return None
24 |
25 |
26 | def locate_cuda():
27 | """Locate the CUDA environment on the system
28 |
29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
30 | and values giving the absolute path to each directory.
31 |
32 | Starts by looking for the CUDAHOME env variable. If not found, everything
33 | is based on finding 'nvcc' in the PATH.
34 | """
35 |
36 | # first check if the CUDAHOME env variable is in use
37 | if 'CUDAHOME' in os.environ:
38 | home = os.environ['CUDAHOME']
39 | nvcc = pjoin(home, 'bin', 'nvcc')
40 | else:
41 | # otherwise, search the PATH for NVCC
42 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
43 | nvcc = find_in_path('nvcc',
44 | os.environ['PATH'] + os.pathsep + default_path)
45 | if nvcc is None:
46 | raise EnvironmentError(
47 | 'The nvcc binary could not be '
48 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME'
49 | )
50 | home = os.path.dirname(os.path.dirname(nvcc))
51 |
52 | cudaconfig = {
53 | 'home': home,
54 | 'nvcc': nvcc,
55 | 'include': pjoin(home, 'include'),
56 | 'lib64': pjoin(home, 'lib64')
57 | }
58 | for k, v in cudaconfig.items():
59 | if not os.path.exists(v):
60 | raise EnvironmentError(
61 | 'The CUDA %s path could not be located in %s' % (k, v))
62 |
63 | return cudaconfig
64 |
65 |
66 | CUDA = locate_cuda()
67 |
68 | # Obtain the numpy include directory. This logic works across numpy versions.
69 | try:
70 | numpy_include = np.get_include()
71 | except AttributeError:
72 | numpy_include = np.get_numpy_include()
73 |
74 |
75 | def customize_compiler_for_nvcc(self):
76 | """inject deep into distutils to customize how the dispatch
77 | to gcc/nvcc works.
78 |
79 | If you subclass UnixCCompiler, it's not trivial to get your subclass
80 | injected in, and still have the right customizations (i.e.
81 | distutils.sysconfig.customize_compiler) run on it. So instead of going
82 | the OO route, I have this. Note, it's kindof like a wierd functional
83 | subclassing going on."""
84 |
85 | # tell the compiler it can processes .cu
86 | self.src_extensions.append('.cu')
87 |
88 | # save references to the default compiler_so and _comple methods
89 | default_compiler_so = self.compiler_so
90 | super = self._compile
91 |
92 | # now redefine the _compile method. This gets executed for each
93 | # object but distutils doesn't have the ability to change compilers
94 | # based on source extension: we add it.
95 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
96 | print(extra_postargs)
97 | if os.path.splitext(src)[1] == '.cu':
98 | # use the cuda for .cu files
99 | self.set_executable('compiler_so', CUDA['nvcc'])
100 | # use only a subset of the extra_postargs, which are 1-1 translated
101 | # from the extra_compile_args in the Extension class
102 | postargs = extra_postargs['nvcc']
103 | else:
104 | postargs = extra_postargs['gcc']
105 |
106 | super(obj, src, ext, cc_args, postargs, pp_opts)
107 | # reset the default compiler_so, which we might have changed for cuda
108 | self.compiler_so = default_compiler_so
109 |
110 | # inject our redefined _compile method into the class
111 | self._compile = _compile
112 |
113 |
114 | # run the customize_compiler
115 | class custom_build_ext(build_ext):
116 | def build_extensions(self):
117 | customize_compiler_for_nvcc(self.compiler)
118 | build_ext.build_extensions(self)
119 |
120 |
121 | ext_modules = [
122 | Extension(
123 | "nms.cpu_nms", ["nms/cpu_nms.pyx"],
124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 | include_dirs=[numpy_include]),
126 | Extension(
127 | 'nms.gpu_nms',
128 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
129 | library_dirs=[CUDA['lib64']],
130 | libraries=['cudart'],
131 | language='c++',
132 | runtime_library_dirs=[CUDA['lib64']],
133 | # this syntax is specific to this build system
134 | # we're only going to use certain compiler args with nvcc and not with gcc
135 | # the implementation of this trick is in customize_compiler() below
136 | extra_compile_args={
137 | 'gcc': ["-Wno-unused-function"],
138 | 'nvcc': [
139 | '-arch=sm_61', '--ptxas-options=-v', '-c',
140 | '--compiler-options', "'-fPIC'"
141 | ]
142 | },
143 | include_dirs=[numpy_include, CUDA['include']])
144 | ]
145 |
146 | setup(
147 | name='mot_utils',
148 | ext_modules=ext_modules,
149 | # inject our custom trigger
150 | cmdclass={'build_ext': custom_build_ext},
151 | )
152 |
--------------------------------------------------------------------------------
/utils/collections.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | """A simple attribute dictionary used for representing configuration options."""
16 |
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | from __future__ import unicode_literals
21 |
22 |
23 | class AttrDict(dict):
24 |
25 | IMMUTABLE = '__immutable__'
26 |
27 | def __init__(self, *args, **kwargs):
28 | super(AttrDict, self).__init__(*args, **kwargs)
29 | self.__dict__[AttrDict.IMMUTABLE] = False
30 |
31 | def __getattr__(self, name):
32 | if name in self.__dict__:
33 | return self.__dict__[name]
34 | elif name in self:
35 | return self[name]
36 | else:
37 | raise AttributeError(name)
38 |
39 | def __setattr__(self, name, value):
40 | if not self.__dict__[AttrDict.IMMUTABLE]:
41 | if name in self.__dict__:
42 | self.__dict__[name] = value
43 | else:
44 | self[name] = value
45 | else:
46 | raise AttributeError(
47 | 'Attempted to set "{}" to "{}", but AttrDict is immutable'.
48 | format(name, value))
49 |
50 | def immutable(self, is_immutable):
51 | """Set immutability to is_immutable and recursively apply the setting
52 | to all nested AttrDicts.
53 | """
54 | self.__dict__[AttrDict.IMMUTABLE] = is_immutable
55 | # Recursively set immutable state
56 | for v in self.__dict__.values():
57 | if isinstance(v, AttrDict):
58 | v.immutable(is_immutable)
59 | for v in self.values():
60 | if isinstance(v, AttrDict):
61 | v.immutable(is_immutable)
62 |
63 | def is_immutable(self):
64 | return self.__dict__[AttrDict.IMMUTABLE]
65 |
--------------------------------------------------------------------------------
/utils/get_class_map.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import argparse
4 | import os.path as osp
5 |
6 |
7 | def check_size(submit_file):
8 | max_size = 60 * 1024 * 1024
9 | if osp.getsize(submit_file) > max_size:
10 | raise (
11 | IOError,
12 | "File size exceeds the specified maximum size, which is 60M for the server."
13 | )
14 |
15 |
16 | def parse_submission(submit_file):
17 | with open(submit_file, 'r') as f:
18 | lines = f.readlines()
19 | submit_dict = dict()
20 | final_dict = dict()
21 | splitlines = [x.strip().split(' ') for x in lines]
22 | for idx, val in enumerate(splitlines):
23 | cls = str(int(float(val[1])))
24 | if cls not in submit_dict:
25 | submit_dict[cls] = list()
26 | final_dict[cls] = dict()
27 | submit_dict[cls].append(
28 | [val[0], val[2], val[3], val[4], val[5], val[6]])
29 | for k, v in submit_dict.items():
30 | image_ids = [x[0] for x in v]
31 | confidence = np.array([float(x[1]) for x in v])
32 | BB = np.array([[float(z) for z in x[2:]] for x in v])
33 | sorted_ind = np.argsort(-confidence)
34 | sorted_scores = np.sort(-confidence)
35 | BB = BB[sorted_ind, :]
36 | image_ids = [image_ids[x] for x in sorted_ind]
37 | final_dict[k]["image_ids"] = image_ids
38 | final_dict[k]["BB"] = np.array(BB)
39 | return final_dict
40 |
41 |
42 | def parse_gt_annotation(gt_file):
43 | with open(gt_file, 'r') as f:
44 | lines = f.readlines()
45 | info = [x.strip().split() for x in lines]
46 | gt = {}
47 | for item in info:
48 | img_id = item[0]
49 | obj_struct = {}
50 | obj_struct['class'] = item[1]
51 | obj_struct['bbox'] = [
52 | int(item[2]),
53 | int(item[3]),
54 | int(item[4]),
55 | int(item[5])
56 | ]
57 | if img_id not in gt:
58 | gt[img_id] = list()
59 | gt[img_id].append(obj_struct)
60 | return gt
61 |
62 |
63 | def get_class_recs(recs, classname):
64 | npos = 0
65 | class_recs = {}
66 | for key in recs.keys():
67 | R = [obj for obj in recs[key] if obj['class'] == classname]
68 | bbox = np.array([x['bbox'] for x in R])
69 | det = [False] * len(R)
70 | npos += len(R)
71 | class_recs[key] = {'bbox': bbox, 'det': det}
72 | return class_recs, npos
73 |
74 |
75 | def compute_ap(rec, prec):
76 | mrec = np.concatenate(([0.], rec, [1.]))
77 | mpre = np.concatenate(([0.], prec, [0.]))
78 | for i in range(mpre.size - 1, 0, -1):
79 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
80 | i = np.where(mrec[1:] != mrec[:-1])[0]
81 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
82 | return ap
83 |
84 |
85 | def eval(submit_file, gt_file, ovthresh, classname):
86 | recs = parse_gt_annotation(gt_file)
87 | submit_result = parse_submission(submit_file)
88 | # get one class result
89 | class_recs, npos = get_class_recs(recs, classname)
90 | image_ids = submit_result[classname]["image_ids"]
91 | BB = submit_result[classname]["BB"]
92 | nd = len(image_ids)
93 | tp = np.zeros(nd)
94 | fp = np.zeros(nd)
95 | for d in range(nd):
96 | if image_ids[d] not in recs.keys():
97 | raise KeyError(
98 | "Can not find image {} in the groundtruth file, did you submit the result file for the right dataset?"
99 | .format(image_ids[d]))
100 | for d in range(nd):
101 | R = class_recs[image_ids[d]]
102 | bb = BB[d, :].astype(float)
103 | ovmax = -np.inf
104 | BBGT = R['bbox'].astype(float)
105 | if BBGT.size > 0:
106 | ixmin = np.maximum(BBGT[:, 0], bb[0])
107 | iymin = np.maximum(BBGT[:, 1], bb[1])
108 | ixmax = np.minimum(BBGT[:, 2], bb[2])
109 | iymax = np.minimum(BBGT[:, 3], bb[3])
110 | iw = np.maximum(ixmax - ixmin + 1., 0.)
111 | ih = np.maximum(iymax - iymin + 1., 0.)
112 | inters = iw * ih
113 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
114 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
115 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
116 | overlaps = inters / uni
117 | ovmax = np.max(overlaps)
118 | jmax = np.argmax(overlaps)
119 | if ovmax > ovthresh:
120 | if not R['det'][jmax]:
121 | tp[d] = 1.
122 | R['det'][jmax] = 1
123 | else:
124 | fp[d] = 1.
125 | else:
126 | fp[d] = 1.
127 | fp = np.cumsum(fp)
128 | tp = np.cumsum(tp)
129 | rec = tp / float(npos)
130 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
131 | ap = compute_ap(rec, prec)
132 | return ap
133 |
134 |
135 | def result_eval(submit_file, gt, class_list):
136 | ove_aap = []
137 | for ove in np.arange(0.5, 1.0, 0.05):
138 | cls_aap = []
139 | for cls in class_list:
140 | ap = eval(submit_file, gt, ove, cls)
141 | cls_aap.append(ap)
142 | cls_mAP = np.average(cls_aap)
143 | print("thresh", round(ove, 3), "map", round(cls_mAP * 100, 3))
144 | ove_aap.append(cls_mAP)
145 | mAP = np.average(ove_aap) * 100
146 | return round(mAP, 3)
147 |
148 |
149 | if __name__ == '__main__':
150 | '''
151 | submit_file: image_id, class, score, xmin, ymin, xmax, ymax
152 | gt_file: image_id, class, xmin, ymin, xmax, ymax
153 | '''
154 | class_list = []
155 | for i in range(1, 61):
156 | class_list.append(str(i))
157 | submit_file = "./results/fpn_dcn_result.csv"
158 | gt_file = "./results/val_label.txt"
159 | check_size(submit_file)
160 | mAP = result_eval(submit_file, gt_file, class_list)
161 | out = {'Average AP': str(round(mAP, 3))}
162 | print(out)
--------------------------------------------------------------------------------
/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yqyao/SSD_Pytorch/6060bbb650e7a1df7c12d7c9650a38eaba4ab6a8/utils/nms/__init__.py
--------------------------------------------------------------------------------
/utils/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | cdef inline np.float32_t abs(np.float32_t a, np.float32_t b):
18 | return a - b if a >= b else b - a
19 |
20 | def get_iou_weights(np.ndarray[np.float32_t, ndim=1] ious, np.float threshold, float init_weight):
21 |
22 | cdef:
23 | int num = ious.shape[0]
24 | # np.ndarray[np.float32_t, ndim=1] out = np.zeros(num, dtype=np.float)
25 | int idx
26 | float iou
27 | float weight
28 |
29 | for idx, iou in enumerate(ious):
30 | weight = init_weight
31 | if iou > 0.0:
32 | if iou > threshold + 0.1:
33 | weight += 1.0
34 | elif iou < threshold - 0.1:
35 | weight += 1.0
36 | else:
37 | weight += 0.0
38 | ious[idx] = weight
39 | return ious
40 |
41 | def get_mask(np.ndarray[np.float32_t, ndim=1] ious, np.float threshold):
42 | cdef:
43 | int num = ious.shape[0]
44 | int idx = 0
45 | float distance
46 | float iou
47 | np.ndarray[np.int64_t, ndim=1] out = np.zeros((num), dtype=np.int64)
48 | for idx, iou in enumerate(ious):
49 | # if iou >= threshold:
50 | # distance = iou - threshold
51 | # if distance < 0.1:
52 | # out[idx] = 0
53 | # elif distance < 0.2:
54 | # out[idx] = 1
55 | # else:
56 | # out[idx] = 2
57 | # else:
58 | # distance = threshold - iou
59 | # if distance < 0.1:
60 | # out[idx] = 2
61 | # elif distance < 0.2:
62 | # out[idx] = 1
63 | # else:
64 | # out[idx] = 0
65 | distance = abs(iou, threshold)
66 | if distance < 0.1:
67 | # out[:,2] = 1
68 | out[idx] = 2
69 | elif distance < 0.2:
70 | # out[:,1] = 1
71 | out[idx] = 1
72 | else:
73 | # out[:,0] = 0
74 | out[idx] = 0
75 | return out
76 |
77 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
78 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
79 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
80 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
81 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
82 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
83 |
84 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
85 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
86 |
87 | cdef int ndets = dets.shape[0]
88 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
89 | np.zeros((ndets), dtype=np.int)
90 |
91 | # nominal indices
92 | cdef int _i, _j
93 | # sorted indices
94 | cdef int i, j
95 | # temp variables for box i's (the box currently under consideration)
96 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
97 | # variables for computing overlap with box j (lower scoring box)
98 | cdef np.float32_t xx1, yy1, xx2, yy2
99 | cdef np.float32_t w, h
100 | cdef np.float32_t inter, ovr
101 |
102 | keep = []
103 | for _i in range(ndets):
104 | i = order[_i]
105 | if suppressed[i] == 1:
106 | continue
107 | keep.append(i)
108 | ix1 = x1[i]
109 | iy1 = y1[i]
110 | ix2 = x2[i]
111 | iy2 = y2[i]
112 | iarea = areas[i]
113 | for _j in range(_i + 1, ndets):
114 | j = order[_j]
115 | if suppressed[j] == 1:
116 | continue
117 | xx1 = max(ix1, x1[j])
118 | yy1 = max(iy1, y1[j])
119 | xx2 = min(ix2, x2[j])
120 | yy2 = min(iy2, y2[j])
121 | w = max(0.0, xx2 - xx1 + 1)
122 | h = max(0.0, yy2 - yy1 + 1)
123 | inter = w * h
124 | ovr = inter / (iarea + areas[j] - inter)
125 | if ovr >= thresh:
126 | suppressed[j] = 1
127 |
128 | return keep
129 |
130 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
131 | cdef unsigned int N = boxes.shape[0]
132 | cdef float iw, ih, box_area
133 | cdef float ua
134 | cdef int pos = 0
135 | cdef float maxscore = 0
136 | cdef int maxpos = 0
137 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
138 |
139 | for i in range(N):
140 | maxscore = boxes[i, 4]
141 | maxpos = i
142 |
143 | tx1 = boxes[i,0]
144 | ty1 = boxes[i,1]
145 | tx2 = boxes[i,2]
146 | ty2 = boxes[i,3]
147 | ts = boxes[i,4]
148 |
149 | pos = i + 1
150 | # get max box
151 | while pos < N:
152 | if maxscore < boxes[pos, 4]:
153 | maxscore = boxes[pos, 4]
154 | maxpos = pos
155 | pos = pos + 1
156 |
157 | # add max box as a detection
158 | boxes[i,0] = boxes[maxpos,0]
159 | boxes[i,1] = boxes[maxpos,1]
160 | boxes[i,2] = boxes[maxpos,2]
161 | boxes[i,3] = boxes[maxpos,3]
162 | boxes[i,4] = boxes[maxpos,4]
163 |
164 | # swap ith box with position of max box
165 | boxes[maxpos,0] = tx1
166 | boxes[maxpos,1] = ty1
167 | boxes[maxpos,2] = tx2
168 | boxes[maxpos,3] = ty2
169 | boxes[maxpos,4] = ts
170 |
171 | tx1 = boxes[i,0]
172 | ty1 = boxes[i,1]
173 | tx2 = boxes[i,2]
174 | ty2 = boxes[i,3]
175 | ts = boxes[i,4]
176 |
177 | pos = i + 1
178 | # NMS iterations, note that N changes if detection boxes fall below threshold
179 | while pos < N:
180 | x1 = boxes[pos, 0]
181 | y1 = boxes[pos, 1]
182 | x2 = boxes[pos, 2]
183 | y2 = boxes[pos, 3]
184 | s = boxes[pos, 4]
185 |
186 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
187 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
188 | if iw > 0:
189 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
190 | if ih > 0:
191 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
192 | ov = iw * ih / ua #iou between max box and detection box
193 |
194 | if method == 1: # linear
195 | if ov > Nt:
196 | weight = 1 - ov
197 | else:
198 | weight = 1
199 | elif method == 2: # gaussian
200 | weight = np.exp(-(ov * ov)/sigma)
201 | else: # original NMS
202 | if ov > Nt:
203 | weight = 0
204 | else:
205 | weight = 1
206 |
207 | boxes[pos, 4] = weight*boxes[pos, 4]
208 |
209 | # if box score falls below threshold, discard the box by swapping with last box
210 | # update N
211 | if boxes[pos, 4] < threshold:
212 | boxes[pos,0] = boxes[N-1, 0]
213 | boxes[pos,1] = boxes[N-1, 1]
214 | boxes[pos,2] = boxes[N-1, 2]
215 | boxes[pos,3] = boxes[N-1, 3]
216 | boxes[pos,4] = boxes[N-1, 4]
217 | N = N - 1
218 | pos = pos - 1
219 |
220 | pos = pos + 1
221 |
222 | keep = [i for i in range(N)]
223 | return keep
224 |
--------------------------------------------------------------------------------
/utils/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/utils/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/utils/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | from .nms.cpu_nms import cpu_nms, cpu_soft_nms
9 | from .nms.gpu_nms import gpu_nms
10 |
11 | # def nms(dets, thresh, force_cpu=False):
12 | # """Dispatch to either CPU or GPU NMS implementations."""
13 |
14 | # if dets.shape[0] == 0:
15 | # return []
16 | # if cfg.USE_GPU_NMS and not force_cpu:
17 | # return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
18 | # else:
19 | # return cpu_nms(dets, thresh)
20 |
21 |
22 | def nms(dets, thresh, force_cpu=False):
23 | """Dispatch to either CPU or GPU NMS implementations."""
24 |
25 | if dets.shape[0] == 0:
26 | return []
27 | if force_cpu:
28 | #return cpu_soft_nms(dets, thresh, method = 0)
29 | return cpu_nms(dets, thresh)
30 | return gpu_nms(dets, thresh)
31 |
32 |
33 | def soft_nms(dets, Nt=0.3, sigma=0.5, thresh=0.001, method=1):
34 | """Dispatch to either CPU or GPU NMS implementations."""
35 |
36 | if dets.shape[0] == 0:
37 | return []
38 | return cpu_soft_nms(dets, sigma, Nt, thresh, method)
--------------------------------------------------------------------------------
/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 |
14 | def __init__(self):
15 | self.total_time = 0.
16 | self.calls = 0
17 | self.start_time = 0.
18 | self.diff = 0.
19 | self.average_time = 0.
20 |
21 | def tic(self):
22 | # using time.time instead of time.clock because time time.clock
23 | # does not normalize for multithreading
24 | self.start_time = time.time()
25 |
26 | def toc(self, average=True):
27 | self.diff = time.time() - self.start_time
28 | self.total_time += self.diff
29 | self.calls += 1
30 | self.average_time = self.total_time / self.calls
31 | if average:
32 | return self.average_time
33 | else:
34 | return self.diff
35 |
36 | def clear(self):
37 | self.total_time = 0.
38 | self.calls = 0
39 | self.start_time = 0.
40 | self.diff = 0.
41 | self.average_time = 0.
42 |
--------------------------------------------------------------------------------