├── .gitattributes
├── .gitignore
├── .idea
    ├── vcs.xml
    └── workspace.xml
├── README.md
├── demo.py
├── demo_simpler.py
├── experiments
    ├── 1.mp4
    ├── cfgs
    │   ├── fssd_lite_mobilenetv1_train_coco.yml
    │   ├── fssd_lite_mobilenetv1_train_voc.yml
    │   ├── fssd_lite_mobilenetv2_train_coco.yml
    │   ├── fssd_lite_mobilenetv2_train_voc.yml
    │   ├── fssd_resnet50_train_coco.yml
    │   ├── fssd_vgg16_train_coco.yml
    │   ├── fssd_vgg16_train_voc.yml
    │   ├── rfb_lite_mobilenetv1_train_coco.yml
    │   ├── rfb_lite_mobilenetv1_train_voc.yml
    │   ├── rfb_lite_mobilenetv2_train_coco.yml
    │   ├── rfb_lite_mobilenetv2_train_voc.yml
    │   ├── rfb_resnet50_train_coco.yml
    │   ├── rfb_resnet50_train_voc.yml
    │   ├── rfb_vgg16_train_coco.yml
    │   ├── rfb_vgg16_train_voc.yml
    │   ├── ssd_lite_mobilenetv1_train_coco.yml
    │   ├── ssd_lite_mobilenetv1_train_voc.yml
    │   ├── ssd_lite_mobilenetv2_train_coco.yml
    │   ├── ssd_lite_mobilenetv2_train_voc.yml
    │   ├── ssd_resnet50_train_coco.yml
    │   ├── ssd_resnet50_train_voc.yml
    │   ├── ssd_vgg16_train_coco.yml
    │   ├── ssd_vgg16_train_voc.yml
    │   ├── tests
    │   │   ├── fssd_darknet19_coco.yml
    │   │   ├── fssd_darknet53_coco.yml
    │   │   ├── fssd_darknet53_voc.yml
    │   │   ├── fssd_resnet50_train_voc.yml
    │   │   ├── rfb_darknet19_coco.yml
    │   │   ├── rfb_darknet53_coco.yml
    │   │   ├── rfb_darknet53_voc.yml
    │   │   ├── ssd_darknet19_coco.yml
    │   │   ├── ssd_darknet53_coco.yml
    │   │   ├── ssd_darknet53_voc.yml
    │   │   ├── ssd_resnet101_train_coco.yml
    │   │   ├── test.yml
    │   │   ├── yolo_v2_mobilenetv1_coco.yml
    │   │   ├── yolo_v2_mobilenetv1_voc.yml
    │   │   ├── yolo_v2_mobilenetv2_coco.yml
    │   │   ├── yolo_v2_mobilenetv2_voc.yml
    │   │   ├── yolo_v3_darknet53_coco.yml
    │   │   ├── yolo_v3_darknet53_voc.yml
    │   │   ├── yolo_v3_mobilenetv1_coco.yml
    │   │   ├── yolo_v3_mobilenetv1_voc.yml
    │   │   ├── yolo_v3_mobilenetv2_coco.yml
    │   │   └── yolo_v3_mobilenetv2_voc.yml
    │   ├── yolo_v2_darknet19_coco.yml
    │   ├── yolo_v2_darknet19_voc.yml
    │   ├── yolo_v2_mobilenetv1_coco.yml
    │   ├── yolo_v2_mobilenetv1_voc.yml
    │   ├── yolo_v2_mobilenetv2_coco.yml
    │   ├── yolo_v2_mobilenetv2_voc.yml
    │   ├── yolo_v3_darknet53_coco.yml
    │   ├── yolo_v3_darknet53_voc.yml
    │   ├── yolo_v3_mobilenetv1_coco.yml
    │   ├── yolo_v3_mobilenetv1_voc.yml
    │   ├── yolo_v3_mobilenetv2_coco.yml
    │   └── yolo_v3_mobilenetv2_voc.yml
    └── person.jpg
├── lib
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── dataset_factory.py
    │   ├── voc.py
    │   └── voc_eval.py
    ├── layers
    │   ├── __init__.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── detection.py
    │   │   └── prior_box.py
    │   └── modules
    │   │   ├── __init__.py
    │   │   ├── focal_loss.py
    │   │   ├── l2norm.py
    │   │   └── multibox_loss.py
    ├── modeling
    │   ├── __init__.py
    │   ├── model_builder.py
    │   ├── nets
    │   │   ├── __init__.py
    │   │   ├── darknet.py
    │   │   ├── mobilenet.py
    │   │   ├── resnet.py
    │   │   └── vgg.py
    │   └── ssds
    │   │   ├── __init__.py
    │   │   ├── fssd.py
    │   │   ├── fssd_lite.py
    │   │   ├── retina.py
    │   │   ├── rfb.py
    │   │   ├── rfb_lite.py
    │   │   ├── ssd.py
    │   │   ├── ssd_lite.py
    │   │   └── yolo.py
    ├── ssds.py
    ├── ssds_train.py
    └── utils
    │   ├── __init__.py
    │   ├── box_utils.py
    │   ├── build
    │       └── temp.linux-x86_64-3.6
    │       │   ├── nms
    │       │       ├── cpu_nms.o
    │       │       ├── gpu_nms.o
    │       │       └── nms_kernel.o
    │       │   └── pycocotools
    │       │       ├── _mask.o
    │       │       └── maskApi.o
    │   ├── config_parse.py
    │   ├── dark2pth.py
    │   ├── data_augment.py
    │   ├── data_augment_test.py
    │   ├── eval_utils.py
    │   ├── fp16_utils.py
    │   ├── nms
    │       ├── .gitignore
    │       ├── __init__.py
    │       ├── _ext
    │       │   ├── __init__.py
    │       │   └── nms
    │       │   │   └── __init__.py
    │       ├── build.py
    │       ├── make.sh
    │       ├── nms_gpu.py
    │       ├── nms_kernel.cu
    │       ├── nms_wrapper.py
    │       └── src
    │       │   ├── nms_cuda.c
    │       │   ├── nms_cuda.h
    │       │   ├── nms_cuda_kernel.cu
    │       │   ├── nms_cuda_kernel.cu.o
    │       │   └── nms_cuda_kernel.h
    │   ├── pycocotools
    │       ├── __init__.py
    │       ├── _mask.c
    │       ├── _mask.cpython-36m-x86_64-linux-gnu.so
    │       ├── _mask.pyx
    │       ├── coco.py
    │       ├── cocoeval.py
    │       └── mask.py
    │   ├── timer.py
    │   └── visualize_utils.py
├── requirements.txt
├── setup.py
├── test.py
├── time_benchmark.sh
└── train.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .vscode/
 2 | weights/
 3 | data/
 4 | data
 5 | experiments/models/
 6 | run.sh
 7 | __pycache__
 8 | *.pyc
 9 | log*
10 | .idea/
11 | saved_model/
12 | 
13 | vendor/
14 | 


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SSDs
 2 | 
 3 | This repo contains many object detection methods that aims at **single shot and real time**, so the **speed** is the only thing we talk about. Currently we have some base networks that support object detection task such as MobileNet V2, ResNet, VGG etc. And some SSD variants such as FSSD, RFBNet, Retina, and even Yolo are contained.
 4 | 
 5 | If you have any faster object detection methods welcome to discuss with me to merge it into our master branches.
 6 | 
 7 | 
 8 | 
 9 | 
10 | # Note
11 | 
12 | Work are just being progressing. Will update some result and pretrained model after trained on some datasets. And of course, some out-of-box inference demo.
13 | 
14 | [updates]:
15 | 
16 | 2018.11.06: As you know, after trained `fssd_mobilenetv2` the inference codes actually get none result, still debugging how this logic error comes out.
17 | 
18 | 
19 | 
20 | # Train
21 | 
22 | All settings about base net and ssd variants are under `./experiments/cfgs/*.yml`, just edit it to your enviroment and kick it off.
23 | 
24 | ```
25 | python3 train.py --cfg=./experiments/cfgs/rfb_lite_mobilenetv2_train_vocyml
26 | ```
27 | 
28 | You can try train on coco first then using your custom dataset. If you have your coco data inside /path/to/coco, the just link it to `./data/` and you can find coco inside `./data`. Same as VOC data.
29 | 
30 | ![](https://s1.ax1x.com/2018/11/06/iTKMkV.png)
31 | 
32 | 
33 | 
34 | That is what it trains like. After that I shall upload some trained model.
35 | 
36 | 
37 | 
38 | ## Predict
39 | 
40 | To predict on a simple image, you can find some useful codes in `demo_simpler.py`. But it still under testing. I will upload some images when I get it predicted success.
41 | 
42 | 
43 | 
44 | 
45 | 
46 | ## Copyright
47 | 
48 | This version contained by myself and portable to pytorch newest version. As well as some pretrained model and speed test benchmark. If you have any question or want ask *Computer Vision* questions you can contact me via **wechat**: `jintianiloveu`.
49 | 
50 | Some useful links and other repo:
51 | 
52 | 1. https://github.com/ShuangXieIrene/ssds.pytorch


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import os
  4 | import argparse
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | from lib.ssds import ObjectDetector
  9 | from lib.utils.config_parse import cfg_from_file
 10 | 
 11 | VOC_CLASSES = ( 'aeroplane', 'bicycle', 'bird', 'boat',
 12 |     'bottle', 'bus', 'car', 'cat', 'chair',
 13 |     'cow', 'diningtable', 'dog', 'horse',
 14 |     'motorbike', 'person', 'pottedplant',
 15 |     'sheep', 'sofa', 'train', 'tvmonitor')
 16 | 
 17 | def parse_args():
 18 |     """
 19 |     Parse input arguments
 20 |     """
 21 |     parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network')
 22 |     parser.add_argument('--cfg', dest='confg_file',
 23 |             help='the address of optional config file', default=None, type=str, required=True)
 24 |     parser.add_argument('--demo', dest='demo_file',
 25 |             help='the address of the demo file', default=None, type=str, required=True)
 26 |     parser.add_argument('-t', '--type', dest='type',
 27 |             help='the type of the demo file, could be "image", "video", "camera" or "time", default is "image"', default='image', type=str)
 28 |     parser.add_argument('-d', '--display', dest='display',
 29 |             help='whether display the detection result, default is True', default=True, type=bool)
 30 |     parser.add_argument('-s', '--save', dest='save',
 31 |             help='whether write the detection result, default is False', default=False, type=bool)  
 32 | 
 33 |     if len(sys.argv) == 1:
 34 |         parser.print_help()
 35 |         sys.exit(1)
 36 | 
 37 |     args = parser.parse_args()
 38 |     return args
 39 | 
 40 | 
 41 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
 42 | FONT = cv2.FONT_HERSHEY_SIMPLEX
 43 | 
 44 | def demo(args, image_path):
 45 |     # 1. load the configure file
 46 |     cfg_from_file(args.confg_file)
 47 | 
 48 |     # 2. load detector based on the configure file
 49 |     object_detector = ObjectDetector()
 50 | 
 51 |     # 3. load image
 52 |     image = cv2.imread(image_path)
 53 | 
 54 |     # 4. detect
 55 |     _labels, _scores, _coords = object_detector.predict(image)
 56 | 
 57 |     # 5. draw bounding box on the image
 58 |     for labels, scores, coords in zip(_labels, _scores, _coords):
 59 |         cv2.rectangle(image, (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3])), COLORS[labels % 3], 2)
 60 |         cv2.putText(image, '{label}: {score:.3f}'.format(label=VOC_CLASSES[labels], score=scores), (int(coords[0]), int(coords[1])), FONT, 0.5, COLORS[labels % 3], 2)
 61 |     
 62 |     # 6. visualize result
 63 |     if args.display is True:
 64 |         cv2.imshow('result', image)
 65 |         cv2.waitKey(0)
 66 | 
 67 |     # 7. write result
 68 |     if args.save is True:
 69 |         path, _ = os.path.splitext(image_path)
 70 |         cv2.imwrite(path + '_result.jpg', image)
 71 |     
 72 | 
 73 | def demo_live(args, video_path):
 74 |     # 1. load the configure file
 75 |     cfg_from_file(args.confg_file)
 76 | 
 77 |     # 2. load detector based on the configure file
 78 |     object_detector = ObjectDetector()
 79 | 
 80 |     # 3. load video
 81 |     video = cv2.VideoCapture(video_path)
 82 | 
 83 |     index = -1
 84 |     while(video.isOpened()):
 85 |         index = index + 1
 86 |         sys.stdout.write('Process image: {} \r'.format(index))
 87 |         sys.stdout.flush()
 88 | 
 89 |         # 4. read image
 90 |         flag, image = video.read()
 91 |         if flag == False:
 92 |             print("Can not read image in Frame : {}".format(index))
 93 |             break
 94 | 
 95 |         # 5. detect
 96 |         _labels, _scores, _coords = object_detector.predict(image)
 97 | 
 98 |         # 6. draw bounding box on the image
 99 |         for labels, scores, coords in zip(_labels, _scores, _coords):
100 |             cv2.rectangle(image, (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3])), COLORS[labels % 3], 2)
101 |             cv2.putText(image, '{label}: {score:.3f}'.format(label=VOC_CLASSES[labels], score=scores), (int(coords[0]), int(coords[1])), FONT, 0.5, COLORS[labels % 3], 2)
102 |     
103 |         # 7. visualize result
104 |         if args.display is True:
105 |             cv2.imshow('result', image)
106 |             cv2.waitKey(33)
107 | 
108 |         # 8. write result
109 |         if args.save is True:
110 |             path, _ = os.path.splitext(video_path)
111 |             path = path + '_result'
112 |             if not os.path.exists(path):
113 |                 os.mkdir(path)
114 |             cv2.imwrite(path + '/{}.jpg'.format(index), image)        
115 | 
116 | 
117 | def time_benchmark(args, image_path):
118 |     # 1. load the configure file
119 |     cfg_from_file(args.confg_file)
120 | 
121 |     # 2. load detector based on the configure file
122 |     object_detector = ObjectDetector()
123 | 
124 |     # 3. load image
125 |     image = cv2.imread(image_path)
126 | 
127 |     # 4. time test
128 |     warmup = 20
129 |     time_iter = 100
130 |     print('Warmup the detector...')
131 |     _t = list()
132 |     for i in range(warmup+time_iter):
133 |         _, _, _, (total_time, preprocess_time, net_forward_time, detect_time, output_time) \
134 |             = object_detector.predict(image, check_time=True)
135 |         if i > warmup:
136 |             _t.append([total_time, preprocess_time, net_forward_time, detect_time, output_time])
137 |             if i % 20 == 0: 
138 |                 print('In {}\{}, total time: {} \n preprocess: {} \n net_forward: {} \n detect: {} \n output: {}'.format(
139 |                     i-warmup, time_iter, total_time, preprocess_time, net_forward_time, detect_time, output_time
140 |                 ))
141 |     total_time, preprocess_time, net_forward_time, detect_time, output_time = np.sum(_t, axis=0)/time_iter
142 |     print('In average, total time: {} \n preprocess: {} \n net_forward: {} \n detect: {} \n output: {}'.format(
143 |         total_time, preprocess_time, net_forward_time, detect_time, output_time
144 |     ))
145 | 
146 |     
147 | if __name__ == '__main__':
148 |     args = parse_args()
149 |     if args.type == 'image':
150 |         demo(args, args.demo_file)
151 |     elif args.type == 'video':
152 |         demo_live(args, args.demo_file)
153 |     elif args.type == 'camera':
154 |         demo_live(args, int(args.demo_file))
155 |     elif args.type == 'time':
156 |         time_benchmark(args, args.demo_file)
157 |     else:
158 |         AssertionError('type is not correct')
159 | 


--------------------------------------------------------------------------------
/demo_simpler.py:
--------------------------------------------------------------------------------
 1 | """
 2 | inference on trained models
 3 | 
 4 | with only provide a simple config file
 5 | 
 6 | """
 7 | import sys
 8 | import os
 9 | import numpy as np
10 | import cv2
11 | 
12 | from lib.ssds import ObjectDetector
13 | from lib.utils.config_parse import cfg_from_file
14 | import argparse
15 | 
16 | img_f = 'experiments/person.jpg'
17 | 
18 | 
19 | def parse_args():
20 |     parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network')
21 |     parser.add_argument('--cfg', default='experiments/cfgs/fssd_lite_mobilenetv2_train_voc.yml',
22 |                         help='the address of optional config file')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | def predict():
28 |     args = parse_args()
29 | 
30 |     cfg_from_file(args.cfg)
31 | 
32 |     detector = ObjectDetector()
33 | 
34 |     img = cv2.imread(img_f)
35 | 
36 |     _labels, _scores, _coords = detector.predict(img)
37 |     print('labels: {}\nscores: {}\ncoords: {}'.format(_labels, _scores, _coords))
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     predict()


--------------------------------------------------------------------------------
/experiments/1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/experiments/1.mp4


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_lite_mobilenetv1_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[5, 11, 13], [256, 512, 1024]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 200
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 97
26 | 
27 | TEST:
28 |   BATCH_SIZE: 48
29 |   TEST_SCOPE: [196, 200]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_mobilenet_v1_coco'
49 | LOG_DIR: './experiments/models/fssd_mobilenet_v1_coco'
50 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_coco_24.2.pth'
51 | PHASE: ['test']
52 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_lite_mobilenetv1_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[[5, 11, 13], [256, 512, 1024]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 300
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 100
26 | 
27 | TEST:
28 |   BATCH_SIZE: 48
29 |   TEST_SCOPE: [285, 300]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'voc'
43 |   DATASET_DIR: './data/VOCdevkit'
44 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
45 |   TEST_SETS:  [['2007', 'test']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_mobilenet_v1_voc'
49 | LOG_DIR: './experiments/models/fssd_mobilenet_v1_voc'
50 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_voc_78.4.pth'
51 | PHASE: ['test']
52 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_lite_mobilenetv2_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[6, 13, 17], [32, 96, 320]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [256, 256, 256, 256, 128, 128]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 200
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 4
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 100
26 | 
27 | TEST:
28 |   BATCH_SIZE: 48
29 |   TEST_SCOPE: [188, 200]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_mobilenet_v2_coco'
49 | LOG_DIR: './experiments/models/fssd_mobilenet_v2_coco'
50 | RESUME_CHECKPOINT: './saved_model/fssd_mobilenet_v2_coco/fssd_lite_mobilenet_v2_voc_epoch_290.pth'
51 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_lite_mobilenetv2_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[[6, 13, 17], [32, 96, 320]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [256, 256, 256, 256, 128, 128]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 300
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 100
26 | 
27 | TEST:
28 |   BATCH_SIZE: 48
29 |   TEST_SCOPE: [288, 300]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'voc'
43 |   DATASET_DIR: './data/VOCdevkit'
44 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
45 |   TEST_SETS:  [['2007', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_mobilenet_v2_voc'
49 | LOG_DIR: './experiments/models/fssd_mobilenet_v2_voc'
50 | 
51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v2_fssd_lite_voc_76.7.pth'
52 | PHASE: ['train']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_resnet50_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[10, 16, 'S'], [512, 1024, 512]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 100
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 28
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 10
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [90, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_resnet50_coco'
49 | LOG_DIR: './experiments/models/fssd_resnet50_coco'
50 | RESUME_CHECKPOINT: './weights/fssd/resnet50_fssd_coco_27.2.pth'
51 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_vgg16_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[22, 34, 'S'], [512, 1024, 512]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 100
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 28
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 30
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [90, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_vgg16_coco'
49 | LOG_DIR: './experiments/models/fssd_vgg16_coco'
50 | RESUME_CHECKPOINT: './weights/fssd/vgg16_fssd_coco_24.5.pth'
51 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/fssd_vgg16_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[[22, 34, 'S'], [512, 1024, 512]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 30
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf,transforms,pyramids'
17 |   RESUME_SCOPE: 'base,norm,extras,loc,conf,transforms,pyramids'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.004
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [27, 30]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 | 
46 | EXP_DIR: './experiments/models/fssd_vgg16_voc'
47 | LOG_DIR: './experiments/models/fssd_vgg16_voc'
48 | RESUME_CHECKPOINT: './weights/fssd/vgg16_fssd_voc_77.8.pth'
49 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_lite_mobilenetv1_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[11, 13, 'RBF', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 51
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 25
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   # TEST_SCOPE: [45, 50]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_mobilenet_v1_coco'
48 | LOG_DIR: './experiments/models/rfb_mobilenet_v1_coco'
49 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_coco_19.1.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_lite_mobilenetv1_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[11, 13, 'RBF', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 51
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 25
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 | 
45 | EXP_DIR: './experiments/models/rfb_mobilenet_v1_voc'
46 | LOG_DIR: './experiments/models/rfb_mobilenet_v1_voc'
47 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_voc_73.7.pth'
48 | PHASE: ['test']
49 | 


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_lite_mobilenetv2_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[13, 17, 'RBF', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 50
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.002
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 0
25 | 
26 | TEST:
27 |   BATCH_SIZE: 48
28 |   TEST_SCOPE: [45, 50]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_mobilenet_v2_coco'
48 | LOG_DIR: './experiments/models/rfb_mobilenet_v2_coco'
49 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v2_rfb_lite_coco_18.5.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_lite_mobilenetv2_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[13, 17, 'RBF', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 300
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 0
25 | TEST:
26 |   BATCH_SIZE: 48
27 |   TEST_SCOPE: [270, 300]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 | 
45 | EXP_DIR: './experiments/models/rfb_mobilenet_v2_voc'
46 | LOG_DIR: './experiments/models/rfb_mobilenet_v2_voc'
47 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v2_rfb_lite_voc_73.4.pth'
48 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_resnet50_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[10, 16, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 28
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 30
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [93, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_resnet50_coco'
48 | LOG_DIR: './experiments/models/rfb_resnet50_coco'
49 | RESUME_CHECKPOINT: './weights/rfb/resnet50_rfb_coco_26.5.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_resnet50_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[10, 16, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 50
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [90, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 | 
46 | EXP_DIR: './experiments/models/rfb_resnet50_voc'
47 | LOG_DIR: './experiments/models/rfb_resnet50_voc'
48 | RESUME_CHECKPOINT: './weights/rfb/resnet50_rfb_voc_81.2.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_vgg16_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[22, 34, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 24
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 60
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   # TEST_SCOPE: [95, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_vgg16_coco'
48 | LOG_DIR: './experiments/models/rfb_vgg16_coco'
49 | RESUME_CHECKPOINT: './weights/rfb/vgg16_rfb_coco_25.5.pth'
50 | PHASE: ['test']
51 | 


--------------------------------------------------------------------------------
/experiments/cfgs/rfb_vgg16_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[22, 34, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 24
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 60
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   # TEST_SCOPE: [95, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_vgg16_voc'
48 | LOG_DIR: './experiments/models/rfb_vgg16_voc'
49 | RESUME_CHECKPOINT: './weights/rfb/vgg16_rfb_voc_80.5.pth'
50 | PHASE: ['test']
51 | 


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_lite_mobilenetv1_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 0
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [90, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_mobilenet_v1_coco'
48 | LOG_DIR: './experiments/models/ssd_mobilenet_v1_coco'
49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_coco_18.8.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_lite_mobilenetv1_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 300
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 100
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [285, 300]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_mobilenet_v1_voc'
48 | LOG_DIR: './experiments/models/ssd_mobilenet_v1_voc'
49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_voc_72.7.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_lite_mobilenetv2_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[13, 17, 'S', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 200
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 95
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [196, 200]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_mobilenet_v2_coco'
48 | LOG_DIR: './experiments/models/ssd_mobilenet_v2_coco'
49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_coco_18.5.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_lite_mobilenetv2_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd_lite
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[13, 17, 'S', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 300
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 100
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [285, 300]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_mobilenet_v2_voc'
48 | LOG_DIR: './experiments/models/ssd_mobilenet_v2_voc'
49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_voc_73.2.pth'
50 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_resnet50_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[10, 16, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 200
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 10
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [190, 200]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_resnet50_coco'
48 | LOG_DIR: './experiments/models/ssd_resnet50_coco'
49 | RESUME_CHECKPOINT: './weights/ssd/resnet50_ssd_coco_25.1.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_resnet50_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[10, 16, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 200
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 50
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [190, 200]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 | 
46 | EXP_DIR: './experiments/models/ssd_resnet50_voc'
47 | LOG_DIR: './experiments/models/ssd_resnet50_voc'
48 | RESUME_CHECKPOINT: './weights/ssd/resnet50_ssd_voc_79.7.pth'
49 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_vgg16_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[22, 34, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 60
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 5
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [55, 60]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_vgg16_coco'
48 | LOG_DIR: './experiments/models/ssd_vgg16_coco'
49 | RESUME_CHECKPOINT: './weights/ssd/vgg16_ssd_coco_24.4.pth'
50 | PHASE: ['test']
51 | 


--------------------------------------------------------------------------------
/experiments/cfgs/ssd_vgg16_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: vgg16
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[22, 34, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 2
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 4
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 | 
28 | MATCHER:
29 |   MATCHED_THRESHOLD: 0.5
30 |   UNMATCHED_THRESHOLD: 0.5
31 |   NEGPOS_RATIO: 3
32 | 
33 | POST_PROCESS:
34 |   SCORE_THRESHOLD: 0.01
35 |   IOU_THRESHOLD: 0.6
36 |   MAX_DETECTIONS: 100
37 | 
38 | DATASET:
39 |   DATASET: 'voc'
40 |   DATASET_DIR: './data/VOCdevkit'
41 |   # TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
42 |   TRAIN_SETS: [['2007', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 | 
45 | EXP_DIR: './experiments/models/ssd_vgg16_voc'
46 | LOG_DIR: './experiments/models/ssd_vgg16_voc'
47 | RESUME_CHECKPOINT: './weights/ssd/vgg16_reducedfc.pth'
48 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/fssd_darknet19_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: darknet_19
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[8, 12, 16], [256, 512, 1024]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 60
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 0
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [91, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_darknet_19_coco'
49 | LOG_DIR: './experiments/models/fssd_darknet_19_coco'
50 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth'
51 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/fssd_darknet53_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[[14, 23, 28], [256, 512, 1024]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 100
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 16
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 60
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [100, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'coco'
43 |   DATASET_DIR: './data/COCO'
44 |   TRAIN_SETS: [['2017', 'train']]
45 |   TEST_SETS:  [['2017', 'val']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_darknet_53_coco'
49 | LOG_DIR: './experiments/models/fssd_darknet_53_coco'
50 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth'
51 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/fssd_darknet53_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[[14, 23, 28], [256, 512, 1024]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 100
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 16
16 |   TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.001
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 60
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [91, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'voc'
43 |   DATASET_DIR: './data/VOCdevkit'
44 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
45 |   TEST_SETS:  [['2007', 'test']]
46 |   PROB: 0.6
47 | 
48 | EXP_DIR: './experiments/models/fssd_darknet_53_voc'
49 | LOG_DIR: './experiments/models/fssd_darknet_53_voc'
50 | RESUME_CHECKPOINT: './experiments/models/fssd_darknet_53_coco/fssd_darknet_53_coco_epoch_98.pth'
51 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/fssd_resnet50_train_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: fssd
 3 |   NETS: resnet_50
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[[10, 16, 'S'], [512, 1024, 512]],
 7 |                   [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]]
 8 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 9 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
10 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
11 | 
12 | TRAIN:
13 |   MAX_EPOCHS: 50
14 |   CHECKPOINTS_EPOCHS: 1
15 |   BATCH_SIZE: 32
16 |   TRAINABLE_SCOPE: 'norm,extras,transforms,pyramids,loc,conf'
17 |   RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf'
18 |   OPTIMIZER:
19 |     OPTIMIZER: sgd
20 |     LEARNING_RATE: 0.004
21 |     MOMENTUM: 0.9
22 |     WEIGHT_DECAY: 0.0001
23 |   LR_SCHEDULER:
24 |     SCHEDULER: SGDR
25 |     WARM_UP_EPOCHS: 20
26 | 
27 | TEST:
28 |   BATCH_SIZE: 64
29 |   TEST_SCOPE: [90, 100]
30 | 
31 | MATCHER:
32 |   MATCHED_THRESHOLD: 0.5
33 |   UNMATCHED_THRESHOLD: 0.5
34 |   NEGPOS_RATIO: 3
35 | 
36 | POST_PROCESS:
37 |   SCORE_THRESHOLD: 0.01
38 |   IOU_THRESHOLD: 0.6
39 |   MAX_DETECTIONS: 100
40 | 
41 | DATASET:
42 |   DATASET: 'voc'
43 |   DATASET_DIR: './data/VOCdevkit'
44 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
45 |   TEST_SETS:  [['2007', 'test']]
46 | 
47 | EXP_DIR: './experiments/models/fssd_resnet50_voc'
48 | LOG_DIR: './experiments/models/fssd_resnet50_voc'
49 | RESUME_CHECKPOINT: './weights/fssd/resnet50_fssd_coco_27.2.pth'
50 | PHASE: ['train']
51 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/rfb_darknet19_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: darknet_19
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[12, 16, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 60
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [91, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_darknet_19_coco'
48 | LOG_DIR: './experiments/models/rfb_darknet_19_coco'
49 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth'
50 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/rfb_darknet53_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[23, 28, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 16
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 55
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [96, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_darknet_53_coco'
48 | LOG_DIR: './experiments/models/rfb_darknet_53_coco'
49 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/rfb_darknet53_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: rfb
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[23, 28, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 55
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [91, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/rfb_darknet_53_voc'
48 | LOG_DIR: './experiments/models/rfb_darknet_53_voc'
49 | RESUME_CHECKPOINT: './experiments/models/rfb_darknet_53_coco/rfb_darknet_53_coco_epoch_100.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/ssd_darknet19_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: darknet_19
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[12, 16, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 60
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 0
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [91, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_darknet_19_coco'
48 | LOG_DIR: './experiments/models/ssd_darknet_19_coco'
49 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth'
50 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/ssd_darknet53_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[23, 28, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 60
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [91, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_darknet_53_coco'
48 | LOG_DIR: './experiments/models/ssd_darknet_53_coco'
49 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/ssd_darknet53_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[23, 28, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 60
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [91, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'voc'
42 |   DATASET_DIR: './data/VOCdevkit'
43 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
44 |   TEST_SETS:  [['2007', 'test']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/ssd_darknet_53_voc'
48 | LOG_DIR: './experiments/models/ssd_darknet_53_voc'
49 | RESUME_CHECKPOINT: './experiments/models/ssd_darknet_53_coco/ssd_darknet_53_coco_epoch_100.pth'
50 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/ssd_resnet101_train_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd
 3 |   NETS: resnet_101
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[10, 33, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]]
 7 |   STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]]
 8 |   SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 50
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.004
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 10
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/ssd_resnet101_coco'
47 | LOG_DIR: './experiments/models/ssd_resnet101_coco'
48 | RESUME_CHECKPOINT: './weights/resnet101-5d3b4d8f.pth'
49 | PHASE: ['train']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/test.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: ssd_lite
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [300, 300]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]]
 7 |   STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]]
 8 |   SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]]
 9 |   ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
10 | 
11 | TRAIN:
12 |   MAX_EPOCHS: 100
13 |   CHECKPOINTS_EPOCHS: 1
14 |   BATCH_SIZE: 32
15 |   TRAINABLE_SCOPE: 'base,norm,extras,loc,conf'
16 |   RESUME_SCOPE: 'base,norm,extras,loc,conf'
17 |   OPTIMIZER:
18 |     OPTIMIZER: sgd
19 |     LEARNING_RATE: 0.001
20 |     MOMENTUM: 0.9
21 |     WEIGHT_DECAY: 0.0001
22 |   LR_SCHEDULER:
23 |     SCHEDULER: SGDR
24 |     WARM_UP_EPOCHS: 0
25 | 
26 | TEST:
27 |   BATCH_SIZE: 64
28 |   TEST_SCOPE: [90, 100]
29 | 
30 | MATCHER:
31 |   MATCHED_THRESHOLD: 0.5
32 |   UNMATCHED_THRESHOLD: 0.5
33 |   NEGPOS_RATIO: 3
34 | 
35 | POST_PROCESS:
36 |   SCORE_THRESHOLD: 0.01
37 |   IOU_THRESHOLD: 0.6
38 |   MAX_DETECTIONS: 100
39 | 
40 | DATASET:
41 |   DATASET: 'coco'
42 |   DATASET_DIR: './data/COCO'
43 |   TRAIN_SETS: [['2017', 'train']]
44 |   TEST_SETS:  [['2017', 'val']]
45 |   PROB: 0.6
46 | 
47 | EXP_DIR: './experiments/models/test'
48 | LOG_DIR: './experiments/models/test'
49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_coco_18.8.pth'
50 | PHASE: ['visualize']
51 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v2_mobilenetv1_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 60
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [92, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_coco'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_coco'
48 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_coco_19.1.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v2_mobilenetv1_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 60
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_voc'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_voc'
48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_coco_21.5.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v2_mobilenetv2_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 28
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 55
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_coco'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_coco'
48 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_coco_18.5.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v2_mobilenetv2_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 16
14 |   TRAINABLE_SCOPE: 'extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 40
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_voc'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_voc'
48 | RESUME_CHECKPOINT: './weights/yolo/yolo_v2_mobilenet_v2_coco_epoch_100.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_darknet53_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 200
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 12
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 80
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [191, 200]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_coco'
51 | RESUME_CHECKPOINT: './weights/dark/yolov3.pth'
52 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_darknet53_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 45
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_voc'
51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth'
52 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_mobilenetv1_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.004
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 60
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_coco'
51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_coco_24.2.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_mobilenetv1_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 32
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 50
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_voc'
51 | RESUME_CHECKPOINT: './experiments/models/yolo_v3_mobilenetv1_coco/yolo_v3_mobilenet_v1_coco_epoch_100.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_mobilenetv2_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.004
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 50
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_coco'
51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v2_fssd_lite_coco_22.2.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/tests/yolo_v3_mobilenetv2_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 32
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 20
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_voc'
51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_coco_24.0.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_darknet19_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: darknet_19
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['', '',12, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 200
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 120
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [198, 200]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_darknet_19_coco'
47 | LOG_DIR: './experiments/models/yolo_v2_darknet_19_coco'
48 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_26.1.pth'
49 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_darknet19_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: darknet_19
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['', '',12, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 0
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [90, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_darknet_19_voc'
47 | LOG_DIR: './experiments/models/yolo_v2_darknet_19_voc'
48 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_voc_78.4.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_mobilenetv1_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 60
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [92, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_coco'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_coco'
48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_coco_21.5.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_mobilenetv1_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 32
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 60
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_voc'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_voc'
48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_voc_74.7.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_mobilenetv2_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 28
14 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 55
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'coco'
41 |   DATASET_DIR: './data/COCO'
42 |   TRAIN_SETS: [['2017', 'train']]
43 |   TEST_SETS:  [['2017', 'val']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_coco'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_coco'
48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v2_coco_20.4.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v2_mobilenetv2_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v2
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]]
 7 |   SIZES: [[416, 416]]
 8 |   ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]]
 9 | 
10 | TRAIN:
11 |   MAX_EPOCHS: 100
12 |   CHECKPOINTS_EPOCHS: 1
13 |   BATCH_SIZE: 16
14 |   TRAINABLE_SCOPE: 'extras,loc,conf'
15 |   RESUME_SCOPE: 'base,extras,loc,conf'
16 |   OPTIMIZER:
17 |     OPTIMIZER: sgd
18 |     LEARNING_RATE: 0.001
19 |     MOMENTUM: 0.9
20 |     WEIGHT_DECAY: 0.0001
21 |   LR_SCHEDULER:
22 |     SCHEDULER: SGDR
23 |     WARM_UP_EPOCHS: 40
24 | 
25 | TEST:
26 |   BATCH_SIZE: 64
27 |   TEST_SCOPE: [91, 100]
28 | 
29 | MATCHER:
30 |   MATCHED_THRESHOLD: 0.5
31 |   UNMATCHED_THRESHOLD: 0.5
32 |   NEGPOS_RATIO: 3
33 | 
34 | POST_PROCESS:
35 |   SCORE_THRESHOLD: 0.01
36 |   IOU_THRESHOLD: 0.6
37 |   MAX_DETECTIONS: 100
38 | 
39 | DATASET:
40 |   DATASET: 'voc'
41 |   DATASET_DIR: './data/VOCdevkit'
42 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
43 |   TEST_SETS:  [['2007', 'test']]
44 |   PROB: 0.6
45 | 
46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_voc'
47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_voc'
48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v2_voc_72.0.pth'
49 | PHASE: ['test']
50 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_darknet53_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 200
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 12
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 80
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [191, 200]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_coco'
51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth'
52 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_darknet53_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: darknet_53
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 45
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_voc'
51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_voc_79.3.pth'
52 | PHASE: ['test']


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_mobilenetv1_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.004
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 60
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_coco'
51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v3_coco_25.7.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_mobilenetv1_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v1
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 32
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 50
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_voc'
51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v3_voc_78.2.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_mobilenetv2_coco.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 81
 6 |   FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 16
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.004
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 50
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'coco'
44 |   DATASET_DIR: './data/COCO'
45 |   TRAIN_SETS: [['2017', 'train']]
46 |   TEST_SETS:  [['2017', 'val']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_coco'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_coco'
51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_coco_24.0.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/cfgs/yolo_v3_mobilenetv2_voc.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   SSDS: yolo_v3
 3 |   NETS: mobilenet_v2
 4 |   IMAGE_SIZE: [416, 416]
 5 |   NUM_CLASSES: 21
 6 |   FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']],
 7 |                   [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]]
 8 |   SIZES: [[416, 416], [416, 416], [416, 416]]
 9 |   ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]],
10 |                   [[0.072,0.146], [0.146,0.108], [0.141,0.286]],
11 |                   [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ]
12 | 
13 | TRAIN:
14 |   MAX_EPOCHS: 100
15 |   CHECKPOINTS_EPOCHS: 1
16 |   BATCH_SIZE: 32
17 |   TRAINABLE_SCOPE: 'base,extras,loc,conf'
18 |   RESUME_SCOPE: 'base,extras,loc,conf'
19 |   OPTIMIZER:
20 |     OPTIMIZER: sgd
21 |     LEARNING_RATE: 0.001
22 |     MOMENTUM: 0.9
23 |     WEIGHT_DECAY: 0.0001
24 |   LR_SCHEDULER:
25 |     SCHEDULER: SGDR
26 |     WARM_UP_EPOCHS: 20
27 | 
28 | TEST:
29 |   BATCH_SIZE: 64
30 |   TEST_SCOPE: [90, 100]
31 | 
32 | MATCHER:
33 |   MATCHED_THRESHOLD: 0.5
34 |   UNMATCHED_THRESHOLD: 0.5
35 |   NEGPOS_RATIO: 3
36 | 
37 | POST_PROCESS:
38 |   SCORE_THRESHOLD: 0.01
39 |   IOU_THRESHOLD: 0.6
40 |   MAX_DETECTIONS: 100
41 | 
42 | DATASET:
43 |   DATASET: 'voc'
44 |   DATASET_DIR: './data/VOCdevkit'
45 |   TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']]
46 |   TEST_SETS:  [['2007', 'test']]
47 |   PROB: 0.6
48 | 
49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_voc'
50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_voc'
51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_voc_75.8.pth'
52 | PHASE: ['test']
53 | 


--------------------------------------------------------------------------------
/experiments/person.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/experiments/person.jpg


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/__init__.py


--------------------------------------------------------------------------------
/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/dataset/__init__.py


--------------------------------------------------------------------------------
/lib/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from lib.dataset import voc
 2 | from lib.dataset import coco
 3 | 
 4 | dataset_map = {
 5 |                 'voc': voc.VOCDetection,
 6 |                 'coco': coco.COCODetection,
 7 |             }
 8 | 
 9 | def gen_dataset_fn(name):
10 |     """Returns a dataset func.
11 | 
12 |     Args:
13 |     name: The name of the dataset.
14 | 
15 |     Returns:
16 |     func: dataset_fn
17 | 
18 |     Raises:
19 |     ValueError: If network `name` is not recognized.
20 |     """
21 |     if name not in dataset_map:
22 |         raise ValueError('The dataset unknown %s' % name)
23 |     func = dataset_map[name]
24 |     return func
25 | 
26 | 
27 | import torch
28 | import numpy as np
29 | 
30 | def detection_collate(batch):
31 |     """Custom collate fn for dealing with batches of images that have a different
32 |     number of associated object annotations (bounding boxes).
33 | 
34 |     Arguments:
35 |         batch: (tuple) A tuple of tensor images and lists of annotations
36 | 
37 |     Return:
38 |         A tuple containing:
39 |             1) (tensor) batch of images stacked on their 0 dim
40 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
41 |     """
42 |     targets = []
43 |     imgs = []
44 |     for _, sample in enumerate(batch):
45 |         for _, tup in enumerate(sample):
46 |             if torch.is_tensor(tup):
47 |                 imgs.append(tup)
48 |             elif isinstance(tup, type(np.empty(0))):
49 |                 annos = torch.from_numpy(tup).float()
50 |                 targets.append(annos)
51 | 
52 |     return (torch.stack(imgs, 0), targets)
53 | 
54 | from lib.utils.data_augment import preproc
55 | import torch.utils.data as data
56 | 
57 | def load_data(cfg, phase):
58 |     if phase == 'train':
59 |         dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TRAIN_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, cfg.PROB))
60 |         data_loader = data.DataLoader(dataset, cfg.TRAIN_BATCH_SIZE, num_workers=cfg.NUM_WORKERS,
61 |                                   shuffle=True, collate_fn=detection_collate, pin_memory=True)
62 |     if phase == 'eval':
63 |         dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, -1))
64 |         data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS,
65 |                                   shuffle=False, collate_fn=detection_collate, pin_memory=True)
66 |     if phase == 'test':
67 |         dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, -2))
68 |         data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS,
69 |                                   shuffle=False, collate_fn=detection_collate, pin_memory=True)
70 |     if phase == 'visualize':
71 |         dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, 1))
72 |         data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS,
73 |                                   shuffle=False, collate_fn=detection_collate, pin_memory=True)
74 |     return data_loader
75 | 


--------------------------------------------------------------------------------
/lib/dataset/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import pickle
 10 | import numpy as np
 11 | import pdb
 12 | 
 13 | 
 14 | def parse_rec(filename):
 15 |     """ Parse a PASCAL VOC xml file """
 16 |     tree = ET.parse(filename)
 17 |     objects = []
 18 |     for obj in tree.findall('object'):
 19 |         obj_struct = {}
 20 |         obj_struct['name'] = obj.find('name').text
 21 |         obj_struct['pose'] = obj.find('pose').text
 22 |         obj_struct['truncated'] = int(obj.find('truncated').text)
 23 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 24 |         bbox = obj.find('bndbox')
 25 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 26 |                               int(bbox.find('ymin').text),
 27 |                               int(bbox.find('xmax').text),
 28 |                               int(bbox.find('ymax').text)]
 29 |         objects.append(obj_struct)
 30 | 
 31 |     return objects
 32 | 
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |     Compute VOC AP given precision and recall.
 38 |     If use_07_metric is true, uses the
 39 |     VOC 07 11 point method (default:False).
 40 |     """
 41 |     if use_07_metric:
 42 |         # 11 point metric
 43 |         ap = 0.
 44 |         for t in np.arange(0., 1.1, 0.1):
 45 |             if np.sum(rec >= t) == 0:
 46 |                 p = 0
 47 |             else:
 48 |                 p = np.max(prec[rec >= t])
 49 |             ap = ap + p / 11.
 50 |     else:
 51 |         # correct AP calculation
 52 |         # first append sentinel values at the end
 53 |         mrec = np.concatenate(([0.], rec, [1.]))
 54 |         mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |         # compute the precision envelope
 57 |         for i in range(mpre.size - 1, 0, -1):
 58 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |         # to calculate area under PR curve, look for points
 61 |         # where X axis (recall) changes value
 62 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |         # and sum (\Delta recall) * prec
 65 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |     return ap
 67 | 
 68 | def voc_eval(detpath,
 69 |              annopath,
 70 |              imagesetfile,
 71 |              classname,
 72 |              cachedir,
 73 |              ovthresh=0.5,
 74 |              use_07_metric=False):
 75 |     """rec, prec, ap = voc_eval(detpath,
 76 |                                 annopath,
 77 |                                 imagesetfile,
 78 |                                 classname,
 79 |                                 [ovthresh],
 80 |                                 [use_07_metric])
 81 | 
 82 |     Top level function that does the PASCAL VOC evaluation.
 83 | 
 84 |     detpath: Path to detections
 85 |         detpath.format(classname) should produce the detection results file.
 86 |     annopath: Path to annotations
 87 |         annopath.format(imagename) should be the xml annotations file.
 88 |     imagesetfile: Text file containing the list of images, one image per line.
 89 |     classname: Category name (duh)
 90 |     cachedir: Directory for caching the annotations
 91 |     [ovthresh]: Overlap threshold (default = 0.5)
 92 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 93 |         (default False)
 94 |     """
 95 |     # assumes detections are in detpath.format(classname)
 96 |     # assumes annotations are in annopath.format(imagename)
 97 |     # assumes imagesetfile is a text file with each line an image name
 98 |     # cachedir caches the annotations in a pickle file
 99 | 
100 |     # first load gt
101 |     if not os.path.isdir(cachedir):
102 |         os.mkdir(cachedir)
103 |     cachefile = os.path.join(cachedir, 'annots.pkl')
104 |     # read list of images
105 |     with open(imagesetfile, 'r') as f:
106 |         lines = f.readlines()
107 |     imagenames = [x.strip() for x in lines]
108 | 
109 |     if not os.path.isfile(cachefile):
110 |         # load annots
111 |         recs = {}
112 |         for i, imagename in enumerate(imagenames):
113 |             recs[imagename] = parse_rec(annopath.format(imagename))
114 |             if i % 100 == 0:
115 |                 print('Reading annotation for {:d}/{:d}'.format(
116 |                     i + 1, len(imagenames)))
117 |         # save
118 |         print('Saving cached annotations to {:s}'.format(cachefile))
119 |         with open(cachefile, 'wb') as f:
120 |             pickle.dump(recs, f)
121 |     else:
122 |         # load
123 |         with open(cachefile, 'rb') as f:
124 |             recs = pickle.load(f)
125 | 
126 |     # extract gt objects for this class
127 |     class_recs = {}
128 |     npos = 0
129 |     for imagename in imagenames:
130 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
131 |         bbox = np.array([x['bbox'] for x in R])
132 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
133 |         det = [False] * len(R)
134 |         npos = npos + sum(~difficult)
135 |         class_recs[imagename] = {'bbox': bbox,
136 |                                  'difficult': difficult,
137 |                                  'det': det}
138 | 
139 |     # read dets
140 |     detfile = detpath.format(classname)
141 |     with open(detfile, 'r') as f:
142 |         lines = f.readlines()
143 | 
144 |     splitlines = [x.strip().split(' ') for x in lines]
145 |     image_ids = [x[0] for x in splitlines]
146 |     confidence = np.array([float(x[1]) for x in splitlines])
147 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
148 | 
149 |         # sort by confidence
150 |     sorted_ind = np.argsort(-confidence)
151 |     sorted_scores = np.sort(-confidence)
152 |     BB = BB[sorted_ind, :]
153 |     image_ids = [image_ids[x] for x in sorted_ind]
154 | 
155 |         # go down dets and mark TPs and FPs
156 |     nd = len(image_ids)
157 |     tp = np.zeros(nd)
158 |     fp = np.zeros(nd)
159 |     for d in range(nd):
160 |         R = class_recs[image_ids[d]]
161 |         bb = BB[d, :].astype(float)
162 |         ovmax = -np.inf
163 |         BBGT = R['bbox'].astype(float)
164 | 
165 |         if BBGT.size > 0:
166 |             # compute overlaps
167 |             # intersection
168 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
169 |             iymin = np.maximum(BBGT[:, 1], bb[1])
170 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
171 |             iymax = np.minimum(BBGT[:, 3], bb[3])
172 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
173 |             ih = np.maximum(iymax - iymin + 1., 0.)
174 |             inters = iw * ih
175 | 
176 |                 # union
177 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
178 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
179 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
180 | 
181 |             overlaps = inters / uni
182 |             ovmax = np.max(overlaps)
183 |             jmax = np.argmax(overlaps)
184 | 
185 |         if ovmax > ovthresh:
186 |             if not R['difficult'][jmax]:
187 |                 if not R['det'][jmax]:
188 |                     tp[d] = 1.
189 |                     R['det'][jmax] = 1
190 |                 else:
191 |                     fp[d] = 1.
192 |         else:
193 |             fp[d] = 1.
194 | 
195 |         # compute precision recall
196 |     fp = np.cumsum(fp)
197 |     tp = np.cumsum(tp)
198 |     rec = tp / float(npos)
199 |         # avoid divide by zero in case the first detection matches a difficult
200 |         # ground truth
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec, use_07_metric)
203 | 
204 |     return rec, prec, ap
205 | 


--------------------------------------------------------------------------------
/lib/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/lib/layers/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .detection import Detect
2 | from .prior_box import PriorBox
3 | 
4 | 
5 | __all__ = ['Detect', 'PriorBox']
6 | 


--------------------------------------------------------------------------------
/lib/layers/functions/detection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.backends.cudnn as cudnn
  4 | from torch.autograd import Function
  5 | from torch.autograd import Variable
  6 | from lib.utils.box_utils import decode, nms
  7 | # from lib.utils.nms.nms_wrapper import nms
  8 | from lib.utils.timer import Timer
  9 | 
 10 | 
 11 | class Detect(Function):
 12 |     """At test time, Detect is the final layer of SSD.  Decode location preds,
 13 |     apply non-maximum suppression to location predictions based on conf
 14 |     scores and threshold to a top_k number of output predictions for both
 15 |     confidence score and locations.
 16 |     """
 17 | 
 18 |     def __init__(self, cfg, priors):
 19 |         self.num_classes = cfg.NUM_CLASSES
 20 |         self.background_label = cfg.BACKGROUND_LABEL
 21 |         self.conf_thresh = cfg.SCORE_THRESHOLD
 22 |         self.nms_thresh = cfg.IOU_THRESHOLD
 23 |         self.top_k = cfg.MAX_DETECTIONS
 24 |         self.variance = cfg.VARIANCE
 25 |         self.priors = priors
 26 | 
 27 |     # def forward(self, predictions, prior):
 28 |     #     """
 29 |     #     Args:
 30 |     #         loc_data: (tensor) Loc preds from loc layers
 31 |     #             Shape: [batch,num_priors*4]
 32 |     #         conf_data: (tensor) Shape: Conf preds from conf layers
 33 |     #             Shape: [batch*num_priors,num_classes]
 34 |     #         prior_data: (tensor) Prior boxes and variances from priorbox layers
 35 |     #             Shape: [1,num_priors,4]
 36 |     #     """
 37 |     #     loc, conf = predictions
 38 | 
 39 |     #     loc_data = loc.data
 40 |     #     conf_data = conf.data
 41 |     #     prior_data = prior.data
 42 | 
 43 |     #     num = loc_data.size(0)  # batch size
 44 |     #     num_priors = prior_data.size(0)
 45 |     #     self.boxes = torch.zeros(1, num_priors, 4)
 46 |     #     self.scores = torch.zeros(1, num_priors, self.num_classes)
 47 | 
 48 |     #     if num == 1:
 49 |     #         # size batch x num_classes x num_priors
 50 |     #         conf_preds = conf_data.unsqueeze(0)
 51 | 
 52 |     #     else:
 53 |     #         conf_preds = conf_data.view(num, num_priors,
 54 |     #                                     self.num_classes)
 55 |     #         self.boxes.expand_(num, num_priors, 4)
 56 |     #         self.scores.expand_(num, num_priors, self.num_classes)
 57 | 
 58 |     #     # Decode predictions into bboxes.
 59 |     #     for i in range(num):
 60 |     #         decoded_boxes = decode(loc_data[i], prior_data, self.variance)
 61 |     #         # For each class, perform nms
 62 |     #         conf_scores = conf_preds[i].clone()
 63 |     #         '''
 64 |     #         c_mask = conf_scores.gt(self.thresh)
 65 |     #         decoded_boxes = decoded_boxes[c_mask]
 66 |     #         conf_scores = conf_scores[c_mask]
 67 |     #         '''
 68 | 
 69 |     #         conf_scores = conf_preds[i].clone()
 70 |     #         num_det = 0
 71 |     #         for cl in range(1, self.num_classes):
 72 |     #             c_mask = conf_scores[cl].gt(self.conf_thresh)
 73 |     #             scores = conf_scores[cl][c_mask]
 74 |     #             if scores.dim() == 0:
 75 |     #                 continue
 76 |     #             l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
 77 |     #             boxes = decoded_boxes[l_mask].view(-1, 4)
 78 |     #             ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
 79 |     #             self.output[i, cl, :count] = \
 80 |     #                 torch.cat((scores[ids[:count]].unsqueeze(1),
 81 |     #                            boxes[ids[:count]]), 1)
 82 | 
 83 |     #     return self.output
 84 | 
 85 |     def forward(self, predictions):
 86 |         """
 87 |         Args:
 88 |             loc_data: (tensor) Loc preds from loc layers
 89 |                 Shape: [batch,num_priors*4]
 90 |             conf_data: (tensor) Shape: Conf preds from conf layers
 91 |                 Shape: [batch*num_priors,num_classes]
 92 |             prior_data: (tensor) Prior boxes and variances from priorbox layers
 93 |                 Shape: [1,num_priors,4]
 94 |         """
 95 |         loc, conf = predictions
 96 | 
 97 |         loc_data = loc.data
 98 |         conf_data = conf.data
 99 |         prior_data = self.priors.data
100 | 
101 |         num = loc_data.size(0)  # batch size
102 |         num_priors = prior_data.size(0)
103 |         # self.output.zero_()
104 |         if num == 1:
105 |             # size batch x num_classes x num_priors
106 |             conf_preds = conf_data.t().contiguous().unsqueeze(0)
107 |         else:
108 |             conf_preds = conf_data.view(num, num_priors,
109 |                                         self.num_classes).transpose(2, 1)
110 |             # self.output.expand_(num, self.num_classes, self.top_k, 5)
111 |         output = torch.zeros(num, self.num_classes, self.top_k, 5)
112 | 
113 |         # Decode predictions into bboxes.
114 |         for i in range(num):
115 |             decoded_boxes = decode(loc_data[i], prior_data, self.variance)
116 | 
117 |             # print('decoded_boxes: {}'.format(decoded_boxes))
118 | 
119 |             # For each class, perform nms
120 |             conf_scores = conf_preds[i].clone()
121 |             # print('conf_scores: {}'.format(conf_scores))
122 |             # print(conf_scores.size())
123 | 
124 |             for cl in range(1, self.num_classes):
125 |                 # print(conf_scores[cl])
126 |                 # print(conf_scores[cl].size())
127 |                 c_mask = conf_scores[cl].gt(self.conf_thresh).nonzero().view(-1)
128 |                 # print('cmask: ', c_mask)
129 |                 if c_mask.dim() == 0:
130 |                     continue
131 |                 scores = conf_scores[cl][c_mask]
132 |                 if scores.dim() == 0:
133 |                     continue
134 |                 # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes)
135 |                 # boxes = decoded_boxes[l_mask].view(-1, 4)
136 |                 boxes = decoded_boxes[c_mask, :]
137 |                 # print(scores, boxes)
138 |                 # idx of highest scoring and non-overlapping boxes per class
139 |                 # cls_dets = torch.cat((boxes, scores), 1)
140 |                 # _, order = torch.sort(scores, 0, True)
141 |                 # cls_dets = cls_dets[order]
142 |                 # keep = nms(cls_dets, self.nms_thresh)
143 |                 # cls_dets = cls_dets[keep.view(-1).long()]
144 | 
145 |                 # print('before nms:')
146 |                 # print('boxes: {}'.format(boxes))
147 |                 # print('scores: {}'.format(scores))
148 |                 # why it is empty?
149 |                 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k)
150 |                 output[i, cl, :count] = \
151 |                     torch.cat((scores[ids[:count]].unsqueeze(1),
152 |                                boxes[ids[:count]]), 1)
153 |         # print(nms_time, cpu_tims, scores_time,box_time,gpunms_time)
154 |         # flt = self.output.view(-1, 5)
155 |         # _, idx = flt[:, 0].sort(0)
156 |         # _, rank = idx.sort(0)
157 |         # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0)
158 |         return output
159 | 


--------------------------------------------------------------------------------
/lib/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import torch
 3 | from math import sqrt as sqrt
 4 | from itertools import product as product
 5 | 
 6 | class PriorBox(object):
 7 |     """Compute priorbox coordinates in center-offset form for each source
 8 |     feature map.
 9 |     """
10 |     def __init__(self, image_size, feature_maps, aspect_ratios, scale, archor_stride=None, archor_offest=None, clip=True):
11 |         super(PriorBox, self).__init__()
12 |         self.image_size = image_size #[height, width]
13 |         self.feature_maps = feature_maps #[(height, width), ...]
14 |         self.aspect_ratios = aspect_ratios
15 |         # number of priors for feature map location (either 4 or 6)
16 |         self.num_priors = len(aspect_ratios)
17 |         self.clip = clip
18 |         # scale value
19 |         if isinstance(scale[0], list):
20 |             # get min of the result
21 |             self.scales = [min(s[0] / self.image_size[0], s[1] / self.image_size[1]) for s in scale]
22 |         elif isinstance(scale[0], float) and len(scale) == 2:
23 |             num_layers = len(feature_maps)
24 |             min_scale, max_scale = scale
25 |             self.scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) for i in range(num_layers)] + [1.0]
26 |         
27 |         if archor_stride:
28 |             self.steps = [(steps[0] / self.image_size[0], steps[1] / self.image_size[1]) for steps in archor_stride] 
29 |         else:
30 |             self.steps = [(1/f_h, 1/f_w) for f_h, f_w in feature_maps]
31 | 
32 |         if archor_offest:
33 |             self.offset = [[offset[0] / self.image_size[0], offset[1] * self.image_size[1]] for offset in archor_offest] 
34 |         else:
35 |             self.offset = [[steps[0] * 0.5, steps[1] * 0.5] for steps in self.steps] 
36 | 
37 |     def forward(self):
38 |         mean = []
39 |         # l = 0
40 |         for k, f in enumerate(self.feature_maps):
41 |             for i, j in product(range(f[0]), range(f[1])):
42 |                 cx = j * self.steps[k][1] + self.offset[k][1]
43 |                 cy = i * self.steps[k][0] + self.offset[k][0]
44 |                 s_k = self.scales[k]
45 | 
46 |                 # rest of aspect ratios
47 |                 for ar in self.aspect_ratios[k]:
48 |                     if isinstance(ar, int):
49 |                         if ar == 1:
50 |                             # aspect_ratio: 1 Min size
51 |                             mean += [cx, cy, s_k, s_k]
52 | 
53 |                             # aspect_ratio: 1 Max size
54 |                             # rel size: sqrt(s_k * s_(k+1))
55 |                             s_k_prime = sqrt(s_k * self.scales[k+1])
56 |                             mean += [cx, cy, s_k_prime, s_k_prime]
57 |                         else:
58 |                             ar_sqrt = sqrt(ar)
59 |                             mean += [cx, cy, s_k*ar_sqrt, s_k/ar_sqrt]
60 |                             mean += [cx, cy, s_k/ar_sqrt, s_k*ar_sqrt]
61 |                     elif isinstance(ar, list):
62 |                         mean += [cx, cy, s_k*ar[0], s_k*ar[1]]
63 |         #     print(f, self.aspect_ratios[k])
64 |         # assert False
65 |         # back to torch land
66 |         output = torch.Tensor(mean).view(-1, 4)
67 |         if self.clip:
68 |             output.clamp_(max=1, min=0)
69 |         return output


--------------------------------------------------------------------------------
/lib/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2norm import L2Norm
2 | from .multibox_loss import MultiBoxLoss
3 | 
4 | __all__ = ['L2Norm', 'MultiBoxLoss']
5 | 


--------------------------------------------------------------------------------
/lib/layers/modules/focal_loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from lib.utils.box_utils import match, log_sum_exp, one_hot_embedding
  7 | 
  8 | # I do not fully understand this part, It completely based on https://github.com/kuangliu/pytorch-retinanet/blob/master/loss.py
  9 | 
 10 | class FocalLoss(nn.Module):
 11 |     """SSD Weighted Loss Function
 12 |     Focal Loss for Dense Object Detection.
 13 |         
 14 |         Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])
 15 | 
 16 |     The losses are averaged across observations for each minibatch.
 17 |     Args:
 18 |         alpha(1D Tensor, Variable) : the scalar factor for this criterion
 19 |         gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5), 
 20 |                                 putting more focus on hard, misclassiﬁed examples
 21 |         size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch.
 22 |                             However, if the field size_average is set to False, the losses are
 23 |                             instead summed for each minibatch.
 24 |     """
 25 | 
 26 |     def __init__(self, cfg, priors, use_gpu=True):
 27 |         super(FocalLoss, self).__init__()
 28 |         self.use_gpu = use_gpu
 29 |         self.num_classes = cfg.NUM_CLASSES
 30 |         self.background_label = cfg.BACKGROUND_LABEL
 31 |         self.negpos_ratio = cfg.NEGPOS_RATIO
 32 |         self.threshold = cfg.MATCHED_THRESHOLD
 33 |         self.unmatched_threshold = cfg.UNMATCHED_THRESHOLD
 34 |         self.variance = cfg.VARIANCE
 35 |         self.priors = priors
 36 | 
 37 |         self.alpha = Variable(torch.ones(self.num_classes, 1) * cfg.alpha)
 38 |         self.gamma = cfg.gamma
 39 | 
 40 | 
 41 |     def forward(self, predictions, targets):
 42 |         """Multibox Loss
 43 |         Args:
 44 |             predictions (tuple): A tuple containing loc preds, conf preds,
 45 |             and prior boxes from SSD net.
 46 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 47 |                 loc shape: torch.size(batch_size,num_priors,4)
 48 |                 priors shape: torch.size(num_priors,4)
 49 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 50 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 51 |         """
 52 |         loc_data, conf_data = predictions
 53 |         num = loc_data.size(0)
 54 |         priors = self.priors
 55 |         # priors = priors[:loc_data.size(1), :]
 56 |         num_priors = (priors.size(0))
 57 |         
 58 |         # match priors (default boxes) and ground truth boxes
 59 |         loc_t = torch.Tensor(num, num_priors, 4)
 60 |         conf_t = torch.LongTensor(num, num_priors)
 61 |         for idx in range(num):
 62 |             truths = targets[idx][:,:-1].data
 63 |             labels = targets[idx][:,-1].data
 64 |             defaults = priors.data
 65 |             match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
 66 |         if self.use_gpu:
 67 |             loc_t = loc_t.cuda()
 68 |             conf_t = conf_t.cuda()
 69 |         # wrap targets
 70 |         loc_t = Variable(loc_t, requires_grad=False)
 71 |         conf_t = Variable(conf_t,requires_grad=False)
 72 | 
 73 |         pos = conf_t > 0
 74 |         num_pos = pos.sum()
 75 | 
 76 |         # Localization Loss (Smooth L1)
 77 |         # Shape: [batch,num_priors,4]
 78 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 79 |         loc_p = loc_data[pos_idx].view(-1,4)
 80 |         loc_t = loc_t[pos_idx].view(-1,4)
 81 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
 82 |         loss_l/=num_pos.data.sum()
 83 | 
 84 |         # Confidence Loss (Focal loss)
 85 |         # Shape: [batch,num_priors,1]
 86 |         loss_c = self.focal_loss(conf_data.view(-1, self.num_classes), conf_t.view(-1,1))
 87 | 
 88 |         return loss_l,loss_c
 89 | 
 90 |     def focal_loss(self, inputs, targets):
 91 |         '''Focal loss.
 92 |         mean of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 93 |         '''
 94 |         N = inputs.size(0)
 95 |         C = inputs.size(1)
 96 |         P = F.softmax(inputs)
 97 |         
 98 |         class_mask = inputs.data.new(N, C).fill_(0)
 99 |         class_mask = Variable(class_mask)
100 |         ids = targets.view(-1, 1)
101 |         class_mask.scatter_(1, ids.data, 1.)
102 | 
103 |         if inputs.is_cuda and not self.alpha.is_cuda:
104 |             self.alpha = self.alpha.cuda()
105 |         alpha = self.alpha[ids.data.view(-1)]
106 |         probs = (P*class_mask).sum(1).view(-1,1)
107 |         log_p = probs.log()
108 | 
109 |         batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 
110 | 
111 |         loss = batch_loss.mean()
112 |         return loss


--------------------------------------------------------------------------------
/lib/layers/modules/l2norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd import Variable
 5 | import torch.nn.init as init
 6 | 
 7 | class L2Norm(nn.Module):
 8 |     def __init__(self,n_channels, scale):
 9 |         super(L2Norm,self).__init__()
10 |         self.n_channels = n_channels
11 |         self.gamma = scale or None
12 |         self.eps = 1e-10
13 |         self.weight = nn.Parameter(torch.Tensor(self.n_channels))
14 |         self.reset_parameters()
15 | 
16 |     def reset_parameters(self):
17 |         init.constant_(self.weight,self.gamma)
18 | 
19 |     def forward(self, x):
20 |         norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps
21 |         #x /= norm
22 |         x = torch.div(x,norm)
23 |         out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x
24 |         return out
25 | 


--------------------------------------------------------------------------------
/lib/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from lib.utils.box_utils import match, log_sum_exp
  7 | 
  8 | 
  9 | class MultiBoxLoss(nn.Module):
 10 |     """SSD Weighted Loss Function
 11 |     Compute Targets:
 12 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 13 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 14 |            (default threshold: 0.5).
 15 |         2) Produce localization target by 'encoding' variance into offsets of ground
 16 |            truth boxes and their matched  'priorboxes'.
 17 |         3) Hard negative mining to filter the excessive number of negative examples
 18 |            that comes with using a large number of default bounding boxes.
 19 |            (default negative:positive ratio 3:1)
 20 |     Objective Loss:
 21 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 22 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 23 |         weighted by α which is set to 1 by cross val.
 24 |         Args:
 25 |             c: class confidences,
 26 |             l: predicted boxes,
 27 |             g: ground truth boxes
 28 |             N: number of matched default boxes
 29 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 30 |     """
 31 | 
 32 |     def __init__(self, cfg, priors, use_gpu=True):
 33 |         super(MultiBoxLoss, self).__init__()
 34 |         # self.use_gpu = use_gpu
 35 |         self.device = torch.device("cuda:0" if use_gpu else "cpu")
 36 |         self.num_classes = cfg.NUM_CLASSES
 37 |         self.background_label = cfg.BACKGROUND_LABEL
 38 |         self.negpos_ratio = cfg.NEGPOS_RATIO
 39 |         self.threshold = cfg.MATCHED_THRESHOLD
 40 |         self.unmatched_threshold = cfg.UNMATCHED_THRESHOLD
 41 |         self.variance = cfg.VARIANCE
 42 |         self.priors = priors
 43 | 
 44 |     def forward(self, predictions, targets):
 45 |         """Multibox Loss
 46 |         Args:
 47 |             predictions (tuple): A tuple containing loc preds, conf preds,
 48 |             and prior boxes from SSD net.
 49 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 50 |                 loc shape: torch.size(batch_size,num_priors,4)
 51 |                 priors shape: torch.size(num_priors,4)
 52 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 53 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 54 |         """
 55 |         loc_data, conf_data = predictions
 56 |         num = loc_data.size(0) # batch size
 57 |         priors = self.priors
 58 |         # priors = priors[:loc_data.size(1), :]
 59 |         num_priors = (priors.size(0)) # number of priors
 60 |         num_classes = self.num_classes
 61 | 
 62 |         # match priors (default boxes) and ground truth boxes
 63 |         loc_t = torch.Tensor(num, num_priors, 4).to(self.device)
 64 |         conf_t = torch.LongTensor(num, num_priors).to(self.device)
 65 |         for idx in range(num):
 66 |             truths = targets[idx][:,:-1].data
 67 |             labels = targets[idx][:,-1].data
 68 |             defaults = priors.data
 69 |             match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx)
 70 |         # if self.use_gpu:
 71 |             # loc_t = loc_t.cuda()
 72 |             # conf_t = conf_t.cuda()
 73 |         # wrap targets
 74 |         # loc_t = Variable(loc_t, requires_grad=False)
 75 |         # conf_t = Variable(conf_t,requires_grad=False)
 76 |         loc_t = loc_t.detach()
 77 |         conf_t = conf_t.detach()
 78 | 
 79 |         pos = conf_t > 0
 80 |         # num_pos = pos.sum()
 81 | 
 82 |         # Localization Loss (Smooth L1)
 83 |         # Shape: [batch,num_priors,4]
 84 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 85 |         loc_p = loc_data[pos_idx].view(-1,4)
 86 |         loc_t = loc_t[pos_idx].view(-1,4)
 87 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)
 88 | 
 89 |         # Compute max conf across batch for hard negative mining
 90 |         batch_conf = conf_data.view(-1, self.num_classes)
 91 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1))
 92 | 
 93 |         # Hard Negative Mining
 94 |         loss_c = loss_c.view(num, -1)
 95 |         loss_c[pos] = 0 # filter out pos boxes for now
 96 |         _,loss_idx = loss_c.sort(1, descending=True)
 97 |         _,idx_rank = loss_idx.sort(1)
 98 |         num_pos = pos.long().sum(1,keepdim=True) #new sum needs to keep the same dim
 99 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
100 |         neg = idx_rank < num_neg.expand_as(idx_rank)
101 | 
102 |         # Confidence Loss Including Positive and Negative Examples
103 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
104 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
105 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
106 |         targets_weighted = conf_t[(pos+neg).gt(0)]
107 |         loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)
108 | 
109 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
110 | 
111 |         N = num_pos.data.sum().to(dtype=torch.float)
112 |         loss_l/=N
113 |         loss_c/=N
114 |         return loss_l,loss_c
115 | 


--------------------------------------------------------------------------------
/lib/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/model_builder.py:
--------------------------------------------------------------------------------
 1 | # ssds part
 2 | from lib.modeling.ssds import ssd
 3 | from lib.modeling.ssds import ssd_lite
 4 | from lib.modeling.ssds import rfb
 5 | from lib.modeling.ssds import rfb_lite
 6 | from lib.modeling.ssds import fssd
 7 | from lib.modeling.ssds import fssd_lite
 8 | from lib.modeling.ssds import yolo
 9 | 
10 | ssds_map = {
11 |     'ssd': ssd.build_ssd,
12 |     'ssd_lite': ssd_lite.build_ssd_lite,
13 |     'rfb': rfb.build_rfb,
14 |     'rfb_lite': rfb_lite.build_rfb_lite,
15 |     'fssd': fssd.build_fssd,
16 |     'fssd_lite': fssd_lite.build_fssd_lite,
17 |     'yolo_v2': yolo.build_yolo_v2,
18 |     'yolo_v3': yolo.build_yolo_v3,
19 | }
20 | 
21 | # nets part
22 | from lib.modeling.nets import vgg
23 | from lib.modeling.nets import resnet
24 | from lib.modeling.nets import mobilenet
25 | from lib.modeling.nets import darknet
26 | 
27 | networks_map = {
28 |     'vgg16': vgg.vgg16,
29 |     'resnet_18': resnet.resnet_18,
30 |     'resnet_34': resnet.resnet_34,
31 |     'resnet_50': resnet.resnet_50,
32 |     'resnet_101': resnet.resnet_101,
33 |     'mobilenet_v1': mobilenet.mobilenet_v1,
34 |     'mobilenet_v1_075': mobilenet.mobilenet_v1_075,
35 |     'mobilenet_v1_050': mobilenet.mobilenet_v1_050,
36 |     'mobilenet_v1_025': mobilenet.mobilenet_v1_025,
37 |     'mobilenet_v2': mobilenet.mobilenet_v2,
38 |     'mobilenet_v2_075': mobilenet.mobilenet_v2_075,
39 |     'mobilenet_v2_050': mobilenet.mobilenet_v2_050,
40 |     'mobilenet_v2_025': mobilenet.mobilenet_v2_025,
41 |     'darknet_19': darknet.darknet_19,
42 |     'darknet_53': darknet.darknet_53,
43 | }
44 | 
45 | from lib.layers.functions.prior_box import PriorBox
46 | import torch
47 | 
48 | 
49 | def _forward_features_size(model, img_size):
50 |     model.eval()
51 |     x = torch.rand(1, 3, img_size[0], img_size[1])
52 |     with torch.no_grad():
53 |         x = torch.Tensor(x)
54 |     feature_maps = model(x, phase='feature')
55 |     return [(o.size()[2], o.size()[3]) for o in feature_maps]
56 | 
57 | 
58 | def create_model(cfg):
59 |     '''
60 |     '''
61 |     #
62 |     base = networks_map[cfg.NETS]
63 |     number_box = [2 * len(aspect_ratios) if isinstance(aspect_ratios[0], int) else len(aspect_ratios) for aspect_ratios
64 |                   in cfg.ASPECT_RATIOS]
65 | 
66 |     model = ssds_map[cfg.SSDS](base=base, feature_layer=cfg.FEATURE_LAYER, mbox=number_box, num_classes=cfg.NUM_CLASSES)
67 |     #
68 |     feature_maps = _forward_features_size(model, cfg.IMAGE_SIZE)
69 |     print('==>Feature map size:')
70 |     print(feature_maps)
71 |     # 
72 |     priorbox = PriorBox(image_size=cfg.IMAGE_SIZE, feature_maps=feature_maps, aspect_ratios=cfg.ASPECT_RATIOS,
73 |                         scale=cfg.SIZES, archor_stride=cfg.STEPS, clip=cfg.CLIP)
74 |     # priors = Variable(priorbox.forward(), volatile=True)
75 | 
76 |     return model, priorbox
77 | 


--------------------------------------------------------------------------------
/lib/modeling/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/nets/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/nets/darknet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from collections import namedtuple
  5 | import functools
  6 | 
  7 | Conv = namedtuple('Conv', ['stride', 'depth'])
  8 | ConvBlock = namedtuple('ConvBlock', ['stride', 'depth', 'num', 't']) # t is the expension factor
  9 | ResidualBlock = namedtuple('ResidualBlock', ['stride', 'depth', 'num', 't']) # t is the expension factor
 10 | 
 11 | 
 12 | CONV_DEFS_19 = [
 13 |     Conv(stride=1, depth=32),
 14 |     'M',
 15 |     Conv(stride=1, depth=64),
 16 |     'M',
 17 |     ConvBlock(stride=1, depth=128, num=2, t=0.5),
 18 |     'M',
 19 |     ConvBlock(stride=1, depth=256, num=2, t=0.5),
 20 |     'M',
 21 |     ConvBlock(stride=1, depth=512, num=3, t=0.5),
 22 |     'M',
 23 |     ConvBlock(stride=1, depth=1024, num=3, t=0.5),
 24 | ]
 25 | 
 26 | CONV_DEFS_53 = [
 27 |     Conv(stride=1, depth=32),
 28 |     ResidualBlock(stride=2, depth=64, num=2, t=0.5),
 29 |     ResidualBlock(stride=2, depth=128, num=3, t=0.5),
 30 |     ResidualBlock(stride=2, depth=256, num=9, t=0.5),
 31 |     ResidualBlock(stride=2, depth=512, num=9, t=0.5),
 32 |     ResidualBlock(stride=2, depth=1024, num=5, t=0.5),
 33 | ]
 34 | 
 35 | class _conv_bn(nn.Module):
 36 |     def __init__(self, inp, oup, stride):
 37 |         super(_conv_bn, self).__init__()
 38 |         self.conv = nn.Sequential(
 39 |             nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 40 |             nn.BatchNorm2d(oup),
 41 |             nn.LeakyReLU(0.1, inplace=True),
 42 |         )
 43 |         self.depth = oup
 44 | 
 45 |     def forward(self, x):
 46 |         return self.conv(x)
 47 | 
 48 | class _conv_block(nn.Module):
 49 |     def __init__(self, inp, oup, stride, expand_ratio=0.5):
 50 |         super(_conv_block, self).__init__()
 51 |         if stride == 1 and inp == oup:
 52 |             depth = int(oup*expand_ratio)
 53 |             self.conv = nn.Sequential(
 54 |                 nn.Conv2d(inp, depth, 1, 1, bias=False),
 55 |                 nn.BatchNorm2d(depth),
 56 |                 nn.LeakyReLU(0.1, inplace=True),
 57 |                 nn.Conv2d(depth, oup, 3, stride, 1, bias=False),
 58 |                 nn.BatchNorm2d(oup),
 59 |                 nn.LeakyReLU(0.1, inplace=True),
 60 |             )
 61 |         else:
 62 |             self.conv = nn.Sequential(
 63 |                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 64 |                 nn.BatchNorm2d(oup),
 65 |                 nn.LeakyReLU(0.1, inplace=True),
 66 |             )
 67 |         self.depth = oup
 68 | 
 69 |     def forward(self, x):
 70 |         return self.conv(x)
 71 | 
 72 | 
 73 | class _residual_block(nn.Module):
 74 |     def __init__(self, inp, oup, stride, expand_ratio=0.5):
 75 |         super(_residual_block, self).__init__()
 76 |         self.use_res_connect = stride == 1 and inp == oup
 77 |         if self.use_res_connect:
 78 |             depth = int(oup*expand_ratio)
 79 |             self.conv = nn.Sequential(
 80 |                 nn.Conv2d(inp, depth, 1, 1, bias=False),
 81 |                 nn.BatchNorm2d(depth),
 82 |                 nn.LeakyReLU(0.1, inplace=True),
 83 |                 nn.Conv2d(depth, oup, 3, stride, 1, bias=False),
 84 |                 nn.BatchNorm2d(oup),
 85 |                 nn.LeakyReLU(0.1, inplace=True),
 86 |             )
 87 |         else:
 88 |             self.conv = nn.Sequential(
 89 |                 nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 90 |                 nn.BatchNorm2d(oup),
 91 |                 nn.LeakyReLU(0.1, inplace=True),
 92 |             )
 93 |         self.depth = oup
 94 | 
 95 |     def forward(self, x):
 96 |         if self.use_res_connect:
 97 |             return x + self.conv(x)
 98 |         else:
 99 |             return self.conv(x)
100 | 
101 | 
102 | def darknet(conv_defs, depth_multiplier=1.0, min_depth=8):
103 |     depth = lambda d: max(int(d * depth_multiplier), min_depth)
104 |     layers = []
105 |     in_channels = 3
106 |     for conv_def in conv_defs:
107 |         if isinstance(conv_def, Conv):
108 |             layers += [_conv_bn(in_channels, depth(conv_def.depth), conv_def.stride)]
109 |             in_channels = depth(conv_def.depth)
110 |         elif isinstance(conv_def, ConvBlock):
111 |           for n in range(conv_def.num):
112 |             stride = conv_def.stride if n == 0 else 1
113 |             layers += [_conv_block(in_channels, depth(conv_def.depth), stride, conv_def.t)]
114 |             in_channels = depth(conv_def.depth)
115 |         elif isinstance(conv_def, ResidualBlock):
116 |           for n in range(conv_def.num):
117 |             stride = conv_def.stride if n == 0 else 1
118 |             layers += [_residual_block(in_channels, depth(conv_def.depth), stride, conv_def.t)]
119 |             in_channels = depth(conv_def.depth)
120 |         elif conv_def == 'M':
121 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
122 |     return layers
123 | 
124 | def wrapped_partial(func, *args, **kwargs):
125 |     partial_func = functools.partial(func, *args, **kwargs)
126 |     functools.update_wrapper(partial_func, func)
127 |     return partial_func
128 | 
129 | darknet_19 = wrapped_partial(darknet, conv_defs=CONV_DEFS_19, depth_multiplier=1.0)
130 | darknet_53 = wrapped_partial(darknet, conv_defs=CONV_DEFS_53, depth_multiplier=1.0)
131 | 


--------------------------------------------------------------------------------
/lib/modeling/nets/mobilenet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from collections import namedtuple
  5 | import functools
  6 | 
  7 | Conv = namedtuple('Conv', ['stride', 'depth'])
  8 | DepthSepConv = namedtuple('DepthSepConv', ['stride', 'depth'])
  9 | InvertedResidual = namedtuple('InvertedResidual', ['stride', 'depth', 'num', 't']) # t is the expension factor
 10 | 
 11 | V1_CONV_DEFS = [
 12 |     Conv(stride=2, depth=32),
 13 |     DepthSepConv(stride=1, depth=64),
 14 |     DepthSepConv(stride=2, depth=128),
 15 |     DepthSepConv(stride=1, depth=128),
 16 |     DepthSepConv(stride=2, depth=256),
 17 |     DepthSepConv(stride=1, depth=256),
 18 |     DepthSepConv(stride=2, depth=512),
 19 |     DepthSepConv(stride=1, depth=512),
 20 |     DepthSepConv(stride=1, depth=512),
 21 |     DepthSepConv(stride=1, depth=512),
 22 |     DepthSepConv(stride=1, depth=512),
 23 |     DepthSepConv(stride=1, depth=512),
 24 |     DepthSepConv(stride=2, depth=1024),
 25 |     DepthSepConv(stride=1, depth=1024)
 26 | ]
 27 | 
 28 | V2_CONV_DEFS = [
 29 |     Conv(stride=2, depth=32),
 30 |     InvertedResidual(stride=1, depth=16, num=1, t=1),
 31 |     InvertedResidual(stride=2, depth=24, num=2, t=6),
 32 |     InvertedResidual(stride=2, depth=32, num=3, t=6),
 33 |     InvertedResidual(stride=2, depth=64, num=4, t=6),
 34 |     InvertedResidual(stride=1, depth=96, num=3, t=6),
 35 |     InvertedResidual(stride=2, depth=160, num=3, t=6),
 36 |     InvertedResidual(stride=1, depth=320, num=1, t=6),
 37 | ]
 38 | 
 39 | class _conv_bn(nn.Module):
 40 |     def __init__(self, inp, oup, stride):
 41 |         super(_conv_bn, self).__init__()
 42 |         self.conv = nn.Sequential(
 43 |             nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 44 |             nn.BatchNorm2d(oup),
 45 |             nn.ReLU(inplace=True),
 46 |         )
 47 |         self.depth = oup
 48 | 
 49 |     def forward(self, x):
 50 |         return self.conv(x)
 51 | 
 52 | 
 53 | class _conv_dw(nn.Module):
 54 |     def __init__(self, inp, oup, stride):
 55 |         super(_conv_dw, self).__init__()
 56 |         self.conv = nn.Sequential(
 57 |             # dw
 58 |             nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 59 |             nn.BatchNorm2d(inp),
 60 |             nn.ReLU(inplace=True),
 61 |             # pw
 62 |             nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 63 |             nn.BatchNorm2d(oup),
 64 |             nn.ReLU(inplace=True),
 65 |         )
 66 |         self.depth = oup
 67 | 
 68 |     def forward(self, x):
 69 |         return self.conv(x)
 70 | 
 71 | 
 72 | class _inverted_residual_bottleneck(nn.Module):
 73 |     def __init__(self, inp, oup, stride, expand_ratio):
 74 |         super(_inverted_residual_bottleneck, self).__init__()
 75 |         self.use_res_connect = stride == 1 and inp == oup
 76 |         self.conv = nn.Sequential(
 77 |             # pw
 78 |             nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False),
 79 |             nn.BatchNorm2d(inp * expand_ratio),
 80 |             nn.ReLU6(inplace=True),
 81 |             # dw
 82 |             nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False),
 83 |             nn.BatchNorm2d(inp * expand_ratio),
 84 |             nn.ReLU6(inplace=True),
 85 |             # pw-linear
 86 |             nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False),
 87 |             nn.BatchNorm2d(oup),
 88 |         )
 89 |         self.depth = oup
 90 |         
 91 |     def forward(self, x):
 92 |         if self.use_res_connect:
 93 |             return x + self.conv(x)
 94 |         else:
 95 |             return self.conv(x)
 96 | 
 97 | 
 98 | def mobilenet(conv_defs, depth_multiplier=1.0, min_depth=8):
 99 |     depth = lambda d: max(int(d * depth_multiplier), min_depth)
100 |     layers = []
101 |     in_channels = 3
102 |     for conv_def in conv_defs:
103 |         if isinstance(conv_def, Conv):
104 |             layers += [_conv_bn(in_channels, depth(conv_def.depth), conv_def.stride)]
105 |             in_channels = depth(conv_def.depth)
106 |         elif isinstance(conv_def, DepthSepConv):
107 |             layers += [_conv_dw(in_channels, depth(conv_def.depth), conv_def.stride)]
108 |             in_channels = depth(conv_def.depth)
109 |         elif isinstance(conv_def, InvertedResidual):
110 |           for n in range(conv_def.num):
111 |             stride = conv_def.stride if n == 0 else 1
112 |             layers += [_inverted_residual_bottleneck(in_channels, depth(conv_def.depth), stride, conv_def.t)]
113 |             in_channels = depth(conv_def.depth)
114 |     return layers
115 | 
116 | def wrapped_partial(func, *args, **kwargs):
117 |     partial_func = functools.partial(func, *args, **kwargs)
118 |     functools.update_wrapper(partial_func, func)
119 |     return partial_func
120 | 
121 | mobilenet_v1 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=1.0)
122 | mobilenet_v1_075 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.75)
123 | mobilenet_v1_050 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.50)
124 | mobilenet_v1_025 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.25)
125 | 
126 | mobilenet_v2 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=1.0)
127 | mobilenet_v2_075 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.75)
128 | mobilenet_v2_050 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.50)
129 | mobilenet_v2_025 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.25)


--------------------------------------------------------------------------------
/lib/modeling/nets/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from collections import namedtuple
  5 | import functools
  6 | 
  7 | BasicBlock = namedtuple('BasicBlock', ['stride', 'depth', 'num', 't'])
  8 | Bottleneck = namedtuple('Bottleneck', ['stride', 'depth', 'num', 't']) # t is the expension factor
  9 | 
 10 | V18_CONV_DEFS = [
 11 |     BasicBlock(stride=1, depth=64, num=2, t=1),
 12 |     BasicBlock(stride=2, depth=128, num=2, t=1),
 13 |     BasicBlock(stride=2, depth=256, num=2, t=1),
 14 |     # BasicBlock(stride=2, depth=512, num=2, t=1),
 15 | ]
 16 | 
 17 | V34_CONV_DEFS = [
 18 |     BasicBlock(stride=1, depth=64, num=3, t=1),
 19 |     BasicBlock(stride=2, depth=128, num=4, t=1),
 20 |     BasicBlock(stride=2, depth=256, num=6, t=1),
 21 |     # BasicBlock(stride=2, depth=512, num=3, t=1),
 22 | ]
 23 | 
 24 | V50_CONV_DEFS = [
 25 |     Bottleneck(stride=1, depth=64, num=3, t=4),
 26 |     Bottleneck(stride=2, depth=128, num=4, t=4),
 27 |     Bottleneck(stride=2, depth=256, num=6, t=4),
 28 |     # Bottleneck(stride=2, depth=512, num=3, t=4),
 29 | ]
 30 | 
 31 | V101_CONV_DEFS = [
 32 |     Bottleneck(stride=1, depth=64, num=3, t=4),
 33 |     Bottleneck(stride=2, depth=128, num=4, t=4),
 34 |     Bottleneck(stride=2, depth=256, num=23, t=4),
 35 |     # Bottleneck(stride=2, depth=512, num=3, t=4),
 36 | ]
 37 | 
 38 | class _basicblock(nn.Module):
 39 |     def __init__(self, inplanes, planes, stride=1, expansion=1, downsample=None):
 40 |         super(_basicblock, self).__init__()
 41 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride,
 42 |                      padding=1, bias=False)
 43 |         self.bn1 = nn.BatchNorm2d(planes)
 44 |         self.relu = nn.ReLU(inplace=True)
 45 |         self.conv2 = nn.Conv2d(planes, planes * expansion, kernel_size=3, stride=1,
 46 |                      padding=1, bias=False)
 47 |         self.bn2 = nn.BatchNorm2d(planes * expansion)
 48 |         self.downsample = downsample
 49 |         self.stride = stride
 50 | 
 51 |     def forward(self, x):
 52 |         residual = x
 53 | 
 54 |         out = self.conv1(x)
 55 |         out = self.bn1(out)
 56 |         out = self.relu(out)
 57 | 
 58 |         out = self.conv2(out)
 59 |         out = self.bn2(out)
 60 | 
 61 |         if self.downsample is not None:
 62 |             residual = self.downsample(x)
 63 | 
 64 |         out += residual
 65 |         out = self.relu(out)
 66 | 
 67 |         return out
 68 | 
 69 | 
 70 | class _bottleneck(nn.Module):
 71 |     def __init__(self, inplanes, planes, stride=1, expansion=4, downsample=None):
 72 |         super(_bottleneck, self).__init__()
 73 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 74 |         self.bn1 = nn.BatchNorm2d(planes)
 75 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 76 |                                padding=1, bias=False)
 77 |         self.bn2 = nn.BatchNorm2d(planes)
 78 |         self.conv3 = nn.Conv2d(planes, planes * expansion, kernel_size=1, bias=False)
 79 |         self.bn3 = nn.BatchNorm2d(planes * expansion)
 80 |         self.relu = nn.ReLU(inplace=True)
 81 |         self.downsample = downsample
 82 |         self.stride = stride
 83 | 
 84 |     def forward(self, x):
 85 |         residual = x
 86 | 
 87 |         out = self.conv1(x)
 88 |         out = self.bn1(out)
 89 |         out = self.relu(out)
 90 | 
 91 |         out = self.conv2(out)
 92 |         out = self.bn2(out)
 93 |         out = self.relu(out)
 94 | 
 95 |         out = self.conv3(out)
 96 |         out = self.bn3(out)
 97 | 
 98 |         if self.downsample is not None:
 99 |             residual = self.downsample(x)
100 | 
101 |         out += residual
102 |         out = self.relu(out)
103 | 
104 |         return out
105 | 
106 | 
107 | def resnet(conv_defs, depth_multiplier=1.0, min_depth=8):
108 |     depth = lambda d: max(int(d * depth_multiplier), min_depth)
109 |     layers = [
110 |             nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False),
111 |             nn.BatchNorm2d(64),
112 |             nn.ReLU(inplace=True),
113 |             nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
114 |         ]
115 |     in_channels = 64
116 |     for conv_def in conv_defs:
117 |         if conv_def.stride != 1 or in_channels != depth(conv_def.depth * conv_def.t):
118 |             _downsample = nn.Sequential(
119 |                 nn.Conv2d(in_channels, depth(conv_def.depth * conv_def.t),
120 |                           kernel_size=1, stride=conv_def.stride, bias=False),
121 |                 nn.BatchNorm2d(depth(conv_def.depth * conv_def.t)),
122 |             )
123 |         if isinstance(conv_def, BasicBlock):
124 |           for n in range(conv_def.num):
125 |             (stride, downsample) = (conv_def.stride, _downsample) if n == 0 else (1, None)
126 |             layers += [_basicblock(in_channels, depth(conv_def.depth), stride, conv_def.t, downsample)]
127 |             in_channels = depth(conv_def.depth * conv_def.t)
128 |         elif isinstance(conv_def, Bottleneck):
129 |           for n in range(conv_def.num):
130 |             (stride, downsample) = (conv_def.stride, _downsample) if n == 0 else (1, None)
131 |             layers += [_bottleneck(in_channels, depth(conv_def.depth), stride, conv_def.t, downsample)]
132 |             in_channels = depth(conv_def.depth * conv_def.t)
133 |     return layers
134 | 
135 | def wrapped_partial(func, *args, **kwargs):
136 |     partial_func = functools.partial(func, *args, **kwargs)
137 |     functools.update_wrapper(partial_func, func)
138 |     return partial_func
139 | 
140 | resnet_18 = wrapped_partial(resnet, conv_defs=V18_CONV_DEFS, depth_multiplier=1.0)
141 | resnet_34 = wrapped_partial(resnet, conv_defs=V34_CONV_DEFS, depth_multiplier=1.0)
142 | 
143 | resnet_50 = wrapped_partial(resnet, conv_defs=V50_CONV_DEFS, depth_multiplier=1.0)
144 | resnet_101 = wrapped_partial(resnet, conv_defs=V101_CONV_DEFS, depth_multiplier=1.0)
145 | 


--------------------------------------------------------------------------------
/lib/modeling/nets/vgg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | base = {
 5 |     'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
 6 |             512, 512, 512],
 7 | }
 8 | 
 9 | # CONV_DEFS_16 = [
10 | #     Conv(stride=1, depth=64),
11 | #     Conv(stride=1, depth=64),
12 | #     'M',
13 | #     Conv(stride=1, depth=128),
14 | #     Conv(stride=1, depth=128),
15 | #     'M'
16 | #     Conv(stride=1, depth=256),
17 | #     Conv(stride=1, depth=256),
18 | #     Conv(stride=1, depth=256),
19 | #     'M'
20 | #     Conv(stride=1, depth=512),
21 | #     Conv(stride=1, depth=512),
22 | #     Conv(stride=1, depth=512),
23 | #     'M'
24 | #     Conv(stride=1, depth=512),
25 | #     Conv(stride=1, depth=512),
26 | #     Conv(stride=1, depth=512),
27 | # ]
28 | 
29 | # Conv = namedtuple('Conv', ['stride', 'depth'])
30 | 
31 | # class _conv_bn(nn.Module):
32 | #     def __init__(self, inp, oup, stride):
33 | #         super(_conv_bn, self).__init__()
34 | #         self.conv = nn.Sequential(
35 | #             nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
36 | #             nn.BatchNorm2d(oup),
37 | #             nn.ReLU(inplace=True),
38 | #         )
39 | #         self.depth = oup
40 | 
41 | #     def forward(self, x):
42 | #         return self.conv(x)
43 | 
44 | 
45 | def vgg(cfg, i, batch_norm=False):
46 |     layers = []
47 |     in_channels = i
48 |     for v in cfg:
49 |         if v == 'M':
50 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
51 |         elif v == 'C':
52 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
53 |         else:
54 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
55 |             if batch_norm:
56 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
57 |             else:
58 |                 layers += [conv2d, nn.ReLU(inplace=True)]
59 |             in_channels = v
60 |     layers += [
61 |         nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
62 |         nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6),
63 |         nn.ReLU(inplace=True),
64 |         nn.Conv2d(1024, 1024, kernel_size=1),
65 |         nn.ReLU(inplace=True)]
66 |     return layers
67 | 
68 | def vgg16():
69 |     return vgg(base['vgg16'], 3)
70 | vgg16.name='vgg16'


--------------------------------------------------------------------------------
/lib/modeling/ssds/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/ssds/__init__.py


--------------------------------------------------------------------------------
/lib/modeling/ssds/fssd_lite.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | import os
  7 | 
  8 | from lib.layers import *
  9 | 
 10 | class FSSDLite(nn.Module):
 11 |     """FSSD: Feature Fusion Single Shot Multibox Detector for embeded system
 12 |     See: https://arxiv.org/pdf/1712.00960.pdf for more details.
 13 | 
 14 |     Args:
 15 |         phase: (string) Can be "eval" or "train" or "feature"
 16 |         base: base layers for input
 17 |         extras: extra layers that feed to multibox loc and conf layers
 18 |         head: "multibox head" consists of loc and conf conv layers
 19 |         features： include to feature layers to fusion feature and build pyramids
 20 |         feature_layer: the feature layers for head to loc and conf
 21 |         num_classes: num of classes 
 22 |     """
 23 | 
 24 |     def __init__(self, base, extras, head, features, feature_layer, num_classes):
 25 |         super(FSSDLite, self).__init__()
 26 |         self.num_classes = num_classes
 27 |         # SSD network
 28 |         self.base = nn.ModuleList(base)
 29 |         self.extras = nn.ModuleList(extras)
 30 |         self.feature_layer = feature_layer[0][0]
 31 |         self.transforms = nn.ModuleList(features[0])
 32 |         self.pyramids = nn.ModuleList(features[1])
 33 |         # print(self.base)
 34 |         self.norm = nn.BatchNorm2d(int(feature_layer[0][1][-1]/2)*len(self.transforms),affine=True)
 35 |         # print(self.extras)
 36 | 
 37 |         self.loc = nn.ModuleList(head[0])
 38 |         self.conf = nn.ModuleList(head[1])
 39 |         # print(self.loc)
 40 | 
 41 |         self.softmax = nn.Softmax(dim=-1)
 42 | 
 43 |     def forward(self, x, phase='eval'):
 44 |         """Applies network layers and ops on input image(s) x.
 45 | 
 46 |         Args:
 47 |             x: input image or batch of images. Shape: [batch,3,300,300].
 48 | 
 49 |         Return:
 50 |             Depending on phase:
 51 |             test:
 52 |                 Variable(tensor) of output class label predictions,
 53 |                 confidence score, and corresponding location predictions for
 54 |                 each object detected. Shape: [batch,topk,7]
 55 | 
 56 |             train:
 57 |                 list of concat outputs from:
 58 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 59 |                     2: localization layers, Shape: [batch,num_priors*4]
 60 | 
 61 |             feature:
 62 |                 the features maps of the feature extractor
 63 |         """
 64 |         sources, transformed, pyramids, loc, conf = [list() for _ in range(5)]
 65 | 
 66 |         # apply bases layers and cache source layer outputs
 67 |         for k in range(len(self.base)):
 68 |             x = self.base[k](x)
 69 |             if k in self.feature_layer:
 70 |                 sources.append(x)
 71 | 
 72 |         # apply extra layers and cache source layer outputs
 73 |         for k, v in enumerate(self.extras):
 74 |             x = v(x)
 75 |             sources.append(x)
 76 |             # if k % 2 == 1:
 77 |             #     sources.append(x)
 78 |         assert len(self.transforms) == len(sources)
 79 |         upsize = (sources[0].size()[2], sources[0].size()[3])
 80 |         
 81 |         for k, v in enumerate(self.transforms):
 82 |             size = None if k == 0 else upsize
 83 |             transformed.append(v(sources[k], size))
 84 |         x = torch.cat(transformed, 1)
 85 |         x = self.norm(x)
 86 |         for k, v in enumerate(self.pyramids):
 87 |             x = v(x)
 88 |             pyramids.append(x)
 89 | 
 90 |         if phase == 'feature':
 91 |             return pyramids
 92 | 
 93 |         # apply multibox head to pyramids layers
 94 |         for (x, l, c) in zip(pyramids, self.loc, self.conf):
 95 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 96 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 97 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 98 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 99 | 
100 |         if phase == 'eval':
101 |             output = (
102 |                 loc.view(loc.size(0), -1, 4),                   # loc preds
103 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
104 |             )
105 |         else:
106 |             output = (
107 |                 loc.view(loc.size(0), -1, 4),
108 |                 conf.view(conf.size(0), -1, self.num_classes),
109 |             )
110 |         return output
111 | 
112 | class BasicConv(nn.Module):
113 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=False, bias=True):
114 |         super(BasicConv, self).__init__()
115 |         self.out_channels = out_planes
116 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
117 |         self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
118 |         self.relu = nn.ReLU(inplace=True) if relu else None
119 |         # self.up_size = up_size
120 |         # self.up_sample = nn.Upsample(size=(up_size,up_size),mode='bilinear') if up_size != 0 else None
121 | 
122 |     def forward(self, x, up_size=None):
123 |         x = self.conv(x)
124 |         if self.bn is not None:
125 |             x = self.bn(x)
126 |         if self.relu is not None:
127 |             x = self.relu(x)
128 |         if up_size is not None:
129 |             x = F.upsample(x, size=up_size, mode='bilinear')
130 |             # x = self.up_sample(x)
131 |         return x
132 | 
133 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1):
134 |     return nn.Sequential(
135 |         # pw
136 |         nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False),
137 |         nn.BatchNorm2d(oup * expand_ratio),
138 |         nn.ReLU6(inplace=True),
139 |         # dw
140 |         nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False),
141 |         nn.BatchNorm2d(oup * expand_ratio),
142 |         nn.ReLU6(inplace=True),
143 |         # pw-linear
144 |         nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False),
145 |         nn.BatchNorm2d(oup),
146 |     )
147 | 
148 | 
149 | def add_extras(base, feature_layer, mbox, num_classes):
150 |     extra_layers = []
151 |     feature_transform_layers = []
152 |     pyramid_feature_layers = []
153 |     loc_layers = []
154 |     conf_layers = []
155 |     in_channels = None
156 |     feature_transform_channel = int(feature_layer[0][1][-1]/2)
157 |     for layer, depth in zip(feature_layer[0][0], feature_layer[0][1]):
158 |         if layer == 'S':
159 |             extra_layers += [ _conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1) ]
160 |             in_channels = depth
161 |         elif layer == '':
162 |             extra_layers += [ _conv_dw(in_channels, depth, stride=1, expand_ratio=1) ]
163 |             in_channels = depth
164 |         else:
165 |             in_channels = depth
166 |         feature_transform_layers += [BasicConv(in_channels, feature_transform_channel, kernel_size=1, padding=0)]
167 |     
168 |     in_channels = len(feature_transform_layers) * feature_transform_channel
169 |     for layer, depth, box in zip(feature_layer[1][0], feature_layer[1][1], mbox):
170 |         if layer == 'S':
171 |             pyramid_feature_layers += [BasicConv(in_channels, depth, kernel_size=3, stride=2, padding=1)]
172 |             in_channels = depth
173 |         elif layer == '':
174 |             pad = (0,1)[len(pyramid_feature_layers)==0]
175 |             pyramid_feature_layers += [BasicConv(in_channels, depth, kernel_size=3, stride=1, padding=pad)]
176 |             in_channels = depth
177 |         else:
178 |             AssertionError('Undefined layer')
179 |         loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)]
180 |         conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)]
181 |     return base, extra_layers, (feature_transform_layers, pyramid_feature_layers), (loc_layers, conf_layers)
182 | 
183 | def build_fssd_lite(base, feature_layer, mbox, num_classes):
184 |     base_, extras_, features_, head_ = add_extras(base(), feature_layer, mbox, num_classes)
185 |     return FSSDLite(base_, extras_, head_, features_, feature_layer, num_classes)


--------------------------------------------------------------------------------
/lib/modeling/ssds/retina.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | import os
  7 | 
  8 | from lib.layers import *
  9 | 
 10 | class Retina(nn.Module):
 11 |     def __init__(self, base, extras, head, feature_layer, num_classes):
 12 |         super(Retina, self).__init__()
 13 |         self.num_classes = num_classes
 14 |         # SSD network
 15 |         self.base = nn.ModuleList(base)
 16 |         self.extras = nn.ModuleList(extras[1])
 17 |         self.transforms = nn.ModuleList(extras[0])
 18 |         self.loc = nn.ModuleList(head[0])
 19 |         self.conf = nn.ModuleList(head[1])
 20 |         self.softmax = nn.Softmax(dim=-1)
 21 | 
 22 |         self.feature_layer = feature_layer[0]
 23 |     
 24 |     def _upsample_add(self, x, y):
 25 |         '''Upsample and add two feature maps.
 26 |         Args:
 27 |           x: (Variable) top feature map to be upsampled.
 28 |           y: (Variable) lateral feature map.
 29 |         Returns:
 30 |           (Variable) added feature map.
 31 |         Note in PyTorch, when input size is odd, the upsampled feature map
 32 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
 33 |         maybe not equal to the lateral feature map size.
 34 |         e.g.
 35 |         original input size: [N,_,15,15] ->
 36 |         conv2d feature map size: [N,_,8,8] ->
 37 |         upsampled feature map size: [N,_,16,16]
 38 |         So we choose bilinear upsample which supports arbitrary output sizes.
 39 |         '''
 40 |         _,_,H,W = y.size()
 41 |         return F.upsample(x, size=(H,W), mode='bilinear') + y    
 42 | 
 43 |     def forward(self, x, phase='eval'):
 44 |         sources, loc, conf = [list() for _ in range(3)]
 45 | 
 46 |         # apply bases layers and cache source layer outputs
 47 |         for k in range(len(self.base)):
 48 |             x = self.base[k](x)
 49 |             if k in self.feature_layer:
 50 |                 sources.append(x)
 51 | 
 52 |         for i in range(len(sources))[::-1]:
 53 |             if i != len(sources) -1:
 54 |                 xx = self.extras[i](self._upsample_add(xx, self.transforms[i](sources[i])))
 55 |             else:
 56 |                 xx = self.transforms[i](sources[i])
 57 |             sources[i] = xx
 58 | 
 59 |         # apply extra layers and cache source layer outputs
 60 |         for i, v in enumerate(self.extras):
 61 |             if i >= len(sources):
 62 |                 x = v(x)
 63 |                 sources.append(x)
 64 | 
 65 |         if phase == 'feature':
 66 |             return sources
 67 | 
 68 |         # apply multibox head to source layers
 69 |         for x in sources:
 70 |             loc.append(self.loc(x).permute(0, 2, 3, 1).contiguous())
 71 |             conf.append(self.conf(x).permute(0, 2, 3, 1).contiguous())
 72 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 73 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 74 | 
 75 |         if phase == 'eval':
 76 |             output = (
 77 |                 loc.view(loc.size(0), -1, 4),                   # loc preds
 78 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 79 |             )
 80 |         else:
 81 |             output = (
 82 |                 loc.view(loc.size(0), -1, 4),
 83 |                 conf.view(conf.size(0), -1, self.num_classes),
 84 |             )
 85 |         return output
 86 |         
 87 | 
 88 | 
 89 | def add_extras(base, feature_layer, mbox, num_classes, version):
 90 |     extra_layers = []
 91 |     transform_layers = []
 92 |     loc_layers = [Retina_head(box * 4)]
 93 |     conf_layers = [Retina_head(box * num_classes)]
 94 | 
 95 |     for layer, in_channels, box in zip(feature_layer[0], feature_layer[1], mbox):
 96 |         if 'lite' in version:
 97 |             if layer == 'S':
 98 |                 extra_layers += [ _conv_dw(in_channels, 256, stride=2, padding=1, expand_ratio=1) ]
 99 |             elif layer == '':
100 |                 extra_layers += [ _conv_dw(in_channels, 256, stride=1, expand_ratio=1) ]
101 |             else:
102 |                 extra_layers += [ _conv_dw(256, 256, stride=1, padding=1, expand_ratio=1) ]
103 |                 transform_layers += [ _conv_pw(in_channels, 256) ]
104 |         else:    
105 |             if layer == 'S':
106 |                 extra_layers += [ _conv(in_channels, 256, stride=2, padding=1) ]
107 |             elif layer == '':
108 |                 extra_layers += [ _conv(in_channels, 256, stride=1) ]
109 |             else:
110 |                 extra_layers += [ _conv(256, 256, stride=1, padding=1) ]
111 |                 transform_layers += [ _conv_pw(in_channels, 256) ]
112 |     return base, (transform_layers, extra_layers), (loc_layers, conf_layers)
113 | 
114 | def Retina_head(self, out_planes):
115 |     layers = []
116 |     for _ in range(4):
117 |         layers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
118 |         layers.append(nn.ReLU(True))
119 |     layers.append(nn.Conv2d(256, out_planes, kernel_size=3, stride=1, padding=1))
120 |     return nn.Sequential(*layers)
121 | 
122 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213
123 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did.
124 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1):
125 |     return nn.Sequential(
126 |         # pw
127 |         nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False),
128 |         nn.BatchNorm2d(oup * expand_ratio),
129 |         nn.ReLU6(inplace=True),
130 |         # dw
131 |         nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False),
132 |         nn.BatchNorm2d(oup * expand_ratio),
133 |         nn.ReLU6(inplace=True),
134 |         # pw-linear
135 |         nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False),
136 |         nn.BatchNorm2d(oup),
137 |     )
138 | 
139 | def _conv_pw(inp, oup, stride=1, padding=0):
140 |     return nn.Sequential(
141 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
142 |         nn.BatchNorm2d(oup),
143 |     )
144 | 
145 | 
146 | def _conv(inp, oup, stride=1, padding=0):
147 |     return nn.Sequential(
148 |         nn.Conv2d(inp, oup, 3, stride, padding, bias=False),
149 |         nn.BatchNorm2d(oup),
150 |         nn.ReLU(inplace=True),
151 |     )
152 | 
153 | 
154 | def build_retina(base, feature_layer, mbox, num_classes):
155 |     """RetinaNet in Focal Loss for Dense Object Detection
156 |     See: https://arxiv.org/pdf/1708.02002.pdffor more details.
157 |     """
158 |     base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='retinanet')
159 |     return Retina(base_, extras_, head_, feature_layer, num_classes)
160 | 
161 | def build_retina_lite(base, feature_layer, mbox, num_classes):
162 |     """RetinaNet in Focal Loss for Dense Object Detection
163 |     See: https://arxiv.org/pdf/1708.02002.pdffor more details.
164 |     """
165 |     base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='retinanet_lite')
166 |     return SSD(base_, extras_, head_, feature_layer, num_classes)


--------------------------------------------------------------------------------
/lib/modeling/ssds/ssd.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | import os
  7 | 
  8 | from lib.layers import *
  9 | 
 10 | 
 11 | class SSD(nn.Module):
 12 |     """Single Shot Multibox Architecture
 13 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 14 | 
 15 |     Args:
 16 |         phase: (string) Can be "eval" or "train" or "feature"
 17 |         base: base layers for input
 18 |         extras: extra layers that feed to multibox loc and conf layers
 19 |         head: "multibox head" consists of loc and conf conv layers
 20 |         feature_layer: the feature layers for head to loc and conf
 21 |         num_classes: num of classes 
 22 |     """
 23 | 
 24 |     def __init__(self, base, extras, head, feature_layer, num_classes):
 25 |         super(SSD, self).__init__()
 26 |         self.num_classes = num_classes
 27 |         # SSD network
 28 |         self.base = nn.ModuleList(base)
 29 |         self.norm = L2Norm(feature_layer[1][0], 20)
 30 |         self.extras = nn.ModuleList(extras)
 31 | 
 32 |         self.loc = nn.ModuleList(head[0])
 33 |         self.conf = nn.ModuleList(head[1])
 34 |         self.softmax = nn.Softmax(dim=-1)
 35 | 
 36 |         self.feature_layer = feature_layer[0]
 37 | 
 38 |     def forward(self, x, phase='eval'):
 39 |         """Applies network layers and ops on input image(s) x.
 40 | 
 41 |         Args:
 42 |             x: input image or batch of images. Shape: [batch,3,300,300].
 43 | 
 44 |         Return:
 45 |             Depending on phase:
 46 |             test:
 47 |                 Variable(tensor) of output class label predictions,
 48 |                 confidence score, and corresponding location predictions for
 49 |                 each object detected. Shape: [batch,topk,7]
 50 | 
 51 |             train:
 52 |                 list of concat outputs from:
 53 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 54 |                     2: localization layers, Shape: [batch,num_priors*4]
 55 | 
 56 |             feature:
 57 |                 the features maps of the feature extractor
 58 |         """
 59 |         sources, loc, conf = [list() for _ in range(3)]
 60 | 
 61 |         # apply bases layers and cache source layer outputs
 62 |         for k in range(len(self.base)):
 63 |             x = self.base[k](x)
 64 |             if k in self.feature_layer:
 65 |                 if len(sources) == 0:
 66 |                     s = self.norm(x)
 67 |                     sources.append(s)
 68 |                 else:
 69 |                     sources.append(x)
 70 | 
 71 |         # apply extra layers and cache source layer outputs
 72 |         for k, v in enumerate(self.extras):
 73 |             # TODO:maybe donot needs the relu here
 74 |             x = F.relu(v(x), inplace=True)
 75 |             # TODO:lite is different in here, should be changed
 76 |             if k % 2 == 1:
 77 |                 sources.append(x)
 78 | 
 79 |         if phase == 'feature':
 80 |             return sources
 81 | 
 82 |         # apply multibox head to source layers
 83 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 84 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 85 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 86 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 87 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 88 | 
 89 |         if phase == 'eval':
 90 |             output = (
 91 |                 loc.view(loc.size(0), -1, 4),  # loc preds
 92 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 93 |             )
 94 |         else:
 95 |             output = (
 96 |                 loc.view(loc.size(0), -1, 4),
 97 |                 conf.view(conf.size(0), -1, self.num_classes),
 98 |             )
 99 |         return output
100 | 
101 | 
102 | def add_extras(base, feature_layer, mbox, num_classes, version):
103 |     extra_layers = []
104 |     loc_layers = []
105 |     conf_layers = []
106 |     in_channels = None
107 |     for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox):
108 |         if 'lite' in version:
109 |             if layer == 'S':
110 |                 extra_layers += [_conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1)]
111 |                 in_channels = depth
112 |             elif layer == '':
113 |                 extra_layers += [_conv_dw(in_channels, depth, stride=1, expand_ratio=1)]
114 |                 in_channels = depth
115 |             else:
116 |                 in_channels = depth
117 |         else:
118 |             if layer == 'S':
119 |                 extra_layers += [
120 |                     nn.Conv2d(in_channels, int(depth / 2), kernel_size=1),
121 |                     nn.Conv2d(int(depth / 2), depth, kernel_size=3, stride=2, padding=1)]
122 |                 in_channels = depth
123 |             elif layer == '':
124 |                 extra_layers += [
125 |                     nn.Conv2d(in_channels, int(depth / 2), kernel_size=1),
126 |                     nn.Conv2d(int(depth / 2), depth, kernel_size=3)]
127 |                 in_channels = depth
128 |             else:
129 |                 in_channels = depth
130 | 
131 |         loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)]
132 |         conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)]
133 |     return base, extra_layers, (loc_layers, conf_layers)
134 | 
135 | 
136 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213
137 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did.
138 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1):
139 |     return nn.Sequential(
140 |         # pw
141 |         nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False),
142 |         nn.BatchNorm2d(oup * expand_ratio),
143 |         nn.ReLU6(inplace=True),
144 |         # dw
145 |         nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False),
146 |         nn.BatchNorm2d(oup * expand_ratio),
147 |         nn.ReLU6(inplace=True),
148 |         # pw-linear
149 |         nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False),
150 |         nn.BatchNorm2d(oup),
151 |     )
152 | 
153 | 
154 | def _conv(inp, oup, stride=1, padding=0):
155 |     return nn.Sequential(
156 |         nn.Conv2d(inp, oup, 3, stride, padding, bias=False),
157 |         nn.BatchNorm2d(oup),
158 |         nn.ReLU(inplace=True),
159 |     )
160 | 
161 | 
162 | def build_ssd(base, feature_layer, mbox, num_classes):
163 |     """Single Shot Multibox Architecture
164 |     See: https://arxiv.org/pdf/1512.02325.pdf for more details.
165 |     """
166 |     base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='ssd')
167 |     return SSD(base_, extras_, head_, feature_layer, num_classes)
168 | 
169 | 
170 | def build_ssd_lite(base, feature_layer, mbox, num_classes):
171 |     """Single Shot Multibox Architecture for embeded system
172 |     See: https://arxiv.org/pdf/1512.02325.pdf & 
173 |     https://arxiv.org/pdf/1801.04381.pdf for more details.
174 |     """
175 |     base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='ssd_lite')
176 |     return SSD(base_, extras_, head_, feature_layer, num_classes)
177 | 


--------------------------------------------------------------------------------
/lib/modeling/ssds/ssd_lite.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | import os
  7 | 
  8 | from lib.layers import *
  9 | 
 10 | class SSDLite(nn.Module):
 11 |     """Single Shot Multibox Architecture for embeded system
 12 |     See: https://arxiv.org/pdf/1512.02325.pdf & 
 13 |     https://arxiv.org/pdf/1801.04381.pdf for more details.
 14 | 
 15 |     Args:
 16 |         phase: (string) Can be "eval" or "train" or "feature"
 17 |         base: base layers for input
 18 |         extras: extra layers that feed to multibox loc and conf layers
 19 |         head: "multibox head" consists of loc and conf conv layers
 20 |         feature_layer: the feature layers for head to loc and conf
 21 |         num_classes: num of classes 
 22 |     """
 23 | 
 24 |     def __init__(self, base, extras, head, feature_layer, num_classes):
 25 |         super(SSDLite, self).__init__()
 26 |         self.num_classes = num_classes
 27 |         # SSD network
 28 |         self.base = nn.ModuleList(base)
 29 |         self.norm = L2Norm(feature_layer[1][0], 20)
 30 |         self.extras = nn.ModuleList(extras)
 31 | 
 32 |         self.loc = nn.ModuleList(head[0])
 33 |         self.conf = nn.ModuleList(head[1])
 34 |         self.softmax = nn.Softmax(dim=-1)
 35 | 
 36 |         self.feature_layer = feature_layer[0]
 37 |         
 38 | 
 39 |     def forward(self, x, phase='eval'):
 40 |         """Applies network layers and ops on input image(s) x.
 41 | 
 42 |         Args:
 43 |             x: input image or batch of images. Shape: [batch,3,300,300].
 44 | 
 45 |         Return:
 46 |             Depending on phase:
 47 |             test:
 48 |                 Variable(tensor) of output class label predictions,
 49 |                 confidence score, and corresponding location predictions for
 50 |                 each object detected. Shape: [batch,topk,7]
 51 | 
 52 |             train:
 53 |                 list of concat outputs from:
 54 |                     1: confidence layers, Shape: [batch*num_priors,num_classes]
 55 |                     2: localization layers, Shape: [batch,num_priors*4]
 56 | 
 57 |             feature:
 58 |                 the features maps of the feature extractor
 59 |         """
 60 |         sources = list()
 61 |         loc = list()
 62 |         conf = list()
 63 | 
 64 |         # apply bases layers and cache source layer outputs
 65 |         for k in range(len(self.base)):
 66 |             x = self.base[k](x)
 67 |             if k in self.feature_layer:
 68 |                 if len(sources) == 0:
 69 |                     s = self.norm(x)
 70 |                     sources.append(s)
 71 |                 else:
 72 |                     sources.append(x)
 73 | 
 74 |         # apply extra layers and cache source layer outputs
 75 |         for k, v in enumerate(self.extras):
 76 |             x = F.relu(v(x), inplace=True)
 77 |             sources.append(x)
 78 |             # if k % 2 == 1:
 79 |             #     sources.append(x)
 80 | 
 81 |         if phase == 'feature':
 82 |             return sources
 83 | 
 84 |         # apply multibox head to source layers
 85 |         for (x, l, c) in zip(sources, self.loc, self.conf):
 86 |             loc.append(l(x).permute(0, 2, 3, 1).contiguous())
 87 |             conf.append(c(x).permute(0, 2, 3, 1).contiguous())
 88 |         loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
 89 |         conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
 90 | 
 91 |         if phase == 'eval':
 92 |             output = (
 93 |                 loc.view(loc.size(0), -1, 4),                   # loc preds
 94 |                 self.softmax(conf.view(-1, self.num_classes)),  # conf preds
 95 |             )
 96 |         else:
 97 |             output = (
 98 |                 loc.view(loc.size(0), -1, 4),
 99 |                 conf.view(conf.size(0), -1, self.num_classes),
100 |             )
101 |         return output
102 | 
103 | def add_extras(base, feature_layer, mbox, num_classes):
104 |     extra_layers = []
105 |     loc_layers = []
106 |     conf_layers = []
107 |     in_channels = None
108 |     for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox):
109 |         if layer == 'S':
110 |             extra_layers += [ _conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1) ]
111 |             in_channels = depth
112 |         elif layer == '':
113 |             extra_layers += [ _conv_dw(in_channels, depth, stride=1, expand_ratio=1) ]
114 |             in_channels = depth
115 |         else:
116 |             in_channels = depth
117 |         loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)]
118 |         conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)]
119 |     return base, extra_layers, (loc_layers, conf_layers)
120 | 
121 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213
122 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did.
123 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1):
124 |     return nn.Sequential(
125 |         # pw
126 |         nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False),
127 |         nn.BatchNorm2d(oup * expand_ratio),
128 |         nn.ReLU6(inplace=True),
129 |         # dw
130 |         nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False),
131 |         nn.BatchNorm2d(oup * expand_ratio),
132 |         nn.ReLU6(inplace=True),
133 |         # pw-linear
134 |         nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False),
135 |         nn.BatchNorm2d(oup),
136 |     )
137 | 
138 | def build_ssd_lite(base, feature_layer, mbox, num_classes):
139 |     base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes)
140 |     return SSDLite(base_, extras_, head_, feature_layer, num_classes)


--------------------------------------------------------------------------------
/lib/ssds.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | 
 4 | import torch
 5 | from torch.autograd import Variable
 6 | import torch.backends.cudnn as cudnn
 7 | 
 8 | from lib.layers import *
 9 | from lib.utils.timer import Timer
10 | from lib.utils.data_augment import preproc
11 | from lib.modeling.model_builder import create_model
12 | from lib.utils.config_parse import cfg
13 | 
14 | 
15 | class ObjectDetector:
16 |     def __init__(self, viz_arch=False):
17 |         self.cfg = cfg
18 | 
19 |         # Build model
20 |         print('===> Building model')
21 |         self.model, self.priorbox = create_model(cfg.MODEL)
22 |         self.priors = Variable(self.priorbox.forward(), volatile=True)
23 | 
24 |         # Print the model architecture and parameters
25 |         if viz_arch is True:
26 |             print('Model architectures:\n{}\n'.format(self.model))
27 | 
28 |         # Utilize GPUs for computation
29 |         self.use_gpu = torch.cuda.is_available()
30 |         self.device = torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu')
31 |         self.half = False
32 |         if self.use_gpu:
33 |             print('Utilize GPUs for computation')
34 |             print('Number of GPU available', torch.cuda.device_count())
35 |             self.model.cuda()
36 |             self.priors.cuda()
37 |             cudnn.benchmark = True
38 |             # self.model = torch.nn.DataParallel(self.model).module
39 |             # Utilize half precision
40 |             self.half = cfg.MODEL.HALF_PRECISION
41 |             if self.half:
42 |                 self.model = self.model.half()
43 |                 self.priors = self.priors.half()
44 | 
45 |         # Build preprocessor and detector
46 |         self.preprocessor = preproc(cfg.MODEL.IMAGE_SIZE, cfg.DATASET.PIXEL_MEANS, -2)
47 |         self.detector = Detect(cfg.POST_PROCESS, self.priors)
48 | 
49 |         # Load weight:
50 |         if cfg.RESUME_CHECKPOINT == '':
51 |             AssertionError('RESUME_CHECKPOINT can not be empty')
52 |         print('=> loading checkpoint {:s}'.format(cfg.RESUME_CHECKPOINT))
53 |         # checkpoint = torch.load(cfg.RESUME_CHECKPOINT)
54 |         checkpoint = torch.load(cfg.RESUME_CHECKPOINT, map_location='gpu' if self.use_gpu else 'cpu')
55 |         self.model.load_state_dict(checkpoint)
56 |         # test only
57 |         self.model.eval()
58 | 
59 |     def predict(self, img, threshold=0.6):
60 |         assert img.shape[2] == 3
61 |         scale = torch.Tensor([img.shape[1::-1], img.shape[1::-1]])
62 | 
63 |         x = Variable(self.preprocessor(img)[0].unsqueeze(0)).to(self.device)
64 | 
65 |         # forward
66 |         out = self.model(x)  # forward pass
67 | 
68 |         print('before nms: ', out[0].size())
69 |         print(out[1].size())
70 |         detections = self.detector.forward(out)
71 |         print('detections: ', detections)
72 | 
73 |         # output
74 |         labels, scores, coords = [list() for _ in range(3)]
75 |         # for batch in range(detections.size(0)):
76 |         #     print('Batch:', batch)
77 |         batch = 0
78 |         for classes in range(detections.size(1)):
79 |             num = 0
80 |             while detections[batch, classes, num, 0] >= threshold:
81 |                 scores.append(detections[batch, classes, num, 0])
82 |                 labels.append(classes - 1)
83 |                 coords.append(detections[batch, classes, num, 1:] * scale)
84 |                 num += 1
85 |         return labels, scores, coords
86 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/__init__.py


--------------------------------------------------------------------------------
/lib/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o


--------------------------------------------------------------------------------
/lib/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o


--------------------------------------------------------------------------------
/lib/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o


--------------------------------------------------------------------------------
/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/_mask.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/_mask.o


--------------------------------------------------------------------------------
/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/maskApi.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/maskApi.o


--------------------------------------------------------------------------------
/lib/utils/data_augment_test.py:
--------------------------------------------------------------------------------
 1 | """Data augmentation functionality. Passed as callable transformations to
 2 | Dataset classes.
 3 | 
 4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper
 5 | http://arxiv.org/abs/1512.02325
 6 | 
 7 | Ellis Brown, Max deGroot
 8 | """
 9 | 
10 | import cv2
11 | import numpy as np
12 | from data_augment import draw_bbox,_crop,_distort,_elastic,_expand,_mirror
13 | 
14 | if __name__ == '__main__':
15 |     image = cv2.imread('./experiments/2011_001100.jpg')
16 |     boxes = np.array([np.array([124, 150, 322, 351])]) # ymin, xmin, ymax, xmax
17 |     labels = np.array([[1]])
18 |     p = 1
19 | 
20 |     image_show = draw_bbox(image, boxes)
21 |     cv2.imshow('input_image', image_show)
22 | 
23 |     image_t, boxes, labels = _crop(image, boxes, labels)
24 |     image_show = draw_bbox(image_t, boxes)
25 |     cv2.imshow('crop_image', image_show)
26 |     
27 |     image_t = _distort(image_t)
28 |     image_show = draw_bbox(image_t, boxes)
29 |     cv2.imshow('distort_image', image_show)
30 | 
31 |     image_t = _elastic(image_t, p)
32 |     image_show = draw_bbox(image_t, boxes)
33 |     cv2.imshow('elastic_image', image_show)
34 | 
35 |     image_t, boxes = _expand(image_t, boxes, (103.94, 116.78, 123.68), p)
36 |     image_show = draw_bbox(image_t, boxes)
37 |     cv2.imshow('expand_image', image_show)
38 | 
39 |     image_t, boxes = _mirror(image_t, boxes)
40 |     image_show = draw_bbox(image_t, boxes)
41 |     cv2.imshow('mirror_image', image_show)
42 | 
43 |     cv2.waitKey(0)


--------------------------------------------------------------------------------
/lib/utils/fp16_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class tofp16(nn.Module):
 6 |     def __init__(self):
 7 |         super(tofp16, self).__init__()
 8 | 
 9 |     def forward(self, input):
10 |         return input.half()
11 | 
12 | 
13 | def copy_in_params(net, params):
14 |     net_params = list(net.parameters())
15 |     for i in range(len(params)):
16 |         net_params[i].data.copy_(params[i].data)
17 | 
18 | 
19 | def set_grad(params, params_with_grad):
20 | 
21 |     for param, param_w_grad in zip(params, params_with_grad):
22 |         if param.grad is None:
23 |             param.grad = torch.nn.Parameter(param.data.new().resize_(*param.data.size()))
24 |         param.grad.data.copy_(param_w_grad.grad.data)
25 | 
26 | 
27 | def BN_convert_float(module):
28 |     '''
29 |     BatchNorm layers to have parameters in single precision.
30 |     Find all layers and convert them back to float. This can't
31 |     be done with built in .apply as that function will apply
32 |     fn to all modules, parameters, and buffers. Thus we wouldn't
33 |     be able to guard the float conversion based on the module type.
34 |     '''
35 |     if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
36 |         module.float()
37 |     for child in module.children():
38 |         BN_convert_float(child)
39 |     return module
40 | 
41 | 
42 | def network_to_half(network):
43 |     return nn.Sequential(tofp16(), BN_convert_float(network.half()))
44 | 


--------------------------------------------------------------------------------
/lib/utils/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.so
2 | 


--------------------------------------------------------------------------------
/lib/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/__init__.py


--------------------------------------------------------------------------------
/lib/utils/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/utils/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/utils/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/utils/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/utils/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/utils/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/utils/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from lib.utils.nms.nms_gpu import nms_gpu
 9 | 
10 | def nms(dets, thresh, force_cpu=False):
11 |     """Dispatch to either CPU or GPU NMS implementations."""
12 |     if dets.shape[0] == 0:
13 |         return []
14 |     # ---numpy version---
15 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
16 |     # ---pytorch version---
17 |     return nms_gpu(dets, thresh)
18 | 


--------------------------------------------------------------------------------
/lib/utils/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         boxes_host->size[0], 
15 | 		         boxes_host->size[1],
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/utils/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/utils/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/utils/nms/src/nms_cuda_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/src/nms_cuda_kernel.cu.o


--------------------------------------------------------------------------------
/lib/utils/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/utils/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/utils/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/utils/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import lib.utils.pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | matplotlib
3 | tensorboardX
4 | torchvision


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/setup.py


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | import os
 5 | import argparse
 6 | import numpy as np
 7 | if '/data/software/opencv-3.4.0/lib/python2.7/dist-packages' in sys.path:
 8 |     sys.path.remove('/data/software/opencv-3.4.0/lib/python2.7/dist-packages')
 9 | if '/data/software/opencv-3.3.1/lib/python2.7/dist-packages' in sys.path:
10 |     sys.path.remove('/data/software/opencv-3.3.1/lib/python2.7/dist-packages')
11 | import cv2
12 | from datetime import datetime
13 | 
14 | import torch
15 | import torch.nn as nn
16 | import torch.backends.cudnn as cudnn
17 | from torch.autograd import Variable
18 | 
19 | from lib.utils.config_parse import cfg_from_file
20 | from lib.ssds_train import test_model
21 | 
22 | def parse_args():
23 |     """
24 |     Parse input arguments
25 |     """
26 |     parser = argparse.ArgumentParser(description='Train a ssds.pytorch network')
27 |     parser.add_argument('--cfg', dest='config_file',
28 |             help='optional config file', default=None, type=str)
29 | 
30 |     if len(sys.argv) == 1:
31 |         parser.print_help()
32 |         sys.exit(1)
33 | 
34 |     args = parser.parse_args()
35 |     return args
36 | 
37 | def test():
38 |     args = parse_args()
39 |     if args.config_file is not None:
40 |         cfg_from_file(args.config_file)
41 |     test_model()
42 | 
43 | if __name__ == '__main__':
44 |     test()
45 | 


--------------------------------------------------------------------------------
/time_benchmark.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # test gpu speed
 4 | for file in ./experiments/cfgs/*.yml
 5 | do
 6 |   echo $file
 7 |   python demo.py --cfg=$file --demo=./experiments/person.jpg -t=time
 8 | done
 9 | 
10 | # test cpu speed
11 | # export CUDA_VISIBLE_DEVICES=''
12 | # for file in ./experiments/cfgs/*.yml
13 | # do
14 | #   echo $file
15 | #   python demo.py --cfg=$file --demo=./experiments/person.jpg -t=time
16 | # done


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import sys
 4 | import os
 5 | import argparse
 6 | import numpy as np
 7 | import cv2
 8 | from datetime import datetime
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.backends.cudnn as cudnn
13 | 
14 | from lib.utils.config_parse import cfg_from_file
15 | from lib.ssds_train import train_model
16 | 
17 | 
18 | def parse_args():
19 |     """
20 |     Parse input arguments
21 |     """
22 |     parser = argparse.ArgumentParser(description='Train a ssds.pytorch network')
23 |     parser.add_argument('--cfg', dest='config_file',
24 |                         help='optional config file', default=None, type=str)
25 | 
26 |     if len(sys.argv) == 1:
27 |         parser.print_help()
28 |         sys.exit(1)
29 | 
30 |     args = parser.parse_args()
31 |     return args
32 | 
33 | 
34 | def train():
35 |     args = parse_args()
36 |     if args.config_file is not None:
37 |         cfg_from_file(args.config_file)
38 |     # os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
39 |     train_model()
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     train()
44 | 


--------------------------------------------------------------------------------