├── .gitattributes ├── .gitignore ├── .idea ├── vcs.xml └── workspace.xml ├── README.md ├── demo.py ├── demo_simpler.py ├── experiments ├── 1.mp4 ├── cfgs │ ├── fssd_lite_mobilenetv1_train_coco.yml │ ├── fssd_lite_mobilenetv1_train_voc.yml │ ├── fssd_lite_mobilenetv2_train_coco.yml │ ├── fssd_lite_mobilenetv2_train_voc.yml │ ├── fssd_resnet50_train_coco.yml │ ├── fssd_vgg16_train_coco.yml │ ├── fssd_vgg16_train_voc.yml │ ├── rfb_lite_mobilenetv1_train_coco.yml │ ├── rfb_lite_mobilenetv1_train_voc.yml │ ├── rfb_lite_mobilenetv2_train_coco.yml │ ├── rfb_lite_mobilenetv2_train_voc.yml │ ├── rfb_resnet50_train_coco.yml │ ├── rfb_resnet50_train_voc.yml │ ├── rfb_vgg16_train_coco.yml │ ├── rfb_vgg16_train_voc.yml │ ├── ssd_lite_mobilenetv1_train_coco.yml │ ├── ssd_lite_mobilenetv1_train_voc.yml │ ├── ssd_lite_mobilenetv2_train_coco.yml │ ├── ssd_lite_mobilenetv2_train_voc.yml │ ├── ssd_resnet50_train_coco.yml │ ├── ssd_resnet50_train_voc.yml │ ├── ssd_vgg16_train_coco.yml │ ├── ssd_vgg16_train_voc.yml │ ├── tests │ │ ├── fssd_darknet19_coco.yml │ │ ├── fssd_darknet53_coco.yml │ │ ├── fssd_darknet53_voc.yml │ │ ├── fssd_resnet50_train_voc.yml │ │ ├── rfb_darknet19_coco.yml │ │ ├── rfb_darknet53_coco.yml │ │ ├── rfb_darknet53_voc.yml │ │ ├── ssd_darknet19_coco.yml │ │ ├── ssd_darknet53_coco.yml │ │ ├── ssd_darknet53_voc.yml │ │ ├── ssd_resnet101_train_coco.yml │ │ ├── test.yml │ │ ├── yolo_v2_mobilenetv1_coco.yml │ │ ├── yolo_v2_mobilenetv1_voc.yml │ │ ├── yolo_v2_mobilenetv2_coco.yml │ │ ├── yolo_v2_mobilenetv2_voc.yml │ │ ├── yolo_v3_darknet53_coco.yml │ │ ├── yolo_v3_darknet53_voc.yml │ │ ├── yolo_v3_mobilenetv1_coco.yml │ │ ├── yolo_v3_mobilenetv1_voc.yml │ │ ├── yolo_v3_mobilenetv2_coco.yml │ │ └── yolo_v3_mobilenetv2_voc.yml │ ├── yolo_v2_darknet19_coco.yml │ ├── yolo_v2_darknet19_voc.yml │ ├── yolo_v2_mobilenetv1_coco.yml │ ├── yolo_v2_mobilenetv1_voc.yml │ ├── yolo_v2_mobilenetv2_coco.yml │ ├── yolo_v2_mobilenetv2_voc.yml │ ├── yolo_v3_darknet53_coco.yml │ ├── yolo_v3_darknet53_voc.yml │ ├── yolo_v3_mobilenetv1_coco.yml │ ├── yolo_v3_mobilenetv1_voc.yml │ ├── yolo_v3_mobilenetv2_coco.yml │ └── yolo_v3_mobilenetv2_voc.yml └── person.jpg ├── lib ├── __init__.py ├── dataset │ ├── __init__.py │ ├── coco.py │ ├── dataset_factory.py │ ├── voc.py │ └── voc_eval.py ├── layers │ ├── __init__.py │ ├── functions │ │ ├── __init__.py │ │ ├── detection.py │ │ └── prior_box.py │ └── modules │ │ ├── __init__.py │ │ ├── focal_loss.py │ │ ├── l2norm.py │ │ └── multibox_loss.py ├── modeling │ ├── __init__.py │ ├── model_builder.py │ ├── nets │ │ ├── __init__.py │ │ ├── darknet.py │ │ ├── mobilenet.py │ │ ├── resnet.py │ │ └── vgg.py │ └── ssds │ │ ├── __init__.py │ │ ├── fssd.py │ │ ├── fssd_lite.py │ │ ├── retina.py │ │ ├── rfb.py │ │ ├── rfb_lite.py │ │ ├── ssd.py │ │ ├── ssd_lite.py │ │ └── yolo.py ├── ssds.py ├── ssds_train.py └── utils │ ├── __init__.py │ ├── box_utils.py │ ├── build │ └── temp.linux-x86_64-3.6 │ │ ├── nms │ │ ├── cpu_nms.o │ │ ├── gpu_nms.o │ │ └── nms_kernel.o │ │ └── pycocotools │ │ ├── _mask.o │ │ └── maskApi.o │ ├── config_parse.py │ ├── dark2pth.py │ ├── data_augment.py │ ├── data_augment_test.py │ ├── eval_utils.py │ ├── fp16_utils.py │ ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── nms │ │ │ └── __init__.py │ ├── build.py │ ├── make.sh │ ├── nms_gpu.py │ ├── nms_kernel.cu │ ├── nms_wrapper.py │ └── src │ │ ├── nms_cuda.c │ │ ├── nms_cuda.h │ │ ├── nms_cuda_kernel.cu │ │ ├── nms_cuda_kernel.cu.o │ │ └── nms_cuda_kernel.h │ ├── pycocotools │ ├── __init__.py │ ├── _mask.c │ ├── _mask.cpython-36m-x86_64-linux-gnu.so │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ └── mask.py │ ├── timer.py │ └── visualize_utils.py ├── requirements.txt ├── setup.py ├── test.py ├── time_benchmark.sh └── train.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | weights/ 3 | data/ 4 | data 5 | experiments/models/ 6 | run.sh 7 | __pycache__ 8 | *.pyc 9 | log* 10 | .idea/ 11 | saved_model/ 12 | 13 | vendor/ 14 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SSDs 2 | 3 | This repo contains many object detection methods that aims at **single shot and real time**, so the **speed** is the only thing we talk about. Currently we have some base networks that support object detection task such as MobileNet V2, ResNet, VGG etc. And some SSD variants such as FSSD, RFBNet, Retina, and even Yolo are contained. 4 | 5 | If you have any faster object detection methods welcome to discuss with me to merge it into our master branches. 6 | 7 | 8 | 9 | 10 | # Note 11 | 12 | Work are just being progressing. Will update some result and pretrained model after trained on some datasets. And of course, some out-of-box inference demo. 13 | 14 | [updates]: 15 | 16 | 2018.11.06: As you know, after trained `fssd_mobilenetv2` the inference codes actually get none result, still debugging how this logic error comes out. 17 | 18 | 19 | 20 | # Train 21 | 22 | All settings about base net and ssd variants are under `./experiments/cfgs/*.yml`, just edit it to your enviroment and kick it off. 23 | 24 | ``` 25 | python3 train.py --cfg=./experiments/cfgs/rfb_lite_mobilenetv2_train_vocyml 26 | ``` 27 | 28 | You can try train on coco first then using your custom dataset. If you have your coco data inside /path/to/coco, the just link it to `./data/` and you can find coco inside `./data`. Same as VOC data. 29 | 30 | ![](https://s1.ax1x.com/2018/11/06/iTKMkV.png) 31 | 32 | 33 | 34 | That is what it trains like. After that I shall upload some trained model. 35 | 36 | 37 | 38 | ## Predict 39 | 40 | To predict on a simple image, you can find some useful codes in `demo_simpler.py`. But it still under testing. I will upload some images when I get it predicted success. 41 | 42 | 43 | 44 | 45 | 46 | ## Copyright 47 | 48 | This version contained by myself and portable to pytorch newest version. As well as some pretrained model and speed test benchmark. If you have any question or want ask *Computer Vision* questions you can contact me via **wechat**: `jintianiloveu`. 49 | 50 | Some useful links and other repo: 51 | 52 | 1. https://github.com/ShuangXieIrene/ssds.pytorch -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import os 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | 8 | from lib.ssds import ObjectDetector 9 | from lib.utils.config_parse import cfg_from_file 10 | 11 | VOC_CLASSES = ( 'aeroplane', 'bicycle', 'bird', 'boat', 12 | 'bottle', 'bus', 'car', 'cat', 'chair', 13 | 'cow', 'diningtable', 'dog', 'horse', 14 | 'motorbike', 'person', 'pottedplant', 15 | 'sheep', 'sofa', 'train', 'tvmonitor') 16 | 17 | def parse_args(): 18 | """ 19 | Parse input arguments 20 | """ 21 | parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network') 22 | parser.add_argument('--cfg', dest='confg_file', 23 | help='the address of optional config file', default=None, type=str, required=True) 24 | parser.add_argument('--demo', dest='demo_file', 25 | help='the address of the demo file', default=None, type=str, required=True) 26 | parser.add_argument('-t', '--type', dest='type', 27 | help='the type of the demo file, could be "image", "video", "camera" or "time", default is "image"', default='image', type=str) 28 | parser.add_argument('-d', '--display', dest='display', 29 | help='whether display the detection result, default is True', default=True, type=bool) 30 | parser.add_argument('-s', '--save', dest='save', 31 | help='whether write the detection result, default is False', default=False, type=bool) 32 | 33 | if len(sys.argv) == 1: 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | args = parser.parse_args() 38 | return args 39 | 40 | 41 | COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255)] 42 | FONT = cv2.FONT_HERSHEY_SIMPLEX 43 | 44 | def demo(args, image_path): 45 | # 1. load the configure file 46 | cfg_from_file(args.confg_file) 47 | 48 | # 2. load detector based on the configure file 49 | object_detector = ObjectDetector() 50 | 51 | # 3. load image 52 | image = cv2.imread(image_path) 53 | 54 | # 4. detect 55 | _labels, _scores, _coords = object_detector.predict(image) 56 | 57 | # 5. draw bounding box on the image 58 | for labels, scores, coords in zip(_labels, _scores, _coords): 59 | cv2.rectangle(image, (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3])), COLORS[labels % 3], 2) 60 | cv2.putText(image, '{label}: {score:.3f}'.format(label=VOC_CLASSES[labels], score=scores), (int(coords[0]), int(coords[1])), FONT, 0.5, COLORS[labels % 3], 2) 61 | 62 | # 6. visualize result 63 | if args.display is True: 64 | cv2.imshow('result', image) 65 | cv2.waitKey(0) 66 | 67 | # 7. write result 68 | if args.save is True: 69 | path, _ = os.path.splitext(image_path) 70 | cv2.imwrite(path + '_result.jpg', image) 71 | 72 | 73 | def demo_live(args, video_path): 74 | # 1. load the configure file 75 | cfg_from_file(args.confg_file) 76 | 77 | # 2. load detector based on the configure file 78 | object_detector = ObjectDetector() 79 | 80 | # 3. load video 81 | video = cv2.VideoCapture(video_path) 82 | 83 | index = -1 84 | while(video.isOpened()): 85 | index = index + 1 86 | sys.stdout.write('Process image: {} \r'.format(index)) 87 | sys.stdout.flush() 88 | 89 | # 4. read image 90 | flag, image = video.read() 91 | if flag == False: 92 | print("Can not read image in Frame : {}".format(index)) 93 | break 94 | 95 | # 5. detect 96 | _labels, _scores, _coords = object_detector.predict(image) 97 | 98 | # 6. draw bounding box on the image 99 | for labels, scores, coords in zip(_labels, _scores, _coords): 100 | cv2.rectangle(image, (int(coords[0]), int(coords[1])), (int(coords[2]), int(coords[3])), COLORS[labels % 3], 2) 101 | cv2.putText(image, '{label}: {score:.3f}'.format(label=VOC_CLASSES[labels], score=scores), (int(coords[0]), int(coords[1])), FONT, 0.5, COLORS[labels % 3], 2) 102 | 103 | # 7. visualize result 104 | if args.display is True: 105 | cv2.imshow('result', image) 106 | cv2.waitKey(33) 107 | 108 | # 8. write result 109 | if args.save is True: 110 | path, _ = os.path.splitext(video_path) 111 | path = path + '_result' 112 | if not os.path.exists(path): 113 | os.mkdir(path) 114 | cv2.imwrite(path + '/{}.jpg'.format(index), image) 115 | 116 | 117 | def time_benchmark(args, image_path): 118 | # 1. load the configure file 119 | cfg_from_file(args.confg_file) 120 | 121 | # 2. load detector based on the configure file 122 | object_detector = ObjectDetector() 123 | 124 | # 3. load image 125 | image = cv2.imread(image_path) 126 | 127 | # 4. time test 128 | warmup = 20 129 | time_iter = 100 130 | print('Warmup the detector...') 131 | _t = list() 132 | for i in range(warmup+time_iter): 133 | _, _, _, (total_time, preprocess_time, net_forward_time, detect_time, output_time) \ 134 | = object_detector.predict(image, check_time=True) 135 | if i > warmup: 136 | _t.append([total_time, preprocess_time, net_forward_time, detect_time, output_time]) 137 | if i % 20 == 0: 138 | print('In {}\{}, total time: {} \n preprocess: {} \n net_forward: {} \n detect: {} \n output: {}'.format( 139 | i-warmup, time_iter, total_time, preprocess_time, net_forward_time, detect_time, output_time 140 | )) 141 | total_time, preprocess_time, net_forward_time, detect_time, output_time = np.sum(_t, axis=0)/time_iter 142 | print('In average, total time: {} \n preprocess: {} \n net_forward: {} \n detect: {} \n output: {}'.format( 143 | total_time, preprocess_time, net_forward_time, detect_time, output_time 144 | )) 145 | 146 | 147 | if __name__ == '__main__': 148 | args = parse_args() 149 | if args.type == 'image': 150 | demo(args, args.demo_file) 151 | elif args.type == 'video': 152 | demo_live(args, args.demo_file) 153 | elif args.type == 'camera': 154 | demo_live(args, int(args.demo_file)) 155 | elif args.type == 'time': 156 | time_benchmark(args, args.demo_file) 157 | else: 158 | AssertionError('type is not correct') 159 | -------------------------------------------------------------------------------- /demo_simpler.py: -------------------------------------------------------------------------------- 1 | """ 2 | inference on trained models 3 | 4 | with only provide a simple config file 5 | 6 | """ 7 | import sys 8 | import os 9 | import numpy as np 10 | import cv2 11 | 12 | from lib.ssds import ObjectDetector 13 | from lib.utils.config_parse import cfg_from_file 14 | import argparse 15 | 16 | img_f = 'experiments/person.jpg' 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='Demo a ssds.pytorch network') 21 | parser.add_argument('--cfg', default='experiments/cfgs/fssd_lite_mobilenetv2_train_voc.yml', 22 | help='the address of optional config file') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def predict(): 28 | args = parse_args() 29 | 30 | cfg_from_file(args.cfg) 31 | 32 | detector = ObjectDetector() 33 | 34 | img = cv2.imread(img_f) 35 | 36 | _labels, _scores, _coords = detector.predict(img) 37 | print('labels: {}\nscores: {}\ncoords: {}'.format(_labels, _scores, _coords)) 38 | 39 | 40 | if __name__ == '__main__': 41 | predict() -------------------------------------------------------------------------------- /experiments/1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/experiments/1.mp4 -------------------------------------------------------------------------------- /experiments/cfgs/fssd_lite_mobilenetv1_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[5, 11, 13], [256, 512, 1024]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 200 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 97 26 | 27 | TEST: 28 | BATCH_SIZE: 48 29 | TEST_SCOPE: [196, 200] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_mobilenet_v1_coco' 49 | LOG_DIR: './experiments/models/fssd_mobilenet_v1_coco' 50 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_coco_24.2.pth' 51 | PHASE: ['test'] 52 | -------------------------------------------------------------------------------- /experiments/cfgs/fssd_lite_mobilenetv1_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[[5, 11, 13], [256, 512, 1024]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 300 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 100 26 | 27 | TEST: 28 | BATCH_SIZE: 48 29 | TEST_SCOPE: [285, 300] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'voc' 43 | DATASET_DIR: './data/VOCdevkit' 44 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 45 | TEST_SETS: [['2007', 'test']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_mobilenet_v1_voc' 49 | LOG_DIR: './experiments/models/fssd_mobilenet_v1_voc' 50 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_voc_78.4.pth' 51 | PHASE: ['test'] 52 | -------------------------------------------------------------------------------- /experiments/cfgs/fssd_lite_mobilenetv2_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[6, 13, 17], [32, 96, 320]], 7 | [['', 'S', 'S', 'S', '', ''], [256, 256, 256, 256, 128, 128]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 200 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 4 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 100 26 | 27 | TEST: 28 | BATCH_SIZE: 48 29 | TEST_SCOPE: [188, 200] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_mobilenet_v2_coco' 49 | LOG_DIR: './experiments/models/fssd_mobilenet_v2_coco' 50 | RESUME_CHECKPOINT: './saved_model/fssd_mobilenet_v2_coco/fssd_lite_mobilenet_v2_voc_epoch_290.pth' 51 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/fssd_lite_mobilenetv2_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[[6, 13, 17], [32, 96, 320]], 7 | [['', 'S', 'S', 'S', '', ''], [256, 256, 256, 256, 128, 128]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 300 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 100 26 | 27 | TEST: 28 | BATCH_SIZE: 48 29 | TEST_SCOPE: [288, 300] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'voc' 43 | DATASET_DIR: './data/VOCdevkit' 44 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 45 | TEST_SETS: [['2007', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_mobilenet_v2_voc' 49 | LOG_DIR: './experiments/models/fssd_mobilenet_v2_voc' 50 | 51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v2_fssd_lite_voc_76.7.pth' 52 | PHASE: ['train'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/fssd_resnet50_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[10, 16, 'S'], [512, 1024, 512]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 100 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 28 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 10 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [90, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_resnet50_coco' 49 | LOG_DIR: './experiments/models/fssd_resnet50_coco' 50 | RESUME_CHECKPOINT: './weights/fssd/resnet50_fssd_coco_27.2.pth' 51 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/fssd_vgg16_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[22, 34, 'S'], [512, 1024, 512]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 100 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 28 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 30 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [90, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_vgg16_coco' 49 | LOG_DIR: './experiments/models/fssd_vgg16_coco' 50 | RESUME_CHECKPOINT: './weights/fssd/vgg16_fssd_coco_24.5.pth' 51 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/fssd_vgg16_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[[22, 34, 'S'], [512, 1024, 512]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 30 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf,transforms,pyramids' 17 | RESUME_SCOPE: 'base,norm,extras,loc,conf,transforms,pyramids' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.004 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [27, 30] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | 46 | EXP_DIR: './experiments/models/fssd_vgg16_voc' 47 | LOG_DIR: './experiments/models/fssd_vgg16_voc' 48 | RESUME_CHECKPOINT: './weights/fssd/vgg16_fssd_voc_77.8.pth' 49 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/rfb_lite_mobilenetv1_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[11, 13, 'RBF', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 51 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 25 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | # TEST_SCOPE: [45, 50] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_mobilenet_v1_coco' 48 | LOG_DIR: './experiments/models/rfb_mobilenet_v1_coco' 49 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_coco_19.1.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/rfb_lite_mobilenetv1_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[11, 13, 'RBF', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 51 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 25 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | 45 | EXP_DIR: './experiments/models/rfb_mobilenet_v1_voc' 46 | LOG_DIR: './experiments/models/rfb_mobilenet_v1_voc' 47 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_voc_73.7.pth' 48 | PHASE: ['test'] 49 | -------------------------------------------------------------------------------- /experiments/cfgs/rfb_lite_mobilenetv2_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[13, 17, 'RBF', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 50 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.002 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 0 25 | 26 | TEST: 27 | BATCH_SIZE: 48 28 | TEST_SCOPE: [45, 50] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_mobilenet_v2_coco' 48 | LOG_DIR: './experiments/models/rfb_mobilenet_v2_coco' 49 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v2_rfb_lite_coco_18.5.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/rfb_lite_mobilenetv2_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[13, 17, 'RBF', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 300 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 0 25 | TEST: 26 | BATCH_SIZE: 48 27 | TEST_SCOPE: [270, 300] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | 45 | EXP_DIR: './experiments/models/rfb_mobilenet_v2_voc' 46 | LOG_DIR: './experiments/models/rfb_mobilenet_v2_voc' 47 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v2_rfb_lite_voc_73.4.pth' 48 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/rfb_resnet50_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[10, 16, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 28 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 30 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [93, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_resnet50_coco' 48 | LOG_DIR: './experiments/models/rfb_resnet50_coco' 49 | RESUME_CHECKPOINT: './weights/rfb/resnet50_rfb_coco_26.5.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/rfb_resnet50_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[10, 16, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 50 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [90, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | 46 | EXP_DIR: './experiments/models/rfb_resnet50_voc' 47 | LOG_DIR: './experiments/models/rfb_resnet50_voc' 48 | RESUME_CHECKPOINT: './weights/rfb/resnet50_rfb_voc_81.2.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/rfb_vgg16_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[22, 34, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 24 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 60 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | # TEST_SCOPE: [95, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_vgg16_coco' 48 | LOG_DIR: './experiments/models/rfb_vgg16_coco' 49 | RESUME_CHECKPOINT: './weights/rfb/vgg16_rfb_coco_25.5.pth' 50 | PHASE: ['test'] 51 | -------------------------------------------------------------------------------- /experiments/cfgs/rfb_vgg16_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[22, 34, 'RBF', 'RBF', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 24 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 60 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | # TEST_SCOPE: [95, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_vgg16_voc' 48 | LOG_DIR: './experiments/models/rfb_vgg16_voc' 49 | RESUME_CHECKPOINT: './weights/rfb/vgg16_rfb_voc_80.5.pth' 50 | PHASE: ['test'] 51 | -------------------------------------------------------------------------------- /experiments/cfgs/ssd_lite_mobilenetv1_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 0 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [90, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_mobilenet_v1_coco' 48 | LOG_DIR: './experiments/models/ssd_mobilenet_v1_coco' 49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_coco_18.8.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_lite_mobilenetv1_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 300 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 100 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [285, 300] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_mobilenet_v1_voc' 48 | LOG_DIR: './experiments/models/ssd_mobilenet_v1_voc' 49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_voc_72.7.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_lite_mobilenetv2_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[13, 17, 'S', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 200 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 95 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [196, 200] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_mobilenet_v2_coco' 48 | LOG_DIR: './experiments/models/ssd_mobilenet_v2_coco' 49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_coco_18.5.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_lite_mobilenetv2_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd_lite 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[13, 17, 'S', 'S', 'S', 'S'], [96, 320, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 300 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 100 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [285, 300] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_mobilenet_v2_voc' 48 | LOG_DIR: './experiments/models/ssd_mobilenet_v2_voc' 49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_voc_73.2.pth' 50 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_resnet50_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[10, 16, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 200 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 10 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [190, 200] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_resnet50_coco' 48 | LOG_DIR: './experiments/models/ssd_resnet50_coco' 49 | RESUME_CHECKPOINT: './weights/ssd/resnet50_ssd_coco_25.1.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_resnet50_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[10, 16, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 200 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 50 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [190, 200] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | 46 | EXP_DIR: './experiments/models/ssd_resnet50_voc' 47 | LOG_DIR: './experiments/models/ssd_resnet50_voc' 48 | RESUME_CHECKPOINT: './weights/ssd/resnet50_ssd_voc_79.7.pth' 49 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/ssd_vgg16_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[22, 34, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 60 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 5 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [55, 60] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_vgg16_coco' 48 | LOG_DIR: './experiments/models/ssd_vgg16_coco' 49 | RESUME_CHECKPOINT: './weights/ssd/vgg16_ssd_coco_24.4.pth' 50 | PHASE: ['test'] 51 | -------------------------------------------------------------------------------- /experiments/cfgs/ssd_vgg16_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: vgg16 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[22, 34, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 2 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 4 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | 28 | MATCHER: 29 | MATCHED_THRESHOLD: 0.5 30 | UNMATCHED_THRESHOLD: 0.5 31 | NEGPOS_RATIO: 3 32 | 33 | POST_PROCESS: 34 | SCORE_THRESHOLD: 0.01 35 | IOU_THRESHOLD: 0.6 36 | MAX_DETECTIONS: 100 37 | 38 | DATASET: 39 | DATASET: 'voc' 40 | DATASET_DIR: './data/VOCdevkit' 41 | # TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 42 | TRAIN_SETS: [['2007', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | 45 | EXP_DIR: './experiments/models/ssd_vgg16_voc' 46 | LOG_DIR: './experiments/models/ssd_vgg16_voc' 47 | RESUME_CHECKPOINT: './weights/ssd/vgg16_reducedfc.pth' 48 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/fssd_darknet19_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: darknet_19 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[8, 12, 16], [256, 512, 1024]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 60 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 0 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [91, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_darknet_19_coco' 49 | LOG_DIR: './experiments/models/fssd_darknet_19_coco' 50 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth' 51 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/fssd_darknet53_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[[14, 23, 28], [256, 512, 1024]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 100 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 16 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 60 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [100, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'coco' 43 | DATASET_DIR: './data/COCO' 44 | TRAIN_SETS: [['2017', 'train']] 45 | TEST_SETS: [['2017', 'val']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_darknet_53_coco' 49 | LOG_DIR: './experiments/models/fssd_darknet_53_coco' 50 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth' 51 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/fssd_darknet53_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[[14, 23, 28], [256, 512, 1024]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 100 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 16 16 | TRAINABLE_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 60 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [91, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'voc' 43 | DATASET_DIR: './data/VOCdevkit' 44 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 45 | TEST_SETS: [['2007', 'test']] 46 | PROB: 0.6 47 | 48 | EXP_DIR: './experiments/models/fssd_darknet_53_voc' 49 | LOG_DIR: './experiments/models/fssd_darknet_53_voc' 50 | RESUME_CHECKPOINT: './experiments/models/fssd_darknet_53_coco/fssd_darknet_53_coco_epoch_98.pth' 51 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/fssd_resnet50_train_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: fssd 3 | NETS: resnet_50 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[[10, 16, 'S'], [512, 1024, 512]], 7 | [['', 'S', 'S', 'S', '', ''], [512, 512, 256, 256, 256, 256]]] 8 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 9 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 10 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 11 | 12 | TRAIN: 13 | MAX_EPOCHS: 50 14 | CHECKPOINTS_EPOCHS: 1 15 | BATCH_SIZE: 32 16 | TRAINABLE_SCOPE: 'norm,extras,transforms,pyramids,loc,conf' 17 | RESUME_SCOPE: 'base,norm,extras,transforms,pyramids,loc,conf' 18 | OPTIMIZER: 19 | OPTIMIZER: sgd 20 | LEARNING_RATE: 0.004 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | LR_SCHEDULER: 24 | SCHEDULER: SGDR 25 | WARM_UP_EPOCHS: 20 26 | 27 | TEST: 28 | BATCH_SIZE: 64 29 | TEST_SCOPE: [90, 100] 30 | 31 | MATCHER: 32 | MATCHED_THRESHOLD: 0.5 33 | UNMATCHED_THRESHOLD: 0.5 34 | NEGPOS_RATIO: 3 35 | 36 | POST_PROCESS: 37 | SCORE_THRESHOLD: 0.01 38 | IOU_THRESHOLD: 0.6 39 | MAX_DETECTIONS: 100 40 | 41 | DATASET: 42 | DATASET: 'voc' 43 | DATASET_DIR: './data/VOCdevkit' 44 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 45 | TEST_SETS: [['2007', 'test']] 46 | 47 | EXP_DIR: './experiments/models/fssd_resnet50_voc' 48 | LOG_DIR: './experiments/models/fssd_resnet50_voc' 49 | RESUME_CHECKPOINT: './weights/fssd/resnet50_fssd_coco_27.2.pth' 50 | PHASE: ['train'] 51 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/rfb_darknet19_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: darknet_19 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[12, 16, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 60 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [91, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_darknet_19_coco' 48 | LOG_DIR: './experiments/models/rfb_darknet_19_coco' 49 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth' 50 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/rfb_darknet53_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[23, 28, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 16 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 55 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [96, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_darknet_53_coco' 48 | LOG_DIR: './experiments/models/rfb_darknet_53_coco' 49 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/rfb_darknet53_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: rfb 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[23, 28, 'RBF', 'RBF', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 55 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [91, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/rfb_darknet_53_voc' 48 | LOG_DIR: './experiments/models/rfb_darknet_53_voc' 49 | RESUME_CHECKPOINT: './experiments/models/rfb_darknet_53_coco/rfb_darknet_53_coco_epoch_100.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/ssd_darknet19_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: darknet_19 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[12, 16, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 60 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 0 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [91, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_darknet_19_coco' 48 | LOG_DIR: './experiments/models/ssd_darknet_19_coco' 49 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_21.6.pth' 50 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/ssd_darknet53_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[23, 28, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 60 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [91, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_darknet_53_coco' 48 | LOG_DIR: './experiments/models/ssd_darknet_53_coco' 49 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/ssd_darknet53_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: darknet_53 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[23, 28, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 60 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [91, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'voc' 42 | DATASET_DIR: './data/VOCdevkit' 43 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 44 | TEST_SETS: [['2007', 'test']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/ssd_darknet_53_voc' 48 | LOG_DIR: './experiments/models/ssd_darknet_53_voc' 49 | RESUME_CHECKPOINT: './experiments/models/ssd_darknet_53_coco/ssd_darknet_53_coco_epoch_100.pth' 50 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/ssd_resnet101_train_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd 3 | NETS: resnet_101 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[10, 33, 'S', 'S', '', ''], [512, 1024, 512, 256, 256, 256]] 7 | STEPS: [[8, 8], [16, 16], [32, 32], [64, 64], [100, 100], [300, 300]] 8 | SIZES: [[30, 30], [60, 60], [111, 111], [162, 162], [213, 213], [264, 264], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 50 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.004 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 10 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/ssd_resnet101_coco' 47 | LOG_DIR: './experiments/models/ssd_resnet101_coco' 48 | RESUME_CHECKPOINT: './weights/resnet101-5d3b4d8f.pth' 49 | PHASE: ['train'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/test.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: ssd_lite 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [300, 300] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[11, 13, 'S', 'S', 'S', 'S'], [512, 1024, 512, 256, 256, 128]] 7 | STEPS: [[16, 16], [32, 32], [64, 64], [100, 100], [150, 150], [300, 300]] 8 | SIZES: [[45, 45], [90, 90], [135, 135], [180, 180], [225, 225], [270, 270], [315, 315]] 9 | ASPECT_RATIOS: [[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]] 10 | 11 | TRAIN: 12 | MAX_EPOCHS: 100 13 | CHECKPOINTS_EPOCHS: 1 14 | BATCH_SIZE: 32 15 | TRAINABLE_SCOPE: 'base,norm,extras,loc,conf' 16 | RESUME_SCOPE: 'base,norm,extras,loc,conf' 17 | OPTIMIZER: 18 | OPTIMIZER: sgd 19 | LEARNING_RATE: 0.001 20 | MOMENTUM: 0.9 21 | WEIGHT_DECAY: 0.0001 22 | LR_SCHEDULER: 23 | SCHEDULER: SGDR 24 | WARM_UP_EPOCHS: 0 25 | 26 | TEST: 27 | BATCH_SIZE: 64 28 | TEST_SCOPE: [90, 100] 29 | 30 | MATCHER: 31 | MATCHED_THRESHOLD: 0.5 32 | UNMATCHED_THRESHOLD: 0.5 33 | NEGPOS_RATIO: 3 34 | 35 | POST_PROCESS: 36 | SCORE_THRESHOLD: 0.01 37 | IOU_THRESHOLD: 0.6 38 | MAX_DETECTIONS: 100 39 | 40 | DATASET: 41 | DATASET: 'coco' 42 | DATASET_DIR: './data/COCO' 43 | TRAIN_SETS: [['2017', 'train']] 44 | TEST_SETS: [['2017', 'val']] 45 | PROB: 0.6 46 | 47 | EXP_DIR: './experiments/models/test' 48 | LOG_DIR: './experiments/models/test' 49 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v1_ssd_lite_coco_18.8.pth' 50 | PHASE: ['visualize'] 51 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v2_mobilenetv1_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 60 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [92, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_coco' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_coco' 48 | RESUME_CHECKPOINT: './weights/rfb_lite/mobilenet_v1_rfb_lite_coco_19.1.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v2_mobilenetv1_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 60 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_voc' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_voc' 48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_coco_21.5.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v2_mobilenetv2_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 28 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 55 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_coco' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_coco' 48 | RESUME_CHECKPOINT: './weights/ssd_lite/mobilenet_v2_ssd_lite_coco_18.5.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v2_mobilenetv2_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 16 14 | TRAINABLE_SCOPE: 'extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 40 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_voc' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_voc' 48 | RESUME_CHECKPOINT: './weights/yolo/yolo_v2_mobilenet_v2_coco_epoch_100.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_darknet53_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: darknet_53 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 200 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 12 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 80 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [191, 200] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_coco' 51 | RESUME_CHECKPOINT: './weights/dark/yolov3.pth' 52 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_darknet53_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: darknet_53 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 45 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_voc' 51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth' 52 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_mobilenetv1_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.004 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 60 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_coco' 51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v1_fssd_lite_coco_24.2.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_mobilenetv1_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 32 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 50 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_voc' 51 | RESUME_CHECKPOINT: './experiments/models/yolo_v3_mobilenetv1_coco/yolo_v3_mobilenet_v1_coco_epoch_100.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_mobilenetv2_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.004 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 50 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_coco' 51 | RESUME_CHECKPOINT: './weights/fssd_lite/mobilenet_v2_fssd_lite_coco_22.2.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/tests/yolo_v3_mobilenetv2_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 32 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 20 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_voc' 51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_coco_24.0.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_darknet19_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: darknet_19 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['', '',12, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 200 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 120 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [198, 200] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_darknet_19_coco' 47 | LOG_DIR: './experiments/models/yolo_v2_darknet_19_coco' 48 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_coco_26.1.pth' 49 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_darknet19_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: darknet_19 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['', '',12, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 0 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [90, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_darknet_19_voc' 47 | LOG_DIR: './experiments/models/yolo_v2_darknet_19_voc' 48 | RESUME_CHECKPOINT: './weights/yolo/darknet19_yolo_v2_voc_78.4.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_mobilenetv1_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 60 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [92, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_coco' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_coco' 48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_coco_21.5.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_mobilenetv1_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['', '',11, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 32 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 60 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv1_voc' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv1_voc' 48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v2_voc_74.7.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_mobilenetv2_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 28 14 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 55 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'coco' 41 | DATASET_DIR: './data/COCO' 42 | TRAIN_SETS: [['2017', 'train']] 43 | TEST_SETS: [['2017', 'val']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_coco' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_coco' 48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v2_coco_20.4.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v2_mobilenetv2_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v2 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['', '',13, '']], [[1024, 1024, 64, 1024]]] 7 | SIZES: [[416, 416]] 8 | ASPECT_RATIOS: [[[0.1,0.1], [0.2,0.2], [0.3,0.3], [0.5,0.5], [0.9,0.9]]] 9 | 10 | TRAIN: 11 | MAX_EPOCHS: 100 12 | CHECKPOINTS_EPOCHS: 1 13 | BATCH_SIZE: 16 14 | TRAINABLE_SCOPE: 'extras,loc,conf' 15 | RESUME_SCOPE: 'base,extras,loc,conf' 16 | OPTIMIZER: 17 | OPTIMIZER: sgd 18 | LEARNING_RATE: 0.001 19 | MOMENTUM: 0.9 20 | WEIGHT_DECAY: 0.0001 21 | LR_SCHEDULER: 22 | SCHEDULER: SGDR 23 | WARM_UP_EPOCHS: 40 24 | 25 | TEST: 26 | BATCH_SIZE: 64 27 | TEST_SCOPE: [91, 100] 28 | 29 | MATCHER: 30 | MATCHED_THRESHOLD: 0.5 31 | UNMATCHED_THRESHOLD: 0.5 32 | NEGPOS_RATIO: 3 33 | 34 | POST_PROCESS: 35 | SCORE_THRESHOLD: 0.01 36 | IOU_THRESHOLD: 0.6 37 | MAX_DETECTIONS: 100 38 | 39 | DATASET: 40 | DATASET: 'voc' 41 | DATASET_DIR: './data/VOCdevkit' 42 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 43 | TEST_SETS: [['2007', 'test']] 44 | PROB: 0.6 45 | 46 | EXP_DIR: './experiments/models/yolo_v2_mobilenetv2_voc' 47 | LOG_DIR: './experiments/models/yolo_v2_mobilenetv2_voc' 48 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v2_voc_72.0.pth' 49 | PHASE: ['test'] 50 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_darknet53_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: darknet_53 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 200 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 12 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 80 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [191, 200] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_coco' 51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_coco_27.3.pth' 52 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_darknet53_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: darknet_53 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [23,'B','B','B'], [14,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 45 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_darknet_53_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_darknet_53_voc' 51 | RESUME_CHECKPOINT: './weights/yolo/darknet53_yolo_v3_voc_79.3.pth' 52 | PHASE: ['test'] -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_mobilenetv1_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.004 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 60 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_coco' 51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v3_coco_25.7.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_mobilenetv1_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v1 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [11,'B','B','B'], [5,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 32 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 50 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv1_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv1_voc' 51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v1_yolo_v3_voc_78.2.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_mobilenetv2_coco.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 81 6 | FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 16 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.004 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 50 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'coco' 44 | DATASET_DIR: './data/COCO' 45 | TRAIN_SETS: [['2017', 'train']] 46 | TEST_SETS: [['2017', 'val']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_coco' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_coco' 51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_coco_24.0.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/cfgs/yolo_v3_mobilenetv2_voc.yml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | SSDS: yolo_v3 3 | NETS: mobilenet_v2 4 | IMAGE_SIZE: [416, 416] 5 | NUM_CLASSES: 21 6 | FEATURE_LAYER: [[['B','B','B'], [13,'B','B','B'], [6,'B','B','B']], 7 | [[1024,1024,1024], [256, 512, 512, 512], [128, 256, 256, 256]]] 8 | SIZES: [[416, 416], [416, 416], [416, 416]] 9 | ASPECT_RATIOS: [[[0.278,0.216], [0.375,0.475], [0.896,0.783]], 10 | [[0.072,0.146], [0.146,0.108], [0.141,0.286]], 11 | [[0.024,0.031], [0.038,0.072], [0.079,0.055]], ] 12 | 13 | TRAIN: 14 | MAX_EPOCHS: 100 15 | CHECKPOINTS_EPOCHS: 1 16 | BATCH_SIZE: 32 17 | TRAINABLE_SCOPE: 'base,extras,loc,conf' 18 | RESUME_SCOPE: 'base,extras,loc,conf' 19 | OPTIMIZER: 20 | OPTIMIZER: sgd 21 | LEARNING_RATE: 0.001 22 | MOMENTUM: 0.9 23 | WEIGHT_DECAY: 0.0001 24 | LR_SCHEDULER: 25 | SCHEDULER: SGDR 26 | WARM_UP_EPOCHS: 20 27 | 28 | TEST: 29 | BATCH_SIZE: 64 30 | TEST_SCOPE: [90, 100] 31 | 32 | MATCHER: 33 | MATCHED_THRESHOLD: 0.5 34 | UNMATCHED_THRESHOLD: 0.5 35 | NEGPOS_RATIO: 3 36 | 37 | POST_PROCESS: 38 | SCORE_THRESHOLD: 0.01 39 | IOU_THRESHOLD: 0.6 40 | MAX_DETECTIONS: 100 41 | 42 | DATASET: 43 | DATASET: 'voc' 44 | DATASET_DIR: './data/VOCdevkit' 45 | TRAIN_SETS: [['2007', 'trainval'], ['2012', 'trainval']] 46 | TEST_SETS: [['2007', 'test']] 47 | PROB: 0.6 48 | 49 | EXP_DIR: './experiments/models/yolo_v3_mobilenetv2_voc' 50 | LOG_DIR: './experiments/models/yolo_v3_mobilenetv2_voc' 51 | RESUME_CHECKPOINT: './weights/yolo/mobilenet_v2_yolo_v3_voc_75.8.pth' 52 | PHASE: ['test'] 53 | -------------------------------------------------------------------------------- /experiments/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/experiments/person.jpg -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/__init__.py -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/dataset/__init__.py -------------------------------------------------------------------------------- /lib/dataset/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from lib.dataset import voc 2 | from lib.dataset import coco 3 | 4 | dataset_map = { 5 | 'voc': voc.VOCDetection, 6 | 'coco': coco.COCODetection, 7 | } 8 | 9 | def gen_dataset_fn(name): 10 | """Returns a dataset func. 11 | 12 | Args: 13 | name: The name of the dataset. 14 | 15 | Returns: 16 | func: dataset_fn 17 | 18 | Raises: 19 | ValueError: If network `name` is not recognized. 20 | """ 21 | if name not in dataset_map: 22 | raise ValueError('The dataset unknown %s' % name) 23 | func = dataset_map[name] 24 | return func 25 | 26 | 27 | import torch 28 | import numpy as np 29 | 30 | def detection_collate(batch): 31 | """Custom collate fn for dealing with batches of images that have a different 32 | number of associated object annotations (bounding boxes). 33 | 34 | Arguments: 35 | batch: (tuple) A tuple of tensor images and lists of annotations 36 | 37 | Return: 38 | A tuple containing: 39 | 1) (tensor) batch of images stacked on their 0 dim 40 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 41 | """ 42 | targets = [] 43 | imgs = [] 44 | for _, sample in enumerate(batch): 45 | for _, tup in enumerate(sample): 46 | if torch.is_tensor(tup): 47 | imgs.append(tup) 48 | elif isinstance(tup, type(np.empty(0))): 49 | annos = torch.from_numpy(tup).float() 50 | targets.append(annos) 51 | 52 | return (torch.stack(imgs, 0), targets) 53 | 54 | from lib.utils.data_augment import preproc 55 | import torch.utils.data as data 56 | 57 | def load_data(cfg, phase): 58 | if phase == 'train': 59 | dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TRAIN_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, cfg.PROB)) 60 | data_loader = data.DataLoader(dataset, cfg.TRAIN_BATCH_SIZE, num_workers=cfg.NUM_WORKERS, 61 | shuffle=True, collate_fn=detection_collate, pin_memory=True) 62 | if phase == 'eval': 63 | dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, -1)) 64 | data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS, 65 | shuffle=False, collate_fn=detection_collate, pin_memory=True) 66 | if phase == 'test': 67 | dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, -2)) 68 | data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS, 69 | shuffle=False, collate_fn=detection_collate, pin_memory=True) 70 | if phase == 'visualize': 71 | dataset = dataset_map[cfg.DATASET](cfg.DATASET_DIR, cfg.TEST_SETS, preproc(cfg.IMAGE_SIZE, cfg.PIXEL_MEANS, 1)) 72 | data_loader = data.DataLoader(dataset, cfg.TEST_BATCH_SIZE, num_workers=cfg.NUM_WORKERS, 73 | shuffle=False, collate_fn=detection_collate, pin_memory=True) 74 | return data_loader 75 | -------------------------------------------------------------------------------- /lib/dataset/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | import pickle 10 | import numpy as np 11 | import pdb 12 | 13 | 14 | def parse_rec(filename): 15 | """ Parse a PASCAL VOC xml file """ 16 | tree = ET.parse(filename) 17 | objects = [] 18 | for obj in tree.findall('object'): 19 | obj_struct = {} 20 | obj_struct['name'] = obj.find('name').text 21 | obj_struct['pose'] = obj.find('pose').text 22 | obj_struct['truncated'] = int(obj.find('truncated').text) 23 | obj_struct['difficult'] = int(obj.find('difficult').text) 24 | bbox = obj.find('bndbox') 25 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 26 | int(bbox.find('ymin').text), 27 | int(bbox.find('xmax').text), 28 | int(bbox.find('ymax').text)] 29 | objects.append(obj_struct) 30 | 31 | return objects 32 | 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | def voc_eval(detpath, 69 | annopath, 70 | imagesetfile, 71 | classname, 72 | cachedir, 73 | ovthresh=0.5, 74 | use_07_metric=False): 75 | """rec, prec, ap = voc_eval(detpath, 76 | annopath, 77 | imagesetfile, 78 | classname, 79 | [ovthresh], 80 | [use_07_metric]) 81 | 82 | Top level function that does the PASCAL VOC evaluation. 83 | 84 | detpath: Path to detections 85 | detpath.format(classname) should produce the detection results file. 86 | annopath: Path to annotations 87 | annopath.format(imagename) should be the xml annotations file. 88 | imagesetfile: Text file containing the list of images, one image per line. 89 | classname: Category name (duh) 90 | cachedir: Directory for caching the annotations 91 | [ovthresh]: Overlap threshold (default = 0.5) 92 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 93 | (default False) 94 | """ 95 | # assumes detections are in detpath.format(classname) 96 | # assumes annotations are in annopath.format(imagename) 97 | # assumes imagesetfile is a text file with each line an image name 98 | # cachedir caches the annotations in a pickle file 99 | 100 | # first load gt 101 | if not os.path.isdir(cachedir): 102 | os.mkdir(cachedir) 103 | cachefile = os.path.join(cachedir, 'annots.pkl') 104 | # read list of images 105 | with open(imagesetfile, 'r') as f: 106 | lines = f.readlines() 107 | imagenames = [x.strip() for x in lines] 108 | 109 | if not os.path.isfile(cachefile): 110 | # load annots 111 | recs = {} 112 | for i, imagename in enumerate(imagenames): 113 | recs[imagename] = parse_rec(annopath.format(imagename)) 114 | if i % 100 == 0: 115 | print('Reading annotation for {:d}/{:d}'.format( 116 | i + 1, len(imagenames))) 117 | # save 118 | print('Saving cached annotations to {:s}'.format(cachefile)) 119 | with open(cachefile, 'wb') as f: 120 | pickle.dump(recs, f) 121 | else: 122 | # load 123 | with open(cachefile, 'rb') as f: 124 | recs = pickle.load(f) 125 | 126 | # extract gt objects for this class 127 | class_recs = {} 128 | npos = 0 129 | for imagename in imagenames: 130 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 131 | bbox = np.array([x['bbox'] for x in R]) 132 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 133 | det = [False] * len(R) 134 | npos = npos + sum(~difficult) 135 | class_recs[imagename] = {'bbox': bbox, 136 | 'difficult': difficult, 137 | 'det': det} 138 | 139 | # read dets 140 | detfile = detpath.format(classname) 141 | with open(detfile, 'r') as f: 142 | lines = f.readlines() 143 | 144 | splitlines = [x.strip().split(' ') for x in lines] 145 | image_ids = [x[0] for x in splitlines] 146 | confidence = np.array([float(x[1]) for x in splitlines]) 147 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 148 | 149 | # sort by confidence 150 | sorted_ind = np.argsort(-confidence) 151 | sorted_scores = np.sort(-confidence) 152 | BB = BB[sorted_ind, :] 153 | image_ids = [image_ids[x] for x in sorted_ind] 154 | 155 | # go down dets and mark TPs and FPs 156 | nd = len(image_ids) 157 | tp = np.zeros(nd) 158 | fp = np.zeros(nd) 159 | for d in range(nd): 160 | R = class_recs[image_ids[d]] 161 | bb = BB[d, :].astype(float) 162 | ovmax = -np.inf 163 | BBGT = R['bbox'].astype(float) 164 | 165 | if BBGT.size > 0: 166 | # compute overlaps 167 | # intersection 168 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 169 | iymin = np.maximum(BBGT[:, 1], bb[1]) 170 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 171 | iymax = np.minimum(BBGT[:, 3], bb[3]) 172 | iw = np.maximum(ixmax - ixmin + 1., 0.) 173 | ih = np.maximum(iymax - iymin + 1., 0.) 174 | inters = iw * ih 175 | 176 | # union 177 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 178 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 179 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 180 | 181 | overlaps = inters / uni 182 | ovmax = np.max(overlaps) 183 | jmax = np.argmax(overlaps) 184 | 185 | if ovmax > ovthresh: 186 | if not R['difficult'][jmax]: 187 | if not R['det'][jmax]: 188 | tp[d] = 1. 189 | R['det'][jmax] = 1 190 | else: 191 | fp[d] = 1. 192 | else: 193 | fp[d] = 1. 194 | 195 | # compute precision recall 196 | fp = np.cumsum(fp) 197 | tp = np.cumsum(tp) 198 | rec = tp / float(npos) 199 | # avoid divide by zero in case the first detection matches a difficult 200 | # ground truth 201 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 202 | ap = voc_ap(rec, prec, use_07_metric) 203 | 204 | return rec, prec, ap 205 | -------------------------------------------------------------------------------- /lib/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /lib/layers/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection import Detect 2 | from .prior_box import PriorBox 3 | 4 | 5 | __all__ = ['Detect', 'PriorBox'] 6 | -------------------------------------------------------------------------------- /lib/layers/functions/detection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.backends.cudnn as cudnn 4 | from torch.autograd import Function 5 | from torch.autograd import Variable 6 | from lib.utils.box_utils import decode, nms 7 | # from lib.utils.nms.nms_wrapper import nms 8 | from lib.utils.timer import Timer 9 | 10 | 11 | class Detect(Function): 12 | """At test time, Detect is the final layer of SSD. Decode location preds, 13 | apply non-maximum suppression to location predictions based on conf 14 | scores and threshold to a top_k number of output predictions for both 15 | confidence score and locations. 16 | """ 17 | 18 | def __init__(self, cfg, priors): 19 | self.num_classes = cfg.NUM_CLASSES 20 | self.background_label = cfg.BACKGROUND_LABEL 21 | self.conf_thresh = cfg.SCORE_THRESHOLD 22 | self.nms_thresh = cfg.IOU_THRESHOLD 23 | self.top_k = cfg.MAX_DETECTIONS 24 | self.variance = cfg.VARIANCE 25 | self.priors = priors 26 | 27 | # def forward(self, predictions, prior): 28 | # """ 29 | # Args: 30 | # loc_data: (tensor) Loc preds from loc layers 31 | # Shape: [batch,num_priors*4] 32 | # conf_data: (tensor) Shape: Conf preds from conf layers 33 | # Shape: [batch*num_priors,num_classes] 34 | # prior_data: (tensor) Prior boxes and variances from priorbox layers 35 | # Shape: [1,num_priors,4] 36 | # """ 37 | # loc, conf = predictions 38 | 39 | # loc_data = loc.data 40 | # conf_data = conf.data 41 | # prior_data = prior.data 42 | 43 | # num = loc_data.size(0) # batch size 44 | # num_priors = prior_data.size(0) 45 | # self.boxes = torch.zeros(1, num_priors, 4) 46 | # self.scores = torch.zeros(1, num_priors, self.num_classes) 47 | 48 | # if num == 1: 49 | # # size batch x num_classes x num_priors 50 | # conf_preds = conf_data.unsqueeze(0) 51 | 52 | # else: 53 | # conf_preds = conf_data.view(num, num_priors, 54 | # self.num_classes) 55 | # self.boxes.expand_(num, num_priors, 4) 56 | # self.scores.expand_(num, num_priors, self.num_classes) 57 | 58 | # # Decode predictions into bboxes. 59 | # for i in range(num): 60 | # decoded_boxes = decode(loc_data[i], prior_data, self.variance) 61 | # # For each class, perform nms 62 | # conf_scores = conf_preds[i].clone() 63 | # ''' 64 | # c_mask = conf_scores.gt(self.thresh) 65 | # decoded_boxes = decoded_boxes[c_mask] 66 | # conf_scores = conf_scores[c_mask] 67 | # ''' 68 | 69 | # conf_scores = conf_preds[i].clone() 70 | # num_det = 0 71 | # for cl in range(1, self.num_classes): 72 | # c_mask = conf_scores[cl].gt(self.conf_thresh) 73 | # scores = conf_scores[cl][c_mask] 74 | # if scores.dim() == 0: 75 | # continue 76 | # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 77 | # boxes = decoded_boxes[l_mask].view(-1, 4) 78 | # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 79 | # self.output[i, cl, :count] = \ 80 | # torch.cat((scores[ids[:count]].unsqueeze(1), 81 | # boxes[ids[:count]]), 1) 82 | 83 | # return self.output 84 | 85 | def forward(self, predictions): 86 | """ 87 | Args: 88 | loc_data: (tensor) Loc preds from loc layers 89 | Shape: [batch,num_priors*4] 90 | conf_data: (tensor) Shape: Conf preds from conf layers 91 | Shape: [batch*num_priors,num_classes] 92 | prior_data: (tensor) Prior boxes and variances from priorbox layers 93 | Shape: [1,num_priors,4] 94 | """ 95 | loc, conf = predictions 96 | 97 | loc_data = loc.data 98 | conf_data = conf.data 99 | prior_data = self.priors.data 100 | 101 | num = loc_data.size(0) # batch size 102 | num_priors = prior_data.size(0) 103 | # self.output.zero_() 104 | if num == 1: 105 | # size batch x num_classes x num_priors 106 | conf_preds = conf_data.t().contiguous().unsqueeze(0) 107 | else: 108 | conf_preds = conf_data.view(num, num_priors, 109 | self.num_classes).transpose(2, 1) 110 | # self.output.expand_(num, self.num_classes, self.top_k, 5) 111 | output = torch.zeros(num, self.num_classes, self.top_k, 5) 112 | 113 | # Decode predictions into bboxes. 114 | for i in range(num): 115 | decoded_boxes = decode(loc_data[i], prior_data, self.variance) 116 | 117 | # print('decoded_boxes: {}'.format(decoded_boxes)) 118 | 119 | # For each class, perform nms 120 | conf_scores = conf_preds[i].clone() 121 | # print('conf_scores: {}'.format(conf_scores)) 122 | # print(conf_scores.size()) 123 | 124 | for cl in range(1, self.num_classes): 125 | # print(conf_scores[cl]) 126 | # print(conf_scores[cl].size()) 127 | c_mask = conf_scores[cl].gt(self.conf_thresh).nonzero().view(-1) 128 | # print('cmask: ', c_mask) 129 | if c_mask.dim() == 0: 130 | continue 131 | scores = conf_scores[cl][c_mask] 132 | if scores.dim() == 0: 133 | continue 134 | # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) 135 | # boxes = decoded_boxes[l_mask].view(-1, 4) 136 | boxes = decoded_boxes[c_mask, :] 137 | # print(scores, boxes) 138 | # idx of highest scoring and non-overlapping boxes per class 139 | # cls_dets = torch.cat((boxes, scores), 1) 140 | # _, order = torch.sort(scores, 0, True) 141 | # cls_dets = cls_dets[order] 142 | # keep = nms(cls_dets, self.nms_thresh) 143 | # cls_dets = cls_dets[keep.view(-1).long()] 144 | 145 | # print('before nms:') 146 | # print('boxes: {}'.format(boxes)) 147 | # print('scores: {}'.format(scores)) 148 | # why it is empty? 149 | ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) 150 | output[i, cl, :count] = \ 151 | torch.cat((scores[ids[:count]].unsqueeze(1), 152 | boxes[ids[:count]]), 1) 153 | # print(nms_time, cpu_tims, scores_time,box_time,gpunms_time) 154 | # flt = self.output.view(-1, 5) 155 | # _, idx = flt[:, 0].sort(0) 156 | # _, rank = idx.sort(0) 157 | # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0) 158 | return output 159 | -------------------------------------------------------------------------------- /lib/layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | from math import sqrt as sqrt 4 | from itertools import product as product 5 | 6 | class PriorBox(object): 7 | """Compute priorbox coordinates in center-offset form for each source 8 | feature map. 9 | """ 10 | def __init__(self, image_size, feature_maps, aspect_ratios, scale, archor_stride=None, archor_offest=None, clip=True): 11 | super(PriorBox, self).__init__() 12 | self.image_size = image_size #[height, width] 13 | self.feature_maps = feature_maps #[(height, width), ...] 14 | self.aspect_ratios = aspect_ratios 15 | # number of priors for feature map location (either 4 or 6) 16 | self.num_priors = len(aspect_ratios) 17 | self.clip = clip 18 | # scale value 19 | if isinstance(scale[0], list): 20 | # get min of the result 21 | self.scales = [min(s[0] / self.image_size[0], s[1] / self.image_size[1]) for s in scale] 22 | elif isinstance(scale[0], float) and len(scale) == 2: 23 | num_layers = len(feature_maps) 24 | min_scale, max_scale = scale 25 | self.scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) for i in range(num_layers)] + [1.0] 26 | 27 | if archor_stride: 28 | self.steps = [(steps[0] / self.image_size[0], steps[1] / self.image_size[1]) for steps in archor_stride] 29 | else: 30 | self.steps = [(1/f_h, 1/f_w) for f_h, f_w in feature_maps] 31 | 32 | if archor_offest: 33 | self.offset = [[offset[0] / self.image_size[0], offset[1] * self.image_size[1]] for offset in archor_offest] 34 | else: 35 | self.offset = [[steps[0] * 0.5, steps[1] * 0.5] for steps in self.steps] 36 | 37 | def forward(self): 38 | mean = [] 39 | # l = 0 40 | for k, f in enumerate(self.feature_maps): 41 | for i, j in product(range(f[0]), range(f[1])): 42 | cx = j * self.steps[k][1] + self.offset[k][1] 43 | cy = i * self.steps[k][0] + self.offset[k][0] 44 | s_k = self.scales[k] 45 | 46 | # rest of aspect ratios 47 | for ar in self.aspect_ratios[k]: 48 | if isinstance(ar, int): 49 | if ar == 1: 50 | # aspect_ratio: 1 Min size 51 | mean += [cx, cy, s_k, s_k] 52 | 53 | # aspect_ratio: 1 Max size 54 | # rel size: sqrt(s_k * s_(k+1)) 55 | s_k_prime = sqrt(s_k * self.scales[k+1]) 56 | mean += [cx, cy, s_k_prime, s_k_prime] 57 | else: 58 | ar_sqrt = sqrt(ar) 59 | mean += [cx, cy, s_k*ar_sqrt, s_k/ar_sqrt] 60 | mean += [cx, cy, s_k/ar_sqrt, s_k*ar_sqrt] 61 | elif isinstance(ar, list): 62 | mean += [cx, cy, s_k*ar[0], s_k*ar[1]] 63 | # print(f, self.aspect_ratios[k]) 64 | # assert False 65 | # back to torch land 66 | output = torch.Tensor(mean).view(-1, 4) 67 | if self.clip: 68 | output.clamp_(max=1, min=0) 69 | return output -------------------------------------------------------------------------------- /lib/layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2norm import L2Norm 2 | from .multibox_loss import MultiBoxLoss 3 | 4 | __all__ = ['L2Norm', 'MultiBoxLoss'] 5 | -------------------------------------------------------------------------------- /lib/layers/modules/focal_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from lib.utils.box_utils import match, log_sum_exp, one_hot_embedding 7 | 8 | # I do not fully understand this part, It completely based on https://github.com/kuangliu/pytorch-retinanet/blob/master/loss.py 9 | 10 | class FocalLoss(nn.Module): 11 | """SSD Weighted Loss Function 12 | Focal Loss for Dense Object Detection. 13 | 14 | Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class]) 15 | 16 | The losses are averaged across observations for each minibatch. 17 | Args: 18 | alpha(1D Tensor, Variable) : the scalar factor for this criterion 19 | gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5), 20 | putting more focus on hard, misclassified examples 21 | size_average(bool): size_average(bool): By default, the losses are averaged over observations for each minibatch. 22 | However, if the field size_average is set to False, the losses are 23 | instead summed for each minibatch. 24 | """ 25 | 26 | def __init__(self, cfg, priors, use_gpu=True): 27 | super(FocalLoss, self).__init__() 28 | self.use_gpu = use_gpu 29 | self.num_classes = cfg.NUM_CLASSES 30 | self.background_label = cfg.BACKGROUND_LABEL 31 | self.negpos_ratio = cfg.NEGPOS_RATIO 32 | self.threshold = cfg.MATCHED_THRESHOLD 33 | self.unmatched_threshold = cfg.UNMATCHED_THRESHOLD 34 | self.variance = cfg.VARIANCE 35 | self.priors = priors 36 | 37 | self.alpha = Variable(torch.ones(self.num_classes, 1) * cfg.alpha) 38 | self.gamma = cfg.gamma 39 | 40 | 41 | def forward(self, predictions, targets): 42 | """Multibox Loss 43 | Args: 44 | predictions (tuple): A tuple containing loc preds, conf preds, 45 | and prior boxes from SSD net. 46 | conf shape: torch.size(batch_size,num_priors,num_classes) 47 | loc shape: torch.size(batch_size,num_priors,4) 48 | priors shape: torch.size(num_priors,4) 49 | ground_truth (tensor): Ground truth boxes and labels for a batch, 50 | shape: [batch_size,num_objs,5] (last idx is the label). 51 | """ 52 | loc_data, conf_data = predictions 53 | num = loc_data.size(0) 54 | priors = self.priors 55 | # priors = priors[:loc_data.size(1), :] 56 | num_priors = (priors.size(0)) 57 | 58 | # match priors (default boxes) and ground truth boxes 59 | loc_t = torch.Tensor(num, num_priors, 4) 60 | conf_t = torch.LongTensor(num, num_priors) 61 | for idx in range(num): 62 | truths = targets[idx][:,:-1].data 63 | labels = targets[idx][:,-1].data 64 | defaults = priors.data 65 | match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) 66 | if self.use_gpu: 67 | loc_t = loc_t.cuda() 68 | conf_t = conf_t.cuda() 69 | # wrap targets 70 | loc_t = Variable(loc_t, requires_grad=False) 71 | conf_t = Variable(conf_t,requires_grad=False) 72 | 73 | pos = conf_t > 0 74 | num_pos = pos.sum() 75 | 76 | # Localization Loss (Smooth L1) 77 | # Shape: [batch,num_priors,4] 78 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 79 | loc_p = loc_data[pos_idx].view(-1,4) 80 | loc_t = loc_t[pos_idx].view(-1,4) 81 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) 82 | loss_l/=num_pos.data.sum() 83 | 84 | # Confidence Loss (Focal loss) 85 | # Shape: [batch,num_priors,1] 86 | loss_c = self.focal_loss(conf_data.view(-1, self.num_classes), conf_t.view(-1,1)) 87 | 88 | return loss_l,loss_c 89 | 90 | def focal_loss(self, inputs, targets): 91 | '''Focal loss. 92 | mean of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 93 | ''' 94 | N = inputs.size(0) 95 | C = inputs.size(1) 96 | P = F.softmax(inputs) 97 | 98 | class_mask = inputs.data.new(N, C).fill_(0) 99 | class_mask = Variable(class_mask) 100 | ids = targets.view(-1, 1) 101 | class_mask.scatter_(1, ids.data, 1.) 102 | 103 | if inputs.is_cuda and not self.alpha.is_cuda: 104 | self.alpha = self.alpha.cuda() 105 | alpha = self.alpha[ids.data.view(-1)] 106 | probs = (P*class_mask).sum(1).view(-1,1) 107 | log_p = probs.log() 108 | 109 | batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 110 | 111 | loss = batch_loss.mean() 112 | return loss -------------------------------------------------------------------------------- /lib/layers/modules/l2norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd import Variable 5 | import torch.nn.init as init 6 | 7 | class L2Norm(nn.Module): 8 | def __init__(self,n_channels, scale): 9 | super(L2Norm,self).__init__() 10 | self.n_channels = n_channels 11 | self.gamma = scale or None 12 | self.eps = 1e-10 13 | self.weight = nn.Parameter(torch.Tensor(self.n_channels)) 14 | self.reset_parameters() 15 | 16 | def reset_parameters(self): 17 | init.constant_(self.weight,self.gamma) 18 | 19 | def forward(self, x): 20 | norm = x.pow(2).sum(dim=1, keepdim=True).sqrt()+self.eps 21 | #x /= norm 22 | x = torch.div(x,norm) 23 | out = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3).expand_as(x) * x 24 | return out 25 | -------------------------------------------------------------------------------- /lib/layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from lib.utils.box_utils import match, log_sum_exp 7 | 8 | 9 | class MultiBoxLoss(nn.Module): 10 | """SSD Weighted Loss Function 11 | Compute Targets: 12 | 1) Produce Confidence Target Indices by matching ground truth boxes 13 | with (default) 'priorboxes' that have jaccard index > threshold parameter 14 | (default threshold: 0.5). 15 | 2) Produce localization target by 'encoding' variance into offsets of ground 16 | truth boxes and their matched 'priorboxes'. 17 | 3) Hard negative mining to filter the excessive number of negative examples 18 | that comes with using a large number of default bounding boxes. 19 | (default negative:positive ratio 3:1) 20 | Objective Loss: 21 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 22 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 23 | weighted by α which is set to 1 by cross val. 24 | Args: 25 | c: class confidences, 26 | l: predicted boxes, 27 | g: ground truth boxes 28 | N: number of matched default boxes 29 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 30 | """ 31 | 32 | def __init__(self, cfg, priors, use_gpu=True): 33 | super(MultiBoxLoss, self).__init__() 34 | # self.use_gpu = use_gpu 35 | self.device = torch.device("cuda:0" if use_gpu else "cpu") 36 | self.num_classes = cfg.NUM_CLASSES 37 | self.background_label = cfg.BACKGROUND_LABEL 38 | self.negpos_ratio = cfg.NEGPOS_RATIO 39 | self.threshold = cfg.MATCHED_THRESHOLD 40 | self.unmatched_threshold = cfg.UNMATCHED_THRESHOLD 41 | self.variance = cfg.VARIANCE 42 | self.priors = priors 43 | 44 | def forward(self, predictions, targets): 45 | """Multibox Loss 46 | Args: 47 | predictions (tuple): A tuple containing loc preds, conf preds, 48 | and prior boxes from SSD net. 49 | conf shape: torch.size(batch_size,num_priors,num_classes) 50 | loc shape: torch.size(batch_size,num_priors,4) 51 | priors shape: torch.size(num_priors,4) 52 | ground_truth (tensor): Ground truth boxes and labels for a batch, 53 | shape: [batch_size,num_objs,5] (last idx is the label). 54 | """ 55 | loc_data, conf_data = predictions 56 | num = loc_data.size(0) # batch size 57 | priors = self.priors 58 | # priors = priors[:loc_data.size(1), :] 59 | num_priors = (priors.size(0)) # number of priors 60 | num_classes = self.num_classes 61 | 62 | # match priors (default boxes) and ground truth boxes 63 | loc_t = torch.Tensor(num, num_priors, 4).to(self.device) 64 | conf_t = torch.LongTensor(num, num_priors).to(self.device) 65 | for idx in range(num): 66 | truths = targets[idx][:,:-1].data 67 | labels = targets[idx][:,-1].data 68 | defaults = priors.data 69 | match(self.threshold,truths,defaults,self.variance,labels,loc_t,conf_t,idx) 70 | # if self.use_gpu: 71 | # loc_t = loc_t.cuda() 72 | # conf_t = conf_t.cuda() 73 | # wrap targets 74 | # loc_t = Variable(loc_t, requires_grad=False) 75 | # conf_t = Variable(conf_t,requires_grad=False) 76 | loc_t = loc_t.detach() 77 | conf_t = conf_t.detach() 78 | 79 | pos = conf_t > 0 80 | # num_pos = pos.sum() 81 | 82 | # Localization Loss (Smooth L1) 83 | # Shape: [batch,num_priors,4] 84 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 85 | loc_p = loc_data[pos_idx].view(-1,4) 86 | loc_t = loc_t[pos_idx].view(-1,4) 87 | loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) 88 | 89 | # Compute max conf across batch for hard negative mining 90 | batch_conf = conf_data.view(-1, self.num_classes) 91 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1,1)) 92 | 93 | # Hard Negative Mining 94 | loss_c = loss_c.view(num, -1) 95 | loss_c[pos] = 0 # filter out pos boxes for now 96 | _,loss_idx = loss_c.sort(1, descending=True) 97 | _,idx_rank = loss_idx.sort(1) 98 | num_pos = pos.long().sum(1,keepdim=True) #new sum needs to keep the same dim 99 | num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) 100 | neg = idx_rank < num_neg.expand_as(idx_rank) 101 | 102 | # Confidence Loss Including Positive and Negative Examples 103 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 104 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 105 | conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes) 106 | targets_weighted = conf_t[(pos+neg).gt(0)] 107 | loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) 108 | 109 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 110 | 111 | N = num_pos.data.sum().to(dtype=torch.float) 112 | loss_l/=N 113 | loss_c/=N 114 | return loss_l,loss_c 115 | -------------------------------------------------------------------------------- /lib/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/__init__.py -------------------------------------------------------------------------------- /lib/modeling/model_builder.py: -------------------------------------------------------------------------------- 1 | # ssds part 2 | from lib.modeling.ssds import ssd 3 | from lib.modeling.ssds import ssd_lite 4 | from lib.modeling.ssds import rfb 5 | from lib.modeling.ssds import rfb_lite 6 | from lib.modeling.ssds import fssd 7 | from lib.modeling.ssds import fssd_lite 8 | from lib.modeling.ssds import yolo 9 | 10 | ssds_map = { 11 | 'ssd': ssd.build_ssd, 12 | 'ssd_lite': ssd_lite.build_ssd_lite, 13 | 'rfb': rfb.build_rfb, 14 | 'rfb_lite': rfb_lite.build_rfb_lite, 15 | 'fssd': fssd.build_fssd, 16 | 'fssd_lite': fssd_lite.build_fssd_lite, 17 | 'yolo_v2': yolo.build_yolo_v2, 18 | 'yolo_v3': yolo.build_yolo_v3, 19 | } 20 | 21 | # nets part 22 | from lib.modeling.nets import vgg 23 | from lib.modeling.nets import resnet 24 | from lib.modeling.nets import mobilenet 25 | from lib.modeling.nets import darknet 26 | 27 | networks_map = { 28 | 'vgg16': vgg.vgg16, 29 | 'resnet_18': resnet.resnet_18, 30 | 'resnet_34': resnet.resnet_34, 31 | 'resnet_50': resnet.resnet_50, 32 | 'resnet_101': resnet.resnet_101, 33 | 'mobilenet_v1': mobilenet.mobilenet_v1, 34 | 'mobilenet_v1_075': mobilenet.mobilenet_v1_075, 35 | 'mobilenet_v1_050': mobilenet.mobilenet_v1_050, 36 | 'mobilenet_v1_025': mobilenet.mobilenet_v1_025, 37 | 'mobilenet_v2': mobilenet.mobilenet_v2, 38 | 'mobilenet_v2_075': mobilenet.mobilenet_v2_075, 39 | 'mobilenet_v2_050': mobilenet.mobilenet_v2_050, 40 | 'mobilenet_v2_025': mobilenet.mobilenet_v2_025, 41 | 'darknet_19': darknet.darknet_19, 42 | 'darknet_53': darknet.darknet_53, 43 | } 44 | 45 | from lib.layers.functions.prior_box import PriorBox 46 | import torch 47 | 48 | 49 | def _forward_features_size(model, img_size): 50 | model.eval() 51 | x = torch.rand(1, 3, img_size[0], img_size[1]) 52 | with torch.no_grad(): 53 | x = torch.Tensor(x) 54 | feature_maps = model(x, phase='feature') 55 | return [(o.size()[2], o.size()[3]) for o in feature_maps] 56 | 57 | 58 | def create_model(cfg): 59 | ''' 60 | ''' 61 | # 62 | base = networks_map[cfg.NETS] 63 | number_box = [2 * len(aspect_ratios) if isinstance(aspect_ratios[0], int) else len(aspect_ratios) for aspect_ratios 64 | in cfg.ASPECT_RATIOS] 65 | 66 | model = ssds_map[cfg.SSDS](base=base, feature_layer=cfg.FEATURE_LAYER, mbox=number_box, num_classes=cfg.NUM_CLASSES) 67 | # 68 | feature_maps = _forward_features_size(model, cfg.IMAGE_SIZE) 69 | print('==>Feature map size:') 70 | print(feature_maps) 71 | # 72 | priorbox = PriorBox(image_size=cfg.IMAGE_SIZE, feature_maps=feature_maps, aspect_ratios=cfg.ASPECT_RATIOS, 73 | scale=cfg.SIZES, archor_stride=cfg.STEPS, clip=cfg.CLIP) 74 | # priors = Variable(priorbox.forward(), volatile=True) 75 | 76 | return model, priorbox 77 | -------------------------------------------------------------------------------- /lib/modeling/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/nets/__init__.py -------------------------------------------------------------------------------- /lib/modeling/nets/darknet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from collections import namedtuple 5 | import functools 6 | 7 | Conv = namedtuple('Conv', ['stride', 'depth']) 8 | ConvBlock = namedtuple('ConvBlock', ['stride', 'depth', 'num', 't']) # t is the expension factor 9 | ResidualBlock = namedtuple('ResidualBlock', ['stride', 'depth', 'num', 't']) # t is the expension factor 10 | 11 | 12 | CONV_DEFS_19 = [ 13 | Conv(stride=1, depth=32), 14 | 'M', 15 | Conv(stride=1, depth=64), 16 | 'M', 17 | ConvBlock(stride=1, depth=128, num=2, t=0.5), 18 | 'M', 19 | ConvBlock(stride=1, depth=256, num=2, t=0.5), 20 | 'M', 21 | ConvBlock(stride=1, depth=512, num=3, t=0.5), 22 | 'M', 23 | ConvBlock(stride=1, depth=1024, num=3, t=0.5), 24 | ] 25 | 26 | CONV_DEFS_53 = [ 27 | Conv(stride=1, depth=32), 28 | ResidualBlock(stride=2, depth=64, num=2, t=0.5), 29 | ResidualBlock(stride=2, depth=128, num=3, t=0.5), 30 | ResidualBlock(stride=2, depth=256, num=9, t=0.5), 31 | ResidualBlock(stride=2, depth=512, num=9, t=0.5), 32 | ResidualBlock(stride=2, depth=1024, num=5, t=0.5), 33 | ] 34 | 35 | class _conv_bn(nn.Module): 36 | def __init__(self, inp, oup, stride): 37 | super(_conv_bn, self).__init__() 38 | self.conv = nn.Sequential( 39 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 40 | nn.BatchNorm2d(oup), 41 | nn.LeakyReLU(0.1, inplace=True), 42 | ) 43 | self.depth = oup 44 | 45 | def forward(self, x): 46 | return self.conv(x) 47 | 48 | class _conv_block(nn.Module): 49 | def __init__(self, inp, oup, stride, expand_ratio=0.5): 50 | super(_conv_block, self).__init__() 51 | if stride == 1 and inp == oup: 52 | depth = int(oup*expand_ratio) 53 | self.conv = nn.Sequential( 54 | nn.Conv2d(inp, depth, 1, 1, bias=False), 55 | nn.BatchNorm2d(depth), 56 | nn.LeakyReLU(0.1, inplace=True), 57 | nn.Conv2d(depth, oup, 3, stride, 1, bias=False), 58 | nn.BatchNorm2d(oup), 59 | nn.LeakyReLU(0.1, inplace=True), 60 | ) 61 | else: 62 | self.conv = nn.Sequential( 63 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 64 | nn.BatchNorm2d(oup), 65 | nn.LeakyReLU(0.1, inplace=True), 66 | ) 67 | self.depth = oup 68 | 69 | def forward(self, x): 70 | return self.conv(x) 71 | 72 | 73 | class _residual_block(nn.Module): 74 | def __init__(self, inp, oup, stride, expand_ratio=0.5): 75 | super(_residual_block, self).__init__() 76 | self.use_res_connect = stride == 1 and inp == oup 77 | if self.use_res_connect: 78 | depth = int(oup*expand_ratio) 79 | self.conv = nn.Sequential( 80 | nn.Conv2d(inp, depth, 1, 1, bias=False), 81 | nn.BatchNorm2d(depth), 82 | nn.LeakyReLU(0.1, inplace=True), 83 | nn.Conv2d(depth, oup, 3, stride, 1, bias=False), 84 | nn.BatchNorm2d(oup), 85 | nn.LeakyReLU(0.1, inplace=True), 86 | ) 87 | else: 88 | self.conv = nn.Sequential( 89 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 90 | nn.BatchNorm2d(oup), 91 | nn.LeakyReLU(0.1, inplace=True), 92 | ) 93 | self.depth = oup 94 | 95 | def forward(self, x): 96 | if self.use_res_connect: 97 | return x + self.conv(x) 98 | else: 99 | return self.conv(x) 100 | 101 | 102 | def darknet(conv_defs, depth_multiplier=1.0, min_depth=8): 103 | depth = lambda d: max(int(d * depth_multiplier), min_depth) 104 | layers = [] 105 | in_channels = 3 106 | for conv_def in conv_defs: 107 | if isinstance(conv_def, Conv): 108 | layers += [_conv_bn(in_channels, depth(conv_def.depth), conv_def.stride)] 109 | in_channels = depth(conv_def.depth) 110 | elif isinstance(conv_def, ConvBlock): 111 | for n in range(conv_def.num): 112 | stride = conv_def.stride if n == 0 else 1 113 | layers += [_conv_block(in_channels, depth(conv_def.depth), stride, conv_def.t)] 114 | in_channels = depth(conv_def.depth) 115 | elif isinstance(conv_def, ResidualBlock): 116 | for n in range(conv_def.num): 117 | stride = conv_def.stride if n == 0 else 1 118 | layers += [_residual_block(in_channels, depth(conv_def.depth), stride, conv_def.t)] 119 | in_channels = depth(conv_def.depth) 120 | elif conv_def == 'M': 121 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 122 | return layers 123 | 124 | def wrapped_partial(func, *args, **kwargs): 125 | partial_func = functools.partial(func, *args, **kwargs) 126 | functools.update_wrapper(partial_func, func) 127 | return partial_func 128 | 129 | darknet_19 = wrapped_partial(darknet, conv_defs=CONV_DEFS_19, depth_multiplier=1.0) 130 | darknet_53 = wrapped_partial(darknet, conv_defs=CONV_DEFS_53, depth_multiplier=1.0) 131 | -------------------------------------------------------------------------------- /lib/modeling/nets/mobilenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from collections import namedtuple 5 | import functools 6 | 7 | Conv = namedtuple('Conv', ['stride', 'depth']) 8 | DepthSepConv = namedtuple('DepthSepConv', ['stride', 'depth']) 9 | InvertedResidual = namedtuple('InvertedResidual', ['stride', 'depth', 'num', 't']) # t is the expension factor 10 | 11 | V1_CONV_DEFS = [ 12 | Conv(stride=2, depth=32), 13 | DepthSepConv(stride=1, depth=64), 14 | DepthSepConv(stride=2, depth=128), 15 | DepthSepConv(stride=1, depth=128), 16 | DepthSepConv(stride=2, depth=256), 17 | DepthSepConv(stride=1, depth=256), 18 | DepthSepConv(stride=2, depth=512), 19 | DepthSepConv(stride=1, depth=512), 20 | DepthSepConv(stride=1, depth=512), 21 | DepthSepConv(stride=1, depth=512), 22 | DepthSepConv(stride=1, depth=512), 23 | DepthSepConv(stride=1, depth=512), 24 | DepthSepConv(stride=2, depth=1024), 25 | DepthSepConv(stride=1, depth=1024) 26 | ] 27 | 28 | V2_CONV_DEFS = [ 29 | Conv(stride=2, depth=32), 30 | InvertedResidual(stride=1, depth=16, num=1, t=1), 31 | InvertedResidual(stride=2, depth=24, num=2, t=6), 32 | InvertedResidual(stride=2, depth=32, num=3, t=6), 33 | InvertedResidual(stride=2, depth=64, num=4, t=6), 34 | InvertedResidual(stride=1, depth=96, num=3, t=6), 35 | InvertedResidual(stride=2, depth=160, num=3, t=6), 36 | InvertedResidual(stride=1, depth=320, num=1, t=6), 37 | ] 38 | 39 | class _conv_bn(nn.Module): 40 | def __init__(self, inp, oup, stride): 41 | super(_conv_bn, self).__init__() 42 | self.conv = nn.Sequential( 43 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 44 | nn.BatchNorm2d(oup), 45 | nn.ReLU(inplace=True), 46 | ) 47 | self.depth = oup 48 | 49 | def forward(self, x): 50 | return self.conv(x) 51 | 52 | 53 | class _conv_dw(nn.Module): 54 | def __init__(self, inp, oup, stride): 55 | super(_conv_dw, self).__init__() 56 | self.conv = nn.Sequential( 57 | # dw 58 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 59 | nn.BatchNorm2d(inp), 60 | nn.ReLU(inplace=True), 61 | # pw 62 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 63 | nn.BatchNorm2d(oup), 64 | nn.ReLU(inplace=True), 65 | ) 66 | self.depth = oup 67 | 68 | def forward(self, x): 69 | return self.conv(x) 70 | 71 | 72 | class _inverted_residual_bottleneck(nn.Module): 73 | def __init__(self, inp, oup, stride, expand_ratio): 74 | super(_inverted_residual_bottleneck, self).__init__() 75 | self.use_res_connect = stride == 1 and inp == oup 76 | self.conv = nn.Sequential( 77 | # pw 78 | nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), 79 | nn.BatchNorm2d(inp * expand_ratio), 80 | nn.ReLU6(inplace=True), 81 | # dw 82 | nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, stride, 1, groups=inp * expand_ratio, bias=False), 83 | nn.BatchNorm2d(inp * expand_ratio), 84 | nn.ReLU6(inplace=True), 85 | # pw-linear 86 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), 87 | nn.BatchNorm2d(oup), 88 | ) 89 | self.depth = oup 90 | 91 | def forward(self, x): 92 | if self.use_res_connect: 93 | return x + self.conv(x) 94 | else: 95 | return self.conv(x) 96 | 97 | 98 | def mobilenet(conv_defs, depth_multiplier=1.0, min_depth=8): 99 | depth = lambda d: max(int(d * depth_multiplier), min_depth) 100 | layers = [] 101 | in_channels = 3 102 | for conv_def in conv_defs: 103 | if isinstance(conv_def, Conv): 104 | layers += [_conv_bn(in_channels, depth(conv_def.depth), conv_def.stride)] 105 | in_channels = depth(conv_def.depth) 106 | elif isinstance(conv_def, DepthSepConv): 107 | layers += [_conv_dw(in_channels, depth(conv_def.depth), conv_def.stride)] 108 | in_channels = depth(conv_def.depth) 109 | elif isinstance(conv_def, InvertedResidual): 110 | for n in range(conv_def.num): 111 | stride = conv_def.stride if n == 0 else 1 112 | layers += [_inverted_residual_bottleneck(in_channels, depth(conv_def.depth), stride, conv_def.t)] 113 | in_channels = depth(conv_def.depth) 114 | return layers 115 | 116 | def wrapped_partial(func, *args, **kwargs): 117 | partial_func = functools.partial(func, *args, **kwargs) 118 | functools.update_wrapper(partial_func, func) 119 | return partial_func 120 | 121 | mobilenet_v1 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=1.0) 122 | mobilenet_v1_075 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.75) 123 | mobilenet_v1_050 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.50) 124 | mobilenet_v1_025 = wrapped_partial(mobilenet, conv_defs=V1_CONV_DEFS, depth_multiplier=0.25) 125 | 126 | mobilenet_v2 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=1.0) 127 | mobilenet_v2_075 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.75) 128 | mobilenet_v2_050 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.50) 129 | mobilenet_v2_025 = wrapped_partial(mobilenet, conv_defs=V2_CONV_DEFS, depth_multiplier=0.25) -------------------------------------------------------------------------------- /lib/modeling/nets/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from collections import namedtuple 5 | import functools 6 | 7 | BasicBlock = namedtuple('BasicBlock', ['stride', 'depth', 'num', 't']) 8 | Bottleneck = namedtuple('Bottleneck', ['stride', 'depth', 'num', 't']) # t is the expension factor 9 | 10 | V18_CONV_DEFS = [ 11 | BasicBlock(stride=1, depth=64, num=2, t=1), 12 | BasicBlock(stride=2, depth=128, num=2, t=1), 13 | BasicBlock(stride=2, depth=256, num=2, t=1), 14 | # BasicBlock(stride=2, depth=512, num=2, t=1), 15 | ] 16 | 17 | V34_CONV_DEFS = [ 18 | BasicBlock(stride=1, depth=64, num=3, t=1), 19 | BasicBlock(stride=2, depth=128, num=4, t=1), 20 | BasicBlock(stride=2, depth=256, num=6, t=1), 21 | # BasicBlock(stride=2, depth=512, num=3, t=1), 22 | ] 23 | 24 | V50_CONV_DEFS = [ 25 | Bottleneck(stride=1, depth=64, num=3, t=4), 26 | Bottleneck(stride=2, depth=128, num=4, t=4), 27 | Bottleneck(stride=2, depth=256, num=6, t=4), 28 | # Bottleneck(stride=2, depth=512, num=3, t=4), 29 | ] 30 | 31 | V101_CONV_DEFS = [ 32 | Bottleneck(stride=1, depth=64, num=3, t=4), 33 | Bottleneck(stride=2, depth=128, num=4, t=4), 34 | Bottleneck(stride=2, depth=256, num=23, t=4), 35 | # Bottleneck(stride=2, depth=512, num=3, t=4), 36 | ] 37 | 38 | class _basicblock(nn.Module): 39 | def __init__(self, inplanes, planes, stride=1, expansion=1, downsample=None): 40 | super(_basicblock, self).__init__() 41 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, 42 | padding=1, bias=False) 43 | self.bn1 = nn.BatchNorm2d(planes) 44 | self.relu = nn.ReLU(inplace=True) 45 | self.conv2 = nn.Conv2d(planes, planes * expansion, kernel_size=3, stride=1, 46 | padding=1, bias=False) 47 | self.bn2 = nn.BatchNorm2d(planes * expansion) 48 | self.downsample = downsample 49 | self.stride = stride 50 | 51 | def forward(self, x): 52 | residual = x 53 | 54 | out = self.conv1(x) 55 | out = self.bn1(out) 56 | out = self.relu(out) 57 | 58 | out = self.conv2(out) 59 | out = self.bn2(out) 60 | 61 | if self.downsample is not None: 62 | residual = self.downsample(x) 63 | 64 | out += residual 65 | out = self.relu(out) 66 | 67 | return out 68 | 69 | 70 | class _bottleneck(nn.Module): 71 | def __init__(self, inplanes, planes, stride=1, expansion=4, downsample=None): 72 | super(_bottleneck, self).__init__() 73 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 74 | self.bn1 = nn.BatchNorm2d(planes) 75 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 76 | padding=1, bias=False) 77 | self.bn2 = nn.BatchNorm2d(planes) 78 | self.conv3 = nn.Conv2d(planes, planes * expansion, kernel_size=1, bias=False) 79 | self.bn3 = nn.BatchNorm2d(planes * expansion) 80 | self.relu = nn.ReLU(inplace=True) 81 | self.downsample = downsample 82 | self.stride = stride 83 | 84 | def forward(self, x): 85 | residual = x 86 | 87 | out = self.conv1(x) 88 | out = self.bn1(out) 89 | out = self.relu(out) 90 | 91 | out = self.conv2(out) 92 | out = self.bn2(out) 93 | out = self.relu(out) 94 | 95 | out = self.conv3(out) 96 | out = self.bn3(out) 97 | 98 | if self.downsample is not None: 99 | residual = self.downsample(x) 100 | 101 | out += residual 102 | out = self.relu(out) 103 | 104 | return out 105 | 106 | 107 | def resnet(conv_defs, depth_multiplier=1.0, min_depth=8): 108 | depth = lambda d: max(int(d * depth_multiplier), min_depth) 109 | layers = [ 110 | nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False), 111 | nn.BatchNorm2d(64), 112 | nn.ReLU(inplace=True), 113 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1), 114 | ] 115 | in_channels = 64 116 | for conv_def in conv_defs: 117 | if conv_def.stride != 1 or in_channels != depth(conv_def.depth * conv_def.t): 118 | _downsample = nn.Sequential( 119 | nn.Conv2d(in_channels, depth(conv_def.depth * conv_def.t), 120 | kernel_size=1, stride=conv_def.stride, bias=False), 121 | nn.BatchNorm2d(depth(conv_def.depth * conv_def.t)), 122 | ) 123 | if isinstance(conv_def, BasicBlock): 124 | for n in range(conv_def.num): 125 | (stride, downsample) = (conv_def.stride, _downsample) if n == 0 else (1, None) 126 | layers += [_basicblock(in_channels, depth(conv_def.depth), stride, conv_def.t, downsample)] 127 | in_channels = depth(conv_def.depth * conv_def.t) 128 | elif isinstance(conv_def, Bottleneck): 129 | for n in range(conv_def.num): 130 | (stride, downsample) = (conv_def.stride, _downsample) if n == 0 else (1, None) 131 | layers += [_bottleneck(in_channels, depth(conv_def.depth), stride, conv_def.t, downsample)] 132 | in_channels = depth(conv_def.depth * conv_def.t) 133 | return layers 134 | 135 | def wrapped_partial(func, *args, **kwargs): 136 | partial_func = functools.partial(func, *args, **kwargs) 137 | functools.update_wrapper(partial_func, func) 138 | return partial_func 139 | 140 | resnet_18 = wrapped_partial(resnet, conv_defs=V18_CONV_DEFS, depth_multiplier=1.0) 141 | resnet_34 = wrapped_partial(resnet, conv_defs=V34_CONV_DEFS, depth_multiplier=1.0) 142 | 143 | resnet_50 = wrapped_partial(resnet, conv_defs=V50_CONV_DEFS, depth_multiplier=1.0) 144 | resnet_101 = wrapped_partial(resnet, conv_defs=V101_CONV_DEFS, depth_multiplier=1.0) 145 | -------------------------------------------------------------------------------- /lib/modeling/nets/vgg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | base = { 5 | 'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', 6 | 512, 512, 512], 7 | } 8 | 9 | # CONV_DEFS_16 = [ 10 | # Conv(stride=1, depth=64), 11 | # Conv(stride=1, depth=64), 12 | # 'M', 13 | # Conv(stride=1, depth=128), 14 | # Conv(stride=1, depth=128), 15 | # 'M' 16 | # Conv(stride=1, depth=256), 17 | # Conv(stride=1, depth=256), 18 | # Conv(stride=1, depth=256), 19 | # 'M' 20 | # Conv(stride=1, depth=512), 21 | # Conv(stride=1, depth=512), 22 | # Conv(stride=1, depth=512), 23 | # 'M' 24 | # Conv(stride=1, depth=512), 25 | # Conv(stride=1, depth=512), 26 | # Conv(stride=1, depth=512), 27 | # ] 28 | 29 | # Conv = namedtuple('Conv', ['stride', 'depth']) 30 | 31 | # class _conv_bn(nn.Module): 32 | # def __init__(self, inp, oup, stride): 33 | # super(_conv_bn, self).__init__() 34 | # self.conv = nn.Sequential( 35 | # nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 36 | # nn.BatchNorm2d(oup), 37 | # nn.ReLU(inplace=True), 38 | # ) 39 | # self.depth = oup 40 | 41 | # def forward(self, x): 42 | # return self.conv(x) 43 | 44 | 45 | def vgg(cfg, i, batch_norm=False): 46 | layers = [] 47 | in_channels = i 48 | for v in cfg: 49 | if v == 'M': 50 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 51 | elif v == 'C': 52 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 53 | else: 54 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 55 | if batch_norm: 56 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 57 | else: 58 | layers += [conv2d, nn.ReLU(inplace=True)] 59 | in_channels = v 60 | layers += [ 61 | nn.MaxPool2d(kernel_size=3, stride=1, padding=1), 62 | nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6), 63 | nn.ReLU(inplace=True), 64 | nn.Conv2d(1024, 1024, kernel_size=1), 65 | nn.ReLU(inplace=True)] 66 | return layers 67 | 68 | def vgg16(): 69 | return vgg(base['vgg16'], 3) 70 | vgg16.name='vgg16' -------------------------------------------------------------------------------- /lib/modeling/ssds/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/modeling/ssds/__init__.py -------------------------------------------------------------------------------- /lib/modeling/ssds/fssd_lite.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | import os 7 | 8 | from lib.layers import * 9 | 10 | class FSSDLite(nn.Module): 11 | """FSSD: Feature Fusion Single Shot Multibox Detector for embeded system 12 | See: https://arxiv.org/pdf/1712.00960.pdf for more details. 13 | 14 | Args: 15 | phase: (string) Can be "eval" or "train" or "feature" 16 | base: base layers for input 17 | extras: extra layers that feed to multibox loc and conf layers 18 | head: "multibox head" consists of loc and conf conv layers 19 | features: include to feature layers to fusion feature and build pyramids 20 | feature_layer: the feature layers for head to loc and conf 21 | num_classes: num of classes 22 | """ 23 | 24 | def __init__(self, base, extras, head, features, feature_layer, num_classes): 25 | super(FSSDLite, self).__init__() 26 | self.num_classes = num_classes 27 | # SSD network 28 | self.base = nn.ModuleList(base) 29 | self.extras = nn.ModuleList(extras) 30 | self.feature_layer = feature_layer[0][0] 31 | self.transforms = nn.ModuleList(features[0]) 32 | self.pyramids = nn.ModuleList(features[1]) 33 | # print(self.base) 34 | self.norm = nn.BatchNorm2d(int(feature_layer[0][1][-1]/2)*len(self.transforms),affine=True) 35 | # print(self.extras) 36 | 37 | self.loc = nn.ModuleList(head[0]) 38 | self.conf = nn.ModuleList(head[1]) 39 | # print(self.loc) 40 | 41 | self.softmax = nn.Softmax(dim=-1) 42 | 43 | def forward(self, x, phase='eval'): 44 | """Applies network layers and ops on input image(s) x. 45 | 46 | Args: 47 | x: input image or batch of images. Shape: [batch,3,300,300]. 48 | 49 | Return: 50 | Depending on phase: 51 | test: 52 | Variable(tensor) of output class label predictions, 53 | confidence score, and corresponding location predictions for 54 | each object detected. Shape: [batch,topk,7] 55 | 56 | train: 57 | list of concat outputs from: 58 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 59 | 2: localization layers, Shape: [batch,num_priors*4] 60 | 61 | feature: 62 | the features maps of the feature extractor 63 | """ 64 | sources, transformed, pyramids, loc, conf = [list() for _ in range(5)] 65 | 66 | # apply bases layers and cache source layer outputs 67 | for k in range(len(self.base)): 68 | x = self.base[k](x) 69 | if k in self.feature_layer: 70 | sources.append(x) 71 | 72 | # apply extra layers and cache source layer outputs 73 | for k, v in enumerate(self.extras): 74 | x = v(x) 75 | sources.append(x) 76 | # if k % 2 == 1: 77 | # sources.append(x) 78 | assert len(self.transforms) == len(sources) 79 | upsize = (sources[0].size()[2], sources[0].size()[3]) 80 | 81 | for k, v in enumerate(self.transforms): 82 | size = None if k == 0 else upsize 83 | transformed.append(v(sources[k], size)) 84 | x = torch.cat(transformed, 1) 85 | x = self.norm(x) 86 | for k, v in enumerate(self.pyramids): 87 | x = v(x) 88 | pyramids.append(x) 89 | 90 | if phase == 'feature': 91 | return pyramids 92 | 93 | # apply multibox head to pyramids layers 94 | for (x, l, c) in zip(pyramids, self.loc, self.conf): 95 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 96 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 97 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 98 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 99 | 100 | if phase == 'eval': 101 | output = ( 102 | loc.view(loc.size(0), -1, 4), # loc preds 103 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 104 | ) 105 | else: 106 | output = ( 107 | loc.view(loc.size(0), -1, 4), 108 | conf.view(conf.size(0), -1, self.num_classes), 109 | ) 110 | return output 111 | 112 | class BasicConv(nn.Module): 113 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=False, bias=True): 114 | super(BasicConv, self).__init__() 115 | self.out_channels = out_planes 116 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) 117 | self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None 118 | self.relu = nn.ReLU(inplace=True) if relu else None 119 | # self.up_size = up_size 120 | # self.up_sample = nn.Upsample(size=(up_size,up_size),mode='bilinear') if up_size != 0 else None 121 | 122 | def forward(self, x, up_size=None): 123 | x = self.conv(x) 124 | if self.bn is not None: 125 | x = self.bn(x) 126 | if self.relu is not None: 127 | x = self.relu(x) 128 | if up_size is not None: 129 | x = F.upsample(x, size=up_size, mode='bilinear') 130 | # x = self.up_sample(x) 131 | return x 132 | 133 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1): 134 | return nn.Sequential( 135 | # pw 136 | nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False), 137 | nn.BatchNorm2d(oup * expand_ratio), 138 | nn.ReLU6(inplace=True), 139 | # dw 140 | nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False), 141 | nn.BatchNorm2d(oup * expand_ratio), 142 | nn.ReLU6(inplace=True), 143 | # pw-linear 144 | nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False), 145 | nn.BatchNorm2d(oup), 146 | ) 147 | 148 | 149 | def add_extras(base, feature_layer, mbox, num_classes): 150 | extra_layers = [] 151 | feature_transform_layers = [] 152 | pyramid_feature_layers = [] 153 | loc_layers = [] 154 | conf_layers = [] 155 | in_channels = None 156 | feature_transform_channel = int(feature_layer[0][1][-1]/2) 157 | for layer, depth in zip(feature_layer[0][0], feature_layer[0][1]): 158 | if layer == 'S': 159 | extra_layers += [ _conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1) ] 160 | in_channels = depth 161 | elif layer == '': 162 | extra_layers += [ _conv_dw(in_channels, depth, stride=1, expand_ratio=1) ] 163 | in_channels = depth 164 | else: 165 | in_channels = depth 166 | feature_transform_layers += [BasicConv(in_channels, feature_transform_channel, kernel_size=1, padding=0)] 167 | 168 | in_channels = len(feature_transform_layers) * feature_transform_channel 169 | for layer, depth, box in zip(feature_layer[1][0], feature_layer[1][1], mbox): 170 | if layer == 'S': 171 | pyramid_feature_layers += [BasicConv(in_channels, depth, kernel_size=3, stride=2, padding=1)] 172 | in_channels = depth 173 | elif layer == '': 174 | pad = (0,1)[len(pyramid_feature_layers)==0] 175 | pyramid_feature_layers += [BasicConv(in_channels, depth, kernel_size=3, stride=1, padding=pad)] 176 | in_channels = depth 177 | else: 178 | AssertionError('Undefined layer') 179 | loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)] 180 | conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)] 181 | return base, extra_layers, (feature_transform_layers, pyramid_feature_layers), (loc_layers, conf_layers) 182 | 183 | def build_fssd_lite(base, feature_layer, mbox, num_classes): 184 | base_, extras_, features_, head_ = add_extras(base(), feature_layer, mbox, num_classes) 185 | return FSSDLite(base_, extras_, head_, features_, feature_layer, num_classes) -------------------------------------------------------------------------------- /lib/modeling/ssds/retina.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | import os 7 | 8 | from lib.layers import * 9 | 10 | class Retina(nn.Module): 11 | def __init__(self, base, extras, head, feature_layer, num_classes): 12 | super(Retina, self).__init__() 13 | self.num_classes = num_classes 14 | # SSD network 15 | self.base = nn.ModuleList(base) 16 | self.extras = nn.ModuleList(extras[1]) 17 | self.transforms = nn.ModuleList(extras[0]) 18 | self.loc = nn.ModuleList(head[0]) 19 | self.conf = nn.ModuleList(head[1]) 20 | self.softmax = nn.Softmax(dim=-1) 21 | 22 | self.feature_layer = feature_layer[0] 23 | 24 | def _upsample_add(self, x, y): 25 | '''Upsample and add two feature maps. 26 | Args: 27 | x: (Variable) top feature map to be upsampled. 28 | y: (Variable) lateral feature map. 29 | Returns: 30 | (Variable) added feature map. 31 | Note in PyTorch, when input size is odd, the upsampled feature map 32 | with `F.upsample(..., scale_factor=2, mode='nearest')` 33 | maybe not equal to the lateral feature map size. 34 | e.g. 35 | original input size: [N,_,15,15] -> 36 | conv2d feature map size: [N,_,8,8] -> 37 | upsampled feature map size: [N,_,16,16] 38 | So we choose bilinear upsample which supports arbitrary output sizes. 39 | ''' 40 | _,_,H,W = y.size() 41 | return F.upsample(x, size=(H,W), mode='bilinear') + y 42 | 43 | def forward(self, x, phase='eval'): 44 | sources, loc, conf = [list() for _ in range(3)] 45 | 46 | # apply bases layers and cache source layer outputs 47 | for k in range(len(self.base)): 48 | x = self.base[k](x) 49 | if k in self.feature_layer: 50 | sources.append(x) 51 | 52 | for i in range(len(sources))[::-1]: 53 | if i != len(sources) -1: 54 | xx = self.extras[i](self._upsample_add(xx, self.transforms[i](sources[i]))) 55 | else: 56 | xx = self.transforms[i](sources[i]) 57 | sources[i] = xx 58 | 59 | # apply extra layers and cache source layer outputs 60 | for i, v in enumerate(self.extras): 61 | if i >= len(sources): 62 | x = v(x) 63 | sources.append(x) 64 | 65 | if phase == 'feature': 66 | return sources 67 | 68 | # apply multibox head to source layers 69 | for x in sources: 70 | loc.append(self.loc(x).permute(0, 2, 3, 1).contiguous()) 71 | conf.append(self.conf(x).permute(0, 2, 3, 1).contiguous()) 72 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 73 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 74 | 75 | if phase == 'eval': 76 | output = ( 77 | loc.view(loc.size(0), -1, 4), # loc preds 78 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 79 | ) 80 | else: 81 | output = ( 82 | loc.view(loc.size(0), -1, 4), 83 | conf.view(conf.size(0), -1, self.num_classes), 84 | ) 85 | return output 86 | 87 | 88 | 89 | def add_extras(base, feature_layer, mbox, num_classes, version): 90 | extra_layers = [] 91 | transform_layers = [] 92 | loc_layers = [Retina_head(box * 4)] 93 | conf_layers = [Retina_head(box * num_classes)] 94 | 95 | for layer, in_channels, box in zip(feature_layer[0], feature_layer[1], mbox): 96 | if 'lite' in version: 97 | if layer == 'S': 98 | extra_layers += [ _conv_dw(in_channels, 256, stride=2, padding=1, expand_ratio=1) ] 99 | elif layer == '': 100 | extra_layers += [ _conv_dw(in_channels, 256, stride=1, expand_ratio=1) ] 101 | else: 102 | extra_layers += [ _conv_dw(256, 256, stride=1, padding=1, expand_ratio=1) ] 103 | transform_layers += [ _conv_pw(in_channels, 256) ] 104 | else: 105 | if layer == 'S': 106 | extra_layers += [ _conv(in_channels, 256, stride=2, padding=1) ] 107 | elif layer == '': 108 | extra_layers += [ _conv(in_channels, 256, stride=1) ] 109 | else: 110 | extra_layers += [ _conv(256, 256, stride=1, padding=1) ] 111 | transform_layers += [ _conv_pw(in_channels, 256) ] 112 | return base, (transform_layers, extra_layers), (loc_layers, conf_layers) 113 | 114 | def Retina_head(self, out_planes): 115 | layers = [] 116 | for _ in range(4): 117 | layers.append(nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)) 118 | layers.append(nn.ReLU(True)) 119 | layers.append(nn.Conv2d(256, out_planes, kernel_size=3, stride=1, padding=1)) 120 | return nn.Sequential(*layers) 121 | 122 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213 123 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did. 124 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1): 125 | return nn.Sequential( 126 | # pw 127 | nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False), 128 | nn.BatchNorm2d(oup * expand_ratio), 129 | nn.ReLU6(inplace=True), 130 | # dw 131 | nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False), 132 | nn.BatchNorm2d(oup * expand_ratio), 133 | nn.ReLU6(inplace=True), 134 | # pw-linear 135 | nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False), 136 | nn.BatchNorm2d(oup), 137 | ) 138 | 139 | def _conv_pw(inp, oup, stride=1, padding=0): 140 | return nn.Sequential( 141 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 142 | nn.BatchNorm2d(oup), 143 | ) 144 | 145 | 146 | def _conv(inp, oup, stride=1, padding=0): 147 | return nn.Sequential( 148 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False), 149 | nn.BatchNorm2d(oup), 150 | nn.ReLU(inplace=True), 151 | ) 152 | 153 | 154 | def build_retina(base, feature_layer, mbox, num_classes): 155 | """RetinaNet in Focal Loss for Dense Object Detection 156 | See: https://arxiv.org/pdf/1708.02002.pdffor more details. 157 | """ 158 | base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='retinanet') 159 | return Retina(base_, extras_, head_, feature_layer, num_classes) 160 | 161 | def build_retina_lite(base, feature_layer, mbox, num_classes): 162 | """RetinaNet in Focal Loss for Dense Object Detection 163 | See: https://arxiv.org/pdf/1708.02002.pdffor more details. 164 | """ 165 | base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='retinanet_lite') 166 | return SSD(base_, extras_, head_, feature_layer, num_classes) -------------------------------------------------------------------------------- /lib/modeling/ssds/ssd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | import os 7 | 8 | from lib.layers import * 9 | 10 | 11 | class SSD(nn.Module): 12 | """Single Shot Multibox Architecture 13 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 14 | 15 | Args: 16 | phase: (string) Can be "eval" or "train" or "feature" 17 | base: base layers for input 18 | extras: extra layers that feed to multibox loc and conf layers 19 | head: "multibox head" consists of loc and conf conv layers 20 | feature_layer: the feature layers for head to loc and conf 21 | num_classes: num of classes 22 | """ 23 | 24 | def __init__(self, base, extras, head, feature_layer, num_classes): 25 | super(SSD, self).__init__() 26 | self.num_classes = num_classes 27 | # SSD network 28 | self.base = nn.ModuleList(base) 29 | self.norm = L2Norm(feature_layer[1][0], 20) 30 | self.extras = nn.ModuleList(extras) 31 | 32 | self.loc = nn.ModuleList(head[0]) 33 | self.conf = nn.ModuleList(head[1]) 34 | self.softmax = nn.Softmax(dim=-1) 35 | 36 | self.feature_layer = feature_layer[0] 37 | 38 | def forward(self, x, phase='eval'): 39 | """Applies network layers and ops on input image(s) x. 40 | 41 | Args: 42 | x: input image or batch of images. Shape: [batch,3,300,300]. 43 | 44 | Return: 45 | Depending on phase: 46 | test: 47 | Variable(tensor) of output class label predictions, 48 | confidence score, and corresponding location predictions for 49 | each object detected. Shape: [batch,topk,7] 50 | 51 | train: 52 | list of concat outputs from: 53 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 54 | 2: localization layers, Shape: [batch,num_priors*4] 55 | 56 | feature: 57 | the features maps of the feature extractor 58 | """ 59 | sources, loc, conf = [list() for _ in range(3)] 60 | 61 | # apply bases layers and cache source layer outputs 62 | for k in range(len(self.base)): 63 | x = self.base[k](x) 64 | if k in self.feature_layer: 65 | if len(sources) == 0: 66 | s = self.norm(x) 67 | sources.append(s) 68 | else: 69 | sources.append(x) 70 | 71 | # apply extra layers and cache source layer outputs 72 | for k, v in enumerate(self.extras): 73 | # TODO:maybe donot needs the relu here 74 | x = F.relu(v(x), inplace=True) 75 | # TODO:lite is different in here, should be changed 76 | if k % 2 == 1: 77 | sources.append(x) 78 | 79 | if phase == 'feature': 80 | return sources 81 | 82 | # apply multibox head to source layers 83 | for (x, l, c) in zip(sources, self.loc, self.conf): 84 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 85 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 86 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 87 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 88 | 89 | if phase == 'eval': 90 | output = ( 91 | loc.view(loc.size(0), -1, 4), # loc preds 92 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 93 | ) 94 | else: 95 | output = ( 96 | loc.view(loc.size(0), -1, 4), 97 | conf.view(conf.size(0), -1, self.num_classes), 98 | ) 99 | return output 100 | 101 | 102 | def add_extras(base, feature_layer, mbox, num_classes, version): 103 | extra_layers = [] 104 | loc_layers = [] 105 | conf_layers = [] 106 | in_channels = None 107 | for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox): 108 | if 'lite' in version: 109 | if layer == 'S': 110 | extra_layers += [_conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1)] 111 | in_channels = depth 112 | elif layer == '': 113 | extra_layers += [_conv_dw(in_channels, depth, stride=1, expand_ratio=1)] 114 | in_channels = depth 115 | else: 116 | in_channels = depth 117 | else: 118 | if layer == 'S': 119 | extra_layers += [ 120 | nn.Conv2d(in_channels, int(depth / 2), kernel_size=1), 121 | nn.Conv2d(int(depth / 2), depth, kernel_size=3, stride=2, padding=1)] 122 | in_channels = depth 123 | elif layer == '': 124 | extra_layers += [ 125 | nn.Conv2d(in_channels, int(depth / 2), kernel_size=1), 126 | nn.Conv2d(int(depth / 2), depth, kernel_size=3)] 127 | in_channels = depth 128 | else: 129 | in_channels = depth 130 | 131 | loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)] 132 | conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)] 133 | return base, extra_layers, (loc_layers, conf_layers) 134 | 135 | 136 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213 137 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did. 138 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1): 139 | return nn.Sequential( 140 | # pw 141 | nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False), 142 | nn.BatchNorm2d(oup * expand_ratio), 143 | nn.ReLU6(inplace=True), 144 | # dw 145 | nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False), 146 | nn.BatchNorm2d(oup * expand_ratio), 147 | nn.ReLU6(inplace=True), 148 | # pw-linear 149 | nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False), 150 | nn.BatchNorm2d(oup), 151 | ) 152 | 153 | 154 | def _conv(inp, oup, stride=1, padding=0): 155 | return nn.Sequential( 156 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False), 157 | nn.BatchNorm2d(oup), 158 | nn.ReLU(inplace=True), 159 | ) 160 | 161 | 162 | def build_ssd(base, feature_layer, mbox, num_classes): 163 | """Single Shot Multibox Architecture 164 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 165 | """ 166 | base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='ssd') 167 | return SSD(base_, extras_, head_, feature_layer, num_classes) 168 | 169 | 170 | def build_ssd_lite(base, feature_layer, mbox, num_classes): 171 | """Single Shot Multibox Architecture for embeded system 172 | See: https://arxiv.org/pdf/1512.02325.pdf & 173 | https://arxiv.org/pdf/1801.04381.pdf for more details. 174 | """ 175 | base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes, version='ssd_lite') 176 | return SSD(base_, extras_, head_, feature_layer, num_classes) 177 | -------------------------------------------------------------------------------- /lib/modeling/ssds/ssd_lite.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | import os 7 | 8 | from lib.layers import * 9 | 10 | class SSDLite(nn.Module): 11 | """Single Shot Multibox Architecture for embeded system 12 | See: https://arxiv.org/pdf/1512.02325.pdf & 13 | https://arxiv.org/pdf/1801.04381.pdf for more details. 14 | 15 | Args: 16 | phase: (string) Can be "eval" or "train" or "feature" 17 | base: base layers for input 18 | extras: extra layers that feed to multibox loc and conf layers 19 | head: "multibox head" consists of loc and conf conv layers 20 | feature_layer: the feature layers for head to loc and conf 21 | num_classes: num of classes 22 | """ 23 | 24 | def __init__(self, base, extras, head, feature_layer, num_classes): 25 | super(SSDLite, self).__init__() 26 | self.num_classes = num_classes 27 | # SSD network 28 | self.base = nn.ModuleList(base) 29 | self.norm = L2Norm(feature_layer[1][0], 20) 30 | self.extras = nn.ModuleList(extras) 31 | 32 | self.loc = nn.ModuleList(head[0]) 33 | self.conf = nn.ModuleList(head[1]) 34 | self.softmax = nn.Softmax(dim=-1) 35 | 36 | self.feature_layer = feature_layer[0] 37 | 38 | 39 | def forward(self, x, phase='eval'): 40 | """Applies network layers and ops on input image(s) x. 41 | 42 | Args: 43 | x: input image or batch of images. Shape: [batch,3,300,300]. 44 | 45 | Return: 46 | Depending on phase: 47 | test: 48 | Variable(tensor) of output class label predictions, 49 | confidence score, and corresponding location predictions for 50 | each object detected. Shape: [batch,topk,7] 51 | 52 | train: 53 | list of concat outputs from: 54 | 1: confidence layers, Shape: [batch*num_priors,num_classes] 55 | 2: localization layers, Shape: [batch,num_priors*4] 56 | 57 | feature: 58 | the features maps of the feature extractor 59 | """ 60 | sources = list() 61 | loc = list() 62 | conf = list() 63 | 64 | # apply bases layers and cache source layer outputs 65 | for k in range(len(self.base)): 66 | x = self.base[k](x) 67 | if k in self.feature_layer: 68 | if len(sources) == 0: 69 | s = self.norm(x) 70 | sources.append(s) 71 | else: 72 | sources.append(x) 73 | 74 | # apply extra layers and cache source layer outputs 75 | for k, v in enumerate(self.extras): 76 | x = F.relu(v(x), inplace=True) 77 | sources.append(x) 78 | # if k % 2 == 1: 79 | # sources.append(x) 80 | 81 | if phase == 'feature': 82 | return sources 83 | 84 | # apply multibox head to source layers 85 | for (x, l, c) in zip(sources, self.loc, self.conf): 86 | loc.append(l(x).permute(0, 2, 3, 1).contiguous()) 87 | conf.append(c(x).permute(0, 2, 3, 1).contiguous()) 88 | loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) 89 | conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) 90 | 91 | if phase == 'eval': 92 | output = ( 93 | loc.view(loc.size(0), -1, 4), # loc preds 94 | self.softmax(conf.view(-1, self.num_classes)), # conf preds 95 | ) 96 | else: 97 | output = ( 98 | loc.view(loc.size(0), -1, 4), 99 | conf.view(conf.size(0), -1, self.num_classes), 100 | ) 101 | return output 102 | 103 | def add_extras(base, feature_layer, mbox, num_classes): 104 | extra_layers = [] 105 | loc_layers = [] 106 | conf_layers = [] 107 | in_channels = None 108 | for layer, depth, box in zip(feature_layer[0], feature_layer[1], mbox): 109 | if layer == 'S': 110 | extra_layers += [ _conv_dw(in_channels, depth, stride=2, padding=1, expand_ratio=1) ] 111 | in_channels = depth 112 | elif layer == '': 113 | extra_layers += [ _conv_dw(in_channels, depth, stride=1, expand_ratio=1) ] 114 | in_channels = depth 115 | else: 116 | in_channels = depth 117 | loc_layers += [nn.Conv2d(in_channels, box * 4, kernel_size=3, padding=1)] 118 | conf_layers += [nn.Conv2d(in_channels, box * num_classes, kernel_size=3, padding=1)] 119 | return base, extra_layers, (loc_layers, conf_layers) 120 | 121 | # based on the implementation in https://github.com/tensorflow/models/blob/master/research/object_detection/models/feature_map_generators.py#L213 122 | # when the expand_ratio is 1, the implemetation is nearly same. Since the shape is always change, I do not add the shortcut as what mobilenetv2 did. 123 | def _conv_dw(inp, oup, stride=1, padding=0, expand_ratio=1): 124 | return nn.Sequential( 125 | # pw 126 | nn.Conv2d(inp, oup * expand_ratio, 1, 1, 0, bias=False), 127 | nn.BatchNorm2d(oup * expand_ratio), 128 | nn.ReLU6(inplace=True), 129 | # dw 130 | nn.Conv2d(oup * expand_ratio, oup * expand_ratio, 3, stride, padding, groups=oup * expand_ratio, bias=False), 131 | nn.BatchNorm2d(oup * expand_ratio), 132 | nn.ReLU6(inplace=True), 133 | # pw-linear 134 | nn.Conv2d(oup * expand_ratio, oup, 1, 1, 0, bias=False), 135 | nn.BatchNorm2d(oup), 136 | ) 137 | 138 | def build_ssd_lite(base, feature_layer, mbox, num_classes): 139 | base_, extras_, head_ = add_extras(base(), feature_layer, mbox, num_classes) 140 | return SSDLite(base_, extras_, head_, feature_layer, num_classes) -------------------------------------------------------------------------------- /lib/ssds.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | 4 | import torch 5 | from torch.autograd import Variable 6 | import torch.backends.cudnn as cudnn 7 | 8 | from lib.layers import * 9 | from lib.utils.timer import Timer 10 | from lib.utils.data_augment import preproc 11 | from lib.modeling.model_builder import create_model 12 | from lib.utils.config_parse import cfg 13 | 14 | 15 | class ObjectDetector: 16 | def __init__(self, viz_arch=False): 17 | self.cfg = cfg 18 | 19 | # Build model 20 | print('===> Building model') 21 | self.model, self.priorbox = create_model(cfg.MODEL) 22 | self.priors = Variable(self.priorbox.forward(), volatile=True) 23 | 24 | # Print the model architecture and parameters 25 | if viz_arch is True: 26 | print('Model architectures:\n{}\n'.format(self.model)) 27 | 28 | # Utilize GPUs for computation 29 | self.use_gpu = torch.cuda.is_available() 30 | self.device = torch.device('gpu') if torch.cuda.is_available() else torch.device('cpu') 31 | self.half = False 32 | if self.use_gpu: 33 | print('Utilize GPUs for computation') 34 | print('Number of GPU available', torch.cuda.device_count()) 35 | self.model.cuda() 36 | self.priors.cuda() 37 | cudnn.benchmark = True 38 | # self.model = torch.nn.DataParallel(self.model).module 39 | # Utilize half precision 40 | self.half = cfg.MODEL.HALF_PRECISION 41 | if self.half: 42 | self.model = self.model.half() 43 | self.priors = self.priors.half() 44 | 45 | # Build preprocessor and detector 46 | self.preprocessor = preproc(cfg.MODEL.IMAGE_SIZE, cfg.DATASET.PIXEL_MEANS, -2) 47 | self.detector = Detect(cfg.POST_PROCESS, self.priors) 48 | 49 | # Load weight: 50 | if cfg.RESUME_CHECKPOINT == '': 51 | AssertionError('RESUME_CHECKPOINT can not be empty') 52 | print('=> loading checkpoint {:s}'.format(cfg.RESUME_CHECKPOINT)) 53 | # checkpoint = torch.load(cfg.RESUME_CHECKPOINT) 54 | checkpoint = torch.load(cfg.RESUME_CHECKPOINT, map_location='gpu' if self.use_gpu else 'cpu') 55 | self.model.load_state_dict(checkpoint) 56 | # test only 57 | self.model.eval() 58 | 59 | def predict(self, img, threshold=0.6): 60 | assert img.shape[2] == 3 61 | scale = torch.Tensor([img.shape[1::-1], img.shape[1::-1]]) 62 | 63 | x = Variable(self.preprocessor(img)[0].unsqueeze(0)).to(self.device) 64 | 65 | # forward 66 | out = self.model(x) # forward pass 67 | 68 | print('before nms: ', out[0].size()) 69 | print(out[1].size()) 70 | detections = self.detector.forward(out) 71 | print('detections: ', detections) 72 | 73 | # output 74 | labels, scores, coords = [list() for _ in range(3)] 75 | # for batch in range(detections.size(0)): 76 | # print('Batch:', batch) 77 | batch = 0 78 | for classes in range(detections.size(1)): 79 | num = 0 80 | while detections[batch, classes, num, 0] >= threshold: 81 | scores.append(detections[batch, classes, num, 0]) 82 | labels.append(classes - 1) 83 | coords.append(detections[batch, classes, num, 1:] * scale) 84 | num += 1 85 | return labels, scores, coords 86 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/cpu_nms.o -------------------------------------------------------------------------------- /lib/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/gpu_nms.o -------------------------------------------------------------------------------- /lib/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/nms/nms_kernel.o -------------------------------------------------------------------------------- /lib/utils/build/temp.linux-x86_64-3.6/pycocotools/_mask.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/_mask.o -------------------------------------------------------------------------------- /lib/utils/build/temp.linux-x86_64-3.6/pycocotools/maskApi.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/build/temp.linux-x86_64-3.6/pycocotools/maskApi.o -------------------------------------------------------------------------------- /lib/utils/data_augment_test.py: -------------------------------------------------------------------------------- 1 | """Data augmentation functionality. Passed as callable transformations to 2 | Dataset classes. 3 | 4 | The data augmentation procedures were interpreted from @weiliu89's SSD paper 5 | http://arxiv.org/abs/1512.02325 6 | 7 | Ellis Brown, Max deGroot 8 | """ 9 | 10 | import cv2 11 | import numpy as np 12 | from data_augment import draw_bbox,_crop,_distort,_elastic,_expand,_mirror 13 | 14 | if __name__ == '__main__': 15 | image = cv2.imread('./experiments/2011_001100.jpg') 16 | boxes = np.array([np.array([124, 150, 322, 351])]) # ymin, xmin, ymax, xmax 17 | labels = np.array([[1]]) 18 | p = 1 19 | 20 | image_show = draw_bbox(image, boxes) 21 | cv2.imshow('input_image', image_show) 22 | 23 | image_t, boxes, labels = _crop(image, boxes, labels) 24 | image_show = draw_bbox(image_t, boxes) 25 | cv2.imshow('crop_image', image_show) 26 | 27 | image_t = _distort(image_t) 28 | image_show = draw_bbox(image_t, boxes) 29 | cv2.imshow('distort_image', image_show) 30 | 31 | image_t = _elastic(image_t, p) 32 | image_show = draw_bbox(image_t, boxes) 33 | cv2.imshow('elastic_image', image_show) 34 | 35 | image_t, boxes = _expand(image_t, boxes, (103.94, 116.78, 123.68), p) 36 | image_show = draw_bbox(image_t, boxes) 37 | cv2.imshow('expand_image', image_show) 38 | 39 | image_t, boxes = _mirror(image_t, boxes) 40 | image_show = draw_bbox(image_t, boxes) 41 | cv2.imshow('mirror_image', image_show) 42 | 43 | cv2.waitKey(0) -------------------------------------------------------------------------------- /lib/utils/fp16_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class tofp16(nn.Module): 6 | def __init__(self): 7 | super(tofp16, self).__init__() 8 | 9 | def forward(self, input): 10 | return input.half() 11 | 12 | 13 | def copy_in_params(net, params): 14 | net_params = list(net.parameters()) 15 | for i in range(len(params)): 16 | net_params[i].data.copy_(params[i].data) 17 | 18 | 19 | def set_grad(params, params_with_grad): 20 | 21 | for param, param_w_grad in zip(params, params_with_grad): 22 | if param.grad is None: 23 | param.grad = torch.nn.Parameter(param.data.new().resize_(*param.data.size())) 24 | param.grad.data.copy_(param_w_grad.grad.data) 25 | 26 | 27 | def BN_convert_float(module): 28 | ''' 29 | BatchNorm layers to have parameters in single precision. 30 | Find all layers and convert them back to float. This can't 31 | be done with built in .apply as that function will apply 32 | fn to all modules, parameters, and buffers. Thus we wouldn't 33 | be able to guard the float conversion based on the module type. 34 | ''' 35 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 36 | module.float() 37 | for child in module.children(): 38 | BN_convert_float(child) 39 | return module 40 | 41 | 42 | def network_to_half(network): 43 | return nn.Sequential(tofp16(), BN_convert_float(network.half())) 44 | -------------------------------------------------------------------------------- /lib/utils/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | -------------------------------------------------------------------------------- /lib/utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/__init__.py -------------------------------------------------------------------------------- /lib/utils/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/utils/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/utils/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /lib/utils/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /lib/utils/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /lib/utils/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/utils/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from lib.utils.nms.nms_gpu import nms_gpu 9 | 10 | def nms(dets, thresh, force_cpu=False): 11 | """Dispatch to either CPU or GPU NMS implementations.""" 12 | if dets.shape[0] == 0: 13 | return [] 14 | # ---numpy version--- 15 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 16 | # ---pytorch version--- 17 | return nms_gpu(dets, thresh) 18 | -------------------------------------------------------------------------------- /lib/utils/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | boxes_host->size[0], 15 | boxes_host->size[1], 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /lib/utils/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /lib/utils/nms/src/nms_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "nms_cuda_kernel.h" 13 | 14 | #define CUDA_WARN(XXX) \ 15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \ 16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \ 17 | << std::endl; cudaDeviceSynchronize(); } while (0) 18 | 19 | #define CUDA_CHECK(condition) \ 20 | /* Code block avoids redefinition of cudaError_t error */ \ 21 | do { \ 22 | cudaError_t error = condition; \ 23 | if (error != cudaSuccess) { \ 24 | std::cout << cudaGetErrorString(error) << std::endl; \ 25 | } \ 26 | } while (0) 27 | 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 30 | 31 | __device__ inline float devIoU(float const * const a, float const * const b) { 32 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 35 | float interS = width * height; 36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 38 | return interS / (Sa + Sb - interS); 39 | } 40 | 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh, 42 | float *dev_boxes, unsigned long long *dev_mask) { 43 | const int row_start = blockIdx.y; 44 | const int col_start = blockIdx.x; 45 | 46 | // if (row_start > col_start) return; 47 | 48 | const int row_size = 49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 50 | const int col_size = 51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 52 | 53 | __shared__ float block_boxes[threadsPerBlock * 5]; 54 | if (threadIdx.x < col_size) { 55 | block_boxes[threadIdx.x * 5 + 0] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 57 | block_boxes[threadIdx.x * 5 + 1] = 58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 59 | block_boxes[threadIdx.x * 5 + 2] = 60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 61 | block_boxes[threadIdx.x * 5 + 3] = 62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 63 | block_boxes[threadIdx.x * 5 + 4] = 64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 65 | } 66 | __syncthreads(); 67 | 68 | if (threadIdx.x < row_size) { 69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 70 | const float *cur_box = dev_boxes + cur_box_idx * 5; 71 | int i = 0; 72 | unsigned long long t = 0; 73 | int start = 0; 74 | if (row_start == col_start) { 75 | start = threadIdx.x + 1; 76 | } 77 | for (i = start; i < col_size; i++) { 78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 79 | t |= 1ULL << i; 80 | } 81 | } 82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 83 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 84 | } 85 | } 86 | 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 88 | int boxes_dim, float nms_overlap_thresh) { 89 | 90 | float* boxes_dev = NULL; 91 | unsigned long long* mask_dev = NULL; 92 | 93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 94 | 95 | CUDA_CHECK(cudaMalloc(&boxes_dev, 96 | boxes_num * boxes_dim * sizeof(float))); 97 | CUDA_CHECK(cudaMemcpy(boxes_dev, 98 | boxes_host, 99 | boxes_num * boxes_dim * sizeof(float), 100 | cudaMemcpyHostToDevice)); 101 | 102 | CUDA_CHECK(cudaMalloc(&mask_dev, 103 | boxes_num * col_blocks * sizeof(unsigned long long))); 104 | 105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 106 | DIVUP(boxes_num, threadsPerBlock)); 107 | dim3 threads(threadsPerBlock); 108 | 109 | // printf("i am at line %d\n", boxes_num); 110 | // printf("i am at line %d\n", boxes_dim); 111 | 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | // we need to create a memory for keep_out on cpu 127 | // otherwise, the following code cannot run 128 | 129 | int* keep_out_cpu = new int[boxes_num]; 130 | 131 | int num_to_keep = 0; 132 | for (int i = 0; i < boxes_num; i++) { 133 | int nblock = i / threadsPerBlock; 134 | int inblock = i % threadsPerBlock; 135 | 136 | if (!(remv[nblock] & (1ULL << inblock))) { 137 | // orignal: keep_out[num_to_keep++] = i; 138 | keep_out_cpu[num_to_keep++] = i; 139 | unsigned long long *p = &mask_host[0] + i * col_blocks; 140 | for (int j = nblock; j < col_blocks; j++) { 141 | remv[j] |= p[j]; 142 | } 143 | } 144 | } 145 | 146 | // copy keep_out_cpu to keep_out on gpu 147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice)); 148 | 149 | // *num_out = num_to_keep; 150 | 151 | // original: *num_out = num_to_keep; 152 | // copy num_to_keep to num_out on gpu 153 | 154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice)); 155 | 156 | // release cuda memory 157 | CUDA_CHECK(cudaFree(boxes_dev)); 158 | CUDA_CHECK(cudaFree(mask_dev)); 159 | // release cpu memory 160 | delete []keep_out_cpu; 161 | } 162 | -------------------------------------------------------------------------------- /lib/utils/nms/src/nms_cuda_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/nms/src/nms_cuda_kernel.cu.o -------------------------------------------------------------------------------- /lib/utils/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /lib/utils/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/utils/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/lib/utils/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/utils/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import lib.utils.pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | self.total_time += self.diff 28 | self.calls += 1 29 | self.average_time = self.total_time / self.calls 30 | if average: 31 | return self.average_time 32 | else: 33 | return self.diff 34 | 35 | def clear(self): 36 | self.total_time = 0. 37 | self.calls = 0 38 | self.start_time = 0. 39 | self.diff = 0. 40 | self.average_time = 0. 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | matplotlib 3 | tensorboardX 4 | torchvision -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lucasjinreal/ssds_pytorch/00c69f1aa909d0eee3af8560a5d18e190d3718fc/setup.py -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import argparse 6 | import numpy as np 7 | if '/data/software/opencv-3.4.0/lib/python2.7/dist-packages' in sys.path: 8 | sys.path.remove('/data/software/opencv-3.4.0/lib/python2.7/dist-packages') 9 | if '/data/software/opencv-3.3.1/lib/python2.7/dist-packages' in sys.path: 10 | sys.path.remove('/data/software/opencv-3.3.1/lib/python2.7/dist-packages') 11 | import cv2 12 | from datetime import datetime 13 | 14 | import torch 15 | import torch.nn as nn 16 | import torch.backends.cudnn as cudnn 17 | from torch.autograd import Variable 18 | 19 | from lib.utils.config_parse import cfg_from_file 20 | from lib.ssds_train import test_model 21 | 22 | def parse_args(): 23 | """ 24 | Parse input arguments 25 | """ 26 | parser = argparse.ArgumentParser(description='Train a ssds.pytorch network') 27 | parser.add_argument('--cfg', dest='config_file', 28 | help='optional config file', default=None, type=str) 29 | 30 | if len(sys.argv) == 1: 31 | parser.print_help() 32 | sys.exit(1) 33 | 34 | args = parser.parse_args() 35 | return args 36 | 37 | def test(): 38 | args = parse_args() 39 | if args.config_file is not None: 40 | cfg_from_file(args.config_file) 41 | test_model() 42 | 43 | if __name__ == '__main__': 44 | test() 45 | -------------------------------------------------------------------------------- /time_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # test gpu speed 4 | for file in ./experiments/cfgs/*.yml 5 | do 6 | echo $file 7 | python demo.py --cfg=$file --demo=./experiments/person.jpg -t=time 8 | done 9 | 10 | # test cpu speed 11 | # export CUDA_VISIBLE_DEVICES='' 12 | # for file in ./experiments/cfgs/*.yml 13 | # do 14 | # echo $file 15 | # python demo.py --cfg=$file --demo=./experiments/person.jpg -t=time 16 | # done -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import sys 4 | import os 5 | import argparse 6 | import numpy as np 7 | import cv2 8 | from datetime import datetime 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.backends.cudnn as cudnn 13 | 14 | from lib.utils.config_parse import cfg_from_file 15 | from lib.ssds_train import train_model 16 | 17 | 18 | def parse_args(): 19 | """ 20 | Parse input arguments 21 | """ 22 | parser = argparse.ArgumentParser(description='Train a ssds.pytorch network') 23 | parser.add_argument('--cfg', dest='config_file', 24 | help='optional config file', default=None, type=str) 25 | 26 | if len(sys.argv) == 1: 27 | parser.print_help() 28 | sys.exit(1) 29 | 30 | args = parser.parse_args() 31 | return args 32 | 33 | 34 | def train(): 35 | args = parse_args() 36 | if args.config_file is not None: 37 | cfg_from_file(args.config_file) 38 | # os.environ["CUDA_LAUNCH_BLOCKING"] = "1" 39 | train_model() 40 | 41 | 42 | if __name__ == '__main__': 43 | train() 44 | --------------------------------------------------------------------------------