├── .gitignore
├── LICENSE
├── README.md
├── README_CN.md
├── config
    ├── __init__.py
    ├── data_config
    │   ├── dataset_config.py
    │   └── transform_config.py
    └── model_config
    │   ├── official_yaml
    │       └── yolov7.yaml
    │   ├── yolov1_config.py
    │   ├── yolov2_config.py
    │   ├── yolov3_config.py
    │   ├── yolov4_config.py
    │   ├── yolov5_config.py
    │   ├── yolov7_config.py
    │   ├── yolov8_config.py
    │   └── yolox_config.py
├── dataset
    ├── __init__.py
    ├── build.py
    ├── coco.py
    ├── crowdhuman.py
    ├── customed.py
    ├── data_augment
    │   ├── ssd_augment.py
    │   ├── strong_augment.py
    │   └── yolov5_augment.py
    ├── demo
    │   ├── images
    │   │   ├── 000000000632.jpg
    │   │   ├── 000000000785.jpg
    │   │   ├── 000000000872.jpg
    │   │   ├── 000000000885.jpg
    │   │   ├── 000000001000.jpg
    │   │   ├── 000000001268.jpg
    │   │   ├── 000000001296.jpg
    │   │   ├── 000000001503.jpg
    │   │   └── 000000001532.jpg
    │   └── videos
    │   │   └── 000006.mp4
    ├── scripts
    │   ├── COCO2017.sh
    │   ├── VOC2007.sh
    │   ├── VOC2012.sh
    │   └── data_to_h5py.py
    ├── voc.py
    └── widerface.py
├── demo.py
├── deployment
    ├── ONNXRuntime
    │   ├── README.md
    │   └── onnx_inference.py
    └── test_image.jpg
├── engine.py
├── eval.py
├── evaluator
    ├── build.py
    ├── coco_evaluator.py
    ├── crowdhuman_evaluator.py
    ├── crowdhuman_tools
    │   ├── APMRToolkits
    │   │   ├── __init__.py
    │   │   ├── database.py
    │   │   └── image.py
    │   ├── JIToolkits
    │   │   ├── JI_tools.py
    │   │   └── matching.py
    │   ├── __init__.py
    │   ├── compute_APMR.py
    │   └── compute_JI.py
    ├── customed_evaluator.py
    ├── voc_evaluator.py
    └── widerface_evaluator.py
├── img_files
    ├── video_detection_demo.gif
    ├── video_tracking_demo.gif
    └── yolo_tutorial.png
├── models
    ├── detectors
    │   ├── __init__.py
    │   ├── yolov1
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov1.py
    │   │   ├── yolov1_backbone.py
    │   │   ├── yolov1_basic.py
    │   │   ├── yolov1_head.py
    │   │   └── yolov1_neck.py
    │   ├── yolov2
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov2.py
    │   │   ├── yolov2_backbone.py
    │   │   ├── yolov2_basic.py
    │   │   ├── yolov2_head.py
    │   │   └── yolov2_neck.py
    │   ├── yolov3
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov3.py
    │   │   ├── yolov3_backbone.py
    │   │   ├── yolov3_basic.py
    │   │   ├── yolov3_fpn.py
    │   │   ├── yolov3_head.py
    │   │   └── yolov3_neck.py
    │   ├── yolov4
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov4.py
    │   │   ├── yolov4_backbone.py
    │   │   ├── yolov4_basic.py
    │   │   ├── yolov4_head.py
    │   │   ├── yolov4_neck.py
    │   │   └── yolov4_pafpn.py
    │   ├── yolov5
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov5.py
    │   │   ├── yolov5_backbone.py
    │   │   ├── yolov5_basic.py
    │   │   ├── yolov5_head.py
    │   │   ├── yolov5_neck.py
    │   │   └── yolov5_pafpn.py
    │   ├── yolov7
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov7.py
    │   │   ├── yolov7_backbone.py
    │   │   ├── yolov7_basic.py
    │   │   ├── yolov7_head.py
    │   │   ├── yolov7_neck.py
    │   │   └── yolov7_pafpn.py
    │   ├── yolov8
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolov8.py
    │   │   ├── yolov8_backbone.py
    │   │   ├── yolov8_basic.py
    │   │   ├── yolov8_head.py
    │   │   ├── yolov8_neck.py
    │   │   ├── yolov8_pafpn.py
    │   │   └── yolov8_pred.py
    │   └── yolox
    │   │   ├── README.md
    │   │   ├── build.py
    │   │   ├── loss.py
    │   │   ├── matcher.py
    │   │   ├── yolox.py
    │   │   ├── yolox_backbone.py
    │   │   ├── yolox_basic.py
    │   │   ├── yolox_head.py
    │   │   ├── yolox_neck.py
    │   │   └── yolox_pafpn.py
    └── trackers
    │   ├── __init__.py
    │   └── byte_tracker
    │       ├── basetrack.py
    │       ├── build.py
    │       ├── byte_tracker.py
    │       ├── kalman_filter.py
    │       └── matching.py
├── requirements.txt
├── test.py
├── tools
    ├── __init__.py
    ├── clean_coco.py
    ├── convert_crowdhuman_to_coco.py
    ├── convert_ours_to_coco.py
    ├── convert_widerface_to_coco.py
    └── export_onnx.py
├── track.py
├── train.py
├── train.sh
└── utils
    ├── __init__.py
    ├── box_ops.py
    ├── distributed_utils.py
    ├── kmeans_anchor.py
    ├── misc.py
    ├── solver
        ├── __init__.py
        ├── lr_scheduler.py
        └── optimizer.py
    └── vis_tools.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pt
 2 | *.pth
 3 | *.pkl
 4 | *.onnx
 5 | *.pyc
 6 | *.zip
 7 | weights
 8 | __pycache__
 9 | det_results
10 | .vscode
11 | deployment/OpenVINO/cpp/build
12 | cluster.json
13 | train_nebula.py
14 | train_nebula.sh
15 | make_data_nebula.sh
16 | dataset/make_dataset_nebula.py


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Jianhua Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/config/__init__.py:
--------------------------------------------------------------------------------
  1 | # ------------------ Dataset Config ------------------
  2 | from .data_config.dataset_config import dataset_cfg
  3 | 
  4 | 
  5 | def build_dataset_config(args):
  6 |     if args.dataset in ['coco', 'coco-val', 'coco-test']:
  7 |         cfg = dataset_cfg['coco']
  8 |     else:
  9 |         cfg = dataset_cfg[args.dataset]
 10 | 
 11 |     print('==============================')
 12 |     print('Dataset Config: {} \n'.format(cfg))
 13 | 
 14 |     return cfg
 15 | 
 16 | 
 17 | # ------------------ Transform Config ------------------
 18 | from .data_config.transform_config import (
 19 |     # SSD-Style
 20 |     ssd_trans_config,
 21 |     # YOLOv5-Style
 22 |     yolo_p_trans_config,
 23 |     yolo_n_trans_config,
 24 |     yolo_s_trans_config,
 25 |     yolo_m_trans_config,
 26 |     yolo_l_trans_config,
 27 |     yolo_x_trans_config,
 28 |     # YOLOX-Style
 29 |     yolox_p_trans_config,
 30 |     yolox_n_trans_config,
 31 |     yolox_s_trans_config,
 32 |     yolox_m_trans_config,
 33 |     yolox_l_trans_config,
 34 |     yolox_x_trans_config,
 35 | )
 36 | 
 37 | def build_trans_config(trans_config='ssd'):
 38 |     print('==============================')
 39 |     print('Transform: {}-Style ...'.format(trans_config))
 40 |    
 41 |     # SSD-style transform 
 42 |     if trans_config == 'ssd':
 43 |         cfg = ssd_trans_config
 44 | 
 45 |     # YOLOv5-style transform 
 46 |     elif trans_config == 'yolo_p':
 47 |         cfg = yolo_p_trans_config
 48 |     elif trans_config == 'yolo_n':
 49 |         cfg = yolo_n_trans_config
 50 |     elif trans_config == 'yolo_s':
 51 |         cfg = yolo_s_trans_config
 52 |     elif trans_config == 'yolo_m':
 53 |         cfg = yolo_m_trans_config
 54 |     elif trans_config == 'yolo_l':
 55 |         cfg = yolo_l_trans_config
 56 |     elif trans_config == 'yolo_x':
 57 |         cfg = yolo_x_trans_config
 58 |         
 59 |     # YOLOX-style transform 
 60 |     elif trans_config == 'yolox_p':
 61 |         cfg = yolox_p_trans_config
 62 |     elif trans_config == 'yolox_n':
 63 |         cfg = yolox_n_trans_config
 64 |     elif trans_config == 'yolox_s':
 65 |         cfg = yolox_s_trans_config
 66 |     elif trans_config == 'yolox_m':
 67 |         cfg = yolox_m_trans_config
 68 |     elif trans_config == 'yolox_l':
 69 |         cfg = yolox_l_trans_config
 70 |     elif trans_config == 'yolox_x':
 71 |         cfg = yolox_x_trans_config
 72 | 
 73 |     else:
 74 |         raise NotImplementedError("Unknown transform config: {}".format(trans_config))
 75 |     print('Transform Config: {} \n'.format(cfg))
 76 | 
 77 |     return cfg
 78 | 
 79 | 
 80 | # ------------------ Model Config ------------------
 81 | ## YOLO series
 82 | from .model_config.yolov1_config import yolov1_cfg
 83 | from .model_config.yolov2_config import yolov2_cfg
 84 | from .model_config.yolov3_config import yolov3_cfg
 85 | from .model_config.yolov4_config import yolov4_cfg
 86 | from .model_config.yolov5_config import yolov5_cfg
 87 | from .model_config.yolov7_config import yolov7_cfg
 88 | from .model_config.yolov8_config import yolov8_cfg
 89 | from .model_config.yolox_config  import yolox_cfg
 90 | 
 91 | def build_model_config(args):
 92 |     print('==============================')
 93 |     print('Model: {} ...'.format(args.model.upper()))
 94 |     # YOLOv1
 95 |     if args.model == 'yolov1':
 96 |         cfg = yolov1_cfg
 97 |     # YOLOv2
 98 |     elif args.model == 'yolov2':
 99 |         cfg = yolov2_cfg
100 |     # YOLOv3
101 |     elif args.model in ['yolov3', 'yolov3_tiny']:
102 |         cfg = yolov3_cfg[args.model]
103 |     # YOLOv4
104 |     elif args.model in ['yolov4', 'yolov4_tiny']:
105 |         cfg = yolov4_cfg[args.model]
106 |     # YOLOv5
107 |     elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
108 |         cfg = yolov5_cfg[args.model]
109 |     # YOLOv7
110 |     elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']:
111 |         cfg = yolov7_cfg[args.model]
112 |     # YOLOv8
113 |     elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']:
114 |         cfg = yolov8_cfg[args.model]
115 |     # YOLOX
116 |     elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
117 |         cfg = yolox_cfg[args.model]
118 | 
119 |     return cfg
120 | 
121 | 


--------------------------------------------------------------------------------
/config/data_config/dataset_config.py:
--------------------------------------------------------------------------------
 1 | # Dataset config
 2 | 
 3 | dataset_cfg = {
 4 |     'voc': {
 5 |         'data_name': 'VOCdevkit',
 6 |         'num_classes': 20,
 7 |         'class_indexs': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
 8 |                          10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
 9 |         'class_names': ('aeroplane', 'bicycle', 'bird', 'boat',
10 |                          'bottle', 'bus', 'car', 'cat', 'chair',
11 |                          'cow', 'diningtable', 'dog', 'horse',
12 |                          'motorbike', 'person', 'pottedplant',
13 |                          'sheep', 'sofa', 'train', 'tvmonitor'),
14 |     },
15 | 
16 |     'coco':{
17 |         'data_name': 'COCO',
18 |         'num_classes': 80,
19 |         'class_indexs': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
20 |                          21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
21 |                          41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
22 |                          59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79,
23 |                          80, 81, 82, 84, 85, 86, 87, 88, 89, 90],
24 |         'class_names': ('background',
25 |                         'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
26 |                         'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign',
27 |                         'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
28 |                         'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella',
29 |                         'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
30 |                         'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
31 |                         'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass',
32 |                         'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
33 |                         'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
34 |                         'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk',
35 |                         'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
36 |                         'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book',
37 |                         'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'),
38 |     },
39 | 
40 |     'widerface':{
41 |         'data_name': 'WiderFace',
42 |         'num_classes': 1,
43 |         'class_indexs': [0],
44 |         'class_names': ('face',),
45 |     },
46 | 
47 |     'crowdhuman':{
48 |         'data_name': 'CrowdHuman',
49 |         'num_classes': 1,
50 |         'class_indexs': [0],
51 |         'class_names': ('person',),
52 |     },
53 | 
54 |     'customed':{
55 |         'data_name': 'AnimalDataset',
56 |         'num_classes': 9,
57 |         'class_indexs': [0, 1, 2, 3, 4, 5, 6, 7, 8],
58 |         'class_names': ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', ),
59 |     },
60 | 
61 | }


--------------------------------------------------------------------------------
/config/model_config/official_yaml/yolov7.yaml:
--------------------------------------------------------------------------------
  1 | # parameters
  2 | nc: 80  # number of classes
  3 | depth_multiple: 1.0  # model depth multiple
  4 | width_multiple: 1.0  # layer channel multiple
  5 | 
  6 | # anchors
  7 | anchors:
  8 |   - [12,16, 19,36, 40,28]  # P3/8
  9 |   - [36,75, 76,55, 72,146]  # P4/16
 10 |   - [142,110, 192,243, 459,401]  # P5/32
 11 | 
 12 | # yolov7 backbone
 13 | backbone:
 14 |   # [from, number, module, args]
 15 |   [[-1, 1, Conv, [32, 3, 1]],  # 0
 16 |   
 17 |    [-1, 1, Conv, [64, 3, 2]],  # 1-P1/2      
 18 |    [-1, 1, Conv, [64, 3, 1]],
 19 |    
 20 |    [-1, 1, Conv, [128, 3, 2]],  # 3-P2/4  
 21 |    [-1, 1, Conv, [64, 1, 1]],
 22 |    [-2, 1, Conv, [64, 1, 1]],
 23 |    [-1, 1, Conv, [64, 3, 1]],
 24 |    [-1, 1, Conv, [64, 3, 1]],
 25 |    [-1, 1, Conv, [64, 3, 1]],
 26 |    [-1, 1, Conv, [64, 3, 1]],
 27 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 28 |    [-1, 1, Conv, [256, 1, 1]],  # 11
 29 |          
 30 |    [-1, 1, MP, []],
 31 |    [-1, 1, Conv, [128, 1, 1]],
 32 |    [-3, 1, Conv, [128, 1, 1]],
 33 |    [-1, 1, Conv, [128, 3, 2]],
 34 |    [[-1, -3], 1, Concat, [1]],  # 16-P3/8  
 35 |    [-1, 1, Conv, [128, 1, 1]],
 36 |    [-2, 1, Conv, [128, 1, 1]],
 37 |    [-1, 1, Conv, [128, 3, 1]],
 38 |    [-1, 1, Conv, [128, 3, 1]],
 39 |    [-1, 1, Conv, [128, 3, 1]],
 40 |    [-1, 1, Conv, [128, 3, 1]],
 41 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 42 |    [-1, 1, Conv, [512, 1, 1]],  # 24
 43 |          
 44 |    [-1, 1, MP, []],
 45 |    [-1, 1, Conv, [256, 1, 1]],
 46 |    [-3, 1, Conv, [256, 1, 1]],
 47 |    [-1, 1, Conv, [256, 3, 2]],
 48 |    [[-1, -3], 1, Concat, [1]],  # 29-P4/16  
 49 |    [-1, 1, Conv, [256, 1, 1]],
 50 |    [-2, 1, Conv, [256, 1, 1]],
 51 |    [-1, 1, Conv, [256, 3, 1]],
 52 |    [-1, 1, Conv, [256, 3, 1]],
 53 |    [-1, 1, Conv, [256, 3, 1]],
 54 |    [-1, 1, Conv, [256, 3, 1]],
 55 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 56 |    [-1, 1, Conv, [1024, 1, 1]],  # 37
 57 |          
 58 |    [-1, 1, MP, []],
 59 |    [-1, 1, Conv, [512, 1, 1]],
 60 |    [-3, 1, Conv, [512, 1, 1]],
 61 |    [-1, 1, Conv, [512, 3, 2]],
 62 |    [[-1, -3], 1, Concat, [1]],  # 42-P5/32  
 63 |    [-1, 1, Conv, [256, 1, 1]],
 64 |    [-2, 1, Conv, [256, 1, 1]],
 65 |    [-1, 1, Conv, [256, 3, 1]],
 66 |    [-1, 1, Conv, [256, 3, 1]],
 67 |    [-1, 1, Conv, [256, 3, 1]],
 68 |    [-1, 1, Conv, [256, 3, 1]],
 69 |    [[-1, -3, -5, -6], 1, Concat, [1]],
 70 |    [-1, 1, Conv, [1024, 1, 1]],  # 50
 71 |   ]
 72 | 
 73 | # yolov7 head
 74 | head:
 75 |   [[-1, 1, SPPCSPC, [512]], # 51
 76 |   
 77 |    [-1, 1, Conv, [256, 1, 1]],
 78 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 79 |    [37, 1, Conv, [256, 1, 1]], # route backbone P4
 80 |    [[-1, -2], 1, Concat, [1]],
 81 |    
 82 |    [-1, 1, Conv, [256, 1, 1]],
 83 |    [-2, 1, Conv, [256, 1, 1]],
 84 |    [-1, 1, Conv, [128, 3, 1]],
 85 |    [-1, 1, Conv, [128, 3, 1]],
 86 |    [-1, 1, Conv, [128, 3, 1]],
 87 |    [-1, 1, Conv, [128, 3, 1]],
 88 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
 89 |    [-1, 1, Conv, [256, 1, 1]], # 63
 90 |    
 91 |    [-1, 1, Conv, [128, 1, 1]],
 92 |    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
 93 |    [24, 1, Conv, [128, 1, 1]], # route backbone P3
 94 |    [[-1, -2], 1, Concat, [1]],
 95 |    
 96 |    [-1, 1, Conv, [128, 1, 1]],
 97 |    [-2, 1, Conv, [128, 1, 1]],
 98 |    [-1, 1, Conv, [64, 3, 1]],
 99 |    [-1, 1, Conv, [64, 3, 1]],
100 |    [-1, 1, Conv, [64, 3, 1]],
101 |    [-1, 1, Conv, [64, 3, 1]],
102 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
103 |    [-1, 1, Conv, [128, 1, 1]], # 75
104 |       
105 |    [-1, 1, MP, []],
106 |    [-1, 1, Conv, [128, 1, 1]],
107 |    [-3, 1, Conv, [128, 1, 1]],
108 |    [-1, 1, Conv, [128, 3, 2]],
109 |    [[-1, -3, 63], 1, Concat, [1]],
110 |    
111 |    [-1, 1, Conv, [256, 1, 1]],
112 |    [-2, 1, Conv, [256, 1, 1]],
113 |    [-1, 1, Conv, [128, 3, 1]],
114 |    [-1, 1, Conv, [128, 3, 1]],
115 |    [-1, 1, Conv, [128, 3, 1]],
116 |    [-1, 1, Conv, [128, 3, 1]],
117 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
118 |    [-1, 1, Conv, [256, 1, 1]], # 88
119 |       
120 |    [-1, 1, MP, []],
121 |    [-1, 1, Conv, [256, 1, 1]],
122 |    [-3, 1, Conv, [256, 1, 1]],
123 |    [-1, 1, Conv, [256, 3, 2]],
124 |    [[-1, -3, 51], 1, Concat, [1]],
125 |    
126 |    [-1, 1, Conv, [512, 1, 1]],
127 |    [-2, 1, Conv, [512, 1, 1]],
128 |    [-1, 1, Conv, [256, 3, 1]],
129 |    [-1, 1, Conv, [256, 3, 1]],
130 |    [-1, 1, Conv, [256, 3, 1]],
131 |    [-1, 1, Conv, [256, 3, 1]],
132 |    [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]],
133 |    [-1, 1, Conv, [512, 1, 1]], # 101
134 |    
135 |    [75, 1, RepConv, [256, 3, 1]],
136 |    [88, 1, RepConv, [512, 3, 1]],
137 |    [101, 1, RepConv, [1024, 3, 1]],
138 | 
139 |    [[102,103,104], 1, IDetect, [nc, anchors]],   # Detect(P3, P4, P5)
140 |   ]
141 | 


--------------------------------------------------------------------------------
/config/model_config/yolov1_config.py:
--------------------------------------------------------------------------------
 1 | # YOLOv1 Config
 2 | 
 3 | yolov1_cfg = {
 4 |     # ---------------- Model config ----------------
 5 |     ## Backbone
 6 |     'backbone': 'resnet18',
 7 |     'pretrained': True,
 8 |     'stride': 32,  # P5
 9 |     'max_stride': 32,
10 |     ## Neck
11 |     'neck': 'sppf',
12 |     'neck_act': 'lrelu',
13 |     'neck_norm': 'BN',
14 |     'neck_depthwise': False,
15 |     'expand_ratio': 0.5,
16 |     'pooling_size': 5,
17 |     ## Head
18 |     'head': 'decoupled_head',
19 |     'head_act': 'lrelu',
20 |     'head_norm': 'BN',
21 |     'num_cls_head': 2,
22 |     'num_reg_head': 2,
23 |     'head_depthwise': False,
24 |     # ---------------- Data process config ----------------
25 |     ## Input
26 |     'multi_scale': [0.5, 1.5], # 320 -> 960
27 |     'trans_type': 'ssd',
28 |     # ---------------- Loss config ----------------
29 |     'loss_obj_weight': 1.0,
30 |     'loss_cls_weight': 1.0,
31 |     'loss_box_weight': 5.0,
32 |     # ---------------- Trainer config ----------------
33 |     'trainer_type': 'yolo',
34 | }


--------------------------------------------------------------------------------
/config/model_config/yolov2_config.py:
--------------------------------------------------------------------------------
 1 | # YOLOv2 Config
 2 | 
 3 | yolov2_cfg = {
 4 |     # ---------------- Model config ----------------
 5 |     ## Backbone
 6 |     'backbone': 'darknet19',
 7 |     'pretrained': True,
 8 |     'stride': 32,  # P5
 9 |     'max_stride': 32,
10 |     ## Neck
11 |     'neck': 'sppf',
12 |     'neck_act': 'lrelu',
13 |     'neck_norm': 'BN',
14 |     'neck_depthwise': False,
15 |     'expand_ratio': 0.5,
16 |     'pooling_size': 5,
17 |     ## Head
18 |     'head': 'decoupled_head',
19 |     'head_act': 'lrelu',
20 |     'head_norm': 'BN',
21 |     'num_cls_head': 2,
22 |     'num_reg_head': 2,
23 |     'head_depthwise': False,
24 |     'anchor_size': [[17,  25],
25 |                     [55,  75],
26 |                     [92,  206],
27 |                     [202, 21],
28 |                     [289, 311]],  # 416 scale
29 |     # ---------------- Data process config ----------------
30 |     ## Input
31 |     'multi_scale': [0.5, 1.5], # 320 -> 960
32 |     'trans_type': 'ssd',
33 |     # ---------------- Matcher config ----------------
34 |     'iou_thresh': 0.5,
35 |     # ---------------- Loss config ----------------
36 |     'loss_obj_weight': 1.0,
37 |     'loss_cls_weight': 1.0,
38 |     'loss_box_weight': 5.0,
39 |     # ---------------- Trainer config ----------------
40 |     'trainer_type': 'yolo',
41 | }


--------------------------------------------------------------------------------
/config/model_config/yolov3_config.py:
--------------------------------------------------------------------------------
 1 | # YOLOv3 Config
 2 | 
 3 | yolov3_cfg = {
 4 |     'yolov3':{
 5 |         # ---------------- Model config ----------------
 6 |         ## Backbone
 7 |         'backbone': 'darknet53',
 8 |         'pretrained': True,
 9 |         'stride': [8, 16, 32],  # P3, P4, P5
10 |         'width': 1.0,
11 |         'depth': 1.0,
12 |         'max_stride': 32,
13 |         ## Neck
14 |         'neck': 'sppf',
15 |         'neck_act': 'silu',
16 |         'neck_norm': 'BN',
17 |         'neck_depthwise': False,
18 |         'expand_ratio': 0.5,
19 |         'pooling_size': 5,
20 |         ## FPN
21 |         'fpn': 'yolov3_fpn',
22 |         'fpn_act': 'silu',
23 |         'fpn_norm': 'BN',
24 |         'fpn_depthwise': False,
25 |         ## Head
26 |         'head': 'decoupled_head',
27 |         'head_act': 'silu',
28 |         'head_norm': 'BN',
29 |         'num_cls_head': 2,
30 |         'num_reg_head': 2,
31 |         'head_depthwise': False,
32 |         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
33 |                         [30, 61],   [62, 45],   [59, 119],    # P4
34 |                         [116, 90],  [156, 198], [373, 326]],  # P5
35 |         # ---------------- Data process config ----------------
36 |         'trans_type': 'yolo_l',
37 |         'multi_scale': [0.5, 1.25],  # 320 -> 800
38 |         # ---------------- Assignment config ----------------
39 |         ## matcher
40 |         'iou_thresh': 0.5,
41 |         # ---------------- Loss config ----------------
42 |         ## loss weight
43 |         'loss_obj_weight': 1.0,
44 |         'loss_cls_weight': 1.0,
45 |         'loss_box_weight': 5.0,
46 |         # ---------------- Train config ----------------
47 |         'trainer_type': 'yolo',
48 |     },
49 | 
50 |     'yolov3_tiny':{
51 |         # ---------------- Model config ----------------
52 |         ## Backbone
53 |         'backbone': 'darknet_tiny',
54 |         'pretrained': True,
55 |         'stride': [8, 16, 32],  # P3, P4, P5
56 |         'width': 0.25,
57 |         'depth': 0.34,
58 |         'max_stride': 32,
59 |         ## Neck
60 |         'neck': 'sppf',
61 |         'neck_act': 'silu',
62 |         'neck_norm': 'BN',
63 |         'neck_depthwise': False,
64 |         'expand_ratio': 0.5,
65 |         'pooling_size': 5,
66 |         ## FPN
67 |         'fpn': 'yolov3_fpn',
68 |         'fpn_act': 'silu',
69 |         'fpn_norm': 'BN',
70 |         'fpn_depthwise': False,
71 |         ## Head
72 |         'head': 'decoupled_head',
73 |         'head_act': 'silu',
74 |         'head_norm': 'BN',
75 |         'num_cls_head': 2,
76 |         'num_reg_head': 2,
77 |         'head_depthwise': False,
78 |         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
79 |                         [30, 61],   [62, 45],   [59, 119],    # P4
80 |                         [116, 90],  [156, 198], [373, 326]],  # P5
81 |         # ---------------- Data process config ----------------
82 |         ## input
83 |         'trans_type': 'yolo_n',
84 |         'multi_scale': [0.5, 1.25],  # 320 -> 800
85 |         # ---------------- Assignment config ----------------
86 |         ## matcher
87 |         'iou_thresh': 0.5,
88 |         # ---------------- Loss config ----------------
89 |         ## loss weight
90 |         'loss_obj_weight': 1.0,
91 |         'loss_cls_weight': 1.0,
92 |         'loss_box_weight': 5.0,
93 |         # ---------------- Train config ----------------
94 |         'trainer_type': 'yolo',
95 |     },
96 | 
97 | }


--------------------------------------------------------------------------------
/config/model_config/yolov4_config.py:
--------------------------------------------------------------------------------
 1 | # YOLOv4 Config
 2 | 
 3 | yolov4_cfg = {
 4 |     'yolov4':{
 5 |         # ---------------- Model config ----------------
 6 |         ## Backbone
 7 |         'backbone': 'cspdarknet53',
 8 |         'pretrained': True,
 9 |         'stride': [8, 16, 32],  # P3, P4, P5
10 |         'width': 1.0,
11 |         'depth': 1.0,
12 |         'max_stride': 32,
13 |         ## Neck
14 |         'neck': 'csp_sppf',
15 |         'expand_ratio': 0.5,
16 |         'pooling_size': 5,
17 |         'neck_act': 'silu',
18 |         'neck_norm': 'BN',
19 |         'neck_depthwise': False,
20 |         ## FPN
21 |         'fpn': 'yolov4_pafpn',
22 |         'fpn_act': 'silu',
23 |         'fpn_norm': 'BN',
24 |         'fpn_depthwise': False,
25 |         ## Head
26 |         'head': 'decoupled_head',
27 |         'head_act': 'silu',
28 |         'head_norm': 'BN',
29 |         'num_cls_head': 2,
30 |         'num_reg_head': 2,
31 |         'head_depthwise': False,
32 |         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
33 |                         [30, 61],   [62, 45],   [59, 119],    # P4
34 |                         [116, 90],  [156, 198], [373, 326]],  # P5
35 |         # ---------------- Data process config ----------------
36 |         'trans_type': 'yolo_l',
37 |         'multi_scale': [0.5, 1.25],  # 320 -> 800
38 |         # ---------------- Assignment config ----------------
39 |         ## matcher
40 |         'iou_thresh': 0.5,
41 |         # ---------------- Loss config ----------------
42 |         ## loss weight
43 |         'loss_obj_weight': 1.0,
44 |         'loss_cls_weight': 1.0,
45 |         'loss_box_weight': 5.0,
46 |         # ---------------- Train config ----------------
47 |         'trainer_type': 'yolo',
48 |     },
49 | 
50 |     'yolov4_tiny':{
51 |         # ---------------- Model config ----------------
52 |         ## Backbone
53 |         'backbone': 'cspdarknet_tiny',
54 |         'pretrained': True,
55 |         'stride': [8, 16, 32],  # P3, P4, P5
56 |         'width': 0.25,
57 |         'depth': 0.34,
58 |         'max_stride': 32,
59 |         ## Neck
60 |         'neck': 'csp_sppf',
61 |         'neck_act': 'silu',
62 |         'neck_norm': 'BN',
63 |         'neck_depthwise': False,
64 |         'expand_ratio': 0.5,
65 |         'pooling_size': 5,
66 |         ## FPN
67 |         'fpn': 'yolov4_pafpn',
68 |         'fpn_act': 'silu',
69 |         'fpn_norm': 'BN',
70 |         'fpn_depthwise': False,
71 |         ## Head
72 |         'head': 'decoupled_head',
73 |         'head_act': 'silu',
74 |         'head_norm': 'BN',
75 |         'num_cls_head': 2,
76 |         'num_reg_head': 2,
77 |         'head_depthwise': False,
78 |         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
79 |                         [30, 61],   [62, 45],   [59, 119],    # P4
80 |                         [116, 90],  [156, 198], [373, 326]],  # P5
81 |         # ---------------- Data process config ----------------
82 |         'trans_type': 'yolo_n',
83 |         'multi_scale': [0.5, 1.25],  # 320 -> 800
84 |         # ---------------- Assignment config ----------------
85 |         ## matcher
86 |         'iou_thresh': 0.5,
87 |         # ---------------- Loss config ----------------
88 |         ## loss weight
89 |         'loss_obj_weight': 1.0,
90 |         'loss_cls_weight': 1.0,
91 |         'loss_box_weight': 5.0,
92 |         # ---------------- Train config ----------------
93 |         'trainer_type': 'yolo',
94 |     },
95 | 
96 | }


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/__init__.py


--------------------------------------------------------------------------------
/dataset/demo/images/000000000632.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000632.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000000785.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000785.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000000872.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000872.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000000885.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000885.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000001000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001000.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000001268.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001268.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000001296.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001296.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000001503.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001503.jpg


--------------------------------------------------------------------------------
/dataset/demo/images/000000001532.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001532.jpg


--------------------------------------------------------------------------------
/dataset/demo/videos/000006.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/videos/000006.mp4


--------------------------------------------------------------------------------
/dataset/scripts/COCO2017.sh:
--------------------------------------------------------------------------------
 1 | mkdir COCO
 2 | cd COCO
 3 | 
 4 | wget http://images.cocodataset.org/zips/train2017.zip
 5 | wget http://images.cocodataset.org/zips/val2017.zip
 6 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
 7 | wget http://images.cocodataset.org/zips/test2017.zip
 8 | wget http://images.cocodataset.org/annotations/image_info_test2017.zip 
 9 | 
10 | unzip train2017.zip
11 | unzip val2017.zip
12 | unzip annotations_trainval2017.zip
13 | unzip test2017.zip
14 | unzip image_info_test2017.zip
15 | 
16 | # rm -f train2017.zip
17 | # rm -f val2017.zip
18 | # rm -f annotations_trainval2017.zip
19 | # rm -f test2017.zip
20 | # rm -f image_info_test2017.zip
21 | 


--------------------------------------------------------------------------------
/dataset/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/dataset/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p ~/data
12 |     cd ~/data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/dataset/scripts/data_to_h5py.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import h5py
 3 | import os
 4 | import argparse
 5 | import numpy as np
 6 | import sys
 7 | 
 8 | sys.path.append('..')
 9 | from voc import VOCDetection
10 | from coco import COCODataset
11 | 
12 | # ---------------------- Opt ----------------------
13 | parser = argparse.ArgumentParser(description='Cache-Dataset')
14 | parser.add_argument('-d', '--dataset', default='voc',
15 |                     help='coco, voc, widerface, crowdhuman')
16 | parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
17 |                     help='data root')
18 | parser.add_argument('-size', '--img_size', default=640, type=int,
19 |                     help='input image size.')
20 | parser.add_argument('--mosaic', default=None, type=float,
21 |                     help='mosaic augmentation.')
22 | parser.add_argument('--mixup', default=None, type=float,
23 |                     help='mixup augmentation.')
24 | parser.add_argument('--keep_ratio', action="store_true", default=False,
25 |                     help='keep aspect ratio.')
26 | parser.add_argument('--show', action="store_true", default=False,
27 |                     help='keep aspect ratio.')
28 | 
29 | args = parser.parse_args()
30 | 
31 | 
32 | # ---------------------- Build Dataset ----------------------
33 | if args.dataset == 'voc':
34 |     root = os.path.join(args.root, 'VOCdevkit')
35 |     dataset = VOCDetection(args.img_size, root)
36 | elif args.dataset == 'coco':
37 |     root = os.path.join(args.root, 'COCO')
38 |     dataset = COCODataset(args.img_size, args.root)
39 | print('Data length: ', len(dataset))
40 | 
41 | 
42 | # ---------------------- Main Process ----------------------
43 | cached_image = []
44 | dataset_size = len(dataset)
45 | for i in range(len(dataset)):
46 |     if i % 5000 == 0:
47 |         print("[{} / {}]".format(i, dataset_size))
48 |     # load an image
49 |     image, image_id = dataset.pull_image(i)
50 |     orig_h, orig_w, _ = image.shape
51 | 
52 |     # resize image
53 |     if args.keep_ratio:
54 |         r = args.img_size / max(orig_h, orig_w)
55 |         if r != 1: 
56 |             interp = cv2.INTER_LINEAR
57 |             new_size = (int(orig_w * r), int(orig_h * r))
58 |             image = cv2.resize(image, new_size, interpolation=interp)
59 |     else:
60 |         image = cv2.resize(image, (int(args.img_size), int(args.img_size)))
61 | 
62 |     cached_image.append(image)
63 |     if args.show:
64 |         cv2.imshow('image', image)
65 |         # cv2.imwrite(str(i)+'.jpg', img)
66 |         cv2.waitKey(0)
67 | 
68 | save_path = "dataset/cache/"
69 | os.makedirs(save_path, exist_ok=True)
70 | np.save(save_path + '{}_train_images.npy'.format(args.dataset), cached_image)
71 | 


--------------------------------------------------------------------------------
/deployment/ONNXRuntime/README.md:
--------------------------------------------------------------------------------
 1 | ## YOLO ONNXRuntime
 2 | 
 3 | 
 4 | ### Convert Your Model to ONNX
 5 | 
 6 | First, you should move to <RT-ODLab> by:
 7 | ```shell
 8 | cd <RT-ODLab>
 9 | cd tools/
10 | ```
11 | Then, you can:
12 | 
13 | 1. Convert a standard YOLO model by:
14 | ```shell
15 | python3 export_onnx.py -m yolov1 --weight ../weight/coco/yolov1/yolov1_coco.pth -nc 80 --img_size 640
16 | ```
17 | 
18 | Notes:
19 | * -n: specify a model name. The model name must be one of the [yolox-s,m,l,x and yolox-nano, yolox-tiny, yolov3]
20 | * -c: the model you have trained
21 | * -o: opset version, default 11. **However, if you will further convert your onnx model to [OpenVINO](https://github.com/Megvii-BaseDetection/YOLOX/demo/OpenVINO/), please specify the opset version to 10.**
22 | * --no-onnxsim: disable onnxsim
23 | * To customize an input shape for onnx model,  modify the following code in tools/export_onnx.py:
24 | 
25 |     ```python
26 |     dummy_input = torch.randn(args.batch_size, 3, args.img_size, args.img_size)
27 |     ```
28 | 
29 | ### ONNXRuntime Demo
30 | 
31 | Step1.
32 | ```shell
33 | cd <YOLOX_HOME>/deployment/ONNXRuntime
34 | ```
35 | 
36 | Step2. 
37 | ```shell
38 | python3 onnx_inference.py --model ../../weights/onnx/11/yolov1.onnx -i ../test_image.jpg -s 0.3 --img_size 640
39 | ```
40 | Notes:
41 | * --model: your converted onnx model
42 | * -i: input_image
43 | * -s: score threshold for visualization.
44 | * --img_size: should be consistent with the shape you used for onnx convertion.
45 | 


--------------------------------------------------------------------------------
/deployment/ONNXRuntime/onnx_inference.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | import cv2
 9 | import time
10 | import numpy as np
11 | import sys
12 | sys.path.append('../../')
13 | 
14 | import onnxruntime
15 | from utils.misc import PreProcessor, PostProcessor
16 | from utils.vis_tools import visualize
17 | 
18 | 
19 | def make_parser():
20 |     parser = argparse.ArgumentParser("onnxruntime inference sample")
21 |     parser.add_argument("-m", "--model", type=str, default="../../weights/onnx/11/yolov1.onnx",
22 |                         help="Input your onnx model.")
23 |     parser.add_argument("-i", "--image_path", type=str, default='../test_image.jpg',
24 |                         help="Path to your input image.")
25 |     parser.add_argument("-o", "--output_dir", type=str, default='../../det_results/onnx/',
26 |                         help="Path to your output directory.")
27 |     parser.add_argument("-s", "--score_thr", type=float, default=0.35,
28 |                         help="Score threshould to filter the result.")
29 |     parser.add_argument("-size", "--img_size", type=int, default=640,
30 |                         help="Specify an input shape for inference.")
31 |     return parser
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     args = make_parser().parse_args()
36 | 
37 |     # class color for better visualization
38 |     np.random.seed(0)
39 |     class_colors = [(np.random.randint(255),
40 |                      np.random.randint(255),
41 |                      np.random.randint(255)) for _ in range(80)]
42 | 
43 |     # preprocessor
44 |     prepocess = PreProcessor(img_size=args.img_size)
45 | 
46 |     # postprocessor
47 |     postprocess = PostProcessor(num_classes=80, conf_thresh=args.score_thr, nms_thresh=0.5)
48 | 
49 |     # read an image
50 |     input_shape = tuple([args.img_size, args.img_size])
51 |     origin_img = cv2.imread(args.image_path)
52 | 
53 |     # preprocess
54 |     x, ratio = prepocess(origin_img)
55 | 
56 |     t0 = time.time()
57 |     # inference
58 |     session = onnxruntime.InferenceSession(args.model)
59 | 
60 |     ort_inputs = {session.get_inputs()[0].name: x[None, :, :, :]}
61 |     output = session.run(None, ort_inputs)
62 |     print("inference time: {:.1f} ms".format((time.time() - t0)*1000))
63 | 
64 |     t0 = time.time()
65 |     # post process
66 |     bboxes, scores, labels = postprocess(output[0])
67 |     bboxes /= ratio
68 |     print("post-process time: {:.1f} ms".format((time.time() - t0)*1000))
69 | 
70 |     # visualize detection
71 |     origin_img = visualize(
72 |         img=origin_img,
73 |         bboxes=bboxes,
74 |         scores=scores,
75 |         labels=labels,
76 |         vis_thresh=args.score_thr,
77 |         class_colors=class_colors
78 |         )
79 | 
80 |     # show
81 |     cv2.imshow('onnx detection', origin_img)
82 |     cv2.waitKey(0)
83 | 
84 |     # save results
85 |     os.makedirs(args.output_dir, exist_ok=True)
86 |     output_path = os.path.join(args.output_dir, os.path.basename(args.image_path))
87 |     cv2.imwrite(output_path, origin_img)
88 | 


--------------------------------------------------------------------------------
/deployment/test_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/deployment/test_image.jpg


--------------------------------------------------------------------------------
/evaluator/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from evaluator.coco_evaluator import COCOAPIEvaluator
 4 | from evaluator.voc_evaluator import VOCAPIEvaluator
 5 | from evaluator.crowdhuman_evaluator import CrowdHumanEvaluator
 6 | from evaluator.customed_evaluator import CustomedEvaluator
 7 | 
 8 | 
 9 | 
10 | def build_evluator(args, data_cfg, transform, device):
11 |     # Basic parameters
12 |     data_dir = os.path.join(args.root, data_cfg['data_name'])
13 | 
14 |     # Evaluator
15 |     ## VOC Evaluator
16 |     if args.dataset == 'voc':
17 |         evaluator = VOCAPIEvaluator(data_dir  = data_dir,
18 |                                     device    = device,
19 |                                     transform = transform
20 |                                     )
21 |     ## COCO Evaluator
22 |     elif args.dataset == 'coco':
23 |         evaluator = COCOAPIEvaluator(data_dir  = data_dir,
24 |                                      device    = device,
25 |                                      transform = transform
26 |                                      )
27 |     ## CrowdHuman Evaluator
28 |     elif args.dataset == 'crowdhuman':
29 |         evaluator = CrowdHumanEvaluator(data_dir  = data_dir,
30 |                                         device    = device,
31 |                                         image_set = 'val',
32 |                                         transform = transform
33 |                                         )
34 |     ## Custom dataset Evaluator
35 |     elif args.dataset == 'ourdataset':
36 |         evaluator = CustomedEvaluator(data_dir  = data_dir,
37 |                                         device    = device,
38 |                                         image_set = 'val',
39 |                                         transform = transform
40 |                                         )
41 | 
42 |     return evaluator
43 | 


--------------------------------------------------------------------------------
/evaluator/coco_evaluator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | import torch
  4 | from dataset.coco import COCODataset
  5 | from utils.box_ops import rescale_bboxes
  6 | 
  7 | try:
  8 |     from pycocotools.cocoeval import COCOeval
  9 | except:
 10 |     print("It seems that the COCOAPI is not installed.")
 11 | 
 12 | 
 13 | class COCOAPIEvaluator():
 14 |     """
 15 |     COCO AP Evaluation class.
 16 |     All the data in the val2017 dataset are processed \
 17 |     and evaluated by COCO API.
 18 |     """
 19 |     def __init__(self, data_dir, device, testset=False, transform=None):
 20 |         """
 21 |         Args:
 22 |             data_dir (str): dataset root directory
 23 |             img_size (int): image size after preprocess. images are resized \
 24 |                 to squares whose shape is (img_size, img_size).
 25 |             confthre (float):
 26 |                 confidence threshold ranging from 0 to 1, \
 27 |                 which is defined in the config file.
 28 |             nmsthre (float):
 29 |                 IoU threshold of non-max supression ranging from 0 to 1.
 30 |         """
 31 |         # ----------------- Basic parameters -----------------
 32 |         self.image_set = 'test2017' if testset else 'val2017'
 33 |         self.transform = transform
 34 |         self.device = device
 35 |         self.testset = testset
 36 |         # ----------------- Metrics -----------------
 37 |         self.map = 0.
 38 |         self.ap50_95 = 0.
 39 |         self.ap50 = 0.
 40 |         # ----------------- Dataset -----------------
 41 |         self.dataset = COCODataset(data_dir=data_dir, image_set=self.image_set)
 42 | 
 43 | 
 44 |     @torch.no_grad()
 45 |     def evaluate(self, model):
 46 |         """
 47 |         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
 48 |         and the results are evaluated by COCO API.
 49 |         Args:
 50 |             model : model object
 51 |         Returns:
 52 |             ap50_95 (float) : calculated COCO AP for IoU=50:95
 53 |             ap50 (float) : calculated COCO AP for IoU=50
 54 |         """
 55 |         model.eval()
 56 |         ids = []
 57 |         data_dict = []
 58 |         num_images = len(self.dataset)
 59 |         print('total number of images: %d' % (num_images))
 60 | 
 61 |         # start testing
 62 |         for index in range(num_images): # all the data in val2017
 63 |             if index % 500 == 0:
 64 |                 print('[Eval: %d / %d]'%(index, num_images))
 65 | 
 66 |             # load an image
 67 |             img, id_ = self.dataset.pull_image(index)
 68 |             orig_h, orig_w, _ = img.shape
 69 | 
 70 |             # preprocess
 71 |             x, _, ratio = self.transform(img)
 72 |             x = x.unsqueeze(0).to(self.device)
 73 |             
 74 |             id_ = int(id_)
 75 |             ids.append(id_)
 76 | 
 77 |             # inference
 78 |             outputs = model(x)
 79 |             scores = outputs['scores']
 80 |             labels = outputs['labels']
 81 |             bboxes = outputs['bboxes']
 82 | 
 83 |             # rescale bboxes
 84 |             bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
 85 | 
 86 |             # process outputs
 87 |             for i, box in enumerate(bboxes):
 88 |                 x1 = float(box[0])
 89 |                 y1 = float(box[1])
 90 |                 x2 = float(box[2])
 91 |                 y2 = float(box[3])
 92 |                 label = self.dataset.class_ids[int(labels[i])]
 93 |                 
 94 |                 bbox = [x1, y1, x2 - x1, y2 - y1]
 95 |                 score = float(scores[i]) # object score * class score
 96 |                 A = {"image_id": id_, "category_id": label, "bbox": bbox,
 97 |                      "score": score} # COCO json format
 98 |                 data_dict.append(A)
 99 | 
100 |         annType = ['segm', 'bbox', 'keypoints']
101 | 
102 |         # Evaluate the Dt (detection) json comparing with the ground truth
103 |         if len(data_dict) > 0:
104 |             print('evaluating ......')
105 |             cocoGt = self.dataset.coco
106 |             # workaround: temporarily write data to json file because pycocotools can't process dict in py36.
107 |             if self.testset:
108 |                 json.dump(data_dict, open('coco_test-dev.json', 'w'))
109 |                 cocoDt = cocoGt.loadRes('coco_test-dev.json')
110 |                 return -1, -1
111 |             else:
112 |                 _, tmp = tempfile.mkstemp()
113 |                 json.dump(data_dict, open(tmp, 'w'))
114 |                 cocoDt = cocoGt.loadRes(tmp)
115 |                 cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
116 |                 cocoEval.params.imgIds = ids
117 |                 cocoEval.evaluate()
118 |                 cocoEval.accumulate()
119 |                 cocoEval.summarize()
120 | 
121 |                 ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
122 |                 print('ap50_95 : ', ap50_95)
123 |                 print('ap50 : ', ap50)
124 |                 self.map = ap50_95
125 |                 self.ap50_95 = ap50_95
126 |                 self.ap50 = ap50
127 | 
128 |                 return ap50, ap50_95
129 |         else:
130 |             return 0, 0
131 | 
132 | 


--------------------------------------------------------------------------------
/evaluator/crowdhuman_tools/APMRToolkits/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf8 -*-
2 | __author__ = 'jyn'
3 | __email__ = 'jyn@megvii.com'
4 | 
5 | from .image import *
6 | from .database import *
7 | 


--------------------------------------------------------------------------------
/evaluator/crowdhuman_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/evaluator/crowdhuman_tools/__init__.py


--------------------------------------------------------------------------------
/evaluator/crowdhuman_tools/compute_APMR.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from .APMRToolkits import *
 3 | 
 4 | dbName = 'human'
 5 | def compute_APMR(dt_path, gt_path, target_key=None, mode=0):
 6 |     database = Database(gt_path, dt_path, target_key, None, mode)
 7 |     database.compare()
 8 |     mAP,_ = database.eval_AP()
 9 |     mMR,_ = database.eval_MR()
10 |     line = 'AP:{:.4f}, MR:{:.4f}.'.format(mAP, mMR)
11 |     return mAP, mMR
12 | 
13 | if __name__ == "__main__":
14 |     parser = argparse.ArgumentParser(description='Analyze a json result file with iou match')
15 |     parser.add_argument('--detfile', required=True, help='path of json result file to load')
16 |     parser.add_argument('--target_key', default=None, required=True)
17 |     args = parser.parse_args()
18 |     compute_APMR(args.detfile, args.target_key, 0)


--------------------------------------------------------------------------------
/evaluator/customed_evaluator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | import torch
  4 | from dataset.customed import CustomedDataset
  5 | from utils.box_ops import rescale_bboxes
  6 | 
  7 | try:
  8 |     from pycocotools.cocoeval import COCOeval
  9 | except:
 10 |     print("It seems that the COCOAPI is not installed.")
 11 | 
 12 | 
 13 | class CustomedEvaluator():
 14 |     def __init__(self, data_dir, device, image_set='val', transform=None):
 15 |         # ----------------- Basic parameters -----------------
 16 |         self.image_set = image_set
 17 |         self.transform = transform
 18 |         self.device = device
 19 |         # ----------------- Metrics -----------------
 20 |         self.map = 0.
 21 |         self.ap50_95 = 0.
 22 |         self.ap50 = 0.
 23 |         # ----------------- Dataset -----------------
 24 |         self.dataset = CustomedDataset(data_dir=data_dir, image_set=image_set)
 25 | 
 26 | 
 27 |     @torch.no_grad()
 28 |     def evaluate(self, model):
 29 |         """
 30 |         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
 31 |         and the results are evaluated by COCO API.
 32 |         Args:
 33 |             model : model object
 34 |         Returns:
 35 |             ap50_95 (float) : calculated COCO AP for IoU=50:95
 36 |             ap50 (float) : calculated COCO AP for IoU=50
 37 |         """
 38 |         model.eval()
 39 |         ids = []
 40 |         data_dict = []
 41 |         num_images = len(self.dataset)
 42 |         print('total number of images: %d' % (num_images))
 43 | 
 44 |         # start testing
 45 |         for index in range(num_images): # all the data in val2017
 46 |             if index % 500 == 0:
 47 |                 print('[Eval: %d / %d]'%(index, num_images))
 48 | 
 49 |             # load an image
 50 |             img, id_ = self.dataset.pull_image(index)
 51 |             orig_h, orig_w, _ = img.shape
 52 | 
 53 |             # preprocess
 54 |             x, _, ratio = self.transform(img)
 55 |             x = x.unsqueeze(0).to(self.device)
 56 |             
 57 |             id_ = int(id_)
 58 |             ids.append(id_)
 59 | 
 60 |             # inference
 61 |             outputs = model(x)
 62 |             scores = outputs['scores']
 63 |             labels = outputs['labels']
 64 |             bboxes = outputs['bboxes']
 65 | 
 66 |             # rescale bboxes
 67 |             bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
 68 | 
 69 |             for i, box in enumerate(bboxes):
 70 |                 x1 = float(box[0])
 71 |                 y1 = float(box[1])
 72 |                 x2 = float(box[2])
 73 |                 y2 = float(box[3])
 74 |                 label = self.dataset.class_ids[int(labels[i])]
 75 |                 
 76 |                 bbox = [x1, y1, x2 - x1, y2 - y1]
 77 |                 score = float(scores[i]) # object score * class score
 78 |                 A = {"image_id": id_, "category_id": label, "bbox": bbox,
 79 |                      "score": score} # COCO json format
 80 |                 data_dict.append(A)
 81 | 
 82 |         annType = ['segm', 'bbox', 'keypoints']
 83 | 
 84 |         # Evaluate the Dt (detection) json comparing with the ground truth
 85 |         if len(data_dict) > 0:
 86 |             print('evaluating ......')
 87 |             cocoGt = self.dataset.coco
 88 |             # workaround: temporarily write data to json file because pycocotools can't process dict in py36.
 89 |             _, tmp = tempfile.mkstemp()
 90 |             json.dump(data_dict, open(tmp, 'w'))
 91 |             cocoDt = cocoGt.loadRes(tmp)
 92 |             cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
 93 |             cocoEval.params.imgIds = ids
 94 |             cocoEval.evaluate()
 95 |             cocoEval.accumulate()
 96 |             cocoEval.summarize()
 97 | 
 98 |             ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
 99 |             print('ap50_95 : ', ap50_95)
100 |             print('ap50 : ', ap50)
101 |             self.map = ap50_95
102 |             self.ap50_95 = ap50_95
103 |             self.ap50 = ap50
104 | 
105 |             return ap50, ap50_95
106 |         else:
107 |             return 0, 0
108 | 
109 | 


--------------------------------------------------------------------------------
/evaluator/widerface_evaluator.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | import torch
  4 | import numpy as np
  5 | from dataset.widerface import WiderFaceDataset
  6 | from utils.box_ops import rescale_bboxes
  7 | 
  8 | try:
  9 |     from pycocotools.cocoeval import COCOeval
 10 | except:
 11 |     print("It seems that the COCOAPI is not installed.")
 12 | 
 13 | 
 14 | class WiderFaceEvaluator():
 15 |     """
 16 |     COCO AP Evaluation class.
 17 |     All the data in the val2017 dataset are processed \
 18 |     and evaluated by COCO API.
 19 |     """
 20 |     def __init__(self, data_dir, device, image_set='val', transform=None):
 21 |         """
 22 |             data_dir (str): dataset root directory
 23 |             device: (int): CUDA or CPU.
 24 |             image_set: train or val.
 25 |             transform: used to preprocess inputs.
 26 |         """
 27 |         # ----------------- Basic parameters -----------------
 28 |         self.image_set = image_set
 29 |         self.transform = transform
 30 |         self.device = device
 31 |         # ----------------- Metrics -----------------
 32 |         self.map = 0.
 33 |         self.ap50_95 = 0.
 34 |         self.ap50 = 0.
 35 |         # ----------------- Dataset -----------------
 36 |         self.dataset = WiderFaceDataset(data_dir=data_dir, image_set=image_set)
 37 | 
 38 | 
 39 |     @torch.no_grad()
 40 |     def evaluate(self, model):
 41 |         """
 42 |         COCO average precision (AP) Evaluation. Iterate inference on the test dataset
 43 |         and the results are evaluated by COCO API.
 44 |         Args:
 45 |             model : model object
 46 |         Returns:
 47 |             ap50_95 (float) : calculated COCO AP for IoU=50:95
 48 |             ap50 (float) : calculated COCO AP for IoU=50
 49 |         """
 50 |         model.eval()
 51 |         ids = []
 52 |         data_dict = []
 53 |         num_images = len(self.dataset)
 54 |         print('total number of images: %d' % (num_images))
 55 | 
 56 |         # start testing
 57 |         for index in range(num_images): # all the data in val2017
 58 |             if index % 500 == 0:
 59 |                 print('[Eval: %d / %d]'%(index, num_images))
 60 | 
 61 |             # load an image
 62 |             img, id_ = self.dataset.pull_image(index)
 63 |             orig_h, orig_w, _ = img.shape
 64 | 
 65 |             # preprocess
 66 |             x, _, ratio = self.transform(img)
 67 |             x = x.unsqueeze(0).to(self.device)
 68 |             
 69 |             id_ = int(id_)
 70 |             ids.append(id_)
 71 | 
 72 |             # inference
 73 |             outputs = model(x)
 74 |             scores = outputs['scores']
 75 |             labels = outputs['labels']
 76 |             bboxes = outputs['bboxes']
 77 | 
 78 |             # rescale bboxes
 79 |             bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
 80 | 
 81 |             for i, box in enumerate(bboxes):
 82 |                 x1 = float(box[0])
 83 |                 y1 = float(box[1])
 84 |                 x2 = float(box[2])
 85 |                 y2 = float(box[3])
 86 |                 label = self.dataset.class_ids[int(labels[i])]
 87 |                 
 88 |                 bbox = [x1, y1, x2 - x1, y2 - y1]
 89 |                 score = float(scores[i]) # object score * class score
 90 |                 A = {"image_id": id_, "category_id": label, "bbox": bbox,
 91 |                      "score": score} # COCO json format
 92 |                 data_dict.append(A)
 93 | 
 94 |         annType = ['segm', 'bbox', 'keypoints']
 95 | 
 96 |         # Evaluate the Dt (detection) json comparing with the ground truth
 97 |         if len(data_dict) > 0:
 98 |             print('evaluating ......')
 99 |             cocoGt = self.dataset.coco
100 |             # workaround: temporarily write data to json file because pycocotools can't process dict in py36.
101 |             _, tmp = tempfile.mkstemp()
102 |             json.dump(data_dict, open(tmp, 'w'))
103 |             cocoDt = cocoGt.loadRes(tmp)
104 |             cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
105 |             cocoEval.params.imgIds = ids
106 |             cocoEval.evaluate()
107 |             cocoEval.accumulate()
108 |             cocoEval.summarize()
109 | 
110 |             ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
111 |             print('ap50_95 : ', ap50_95)
112 |             print('ap50 : ', ap50)
113 |             self.map = ap50_95
114 |             self.ap50_95 = ap50_95
115 |             self.ap50 = ap50
116 | 
117 |             return ap50, ap50_95
118 |         else:
119 |             return 0, 0
120 | 
121 | 


--------------------------------------------------------------------------------
/img_files/video_detection_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/video_detection_demo.gif


--------------------------------------------------------------------------------
/img_files/video_tracking_demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/video_tracking_demo.gif


--------------------------------------------------------------------------------
/img_files/yolo_tutorial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/yolo_tutorial.png


--------------------------------------------------------------------------------
/models/detectors/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | 
  4 | import torch
  5 | # YOLO series
  6 | from .yolov1.build import build_yolov1
  7 | from .yolov2.build import build_yolov2
  8 | from .yolov3.build import build_yolov3
  9 | from .yolov4.build import build_yolov4
 10 | from .yolov5.build import build_yolov5
 11 | from .yolov7.build import build_yolov7
 12 | from .yolov8.build import build_yolov8
 13 | from .yolox.build import build_yolox
 14 | 
 15 | 
 16 | # build object detector
 17 | def build_model(args, 
 18 |                 model_cfg,
 19 |                 device, 
 20 |                 num_classes=80, 
 21 |                 trainable=False,
 22 |                 deploy=False):
 23 |     # YOLOv1    
 24 |     if args.model == 'yolov1':
 25 |         model, criterion = build_yolov1(
 26 |             args, model_cfg, device, num_classes, trainable, deploy)
 27 |     # YOLOv2   
 28 |     elif args.model == 'yolov2':
 29 |         model, criterion = build_yolov2(
 30 |             args, model_cfg, device, num_classes, trainable, deploy)
 31 |     # YOLOv3   
 32 |     elif args.model in ['yolov3', 'yolov3_tiny']:
 33 |         model, criterion = build_yolov3(
 34 |             args, model_cfg, device, num_classes, trainable, deploy)
 35 |     # YOLOv4   
 36 |     elif args.model in ['yolov4', 'yolov4_tiny']:
 37 |         model, criterion = build_yolov4(
 38 |             args, model_cfg, device, num_classes, trainable, deploy)
 39 |     # YOLOv5   
 40 |     elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
 41 |         model, criterion = build_yolov5(
 42 |             args, model_cfg, device, num_classes, trainable, deploy)
 43 |     # YOLOv5-AdamW
 44 |     elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
 45 |         model, criterion = build_yolov5(
 46 |             args, model_cfg, device, num_classes, trainable, deploy)
 47 |     # YOLOv7
 48 |     elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']:
 49 |         model, criterion = build_yolov7(
 50 |             args, model_cfg, device, num_classes, trainable, deploy)
 51 |     # YOLOv8
 52 |     elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']:
 53 |         model, criterion = build_yolov8(
 54 |             args, model_cfg, device, num_classes, trainable, deploy)
 55 |     # YOLOX
 56 |     elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
 57 |         model, criterion = build_yolox(
 58 |             args, model_cfg, device, num_classes, trainable, deploy)
 59 |     # YOLOX-AdamW
 60 |     elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
 61 |         model, criterion = build_yolox(
 62 |             args, model_cfg, device, num_classes, trainable, deploy)
 63 | 
 64 |     if trainable:
 65 |         # Load pretrained weight
 66 |         if args.pretrained is not None:
 67 |             print('Loading COCO pretrained weight ...')
 68 |             checkpoint = torch.load(args.pretrained, map_location='cpu')
 69 |             # checkpoint state dict
 70 |             checkpoint_state_dict = checkpoint.pop("model")
 71 |             # model state dict
 72 |             model_state_dict = model.state_dict()
 73 |             # check
 74 |             for k in list(checkpoint_state_dict.keys()):
 75 |                 if k in model_state_dict:
 76 |                     shape_model = tuple(model_state_dict[k].shape)
 77 |                     shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
 78 |                     if shape_model != shape_checkpoint:
 79 |                         checkpoint_state_dict.pop(k)
 80 |                         print(k)
 81 |                 else:
 82 |                     checkpoint_state_dict.pop(k)
 83 |                     print(k)
 84 | 
 85 |             model.load_state_dict(checkpoint_state_dict, strict=False)
 86 | 
 87 |         # keep training
 88 |         if args.resume and args.resume != "None":
 89 |             checkpoint = torch.load(args.resume, map_location='cpu')
 90 |             # checkpoint state dict
 91 |             try:
 92 |                 checkpoint_state_dict = checkpoint.pop("model")
 93 |                 print('Load model from the checkpoint: ', args.resume)
 94 |                 model.load_state_dict(checkpoint_state_dict)
 95 |                 del checkpoint, checkpoint_state_dict
 96 |             except:
 97 |                 print("No model in the given checkpoint.")
 98 | 
 99 |         return model, criterion
100 | 
101 |     else:      
102 |         return model


--------------------------------------------------------------------------------
/models/detectors/yolov1/README.md:
--------------------------------------------------------------------------------
 1 | # Redesigned YOLOv1:
 2 | 
 3 | | Model  |  Backbone  | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |--------|------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv1 | ResNet-18  | 1xb16 |  640  |        27.9            |       47.5        |   37.8            |   21.3             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov1_coco.pth) |
 6 | 
 7 | - For training, we train redesigned YOLOv1 with 150 epochs on COCO.
 8 | - For data augmentation, we only use the large scale jitter (LSJ), no Mosaic or Mixup augmentation.
 9 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01.
10 | - For learning rate scheduler, we use linear decay scheduler.
11 | 
12 | 
13 | ## Train YOLOv1
14 | ### Single GPU
15 | Taking training YOLOv1 on COCO as the example,
16 | ```Shell
17 | python train.py --cuda -d coco --root path/to/coco -m yolov1 -bs 16 -size 640 --wp_epoch 3 --max_epoch 150 --eval_epoch 10 --no_aug_epoch 10 --ema --fp16 --multi_scale 
18 | ```
19 | 
20 | ### Multi GPU
21 | Taking training YOLOv1 on COCO as the example,
22 | ```Shell
23 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov1 -bs 128 -size 640 --wp_epoch 3 --max_epoch 150  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
24 | ```
25 | 
26 | ## Test YOLOv1
27 | Taking testing YOLOv1 on COCO-val as the example,
28 | ```Shell
29 | python test.py --cuda -d coco --root path/to/coco -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show 
30 | ```
31 | 
32 | ## Evaluate YOLOv1
33 | Taking evaluating YOLOv1 on COCO-val as the example,
34 | ```Shell
35 | python eval.py --cuda -d coco --root path/to/coco -m yolov1 --weight path/to/yolov1_coco.pth 
36 | ```
37 | 
38 | ## Demo
39 | ### Detect with Image
40 | ```Shell
41 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show
42 | ```
43 | 
44 | ### Detect with Video
45 | ```Shell
46 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show --gif
47 | ```
48 | 
49 | ### Detect with Camera
50 | ```Shell
51 | python demo.py --mode camera --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show --gif
52 | ```
53 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov1 import YOLOv1
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov1(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv1(cfg                = cfg,
21 |                    device             = device,
22 |                    img_size           = args.img_size,
23 |                    num_classes        = num_classes,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    trainable          = trainable,
27 |                    deploy             = deploy,
28 |                    nms_class_agnostic = args.nms_class_agnostic
29 |                    )
30 | 
31 |     # -------------- Initialize YOLO --------------
32 |     # Init bias
33 |     init_prob = 0.01
34 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
35 |     # obj pred
36 |     b = model.obj_pred.bias.view(1, -1)
37 |     b.data.fill_(bias_value.item())
38 |     model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
39 |     # cls pred
40 |     b = model.cls_pred.bias.view(1, -1)
41 |     b.data.fill_(bias_value.item())
42 |     model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
43 |     # reg pred
44 |     b = model.reg_pred.bias.view(-1, )
45 |     b.data.fill_(1.0)
46 |     model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
47 |     w = model.reg_pred.weight
48 |     w.data.fill_(0.)
49 |     model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
50 | 
51 | 
52 |     # -------------- Build criterion --------------
53 |     criterion = None
54 |     if trainable:
55 |         # build criterion for training
56 |         criterion = build_criterion(cfg, device, num_classes)
57 | 
58 |     return model, criterion
59 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .matcher import YoloMatcher
  4 | from utils.box_ops import get_ious
  5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
  6 | 
  7 | 
  8 | class Criterion(object):
  9 |     def __init__(self, cfg, device, num_classes=80):
 10 |         self.cfg = cfg
 11 |         self.device = device
 12 |         self.num_classes = num_classes
 13 |         self.loss_obj_weight = cfg['loss_obj_weight']
 14 |         self.loss_cls_weight = cfg['loss_cls_weight']
 15 |         self.loss_box_weight = cfg['loss_box_weight']
 16 | 
 17 |         # matcher
 18 |         self.matcher = YoloMatcher(num_classes=num_classes)
 19 | 
 20 | 
 21 |     def loss_objectness(self, pred_obj, gt_obj):
 22 |         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
 23 | 
 24 |         return loss_obj
 25 |     
 26 | 
 27 |     def loss_classes(self, pred_cls, gt_label):
 28 |         loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none')
 29 | 
 30 |         return loss_cls
 31 | 
 32 | 
 33 |     def loss_bboxes(self, pred_box, gt_box):
 34 |         # regression loss
 35 |         ious = get_ious(pred_box,
 36 |                         gt_box,
 37 |                         box_mode="xyxy",
 38 |                         iou_type='giou')
 39 |         loss_box = 1.0 - ious
 40 | 
 41 |         return loss_box
 42 | 
 43 | 
 44 |     def __call__(self, outputs, targets, epoch=0):
 45 |         device = outputs['pred_cls'][0].device
 46 |         stride = outputs['stride']
 47 |         fmp_size = outputs['fmp_size']
 48 |         (
 49 |             gt_objectness, 
 50 |             gt_classes, 
 51 |             gt_bboxes,
 52 |             ) = self.matcher(fmp_size=fmp_size, 
 53 |                              stride=stride, 
 54 |                              targets=targets)
 55 |         # List[B, M, C] -> [B, M, C] -> [BM, C]
 56 |         pred_obj = outputs['pred_obj'].view(-1)                     # [BM,]
 57 |         pred_cls = outputs['pred_cls'].view(-1, self.num_classes)   # [BM, C]
 58 |         pred_box = outputs['pred_box'].view(-1, 4)                  # [BM, 4]
 59 |        
 60 |         gt_objectness = gt_objectness.view(-1).to(device).float()               # [BM,]
 61 |         gt_classes = gt_classes.view(-1, self.num_classes).to(device).float()   # [BM, C]
 62 |         gt_bboxes = gt_bboxes.view(-1, 4).to(device).float()                    # [BM, 4]
 63 | 
 64 |         pos_masks = (gt_objectness > 0)
 65 |         num_fgs = pos_masks.sum()
 66 | 
 67 |         if is_dist_avail_and_initialized():
 68 |             torch.distributed.all_reduce(num_fgs)
 69 |         num_fgs = (num_fgs / get_world_size()).clamp(1.0)
 70 | 
 71 |         # obj loss
 72 |         loss_obj = self.loss_objectness(pred_obj, gt_objectness)
 73 |         loss_obj = loss_obj.sum() / num_fgs
 74 | 
 75 |         # cls loss
 76 |         pred_cls_pos = pred_cls[pos_masks]
 77 |         gt_classes_pos = gt_classes[pos_masks]
 78 |         loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos)
 79 |         loss_cls = loss_cls.sum() / num_fgs
 80 | 
 81 |         # box loss
 82 |         pred_box_pos = pred_box[pos_masks]
 83 |         gt_bboxes_pos = gt_bboxes[pos_masks]
 84 |         loss_box = self.loss_bboxes(pred_box_pos, gt_bboxes_pos)
 85 |         loss_box = loss_box.sum() / num_fgs
 86 |         
 87 |         # total loss
 88 |         losses = self.loss_obj_weight * loss_obj + \
 89 |                  self.loss_cls_weight * loss_cls + \
 90 |                  self.loss_box_weight * loss_box
 91 | 
 92 |         loss_dict = dict(
 93 |                 loss_obj = loss_obj,
 94 |                 loss_cls = loss_cls,
 95 |                 loss_box = loss_box,
 96 |                 losses = losses
 97 |         )
 98 | 
 99 |         return loss_dict
100 |     
101 | 
102 | def build_criterion(cfg, device, num_classes):
103 |     criterion = Criterion(
104 |         cfg=cfg,
105 |         device=device,
106 |         num_classes=num_classes
107 |         )
108 | 
109 |     return criterion
110 | 
111 |     
112 | if __name__ == "__main__":
113 |     pass
114 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/matcher.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | class YoloMatcher(object):
 6 |     def __init__(self, num_classes):
 7 |         self.num_classes = num_classes
 8 | 
 9 | 
10 |     @torch.no_grad()
11 |     def __call__(self, fmp_size, stride, targets):
12 |         """
13 |             img_size: (Int) input image size
14 |             stride: (Int) -> stride of YOLOv1 output.
15 |             targets: (Dict) dict{'boxes': [...], 
16 |                                  'labels': [...], 
17 |                                  'orig_size': ...}
18 |         """
19 |         # prepare
20 |         bs = len(targets)
21 |         fmp_h, fmp_w = fmp_size
22 |         gt_objectness = np.zeros([bs, fmp_h, fmp_w, 1]) 
23 |         gt_classes = np.zeros([bs, fmp_h, fmp_w, self.num_classes]) 
24 |         gt_bboxes = np.zeros([bs, fmp_h, fmp_w, 4])
25 | 
26 |         for batch_index in range(bs):
27 |             targets_per_image = targets[batch_index]
28 |             # [N,]
29 |             tgt_cls = targets_per_image["labels"].numpy()
30 |             # [N, 4]
31 |             tgt_box = targets_per_image['boxes'].numpy()
32 | 
33 |             for gt_box, gt_label in zip(tgt_box, tgt_cls):
34 |                 x1, y1, x2, y2 = gt_box
35 |                 # xyxy -> cxcywh
36 |                 xc, yc = (x2 + x1) * 0.5, (y2 + y1) * 0.5
37 |                 bw, bh = x2 - x1, y2 - y1
38 | 
39 |                 # check
40 |                 if bw < 1. or bh < 1.:
41 |                     continue    
42 | 
43 |                 # grid
44 |                 xs_c = xc / stride
45 |                 ys_c = yc / stride
46 |                 grid_x = int(xs_c)
47 |                 grid_y = int(ys_c)
48 | 
49 |                 if grid_x < fmp_w and grid_y < fmp_h:
50 |                     # obj
51 |                     gt_objectness[batch_index, grid_y, grid_x] = 1.0
52 |                     # cls
53 |                     cls_ont_hot = np.zeros(self.num_classes)
54 |                     cls_ont_hot[int(gt_label)] = 1.0
55 |                     gt_classes[batch_index, grid_y, grid_x] = cls_ont_hot
56 |                     # box
57 |                     gt_bboxes[batch_index, grid_y, grid_x] = np.array([x1, y1, x2, y2])
58 | 
59 |         # [B, M, C]
60 |         gt_objectness = gt_objectness.reshape(bs, -1, 1)
61 |         gt_classes = gt_classes.reshape(bs, -1, self.num_classes)
62 |         gt_bboxes = gt_bboxes.reshape(bs, -1, 4)
63 | 
64 |         # to tensor
65 |         gt_objectness = torch.from_numpy(gt_objectness).float()
66 |         gt_classes = torch.from_numpy(gt_classes).float()
67 |         gt_bboxes = torch.from_numpy(gt_bboxes).float()
68 | 
69 |         return gt_objectness, gt_classes, gt_bboxes
70 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/yolov1_basic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SiLU(nn.Module):
 6 |     """export-friendly version of nn.SiLU()"""
 7 | 
 8 |     @staticmethod
 9 |     def forward(x):
10 |         return x * torch.sigmoid(x)
11 | 
12 | 
13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
14 |     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
15 | 
16 |     return conv
17 | 
18 | 
19 | def get_activation(act_type=None):
20 |     if act_type == 'relu':
21 |         return nn.ReLU(inplace=True)
22 |     elif act_type == 'lrelu':
23 |         return nn.LeakyReLU(0.1, inplace=True)
24 |     elif act_type == 'mish':
25 |         return nn.Mish(inplace=True)
26 |     elif act_type == 'silu':
27 |         return nn.SiLU(inplace=True)
28 | 
29 | 
30 | def get_norm(norm_type, dim):
31 |     if norm_type == 'BN':
32 |         return nn.BatchNorm2d(dim)
33 |     elif norm_type == 'GN':
34 |         return nn.GroupNorm(num_groups=32, num_channels=dim)
35 | 
36 | 
37 | # Basic conv layer
38 | class Conv(nn.Module):
39 |     def __init__(self, 
40 |                  c1,                   # in channels
41 |                  c2,                   # out channels 
42 |                  k=1,                  # kernel size 
43 |                  p=0,                  # padding
44 |                  s=1,                  # padding
45 |                  d=1,                  # dilation
46 |                  act_type='lrelu',     # activation
47 |                  norm_type='BN',       # normalization
48 |                  depthwise=False):
49 |         super(Conv, self).__init__()
50 |         convs = []
51 |         add_bias = False if norm_type else True
52 |         if depthwise:
53 |             convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias))
54 |             # depthwise conv
55 |             if norm_type:
56 |                 convs.append(get_norm(norm_type, c1))
57 |             if act_type:
58 |                 convs.append(get_activation(act_type))
59 |             # pointwise conv
60 |             convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias))
61 |             if norm_type:
62 |                 convs.append(get_norm(norm_type, c2))
63 |             if act_type:
64 |                 convs.append(get_activation(act_type))
65 | 
66 |         else:
67 |             convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias))
68 |             if norm_type:
69 |                 convs.append(get_norm(norm_type, c2))
70 |             if act_type:
71 |                 convs.append(get_activation(act_type))
72 |             
73 |         self.convs = nn.Sequential(*convs)
74 | 
75 | 
76 |     def forward(self, x):
77 |         return self.convs(x)
78 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/yolov1_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov1_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         self.in_dim = in_dim
13 |         self.num_cls_head=cfg['num_cls_head']
14 |         self.num_reg_head=cfg['num_reg_head']
15 |         self.act_type=cfg['head_act']
16 |         self.norm_type=cfg['head_norm']
17 | 
18 |         # cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=self.act_type,
26 |                         norm_type=self.norm_type,
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=self.act_type,
33 |                         norm_type=self.norm_type,
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )
36 |                 
37 |         # reg head
38 |         reg_feats = []
39 |         self.reg_out_dim = max(out_dim, 64)
40 |         for i in range(cfg['num_reg_head']):
41 |             if i == 0:
42 |                 reg_feats.append(
43 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
44 |                         act_type=self.act_type,
45 |                         norm_type=self.norm_type,
46 |                         depthwise=cfg['head_depthwise'])
47 |                         )
48 |             else:
49 |                 reg_feats.append(
50 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
51 |                         act_type=self.act_type,
52 |                         norm_type=self.norm_type,
53 |                         depthwise=cfg['head_depthwise'])
54 |                         )
55 | 
56 |         self.cls_feats = nn.Sequential(*cls_feats)
57 |         self.reg_feats = nn.Sequential(*reg_feats)
58 | 
59 | 
60 |     def forward(self, x):
61 |         """
62 |             in_feats: (Tensor) [B, C, H, W]
63 |         """
64 |         cls_feats = self.cls_feats(x)
65 |         reg_feats = self.reg_feats(x)
66 | 
67 |         return cls_feats, reg_feats
68 |     
69 | 
70 | # build detection head
71 | def build_head(cfg, in_dim, out_dim, num_classes=80):
72 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
73 | 
74 |     return head
75 | 


--------------------------------------------------------------------------------
/models/detectors/yolov1/yolov1_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .yolov1_basic import Conv
 4 | 
 5 | 
 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 7 | class SPPF(nn.Module):
 8 |     """
 9 |         This code referenced to https://github.com/ultralytics/yolov5
10 |     """
11 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'):
12 |         super().__init__()
13 |         inter_dim = int(in_dim * expand_ratio)
14 |         self.out_dim = out_dim
15 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
16 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
18 | 
19 |     def forward(self, x):
20 |         x = self.cv1(x)
21 |         y1 = self.m(x)
22 |         y2 = self.m(y1)
23 | 
24 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
25 | 
26 | 
27 | def build_neck(cfg, in_dim, out_dim):
28 |     model = cfg['neck']
29 |     print('==============================')
30 |     print('Neck: {}'.format(model))
31 |     # build neck
32 |     if model == 'sppf':
33 |         neck = SPPF(
34 |             in_dim=in_dim,
35 |             out_dim=out_dim,
36 |             expand_ratio=cfg['expand_ratio'], 
37 |             pooling_size=cfg['pooling_size'],
38 |             act_type=cfg['neck_act'],
39 |             norm_type=cfg['neck_norm']
40 |             )
41 | 
42 |     return neck
43 |         


--------------------------------------------------------------------------------
/models/detectors/yolov2/README.md:
--------------------------------------------------------------------------------
 1 | # Redesigned YOLOv2:
 2 | 
 3 | | Model  |  Backbone  | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |--------|------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv2 | DarkNet-19 | 1xb16 |  640  |        32.7            |       50.9        |   53.9            |   30.9             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov2_coco.pth) |
 6 | 
 7 | - For training, we train redesigned YOLOv2 with 150 epochs on COCO.
 8 | - For data augmentation, we only use the large scale jitter (LSJ), no Mosaic or Mixup augmentation.
 9 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01.
10 | - For learning rate scheduler, we use linear decay scheduler.
11 | 
12 | ## Train YOLOv2
13 | ### Single GPU
14 | Taking training YOLOv2 on COCO as the example,
15 | ```Shell
16 | python train.py --cuda -d coco --root path/to/coco -m yolov2 -bs 16 -size 640 --wp_epoch 3 --max_epoch 200 --eval_epoch 10 --no_aug_epoch 15 --ema --fp16 --multi_scale 
17 | ```
18 | 
19 | ### Multi GPU
20 | Taking training YOLOv2 on COCO as the example,
21 | ```Shell
22 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov2 -bs 128 -size 640 --wp_epoch 3 --max_epoch 200  --eval_epoch 10 --no_aug_epoch 15 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
23 | ```
24 | 
25 | ## Test YOLOv2
26 | Taking testing YOLOv2 on COCO-val as the example,
27 | ```Shell
28 | python test.py --cuda -d coco --root path/to/coco -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show 
29 | ```
30 | 
31 | ## Evaluate YOLOv2
32 | Taking evaluating YOLOv2 on COCO-val as the example,
33 | ```Shell
34 | python eval.py --cuda -d coco --root path/to/coco -m yolov2 --weight path/to/yolov2_coco.pth
35 | ```
36 | 
37 | ## Demo
38 | ### Detect with Image
39 | ```Shell
40 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show
41 | ```
42 | 
43 | ### Detect with Video
44 | ```Shell
45 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show --gif
46 | ```
47 | 
48 | ### Detect with Camera
49 | ```Shell
50 | python demo.py --mode camera --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show --gif
51 | ```
52 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov2 import YOLOv2
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov2(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv2(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     # Init bias
34 |     init_prob = 0.01
35 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
36 |     # obj pred
37 |     b = model.obj_pred.bias.view(1, -1)
38 |     b.data.fill_(bias_value.item())
39 |     model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
40 |     # cls pred
41 |     b = model.cls_pred.bias.view(1, -1)
42 |     b.data.fill_(bias_value.item())
43 |     model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
44 |     # reg pred
45 |     b = model.reg_pred.bias.view(-1, )
46 |     b.data.fill_(1.0)
47 |     model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
48 |     w = model.reg_pred.weight
49 |     w.data.fill_(0.)
50 |     model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
51 | 
52 | 
53 |     # -------------- Build criterion --------------
54 |     criterion = None
55 |     if trainable:
56 |         # build criterion for training
57 |         criterion = build_criterion(cfg, device, num_classes)
58 |     return model, criterion
59 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .matcher import Yolov2Matcher
  4 | from utils.box_ops import get_ious
  5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
  6 | 
  7 | 
  8 | class Criterion(object):
  9 |     def __init__(self, cfg, device, num_classes=80):
 10 |         self.cfg = cfg
 11 |         self.device = device
 12 |         self.num_classes = num_classes
 13 |         # loss weight
 14 |         self.loss_obj_weight = cfg['loss_obj_weight']
 15 |         self.loss_cls_weight = cfg['loss_cls_weight']
 16 |         self.loss_box_weight = cfg['loss_box_weight']
 17 | 
 18 |         # matcher
 19 |         self.matcher = Yolov2Matcher(cfg['iou_thresh'], num_classes, cfg['anchor_size'])
 20 | 
 21 | 
 22 |     def loss_objectness(self, pred_obj, gt_obj):
 23 |         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
 24 | 
 25 |         return loss_obj
 26 |     
 27 | 
 28 |     def loss_classes(self, pred_cls, gt_label):
 29 |         loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none')
 30 | 
 31 |         return loss_cls
 32 | 
 33 | 
 34 |     def loss_bboxes(self, pred_box, gt_box):
 35 |         # regression loss
 36 |         ious = get_ious(pred_box,
 37 |                         gt_box,
 38 |                         box_mode="xyxy",
 39 |                         iou_type='giou')
 40 |         loss_box = 1.0 - ious
 41 | 
 42 |         return loss_box, ious
 43 | 
 44 | 
 45 |     def __call__(self, outputs, targets, epoch=0):
 46 |         device = outputs['pred_cls'].device
 47 |         stride = outputs['stride']
 48 |         fmp_size = outputs['fmp_size']
 49 |         (
 50 |             gt_objectness, 
 51 |             gt_classes, 
 52 |             gt_bboxes,
 53 |             ) = self.matcher(fmp_size=fmp_size, 
 54 |                              stride=stride, 
 55 |                              targets=targets)
 56 |         # List[B, M, C] -> [B, M, C] -> [BM, C]
 57 |         pred_obj = outputs['pred_obj'].view(-1)                     # [BM,]
 58 |         pred_cls = outputs['pred_cls'].view(-1, self.num_classes)   # [BM, C]
 59 |         pred_box = outputs['pred_box'].view(-1, 4)                  # [BM, 4]
 60 |        
 61 |         gt_objectness = gt_objectness.view(-1).to(device).float()               # [BM,]
 62 |         gt_classes = gt_classes.view(-1, self.num_classes).to(device).float()   # [BM, C]
 63 |         gt_bboxes = gt_bboxes.view(-1, 4).to(device).float()                    # [BM, 4]
 64 | 
 65 |         pos_masks = (gt_objectness > 0)
 66 |         num_fgs = pos_masks.sum()
 67 | 
 68 |         if is_dist_avail_and_initialized():
 69 |             torch.distributed.all_reduce(num_fgs)
 70 |         num_fgs = (num_fgs / get_world_size()).clamp(1.0)
 71 | 
 72 |         # box loss
 73 |         pred_box_pos = pred_box[pos_masks]
 74 |         gt_bboxes_pos = gt_bboxes[pos_masks]
 75 |         loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos)
 76 |         loss_box = loss_box.sum() / num_fgs
 77 |         
 78 |         # cls loss
 79 |         pred_cls_pos = pred_cls[pos_masks]
 80 |         gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.)
 81 |         loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos)
 82 |         loss_cls = loss_cls.sum() / num_fgs
 83 | 
 84 |         # obj loss
 85 |         loss_obj = self.loss_objectness(pred_obj, gt_objectness)
 86 |         loss_obj = loss_obj.sum() / num_fgs
 87 | 
 88 |         # total loss
 89 |         losses = self.loss_obj_weight * loss_obj + \
 90 |                  self.loss_cls_weight * loss_cls + \
 91 |                  self.loss_box_weight * loss_box
 92 | 
 93 |         loss_dict = dict(
 94 |                 loss_obj = loss_obj,
 95 |                 loss_cls = loss_cls,
 96 |                 loss_box = loss_box,
 97 |                 losses = losses
 98 |         )
 99 | 
100 |         return loss_dict
101 |     
102 | 
103 | def build_criterion(cfg, device, num_classes):
104 |     criterion = Criterion(
105 |         cfg=cfg,
106 |         device=device,
107 |         num_classes=num_classes
108 |         )
109 | 
110 |     return criterion
111 | 
112 |     
113 | if __name__ == "__main__":
114 |     pass
115 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/yolov2_backbone.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | model_urls = {
  6 |     "darknet19": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet19.pth",
  7 | }
  8 | 
  9 | 
 10 | __all__ = ['darknet19']
 11 | 
 12 | 
 13 | # --------------------- Basic Module -----------------------
 14 | class Conv_BN_LeakyReLU(nn.Module):
 15 |     def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1):
 16 |         super(Conv_BN_LeakyReLU, self).__init__()
 17 |         self.convs = nn.Sequential(
 18 |             nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation),
 19 |             nn.BatchNorm2d(out_channels),
 20 |             nn.LeakyReLU(0.1, inplace=True)
 21 |         )
 22 | 
 23 |     def forward(self, x):
 24 |         return self.convs(x)
 25 | 
 26 | 
 27 | # --------------------- DarkNet-19 -----------------------
 28 | class DarkNet19(nn.Module):
 29 |     def __init__(self):
 30 |         
 31 |         super(DarkNet19, self).__init__()
 32 |         # backbone network : DarkNet-19
 33 |         # output : stride = 2, c = 32
 34 |         self.conv_1 = nn.Sequential(
 35 |             Conv_BN_LeakyReLU(3, 32, 3, 1),
 36 |             nn.MaxPool2d((2,2), 2),
 37 |         )
 38 | 
 39 |         # output : stride = 4, c = 64
 40 |         self.conv_2 = nn.Sequential(
 41 |             Conv_BN_LeakyReLU(32, 64, 3, 1),
 42 |             nn.MaxPool2d((2,2), 2)
 43 |         )
 44 | 
 45 |         # output : stride = 8, c = 128
 46 |         self.conv_3 = nn.Sequential(
 47 |             Conv_BN_LeakyReLU(64, 128, 3, 1),
 48 |             Conv_BN_LeakyReLU(128, 64, 1),
 49 |             Conv_BN_LeakyReLU(64, 128, 3, 1),
 50 |             nn.MaxPool2d((2,2), 2)
 51 |         )
 52 | 
 53 |         # output : stride = 8, c = 256
 54 |         self.conv_4 = nn.Sequential(
 55 |             Conv_BN_LeakyReLU(128, 256, 3, 1),
 56 |             Conv_BN_LeakyReLU(256, 128, 1),
 57 |             Conv_BN_LeakyReLU(128, 256, 3, 1),
 58 |         )
 59 | 
 60 |         # output : stride = 16, c = 512
 61 |         self.maxpool_4 = nn.MaxPool2d((2, 2), 2)
 62 |         self.conv_5 = nn.Sequential(
 63 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 64 |             Conv_BN_LeakyReLU(512, 256, 1),
 65 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 66 |             Conv_BN_LeakyReLU(512, 256, 1),
 67 |             Conv_BN_LeakyReLU(256, 512, 3, 1),
 68 |         )
 69 |         
 70 |         # output : stride = 32, c = 1024
 71 |         self.maxpool_5 = nn.MaxPool2d((2, 2), 2)
 72 |         self.conv_6 = nn.Sequential(
 73 |             Conv_BN_LeakyReLU(512, 1024, 3, 1),
 74 |             Conv_BN_LeakyReLU(1024, 512, 1),
 75 |             Conv_BN_LeakyReLU(512, 1024, 3, 1),
 76 |             Conv_BN_LeakyReLU(1024, 512, 1),
 77 |             Conv_BN_LeakyReLU(512, 1024, 3, 1)
 78 |         )
 79 | 
 80 | 
 81 |     def forward(self, x):
 82 |         c1 = self.conv_1(x)                    # c1
 83 |         c2 = self.conv_2(c1)                   # c2
 84 |         c3 = self.conv_3(c2)                   # c3
 85 |         c3 = self.conv_4(c3)                   # c3
 86 |         c4 = self.conv_5(self.maxpool_4(c3))   # c4
 87 |         c5 = self.conv_6(self.maxpool_5(c4))   # c5
 88 | 
 89 |         return c5
 90 | 
 91 | 
 92 | # --------------------- Fsnctions -----------------------
 93 | def build_backbone(model_name='darknet19', pretrained=False):
 94 |     if model_name == 'darknet19':
 95 |         # model
 96 |         model = DarkNet19()
 97 |         feat_dim = 1024
 98 | 
 99 |     # load weight
100 |     if pretrained:
101 |         print('Loading pretrained weight ...')
102 |         url = model_urls['darknet19']
103 |         # checkpoint state dict
104 |         checkpoint_state_dict = torch.hub.load_state_dict_from_url(
105 |             url=url, map_location="cpu", check_hash=True)
106 |         # model state dict
107 |         model_state_dict = model.state_dict()
108 |         # check
109 |         for k in list(checkpoint_state_dict.keys()):
110 |             if k in model_state_dict:
111 |                 shape_model = tuple(model_state_dict[k].shape)
112 |                 shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
113 |                 if shape_model != shape_checkpoint:
114 |                     checkpoint_state_dict.pop(k)
115 |             else:
116 |                 checkpoint_state_dict.pop(k)
117 |                 print('Unused key: ', k)
118 | 
119 |         model.load_state_dict(checkpoint_state_dict)
120 | 
121 |     return model, feat_dim
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     import time
126 |     model, feat_dim = build_backbone(pretrained=True)
127 |     x = torch.randn(1, 3, 224, 224)
128 |     t0 = time.time()
129 |     y = model(x)
130 |     t1 = time.time()
131 |     print('Time: ', t1 - t0)
132 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/yolov2_basic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SiLU(nn.Module):
 6 |     """export-friendly version of nn.SiLU()"""
 7 | 
 8 |     @staticmethod
 9 |     def forward(x):
10 |         return x * torch.sigmoid(x)
11 | 
12 | 
13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
14 |     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
15 | 
16 |     return conv
17 | 
18 | 
19 | def get_activation(act_type=None):
20 |     if act_type == 'relu':
21 |         return nn.ReLU(inplace=True)
22 |     elif act_type == 'lrelu':
23 |         return nn.LeakyReLU(0.1, inplace=True)
24 |     elif act_type == 'mish':
25 |         return nn.Mish(inplace=True)
26 |     elif act_type == 'silu':
27 |         return nn.SiLU(inplace=True)
28 | 
29 | 
30 | def get_norm(norm_type, dim):
31 |     if norm_type == 'BN':
32 |         return nn.BatchNorm2d(dim)
33 |     elif norm_type == 'GN':
34 |         return nn.GroupNorm(num_groups=32, num_channels=dim)
35 | 
36 | 
37 | # Basic conv layer
38 | class Conv(nn.Module):
39 |     def __init__(self, 
40 |                  c1,                   # in channels
41 |                  c2,                   # out channels 
42 |                  k=1,                  # kernel size 
43 |                  p=0,                  # padding
44 |                  s=1,                  # padding
45 |                  d=1,                  # dilation
46 |                  act_type='lrelu',     # activation
47 |                  norm_type='BN',       # normalization
48 |                  depthwise=False):
49 |         super(Conv, self).__init__()
50 |         convs = []
51 |         add_bias = False if norm_type else True
52 |         if depthwise:
53 |             convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias))
54 |             # depthwise conv
55 |             if norm_type:
56 |                 convs.append(get_norm(norm_type, c1))
57 |             if act_type:
58 |                 convs.append(get_activation(act_type))
59 |             # pointwise conv
60 |             convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias))
61 |             if norm_type:
62 |                 convs.append(get_norm(norm_type, c2))
63 |             if act_type:
64 |                 convs.append(get_activation(act_type))
65 | 
66 |         else:
67 |             convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias))
68 |             if norm_type:
69 |                 convs.append(get_norm(norm_type, c2))
70 |             if act_type:
71 |                 convs.append(get_activation(act_type))
72 |             
73 |         self.convs = nn.Sequential(*convs)
74 | 
75 | 
76 |     def forward(self, x):
77 |         return self.convs(x)
78 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/yolov2_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov2_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         self.in_dim = in_dim
13 |         self.num_cls_head=cfg['num_cls_head']
14 |         self.num_reg_head=cfg['num_reg_head']
15 |         self.act_type=cfg['head_act']
16 |         self.norm_type=cfg['head_norm']
17 | 
18 |         # cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=self.act_type,
26 |                         norm_type=self.norm_type,
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=self.act_type,
33 |                         norm_type=self.norm_type,
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )
36 |                 
37 |         # reg head
38 |         reg_feats = []
39 |         self.reg_out_dim = max(out_dim, 64)
40 |         for i in range(cfg['num_reg_head']):
41 |             if i == 0:
42 |                 reg_feats.append(
43 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
44 |                         act_type=self.act_type,
45 |                         norm_type=self.norm_type,
46 |                         depthwise=cfg['head_depthwise'])
47 |                         )
48 |             else:
49 |                 reg_feats.append(
50 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
51 |                         act_type=self.act_type,
52 |                         norm_type=self.norm_type,
53 |                         depthwise=cfg['head_depthwise'])
54 |                         )
55 | 
56 |         self.cls_feats = nn.Sequential(*cls_feats)
57 |         self.reg_feats = nn.Sequential(*reg_feats)
58 | 
59 | 
60 |     def forward(self, x):
61 |         """
62 |             in_feats: (Tensor) [B, C, H, W]
63 |         """
64 |         cls_feats = self.cls_feats(x)
65 |         reg_feats = self.reg_feats(x)
66 | 
67 |         return cls_feats, reg_feats
68 |     
69 | 
70 | # build detection head
71 | def build_head(cfg, in_dim, out_dim, num_classes=80):
72 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
73 | 
74 |     return head
75 | 


--------------------------------------------------------------------------------
/models/detectors/yolov2/yolov2_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .yolov2_basic import Conv
 4 | 
 5 | 
 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 7 | class SPPF(nn.Module):
 8 |     """
 9 |         This code referenced to https://github.com/ultralytics/yolov5
10 |     """
11 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'):
12 |         super().__init__()
13 |         inter_dim = int(in_dim * expand_ratio)
14 |         self.out_dim = out_dim
15 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
16 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
18 | 
19 |     def forward(self, x):
20 |         x = self.cv1(x)
21 |         y1 = self.m(x)
22 |         y2 = self.m(y1)
23 | 
24 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
25 | 
26 | 
27 | def build_neck(cfg, in_dim, out_dim):
28 |     model = cfg['neck']
29 |     print('==============================')
30 |     print('Neck: {}'.format(model))
31 |     # build neck
32 |     if model == 'sppf':
33 |         neck = SPPF(
34 |             in_dim=in_dim,
35 |             out_dim=out_dim,
36 |             expand_ratio=cfg['expand_ratio'], 
37 |             pooling_size=cfg['pooling_size'],
38 |             act_type=cfg['neck_act'],
39 |             norm_type=cfg['neck_norm']
40 |             )
41 | 
42 |     return neck
43 |         


--------------------------------------------------------------------------------
/models/detectors/yolov3/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv3:
 2 | 
 3 | |    Model    |   Backbone   | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |-------------|--------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv3-Tiny | DarkNet-Tiny | 1xb16 |  640  |        25.4            |       43.4        |   7.0             |   2.3              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov3_t_coco.pth) |
 6 | | YOLOv3      | DarkNet-53   | 1xb16 |  640  |        42.9            |       63.5        |   167.4           |   54.9             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) |
 7 | 
 8 | - For training, we train YOLOv3 and YOLOv3-Tiny with 250 epochs on COCO.
 9 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5).
10 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01.
11 | - For learning rate scheduler, we use linear decay scheduler.
12 | - For YOLOv3's structure, we use decoupled head, following the setting of [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX).
13 | 
14 | ## Train YOLOv3
15 | ### Single GPU
16 | Taking training YOLOv3 on COCO as the example,
17 | ```Shell
18 | python train.py --cuda -d coco --root path/to/coco -m yolov3 -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
19 | ```
20 | 
21 | ### Multi GPU
22 | Taking training YOLOv3 on COCO as the example,
23 | ```Shell
24 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov3 -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
25 | ```
26 | 
27 | ## Test YOLOv3
28 | Taking testing YOLOv3 on COCO-val as the example,
29 | ```Shell
30 | python test.py --cuda -d coco --root path/to/coco -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show 
31 | ```
32 | 
33 | ## Evaluate YOLOv3
34 | Taking evaluating YOLOv3 on COCO-val as the example,
35 | ```Shell
36 | python eval.py --cuda -d coco --root path/to/coco -m yolov3 --weight path/to/yolov3_coco.pth
37 | ```
38 | 
39 | ## Demo
40 | ### Detect with Image
41 | ```Shell
42 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show
43 | ```
44 | 
45 | ### Detect with Video
46 | ```Shell
47 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show --gif
48 | ```
49 | 
50 | ### Detect with Camera
51 | ```Shell
52 | python demo.py --mode camera --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show --gif
53 | ```
54 | 


--------------------------------------------------------------------------------
/models/detectors/yolov3/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov3 import YOLOv3
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov3(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv3(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |     # Init bias
38 |     init_prob = 0.01
39 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
40 |     # obj pred
41 |     for obj_pred in model.obj_preds:
42 |         b = obj_pred.bias.view(1, -1)
43 |         b.data.fill_(bias_value.item())
44 |         obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
45 |     # cls pred
46 |     for cls_pred in model.cls_preds:
47 |         b = cls_pred.bias.view(1, -1)
48 |         b.data.fill_(bias_value.item())
49 |         cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
50 |     # reg pred
51 |     for reg_pred in model.reg_preds:
52 |         b = reg_pred.bias.view(-1, )
53 |         b.data.fill_(1.0)
54 |         reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
55 |         w = reg_pred.weight
56 |         w.data.fill_(0.)
57 |         reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
58 | 
59 | 
60 |     # -------------- Build criterion --------------
61 |     criterion = None
62 |     if trainable:
63 |         # build criterion for training
64 |         criterion = build_criterion(cfg, device, num_classes)
65 |     return model, criterion
66 | 


--------------------------------------------------------------------------------
/models/detectors/yolov3/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .matcher import Yolov3Matcher
  4 | from utils.box_ops import get_ious
  5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
  6 | 
  7 | 
  8 | class Criterion(object):
  9 |     def __init__(self, cfg, device, num_classes=80):
 10 |         self.cfg = cfg
 11 |         self.device = device
 12 |         self.num_classes = num_classes
 13 |         # loss weight
 14 |         self.loss_obj_weight = cfg['loss_obj_weight']
 15 |         self.loss_cls_weight = cfg['loss_cls_weight']
 16 |         self.loss_box_weight = cfg['loss_box_weight']
 17 | 
 18 |         # matcher
 19 |         self.matcher = Yolov3Matcher(num_classes, 3, cfg['anchor_size'], cfg['iou_thresh'])
 20 | 
 21 | 
 22 |     def loss_objectness(self, pred_obj, gt_obj):
 23 |         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
 24 | 
 25 |         return loss_obj
 26 |     
 27 | 
 28 |     def loss_classes(self, pred_cls, gt_label):
 29 |         loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none')
 30 | 
 31 |         return loss_cls
 32 | 
 33 | 
 34 |     def loss_bboxes(self, pred_box, gt_box):
 35 |         # regression loss
 36 |         ious = get_ious(pred_box,
 37 |                         gt_box,
 38 |                         box_mode="xyxy",
 39 |                         iou_type='giou')
 40 |         loss_box = 1.0 - ious
 41 | 
 42 |         return loss_box, ious
 43 | 
 44 | 
 45 |     def __call__(self, outputs, targets, epoch=0):
 46 |         device = outputs['pred_cls'][0].device
 47 |         fpn_strides = outputs['strides']
 48 |         fmp_sizes = outputs['fmp_sizes']
 49 |         (
 50 |             gt_objectness, 
 51 |             gt_classes, 
 52 |             gt_bboxes,
 53 |             ) = self.matcher(fmp_sizes=fmp_sizes, 
 54 |                              fpn_strides=fpn_strides, 
 55 |                              targets=targets)
 56 |         # List[B, M, C] -> [B, M, C] -> [BM, C]
 57 |         pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1)                      # [BM,]
 58 |         pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)    # [BM, C]
 59 |         pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4)                   # [BM, 4]
 60 |        
 61 |         gt_objectness = gt_objectness.view(-1).to(device).float()               # [BM,]
 62 |         gt_classes = gt_classes.view(-1, self.num_classes).to(device).float()   # [BM, C]
 63 |         gt_bboxes = gt_bboxes.view(-1, 4).to(device).float()                    # [BM, 4]
 64 | 
 65 |         pos_masks = (gt_objectness > 0)
 66 |         num_fgs = pos_masks.sum()
 67 | 
 68 |         if is_dist_avail_and_initialized():
 69 |             torch.distributed.all_reduce(num_fgs)
 70 |         num_fgs = (num_fgs / get_world_size()).clamp(1.0)
 71 | 
 72 |         # box loss
 73 |         pred_box_pos = pred_box[pos_masks]
 74 |         gt_bboxes_pos = gt_bboxes[pos_masks]
 75 |         loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos)
 76 |         loss_box = loss_box.sum() / num_fgs
 77 |         
 78 |         # cls loss
 79 |         pred_cls_pos = pred_cls[pos_masks]
 80 |         gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.)
 81 |         loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos)
 82 |         loss_cls = loss_cls.sum() / num_fgs
 83 | 
 84 |         # obj loss
 85 |         loss_obj = self.loss_objectness(pred_obj, gt_objectness)
 86 |         loss_obj = loss_obj.sum() / num_fgs
 87 | 
 88 |         # total loss
 89 |         losses = self.loss_obj_weight * loss_obj + \
 90 |                  self.loss_cls_weight * loss_cls + \
 91 |                  self.loss_box_weight * loss_box
 92 | 
 93 |         loss_dict = dict(
 94 |                 loss_obj = loss_obj,
 95 |                 loss_cls = loss_cls,
 96 |                 loss_box = loss_box,
 97 |                 losses = losses
 98 |         )
 99 | 
100 |         return loss_dict
101 |     
102 | 
103 | def build_criterion(cfg, device, num_classes):
104 |     criterion = Criterion(
105 |         cfg=cfg,
106 |         device=device,
107 |         num_classes=num_classes
108 |         )
109 | 
110 |     return criterion
111 | 
112 |     
113 | if __name__ == "__main__":
114 |     pass
115 | 


--------------------------------------------------------------------------------
/models/detectors/yolov3/yolov3_basic.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class SiLU(nn.Module):
  6 |     """export-friendly version of nn.SiLU()"""
  7 | 
  8 |     @staticmethod
  9 |     def forward(x):
 10 |         return x * torch.sigmoid(x)
 11 | 
 12 | 
 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
 14 |     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
 15 | 
 16 |     return conv
 17 | 
 18 | 
 19 | def get_activation(act_type=None):
 20 |     if act_type == 'relu':
 21 |         return nn.ReLU(inplace=True)
 22 |     elif act_type == 'lrelu':
 23 |         return nn.LeakyReLU(0.1, inplace=True)
 24 |     elif act_type == 'mish':
 25 |         return nn.Mish(inplace=True)
 26 |     elif act_type == 'silu':
 27 |         return nn.SiLU(inplace=True)
 28 | 
 29 | 
 30 | def get_norm(norm_type, dim):
 31 |     if norm_type == 'BN':
 32 |         return nn.BatchNorm2d(dim)
 33 |     elif norm_type == 'GN':
 34 |         return nn.GroupNorm(num_groups=32, num_channels=dim)
 35 | 
 36 | 
 37 | # Basic conv layer
 38 | class Conv(nn.Module):
 39 |     def __init__(self, 
 40 |                  c1,                   # in channels
 41 |                  c2,                   # out channels 
 42 |                  k=1,                  # kernel size 
 43 |                  p=0,                  # padding
 44 |                  s=1,                  # padding
 45 |                  d=1,                  # dilation
 46 |                  act_type='lrelu',     # activation
 47 |                  norm_type='BN',       # normalization
 48 |                  depthwise=False):
 49 |         super(Conv, self).__init__()
 50 |         convs = []
 51 |         add_bias = False if norm_type else True
 52 |         if depthwise:
 53 |             convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias))
 54 |             # depthwise conv
 55 |             if norm_type:
 56 |                 convs.append(get_norm(norm_type, c1))
 57 |             if act_type:
 58 |                 convs.append(get_activation(act_type))
 59 |             # pointwise conv
 60 |             convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias))
 61 |             if norm_type:
 62 |                 convs.append(get_norm(norm_type, c2))
 63 |             if act_type:
 64 |                 convs.append(get_activation(act_type))
 65 | 
 66 |         else:
 67 |             convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias))
 68 |             if norm_type:
 69 |                 convs.append(get_norm(norm_type, c2))
 70 |             if act_type:
 71 |                 convs.append(get_activation(act_type))
 72 |             
 73 |         self.convs = nn.Sequential(*convs)
 74 | 
 75 | 
 76 |     def forward(self, x):
 77 |         return self.convs(x)
 78 | 
 79 | 
 80 | # BottleNeck
 81 | class Bottleneck(nn.Module):
 82 |     def __init__(self,
 83 |                  in_dim,
 84 |                  out_dim,
 85 |                  expand_ratio=0.5,
 86 |                  shortcut=False,
 87 |                  depthwise=False,
 88 |                  act_type='silu',
 89 |                  norm_type='BN'):
 90 |         super(Bottleneck, self).__init__()
 91 |         inter_dim = int(out_dim * expand_ratio)  # hidden channels            
 92 |         self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type)
 93 |         self.cv2 = Conv(inter_dim, out_dim, k=3, p=1, norm_type=norm_type, act_type=act_type, depthwise=depthwise)
 94 |         self.shortcut = shortcut and in_dim == out_dim
 95 | 
 96 |     def forward(self, x):
 97 |         h = self.cv2(self.cv1(x))
 98 | 
 99 |         return x + h if self.shortcut else h
100 | 
101 | 
102 | # ResBlock
103 | class ResBlock(nn.Module):
104 |     def __init__(self,
105 |                  in_dim,
106 |                  out_dim,
107 |                  nblocks=1,
108 |                  act_type='silu',
109 |                  norm_type='BN'):
110 |         super(ResBlock, self).__init__()
111 |         assert in_dim == out_dim
112 |         self.m = nn.Sequential(*[
113 |             Bottleneck(in_dim, out_dim, expand_ratio=0.5, shortcut=True,
114 |                        norm_type=norm_type, act_type=act_type)
115 |                        for _ in range(nblocks)
116 |                        ])
117 | 
118 |     def forward(self, x):
119 |         return self.m(x)
120 | 
121 | 
122 | # ConvBlocks
123 | class ConvBlocks(nn.Module):
124 |     def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
125 |         super().__init__()
126 |         inter_dim = out_dim // 2
127 |         self.convs = nn.Sequential(
128 |             Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type),
129 |             Conv(out_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
130 |             Conv(inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type),
131 |             Conv(out_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
132 |             Conv(inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
133 |         )
134 | 
135 |     def forward(self, x):
136 |         return self.convs(x)
137 |     


--------------------------------------------------------------------------------
/models/detectors/yolov3/yolov3_fpn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .yolov3_basic import Conv, ConvBlocks
 6 | 
 7 | 
 8 | # Yolov3FPN
 9 | class Yolov3FPN(nn.Module):
10 |     def __init__(self,
11 |                  in_dims=[256, 512, 1024],
12 |                  width=1.0,
13 |                  depth=1.0,
14 |                  out_dim=None,
15 |                  act_type='silu',
16 |                  norm_type='BN'):
17 |         super(Yolov3FPN, self).__init__()
18 |         self.in_dims = in_dims
19 |         self.out_dim = out_dim
20 |         c3, c4, c5 = in_dims
21 | 
22 |         # P5 -> P4
23 |         self.top_down_layer_1 = ConvBlocks(c5, int(512*width), act_type=act_type, norm_type=norm_type)
24 |         self.reduce_layer_1 = Conv(int(512*width), int(256*width), k=1, act_type=act_type, norm_type=norm_type)
25 | 
26 |         # P4 -> P3
27 |         self.top_down_layer_2 = ConvBlocks(c4 + int(256*width), int(256*width), act_type=act_type, norm_type=norm_type)
28 |         self.reduce_layer_2 = Conv(int(256*width), int(128*width), k=1, act_type=act_type, norm_type=norm_type)
29 | 
30 |         # P3
31 |         self.top_down_layer_3 = ConvBlocks(c3 + int(128*width), int(128*width), act_type=act_type, norm_type=norm_type)
32 | 
33 |         # output proj layers
34 |         if out_dim is not None:
35 |             # output proj layers
36 |             self.out_layers = nn.ModuleList([
37 |                 Conv(in_dim, out_dim, k=1,
38 |                         norm_type=norm_type, act_type=act_type)
39 |                         for in_dim in [int(128 * width), int(256 * width), int(512 * width)]
40 |                         ])
41 |             self.out_dim = [out_dim] * 3
42 | 
43 |         else:
44 |             self.out_layers = None
45 |             self.out_dim = [int(128 * width), int(256 * width), int(512 * width)]
46 | 
47 | 
48 |     def forward(self, features):
49 |         c3, c4, c5 = features
50 |         
51 |         # p5/32
52 |         p5 = self.top_down_layer_1(c5)
53 | 
54 |         # p4/16
55 |         p5_up = F.interpolate(self.reduce_layer_1(p5), scale_factor=2.0)
56 |         p4 = self.top_down_layer_2(torch.cat([c4, p5_up], dim=1))
57 | 
58 |         # P3/8
59 |         p4_up = F.interpolate(self.reduce_layer_2(p4), scale_factor=2.0)
60 |         p3 = self.top_down_layer_3(torch.cat([c3, p4_up], dim=1))
61 | 
62 |         out_feats = [p3, p4, p5]
63 | 
64 |         # output proj layers
65 |         if self.out_layers is not None:
66 |             # output proj layers
67 |             out_feats_proj = []
68 |             for feat, layer in zip(out_feats, self.out_layers):
69 |                 out_feats_proj.append(layer(feat))
70 |             return out_feats_proj
71 | 
72 |         return out_feats
73 | 
74 | 
75 | def build_fpn(cfg, in_dims, out_dim=None):
76 |     model = cfg['fpn']
77 |     # build neck
78 |     if model == 'yolov3_fpn':
79 |         fpn_net = Yolov3FPN(in_dims=in_dims,
80 |                             out_dim=out_dim,
81 |                             width=cfg['width'],
82 |                             depth=cfg['depth'],
83 |                             act_type=cfg['fpn_act'],
84 |                             norm_type=cfg['fpn_norm']
85 |                             )
86 | 
87 |     return fpn_net
88 | 


--------------------------------------------------------------------------------
/models/detectors/yolov3/yolov3_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov3_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         self.in_dim = in_dim
13 |         self.num_cls_head=cfg['num_cls_head']
14 |         self.num_reg_head=cfg['num_reg_head']
15 |         self.act_type=cfg['head_act']
16 |         self.norm_type=cfg['head_norm']
17 | 
18 |         # cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=self.act_type,
26 |                         norm_type=self.norm_type,
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=self.act_type,
33 |                         norm_type=self.norm_type,
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )
36 |                 
37 |         # reg head
38 |         reg_feats = []
39 |         self.reg_out_dim = max(out_dim, 64)
40 |         for i in range(cfg['num_reg_head']):
41 |             if i == 0:
42 |                 reg_feats.append(
43 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
44 |                         act_type=self.act_type,
45 |                         norm_type=self.norm_type,
46 |                         depthwise=cfg['head_depthwise'])
47 |                         )
48 |             else:
49 |                 reg_feats.append(
50 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
51 |                         act_type=self.act_type,
52 |                         norm_type=self.norm_type,
53 |                         depthwise=cfg['head_depthwise'])
54 |                         )
55 | 
56 |         self.cls_feats = nn.Sequential(*cls_feats)
57 |         self.reg_feats = nn.Sequential(*reg_feats)
58 | 
59 | 
60 |     def forward(self, x):
61 |         """
62 |             in_feats: (Tensor) [B, C, H, W]
63 |         """
64 |         cls_feats = self.cls_feats(x)
65 |         reg_feats = self.reg_feats(x)
66 | 
67 |         return cls_feats, reg_feats
68 |     
69 | 
70 | # build detection head
71 | def build_head(cfg, in_dim, out_dim, num_classes=80):
72 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
73 | 
74 |     return head
75 | 


--------------------------------------------------------------------------------
/models/detectors/yolov3/yolov3_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .yolov3_basic import Conv
 4 | 
 5 | 
 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 7 | class SPPF(nn.Module):
 8 |     """
 9 |         This code referenced to https://github.com/ultralytics/yolov5
10 |     """
11 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'):
12 |         super().__init__()
13 |         inter_dim = int(in_dim * expand_ratio)
14 |         self.out_dim = out_dim
15 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
16 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
18 | 
19 |     def forward(self, x):
20 |         x = self.cv1(x)
21 |         y1 = self.m(x)
22 |         y2 = self.m(y1)
23 | 
24 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
25 | 
26 | 
27 | def build_neck(cfg, in_dim, out_dim):
28 |     model = cfg['neck']
29 |     print('==============================')
30 |     print('Neck: {}'.format(model))
31 |     # build neck
32 |     if model == 'sppf':
33 |         neck = SPPF(
34 |             in_dim=in_dim,
35 |             out_dim=out_dim,
36 |             expand_ratio=cfg['expand_ratio'], 
37 |             pooling_size=cfg['pooling_size'],
38 |             act_type=cfg['neck_act'],
39 |             norm_type=cfg['neck_norm']
40 |             )
41 | 
42 |     return neck
43 |         


--------------------------------------------------------------------------------
/models/detectors/yolov4/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv4:
 2 | 
 3 | |    Model    |     Backbone    | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |-------------|-----------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv4-Tiny | CSPDarkNet-Tiny | 1xb16 |  640  |        31.0            |       49.1        |   8.1             |   2.9              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov4_t_coco.pth) |
 6 | | YOLOv4      | CSPDarkNet-53   | 1xb16 |  640  |        46.6            |       65.8        |   162.7           |   61.5             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
 7 | 
 8 | - For training, we train YOLOv4 and YOLOv4-Tiny with 250 epochs on COCO.
 9 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5).
10 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01.
11 | - For learning rate scheduler, we use linear decay scheduler.
12 | - For YOLOv4's structure, we use decoupled head, following the setting of [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX).
13 | 
14 | ## Train YOLOv4
15 | ### Single GPU
16 | Taking training YOLOv4 on COCO as the example,
17 | ```Shell
18 | python train.py --cuda -d coco --root path/to/coco -m yolov4 -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
19 | ```
20 | 
21 | ### Multi GPU
22 | Taking training YOLOv4 on COCO as the example,
23 | ```Shell
24 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov4 -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
25 | ```
26 | 
27 | ## Test YOLOv4
28 | Taking testing YOLOv4 on COCO-val as the example,
29 | ```Shell
30 | python test.py --cuda -d coco --root path/to/coco -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show 
31 | ```
32 | 
33 | ## Evaluate YOLOv4
34 | Taking evaluating YOLOv4 on COCO-val as the example,
35 | ```Shell
36 | python eval.py --cuda -d coco --root path/to/coco -m yolov4 --weight path/to/yolov4_coco.pth
37 | ```
38 | 
39 | ## Demo
40 | ### Detect with Image
41 | ```Shell
42 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show
43 | ```
44 | 
45 | ### Detect with Video
46 | ```Shell
47 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show --gif
48 | ```
49 | 
50 | ### Detect with Camera
51 | ```Shell
52 | python demo.py --mode camera --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show --gif
53 | ```
54 | 


--------------------------------------------------------------------------------
/models/detectors/yolov4/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov4 import YOLOv4
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov4(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv4(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |     # Init bias
38 |     init_prob = 0.01
39 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
40 |     # obj pred
41 |     for obj_pred in model.obj_preds:
42 |         b = obj_pred.bias.view(1, -1)
43 |         b.data.fill_(bias_value.item())
44 |         obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
45 |     # cls pred
46 |     for cls_pred in model.cls_preds:
47 |         b = cls_pred.bias.view(1, -1)
48 |         b.data.fill_(bias_value.item())
49 |         cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
50 |     # reg pred
51 |     for reg_pred in model.reg_preds:
52 |         b = reg_pred.bias.view(-1, )
53 |         b.data.fill_(1.0)
54 |         reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
55 |         w = reg_pred.weight
56 |         w.data.fill_(0.)
57 |         reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
58 | 
59 | 
60 |     # -------------- Build criterion --------------
61 |     criterion = None
62 |     if trainable:
63 |         # build criterion for training
64 |         criterion = build_criterion(cfg, device, num_classes)
65 |     return model, criterion
66 | 


--------------------------------------------------------------------------------
/models/detectors/yolov4/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .matcher import Yolov4Matcher
  4 | from utils.box_ops import get_ious
  5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
  6 | 
  7 | 
  8 | class Criterion(object):
  9 |     def __init__(self, cfg, device, num_classes=80):
 10 |         self.cfg = cfg
 11 |         self.device = device
 12 |         self.num_classes = num_classes
 13 |         # loss weight
 14 |         self.loss_obj_weight = cfg['loss_obj_weight']
 15 |         self.loss_cls_weight = cfg['loss_cls_weight']
 16 |         self.loss_box_weight = cfg['loss_box_weight']
 17 | 
 18 |         # matcher
 19 |         self.matcher = Yolov4Matcher(num_classes, 3, cfg['anchor_size'], cfg['iou_thresh'])
 20 | 
 21 | 
 22 |     def loss_objectness(self, pred_obj, gt_obj):
 23 |         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
 24 | 
 25 |         return loss_obj
 26 |     
 27 | 
 28 |     def loss_classes(self, pred_cls, gt_label):
 29 |         loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none')
 30 | 
 31 |         return loss_cls
 32 | 
 33 | 
 34 |     def loss_bboxes(self, pred_box, gt_box):
 35 |         # regression loss
 36 |         ious = get_ious(pred_box,
 37 |                         gt_box,
 38 |                         box_mode="xyxy",
 39 |                         iou_type='giou')
 40 |         loss_box = 1.0 - ious
 41 | 
 42 |         return loss_box, ious
 43 | 
 44 | 
 45 |     def __call__(self, outputs, targets, epoch=0):
 46 |         device = outputs['pred_cls'][0].device
 47 |         fpn_strides = outputs['strides']
 48 |         fmp_sizes = outputs['fmp_sizes']
 49 |         (
 50 |             gt_objectness, 
 51 |             gt_classes, 
 52 |             gt_bboxes,
 53 |             ) = self.matcher(fmp_sizes=fmp_sizes, 
 54 |                              fpn_strides=fpn_strides, 
 55 |                              targets=targets)
 56 |         # List[B, M, C] -> [B, M, C] -> [BM, C]
 57 |         pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1)                      # [BM,]
 58 |         pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)    # [BM, C]
 59 |         pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4)                   # [BM, 4]
 60 |        
 61 |         gt_objectness = gt_objectness.view(-1).to(device).float()               # [BM,]
 62 |         gt_classes = gt_classes.view(-1, self.num_classes).to(device).float()   # [BM, C]
 63 |         gt_bboxes = gt_bboxes.view(-1, 4).to(device).float()                    # [BM, 4]
 64 | 
 65 |         pos_masks = (gt_objectness > 0)
 66 |         num_fgs = pos_masks.sum()
 67 | 
 68 |         if is_dist_avail_and_initialized():
 69 |             torch.distributed.all_reduce(num_fgs)
 70 |         num_fgs = (num_fgs / get_world_size()).clamp(1.0)
 71 | 
 72 |         # box loss
 73 |         pred_box_pos = pred_box[pos_masks]
 74 |         gt_bboxes_pos = gt_bboxes[pos_masks]
 75 |         loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos)
 76 |         loss_box = loss_box.sum() / num_fgs
 77 |         
 78 |         # cls loss
 79 |         pred_cls_pos = pred_cls[pos_masks]
 80 |         gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.)
 81 |         loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos)
 82 |         loss_cls = loss_cls.sum() / num_fgs
 83 | 
 84 |         # obj loss
 85 |         loss_obj = self.loss_objectness(pred_obj, gt_objectness)
 86 |         loss_obj = loss_obj.sum() / num_fgs
 87 | 
 88 |         # total loss
 89 |         losses = self.loss_obj_weight * loss_obj + \
 90 |                  self.loss_cls_weight * loss_cls + \
 91 |                  self.loss_box_weight * loss_box
 92 | 
 93 |         loss_dict = dict(
 94 |                 loss_obj = loss_obj,
 95 |                 loss_cls = loss_cls,
 96 |                 loss_box = loss_box,
 97 |                 losses = losses
 98 |         )
 99 | 
100 |         return loss_dict
101 |     
102 | 
103 | def build_criterion(cfg, device, num_classes):
104 |     criterion = Criterion(
105 |         cfg=cfg,
106 |         device=device,
107 |         num_classes=num_classes
108 |         )
109 | 
110 |     return criterion
111 | 
112 |     
113 | if __name__ == "__main__":
114 |     pass
115 | 


--------------------------------------------------------------------------------
/models/detectors/yolov4/yolov4_basic.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | 
  5 | class SiLU(nn.Module):
  6 |     """export-friendly version of nn.SiLU()"""
  7 | 
  8 |     @staticmethod
  9 |     def forward(x):
 10 |         return x * torch.sigmoid(x)
 11 | 
 12 | 
 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
 14 |     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
 15 | 
 16 |     return conv
 17 | 
 18 | 
 19 | def get_activation(act_type=None):
 20 |     if act_type == 'relu':
 21 |         return nn.ReLU(inplace=True)
 22 |     elif act_type == 'lrelu':
 23 |         return nn.LeakyReLU(0.1, inplace=True)
 24 |     elif act_type == 'mish':
 25 |         return nn.Mish(inplace=True)
 26 |     elif act_type == 'silu':
 27 |         return nn.SiLU(inplace=True)
 28 | 
 29 | 
 30 | def get_norm(norm_type, dim):
 31 |     if norm_type == 'BN':
 32 |         return nn.BatchNorm2d(dim)
 33 |     elif norm_type == 'GN':
 34 |         return nn.GroupNorm(num_groups=32, num_channels=dim)
 35 | 
 36 | 
 37 | # Basic conv layer
 38 | class Conv(nn.Module):
 39 |     def __init__(self, 
 40 |                  c1,                   # in channels
 41 |                  c2,                   # out channels 
 42 |                  k=1,                  # kernel size 
 43 |                  p=0,                  # padding
 44 |                  s=1,                  # padding
 45 |                  d=1,                  # dilation
 46 |                  act_type='lrelu',     # activation
 47 |                  norm_type='BN',       # normalization
 48 |                  depthwise=False):
 49 |         super(Conv, self).__init__()
 50 |         convs = []
 51 |         add_bias = False if norm_type else True
 52 |         if depthwise:
 53 |             convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias))
 54 |             # depthwise conv
 55 |             if norm_type:
 56 |                 convs.append(get_norm(norm_type, c1))
 57 |             if act_type:
 58 |                 convs.append(get_activation(act_type))
 59 |             # pointwise conv
 60 |             convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias))
 61 |             if norm_type:
 62 |                 convs.append(get_norm(norm_type, c2))
 63 |             if act_type:
 64 |                 convs.append(get_activation(act_type))
 65 | 
 66 |         else:
 67 |             convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias))
 68 |             if norm_type:
 69 |                 convs.append(get_norm(norm_type, c2))
 70 |             if act_type:
 71 |                 convs.append(get_activation(act_type))
 72 |             
 73 |         self.convs = nn.Sequential(*convs)
 74 | 
 75 | 
 76 |     def forward(self, x):
 77 |         return self.convs(x)
 78 | 
 79 | 
 80 | # BottleNeck
 81 | class Bottleneck(nn.Module):
 82 |     def __init__(self,
 83 |                  in_dim,
 84 |                  out_dim,
 85 |                  expand_ratio=0.5,
 86 |                  shortcut=False,
 87 |                  depthwise=False,
 88 |                  act_type='silu',
 89 |                  norm_type='BN'):
 90 |         super(Bottleneck, self).__init__()
 91 |         inter_dim = int(out_dim * expand_ratio)  # hidden channels            
 92 |         self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type)
 93 |         self.cv2 = Conv(inter_dim, out_dim, k=3, p=1, norm_type=norm_type, act_type=act_type, depthwise=depthwise)
 94 |         self.shortcut = shortcut and in_dim == out_dim
 95 | 
 96 |     def forward(self, x):
 97 |         h = self.cv2(self.cv1(x))
 98 | 
 99 |         return x + h if self.shortcut else h
100 | 
101 | 
102 | # CSP-stage block
103 | class CSPBlock(nn.Module):
104 |     def __init__(self,
105 |                  in_dim,
106 |                  out_dim,
107 |                  expand_ratio=0.5,
108 |                  nblocks=1,
109 |                  shortcut=False,
110 |                  depthwise=False,
111 |                  act_type='silu',
112 |                  norm_type='BN'):
113 |         super(CSPBlock, self).__init__()
114 |         inter_dim = int(out_dim * expand_ratio)
115 |         self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type)
116 |         self.cv2 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type)
117 |         self.cv3 = Conv(2 * inter_dim, out_dim, k=1, norm_type=norm_type, act_type=act_type)
118 |         self.m = nn.Sequential(*[
119 |             Bottleneck(inter_dim, inter_dim, expand_ratio=1.0, shortcut=shortcut,
120 |                        norm_type=norm_type, act_type=act_type, depthwise=depthwise)
121 |                        for _ in range(nblocks)
122 |                        ])
123 | 
124 |     def forward(self, x):
125 |         x1 = self.cv1(x)
126 |         x2 = self.cv2(x)
127 |         x3 = self.m(x1)
128 |         out = self.cv3(torch.cat([x3, x2], dim=1))
129 | 
130 |         return out
131 |     


--------------------------------------------------------------------------------
/models/detectors/yolov4/yolov4_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov4_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         self.in_dim = in_dim
13 |         self.num_cls_head=cfg['num_cls_head']
14 |         self.num_reg_head=cfg['num_reg_head']
15 |         self.act_type=cfg['head_act']
16 |         self.norm_type=cfg['head_norm']
17 | 
18 |         # cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=self.act_type,
26 |                         norm_type=self.norm_type,
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=self.act_type,
33 |                         norm_type=self.norm_type,
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )
36 |                 
37 |         # reg head
38 |         reg_feats = []
39 |         self.reg_out_dim = max(out_dim, 64)
40 |         for i in range(cfg['num_reg_head']):
41 |             if i == 0:
42 |                 reg_feats.append(
43 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
44 |                         act_type=self.act_type,
45 |                         norm_type=self.norm_type,
46 |                         depthwise=cfg['head_depthwise'])
47 |                         )
48 |             else:
49 |                 reg_feats.append(
50 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
51 |                         act_type=self.act_type,
52 |                         norm_type=self.norm_type,
53 |                         depthwise=cfg['head_depthwise'])
54 |                         )
55 | 
56 |         self.cls_feats = nn.Sequential(*cls_feats)
57 |         self.reg_feats = nn.Sequential(*reg_feats)
58 | 
59 | 
60 |     def forward(self, x):
61 |         """
62 |             in_feats: (Tensor) [B, C, H, W]
63 |         """
64 |         cls_feats = self.cls_feats(x)
65 |         reg_feats = self.reg_feats(x)
66 | 
67 |         return cls_feats, reg_feats
68 |     
69 | 
70 | # build detection head
71 | def build_head(cfg, in_dim, out_dim, num_classes=80):
72 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
73 | 
74 |     return head
75 | 


--------------------------------------------------------------------------------
/models/detectors/yolov4/yolov4_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .yolov4_basic import Conv
 4 | 
 5 | 
 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 7 | class SPPF(nn.Module):
 8 |     """
 9 |         This code referenced to https://github.com/ultralytics/yolov5
10 |     """
11 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'):
12 |         super().__init__()
13 |         inter_dim = int(in_dim * expand_ratio)
14 |         self.out_dim = out_dim
15 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
16 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
18 | 
19 |     def forward(self, x):
20 |         x = self.cv1(x)
21 |         y1 = self.m(x)
22 |         y2 = self.m(y1)
23 | 
24 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
25 | 
26 | 
27 | # SPPF block with CSP module
28 | class SPPFBlockCSP(nn.Module):
29 |     """
30 |         CSP Spatial Pyramid Pooling Block
31 |     """
32 |     def __init__(self,
33 |                  in_dim,
34 |                  out_dim,
35 |                  expand_ratio=0.5,
36 |                  pooling_size=5,
37 |                  act_type='lrelu',
38 |                  norm_type='BN',
39 |                  depthwise=False
40 |                  ):
41 |         super(SPPFBlockCSP, self).__init__()
42 |         inter_dim = int(in_dim * expand_ratio)
43 |         self.out_dim = out_dim
44 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
45 |         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
46 |         self.m = nn.Sequential(
47 |             Conv(inter_dim, inter_dim, k=3, p=1, 
48 |                  act_type=act_type, norm_type=norm_type, 
49 |                  depthwise=depthwise),
50 |             SPPF(inter_dim, 
51 |                  inter_dim, 
52 |                  expand_ratio=1.0, 
53 |                  pooling_size=pooling_size, 
54 |                  act_type=act_type, 
55 |                  norm_type=norm_type),
56 |             Conv(inter_dim, inter_dim, k=3, p=1, 
57 |                  act_type=act_type, norm_type=norm_type, 
58 |                  depthwise=depthwise)
59 |         )
60 |         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type)
61 | 
62 |         
63 |     def forward(self, x):
64 |         x1 = self.cv1(x)
65 |         x2 = self.cv2(x)
66 |         x3 = self.m(x2)
67 |         y = self.cv3(torch.cat([x1, x3], dim=1))
68 | 
69 |         return y
70 | 
71 | 
72 | def build_neck(cfg, in_dim, out_dim):
73 |     model = cfg['neck']
74 |     print('==============================')
75 |     print('Neck: {}'.format(model))
76 |     # build neck
77 |     if model == 'sppf':
78 |         neck = SPPF(
79 |             in_dim=in_dim,
80 |             out_dim=out_dim,
81 |             expand_ratio=cfg['expand_ratio'], 
82 |             pooling_size=cfg['pooling_size'],
83 |             act_type=cfg['neck_act'],
84 |             norm_type=cfg['neck_norm']
85 |             )
86 |     elif model == 'csp_sppf':
87 |         neck = SPPFBlockCSP(
88 |             in_dim=in_dim,
89 |             out_dim=out_dim,
90 |             expand_ratio=cfg['expand_ratio'], 
91 |             pooling_size=cfg['pooling_size'],
92 |             act_type=cfg['neck_act'],
93 |             norm_type=cfg['neck_norm'],
94 |             depthwise=cfg['neck_depthwise']
95 |             )
96 | 
97 |     return neck
98 |         


--------------------------------------------------------------------------------
/models/detectors/yolov5/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv5:
 2 | 
 3 | |   Model   | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv5-N  | 8xb16 |  640  |                        |                   |                   |                    |  |
 6 | | YOLOv5-S  | 8xb16 |  640  |         39.2           |        57.9       |        27.3       |         9.0        | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov5_s_coco_adamw.pth) |
 7 | | YOLOv5-M  | 8xb16 |  640  |                        |                   |                   |                    |  |
 8 | | YOLOv5-L  | 8xb16 |  640  |                        |                   |                   |                    |  |
 9 | | YOLOv5-X  | 8xb16 |  640  |                        |                   |                   |                    |  |
10 | 
11 | - For training, we train YOLOv5 series with 300 epochs on COCO.
12 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5).
13 | - For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64. We are not good at using SGD.
14 | - For learning rate scheduler, we use linear decay scheduler.
15 | - We use decoupled head in our reproduced YOLOv5, which is different from the official YOLOv5'head.
16 | 
17 | 
18 | ## Train YOLOv5
19 | ### Single GPU
20 | Taking training YOLOv5-S on COCO as the example,
21 | ```Shell
22 | python train.py --cuda -d coco --root path/to/coco -m yolov5_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
23 | ```
24 | 
25 | ### Multi GPU
26 | Taking training YOLOv5 on COCO as the example,
27 | ```Shell
28 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov5_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
29 | ```
30 | 
31 | ## Test YOLOv5
32 | Taking testing YOLOv5 on COCO-val as the example,
33 | ```Shell
34 | python test.py --cuda -d coco --root path/to/coco -m yolov5_s --weight path/to/yolov5_coco.pth -size 640 --show 
35 | ```
36 | 
37 | ## Evaluate YOLOv5
38 | Taking evaluating YOLOv5 on COCO-val as the example,
39 | ```Shell
40 | python eval.py --cuda -d coco --root path/to/coco -m yolov5_s --weight path/to/yolov5_coco.pth 
41 | ```
42 | 
43 | ## Demo
44 | ### Detect with Image
45 | ```Shell
46 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov5_s --weight path/to/yolov5_coco.pth  -size 640 --show
47 | ```
48 | 
49 | ### Detect with Video
50 | ```Shell
51 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov5_s --weight path/to/yolov5_coco.pth  -size 640 --show --gif
52 | ```
53 | 
54 | ### Detect with Camera
55 | ```Shell
56 | python demo.py --mode camera --cuda -m yolov5_s --weight path/to/weight -size 640 --show --gif
57 | ```
58 | 


--------------------------------------------------------------------------------
/models/detectors/yolov5/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov5 import YOLOv5
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov5(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv5(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels       = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |     # Init bias
38 |     init_prob = 0.01
39 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
40 |     # obj pred
41 |     for obj_pred in model.obj_preds:
42 |         b = obj_pred.bias.view(1, -1)
43 |         b.data.fill_(bias_value.item())
44 |         obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
45 |     # cls pred
46 |     for cls_pred in model.cls_preds:
47 |         b = cls_pred.bias.view(1, -1)
48 |         b.data.fill_(bias_value.item())
49 |         cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
50 |     # reg pred
51 |     for reg_pred in model.reg_preds:
52 |         b = reg_pred.bias.view(-1, )
53 |         b.data.fill_(1.0)
54 |         reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
55 |         w = reg_pred.weight
56 |         w.data.fill_(0.)
57 |         reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
58 | 
59 | 
60 |     # -------------- Build criterion --------------
61 |     criterion = None
62 |     if trainable:
63 |         # build criterion for training
64 |         criterion = build_criterion(cfg, device, num_classes)
65 |     return model, criterion
66 | 


--------------------------------------------------------------------------------
/models/detectors/yolov5/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from .matcher import Yolov5Matcher
  4 | from utils.box_ops import get_ious
  5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
  6 | 
  7 | 
  8 | class Criterion(object):
  9 |     def __init__(self, cfg, device, num_classes=80):
 10 |         self.cfg = cfg
 11 |         self.device = device
 12 |         self.num_classes = num_classes
 13 |         # loss weight
 14 |         self.loss_obj_weight = cfg['loss_obj_weight']
 15 |         self.loss_cls_weight = cfg['loss_cls_weight']
 16 |         self.loss_box_weight = cfg['loss_box_weight']
 17 | 
 18 |         # matcher
 19 |         self.matcher = Yolov5Matcher(num_classes, 3, cfg['anchor_size'], cfg['anchor_thresh'])
 20 | 
 21 | 
 22 |     def loss_objectness(self, pred_obj, gt_obj):
 23 |         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
 24 | 
 25 |         return loss_obj
 26 |     
 27 | 
 28 |     def loss_classes(self, pred_cls, gt_label):
 29 |         loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none')
 30 | 
 31 |         return loss_cls
 32 | 
 33 | 
 34 |     def loss_bboxes(self, pred_box, gt_box):
 35 |         # regression loss
 36 |         ious = get_ious(pred_box,
 37 |                         gt_box,
 38 |                         box_mode="xyxy",
 39 |                         iou_type='giou')
 40 |         loss_box = 1.0 - ious
 41 | 
 42 |         return loss_box, ious
 43 | 
 44 | 
 45 |     def __call__(self, outputs, targets, epoch=0):
 46 |         device = outputs['pred_cls'][0].device
 47 |         fpn_strides = outputs['strides']
 48 |         fmp_sizes = outputs['fmp_sizes']
 49 |         (
 50 |             gt_objectness, 
 51 |             gt_classes, 
 52 |             gt_bboxes,
 53 |             ) = self.matcher(fmp_sizes=fmp_sizes, 
 54 |                              fpn_strides=fpn_strides, 
 55 |                              targets=targets)
 56 |         # List[B, M, C] -> [B, M, C] -> [BM, C]
 57 |         pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1)                      # [BM,]
 58 |         pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)    # [BM, C]
 59 |         pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4)                   # [BM, 4]
 60 |        
 61 |         gt_objectness = gt_objectness.view(-1).to(device).float()               # [BM,]
 62 |         gt_classes = gt_classes.view(-1, self.num_classes).to(device).float()   # [BM, C]
 63 |         gt_bboxes = gt_bboxes.view(-1, 4).to(device).float()                    # [BM, 4]
 64 | 
 65 |         pos_masks = (gt_objectness > 0)
 66 |         num_fgs = pos_masks.sum()
 67 | 
 68 |         if is_dist_avail_and_initialized():
 69 |             torch.distributed.all_reduce(num_fgs)
 70 |         num_fgs = (num_fgs / get_world_size()).clamp(1.0)
 71 | 
 72 |         # box loss
 73 |         pred_box_pos = pred_box[pos_masks]
 74 |         gt_bboxes_pos = gt_bboxes[pos_masks]
 75 |         loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos)
 76 |         loss_box = loss_box.sum() / num_fgs
 77 |         
 78 |         # cls loss
 79 |         pred_cls_pos = pred_cls[pos_masks]
 80 |         gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.)
 81 |         loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos)
 82 |         loss_cls = loss_cls.sum() / num_fgs
 83 | 
 84 |         # obj loss
 85 |         loss_obj = self.loss_objectness(pred_obj, gt_objectness)
 86 |         loss_obj = loss_obj.sum() / num_fgs
 87 | 
 88 |         # total loss
 89 |         losses = self.loss_obj_weight * loss_obj + \
 90 |                  self.loss_cls_weight * loss_cls + \
 91 |                  self.loss_box_weight * loss_box
 92 | 
 93 |         loss_dict = dict(
 94 |                 loss_obj = loss_obj,
 95 |                 loss_cls = loss_cls,
 96 |                 loss_box = loss_box,
 97 |                 losses = losses
 98 |         )
 99 | 
100 |         return loss_dict
101 |     
102 | 
103 | def build_criterion(cfg, device, num_classes):
104 |     criterion = Criterion(
105 |         cfg=cfg,
106 |         device=device,
107 |         num_classes=num_classes
108 |         )
109 | 
110 |     return criterion
111 | 
112 |     
113 | if __name__ == "__main__":
114 |     pass
115 | 


--------------------------------------------------------------------------------
/models/detectors/yolov5/yolov5_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov5_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         # --------- Basic Parameters ----------
13 |         self.in_dim = in_dim
14 |         self.num_cls_head=cfg['num_cls_head']
15 |         self.num_reg_head=cfg['num_reg_head']
16 | 
17 |         # --------- Network Parameters ----------
18 |         ## cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=cfg['head_act'],
26 |                         norm_type=cfg['head_norm'],
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=cfg['head_act'],
33 |                         norm_type=cfg['head_norm'],
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )      
36 |         ## reg head
37 |         reg_feats = []
38 |         self.reg_out_dim = max(out_dim, 64)
39 |         for i in range(cfg['num_reg_head']):
40 |             if i == 0:
41 |                 reg_feats.append(
42 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
43 |                         act_type=cfg['head_act'],
44 |                         norm_type=cfg['head_norm'],
45 |                         depthwise=cfg['head_depthwise'])
46 |                         )
47 |             else:
48 |                 reg_feats.append(
49 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
50 |                         act_type=cfg['head_act'],
51 |                         norm_type=cfg['head_norm'],
52 |                         depthwise=cfg['head_depthwise'])
53 |                         )
54 | 
55 |         self.cls_feats = nn.Sequential(*cls_feats)
56 |         self.reg_feats = nn.Sequential(*reg_feats)
57 | 
58 | 
59 |     def forward(self, x):
60 |         """
61 |             in_feats: (Tensor) [B, C, H, W]
62 |         """
63 |         cls_feats = self.cls_feats(x)
64 |         reg_feats = self.reg_feats(x)
65 | 
66 |         return cls_feats, reg_feats
67 |     
68 | 
69 | # build detection head
70 | def build_head(cfg, in_dim, out_dim, num_classes=80):
71 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
72 | 
73 |     return head
74 | 


--------------------------------------------------------------------------------
/models/detectors/yolov5/yolov5_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov5_basic import Conv
 5 | 
 6 | 
 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 8 | class SPPF(nn.Module):
 9 |     """
10 |         This code referenced to https://github.com/ultralytics/yolov5
11 |     """
12 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='silu', norm_type='BN'):
13 |         super().__init__()
14 |         inter_dim = int(in_dim * expand_ratio)
15 |         self.out_dim = out_dim
16 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
18 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
19 | 
20 |     def forward(self, x):
21 |         x = self.cv1(x)
22 |         y1 = self.m(x)
23 |         y2 = self.m(y1)
24 | 
25 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
26 | 
27 | 
28 | # SPPF block with CSP module
29 | class SPPFBlockCSP(nn.Module):
30 |     """
31 |         CSP Spatial Pyramid Pooling Block
32 |     """
33 |     def __init__(self,
34 |                  in_dim,
35 |                  out_dim,
36 |                  expand_ratio=0.5,
37 |                  pooling_size=5,
38 |                  act_type='silu',
39 |                  norm_type='BN',
40 |                  depthwise=False
41 |                  ):
42 |         super(SPPFBlockCSP, self).__init__()
43 |         inter_dim = int(in_dim * expand_ratio)
44 |         self.out_dim = out_dim
45 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
46 |         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
47 |         self.m = nn.Sequential(
48 |             Conv(inter_dim, inter_dim, k=3, p=1, 
49 |                  act_type=act_type, norm_type=norm_type, 
50 |                  depthwise=depthwise),
51 |             SPPF(inter_dim, 
52 |                  inter_dim, 
53 |                  expand_ratio=1.0, 
54 |                  pooling_size=pooling_size, 
55 |                  act_type=act_type, 
56 |                  norm_type=norm_type),
57 |             Conv(inter_dim, inter_dim, k=3, p=1, 
58 |                  act_type=act_type, norm_type=norm_type, 
59 |                  depthwise=depthwise)
60 |         )
61 |         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type)
62 | 
63 |         
64 |     def forward(self, x):
65 |         x1 = self.cv1(x)
66 |         x2 = self.cv2(x)
67 |         x3 = self.m(x2)
68 |         y = self.cv3(torch.cat([x1, x3], dim=1))
69 | 
70 |         return y
71 | 
72 | 
73 | def build_neck(cfg, in_dim, out_dim):
74 |     model = cfg['neck']
75 |     print('==============================')
76 |     print('Neck: {}'.format(model))
77 |     # build neck
78 |     if model == 'sppf':
79 |         neck = SPPF(
80 |             in_dim=in_dim,
81 |             out_dim=out_dim,
82 |             expand_ratio=cfg['expand_ratio'], 
83 |             pooling_size=cfg['pooling_size'],
84 |             act_type=cfg['neck_act'],
85 |             norm_type=cfg['neck_norm']
86 |             )
87 |     elif model == 'csp_sppf':
88 |         neck = SPPFBlockCSP(
89 |             in_dim=in_dim,
90 |             out_dim=out_dim,
91 |             expand_ratio=cfg['expand_ratio'], 
92 |             pooling_size=cfg['pooling_size'],
93 |             act_type=cfg['neck_act'],
94 |             norm_type=cfg['neck_norm'],
95 |             depthwise=cfg['neck_depthwise']
96 |             )
97 | 
98 |     return neck
99 |         


--------------------------------------------------------------------------------
/models/detectors/yolov5/yolov5_pafpn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .yolov5_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block)
 6 | 
 7 | 
 8 | # YOLO-Style PaFPN
 9 | class Yolov5PaFPN(nn.Module):
10 |     def __init__(self, cfg, in_dims=[256, 512, 1024], out_dim=None):
11 |         super(Yolov5PaFPN, self).__init__()
12 |         # --------------------------- Basic Parameters ---------------------------
13 |         self.in_dims = in_dims
14 |         c3, c4, c5 = in_dims
15 |         width = cfg['width']
16 | 
17 |         # --------------------------- Network Parameters ---------------------------
18 |         ## top dwon
19 |         ### P5 -> P4
20 |         self.reduce_layer_1 = build_reduce_layer(cfg, c5, round(512*width))
21 |         self.top_down_layer_1 = build_fpn_block(cfg, c4 + round(512*width), round(512*width))
22 | 
23 |         ### P4 -> P3
24 |         self.reduce_layer_2 = build_reduce_layer(cfg, round(512*width), round(256*width))
25 |         self.top_down_layer_2 = build_fpn_block(cfg, c3 + round(256*width), round(256*width))
26 | 
27 |         ## bottom up
28 |         ### P3 -> P4
29 |         self.downsample_layer_1 = build_downsample_layer(cfg, round(256*width), round(256*width))
30 |         self.bottom_up_layer_1 = build_fpn_block(cfg, round(256*width) + round(256*width), round(512*width))
31 | 
32 |         ### P4 -> P5
33 |         self.downsample_layer_2 = build_downsample_layer(cfg, round(512*width), round(512*width))
34 |         self.bottom_up_layer_2 = build_fpn_block(cfg, round(512*width) + round(512*width), round(1024*width))
35 |                 
36 |         ## output proj layers
37 |         if out_dim is not None:
38 |             self.out_layers = nn.ModuleList([
39 |                 Conv(in_dim, out_dim, k=1,
40 |                      act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
41 |                      for in_dim in [round(256*width), round(512*width), round(1024*width)]
42 |                      ])
43 |             self.out_dim = [out_dim] * 3
44 |         else:
45 |             self.out_layers = None
46 |             self.out_dim = [round(256*width), round(512*width), round(1024*width)]
47 | 
48 | 
49 |     def forward(self, features):
50 |         c3, c4, c5 = features
51 | 
52 |         # Top down
53 |         ## P5 -> P4
54 |         c6 = self.reduce_layer_1(c5)
55 |         c7 = F.interpolate(c6, scale_factor=2.0)
56 |         c8 = torch.cat([c7, c4], dim=1)
57 |         c9 = self.top_down_layer_1(c8)
58 |         ## P4 -> P3
59 |         c10 = self.reduce_layer_2(c9)
60 |         c11 = F.interpolate(c10, scale_factor=2.0)
61 |         c12 = torch.cat([c11, c3], dim=1)
62 |         c13 = self.top_down_layer_2(c12)
63 | 
64 |         # Bottom up
65 |         ## p3 -> P4
66 |         c14 = self.downsample_layer_1(c13)
67 |         c15 = torch.cat([c14, c10], dim=1)
68 |         c16 = self.bottom_up_layer_1(c15)
69 |         ## P4 -> P5
70 |         c17 = self.downsample_layer_2(c16)
71 |         c18 = torch.cat([c17, c6], dim=1)
72 |         c19 = self.bottom_up_layer_2(c18)
73 | 
74 |         out_feats = [c13, c16, c19] # [P3, P4, P5]
75 |         
76 |         # output proj layers
77 |         if self.out_layers is not None:
78 |             out_feats_proj = []
79 |             for feat, layer in zip(out_feats, self.out_layers):
80 |                 out_feats_proj.append(layer(feat))
81 |             return out_feats_proj
82 | 
83 |         return out_feats
84 | 
85 | 
86 | def build_fpn(cfg, in_dims, out_dim=None):
87 |     model = cfg['fpn']
88 |     # build pafpn
89 |     if model == 'yolov5_pafpn':
90 |         fpn_net = Yolov5PaFPN(cfg, in_dims, out_dim)
91 | 
92 |     return fpn_net


--------------------------------------------------------------------------------
/models/detectors/yolov7/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv7:
 2 | 
 3 | |    Model    |   Backbone    | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |-------------|---------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv7-Tiny | ELANNet-Tiny  | 8xb16 |  640  |         39.5           |       58.5        |   22.6            |   7.9              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_tiny_coco.pth) |
 6 | | YOLOv7      | ELANNet-Large | 8xb16 |  640  |         49.5           |       68.8        |   144.6           |   44.0             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_coco.pth) |
 7 | | YOLOv7-X    | ELANNet-Huge  |       |  640  |                        |                   |                   |                    |  |
 8 | 
 9 | - For training, we train `YOLOv7` and `YOLOv7-Tiny` with 300 epochs on 8 GPUs.
10 | - For data augmentation, we use the [YOLOX-style](https://github.com/Megvii-BaseDetection/YOLOX) augmentation including the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
11 | - For optimizer, we use `AdamW` with weight decay 0.05 and per image learning rate 0.001 / 64.
12 | - For learning rate scheduler, we use Cosine decay scheduler.
13 | - For YOLOv7's structure, we replace the coupled head with the YOLOX-style decoupled head.
14 | - I think YOLOv7 uses too many training tricks, such as `anchor box`, `AuxiliaryHead`, `RepConv`, `Mosaic9x` and so on, making the picture of YOLO too complicated, which is against the development concept of the YOLO series. Otherwise, why don't we use the DETR series? It's nothing more than doing some acceleration optimization on DETR. Therefore, I was faithful to my own technical aesthetics and realized a cleaner and simpler YOLOv7, but without the blessing of so many tricks, I did not reproduce all the performance, which is a pity.
15 | - I have no more GPUs to train my `YOLOv7-X`.
16 | 
17 | ## Train YOLOv7
18 | ### Single GPU
19 | Taking training YOLOv7-Tiny on COCO as the example,
20 | ```Shell
21 | python train.py --cuda -d coco --root path/to/coco -m yolov7_tiny -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
22 | ```
23 | 
24 | ### Multi GPU
25 | Taking training YOLOv7-Tiny on COCO as the example,
26 | ```Shell
27 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov7_tiny -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
28 | ```
29 | 
30 | ## Test YOLOv7
31 | Taking testing YOLOv7-Tiny on COCO-val as the example,
32 | ```Shell
33 | python test.py --cuda -d coco --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth -size 640 -vt 0.4 --show 
34 | ```
35 | 
36 | ## Evaluate YOLOv7
37 | Taking evaluating YOLOv7-Tiny on COCO-val as the example,
38 | ```Shell
39 | python eval.py --cuda -d coco-val --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth 
40 | ```
41 | 
42 | ## Demo
43 | ### Detect with Image
44 | ```Shell
45 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show
46 | ```
47 | 
48 | ### Detect with Video
49 | ```Shell
50 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
51 | ```
52 | 
53 | ### Detect with Camera
54 | ```Shell
55 | python demo.py --mode camera --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
56 | ```
57 | 


--------------------------------------------------------------------------------
/models/detectors/yolov7/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov7 import YOLOv7
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov7(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv7(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |     # Init bias
38 |     init_prob = 0.01
39 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
40 |     # obj pred
41 |     for obj_pred in model.obj_preds:
42 |         b = obj_pred.bias.view(1, -1)
43 |         b.data.fill_(bias_value.item())
44 |         obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
45 |     # cls pred
46 |     for cls_pred in model.cls_preds:
47 |         b = cls_pred.bias.view(1, -1)
48 |         b.data.fill_(bias_value.item())
49 |         cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
50 |     # reg pred
51 |     for reg_pred in model.reg_preds:
52 |         b = reg_pred.bias.view(-1, )
53 |         b.data.fill_(1.0)
54 |         reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
55 |         w = reg_pred.weight
56 |         w.data.fill_(0.)
57 |         reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
58 | 
59 | 
60 |     # -------------- Build criterion --------------
61 |     criterion = None
62 |     if trainable:
63 |         # build criterion for training
64 |         criterion = build_criterion(args, cfg, device, num_classes)
65 | 
66 |     return model, criterion
67 | 


--------------------------------------------------------------------------------
/models/detectors/yolov7/yolov7_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov7_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         self.in_dim = in_dim
13 |         self.num_cls_head=cfg['num_cls_head']
14 |         self.num_reg_head=cfg['num_reg_head']
15 |         self.act_type=cfg['head_act']
16 |         self.norm_type=cfg['head_norm']
17 | 
18 |         # cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=self.act_type,
26 |                         norm_type=self.norm_type,
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=self.act_type,
33 |                         norm_type=self.norm_type,
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )
36 |                 
37 |         # reg head
38 |         reg_feats = []
39 |         self.reg_out_dim = max(out_dim, 64)
40 |         for i in range(cfg['num_reg_head']):
41 |             if i == 0:
42 |                 reg_feats.append(
43 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
44 |                         act_type=self.act_type,
45 |                         norm_type=self.norm_type,
46 |                         depthwise=cfg['head_depthwise'])
47 |                         )
48 |             else:
49 |                 reg_feats.append(
50 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
51 |                         act_type=self.act_type,
52 |                         norm_type=self.norm_type,
53 |                         depthwise=cfg['head_depthwise'])
54 |                         )
55 | 
56 |         self.cls_feats = nn.Sequential(*cls_feats)
57 |         self.reg_feats = nn.Sequential(*reg_feats)
58 | 
59 | 
60 |     def forward(self, x):
61 |         """
62 |             in_feats: (Tensor) [B, C, H, W]
63 |         """
64 |         cls_feats = self.cls_feats(x)
65 |         reg_feats = self.reg_feats(x)
66 | 
67 |         return cls_feats, reg_feats
68 |     
69 | 
70 | # build detection head
71 | def build_head(cfg, in_dim, out_dim, num_classes=80):
72 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
73 | 
74 |     return head
75 | 


--------------------------------------------------------------------------------
/models/detectors/yolov7/yolov7_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from .yolov7_basic import Conv
 4 | 
 5 | 
 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 7 | class SPPF(nn.Module):
 8 |     """
 9 |         This code referenced to https://github.com/ultralytics/yolov5
10 |     """
11 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'):
12 |         super().__init__()
13 |         inter_dim = int(in_dim * expand_ratio)
14 |         self.out_dim = out_dim
15 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
16 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
18 | 
19 |     def forward(self, x):
20 |         x = self.cv1(x)
21 |         y1 = self.m(x)
22 |         y2 = self.m(y1)
23 | 
24 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
25 | 
26 | 
27 | # SPPF block with CSP module
28 | class SPPFBlockCSP(nn.Module):
29 |     """
30 |         CSP Spatial Pyramid Pooling Block
31 |     """
32 |     def __init__(self,
33 |                  in_dim,
34 |                  out_dim,
35 |                  expand_ratio=0.5,
36 |                  pooling_size=5,
37 |                  act_type='lrelu',
38 |                  norm_type='BN',
39 |                  depthwise=False
40 |                  ):
41 |         super(SPPFBlockCSP, self).__init__()
42 |         inter_dim = int(in_dim * expand_ratio)
43 |         self.out_dim = out_dim
44 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
45 |         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
46 |         self.m = nn.Sequential(
47 |             Conv(inter_dim, inter_dim, k=3, p=1, 
48 |                  act_type=act_type, norm_type=norm_type, 
49 |                  depthwise=depthwise),
50 |             SPPF(inter_dim, 
51 |                  inter_dim, 
52 |                  expand_ratio=1.0, 
53 |                  pooling_size=pooling_size, 
54 |                  act_type=act_type, 
55 |                  norm_type=norm_type),
56 |             Conv(inter_dim, inter_dim, k=3, p=1, 
57 |                  act_type=act_type, norm_type=norm_type, 
58 |                  depthwise=depthwise)
59 |         )
60 |         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type)
61 | 
62 |         
63 |     def forward(self, x):
64 |         x1 = self.cv1(x)
65 |         x2 = self.cv2(x)
66 |         x3 = self.m(x2)
67 |         y = self.cv3(torch.cat([x1, x3], dim=1))
68 | 
69 |         return y
70 | 
71 | 
72 | def build_neck(cfg, in_dim, out_dim):
73 |     model = cfg['neck']
74 |     print('==============================')
75 |     print('Neck: {}'.format(model))
76 |     # build neck
77 |     if model == 'sppf':
78 |         neck = SPPF(
79 |             in_dim=in_dim,
80 |             out_dim=out_dim,
81 |             expand_ratio=cfg['expand_ratio'], 
82 |             pooling_size=cfg['pooling_size'],
83 |             act_type=cfg['neck_act'],
84 |             norm_type=cfg['neck_norm']
85 |             )
86 |     elif model == 'csp_sppf':
87 |         neck = SPPFBlockCSP(
88 |             in_dim=in_dim,
89 |             out_dim=out_dim,
90 |             expand_ratio=cfg['expand_ratio'], 
91 |             pooling_size=cfg['pooling_size'],
92 |             act_type=cfg['neck_act'],
93 |             norm_type=cfg['neck_norm'],
94 |             depthwise=cfg['neck_depthwise']
95 |             )
96 | 
97 |     return neck
98 |         


--------------------------------------------------------------------------------
/models/detectors/yolov8/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv8:
 2 | 
 3 | |   Model   |  Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |-----------|--------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOv8-N  | 8xb16  |  640  |          37.0          |        52.9       |        8.8        |         3.2        | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_n_coco.pth) |
 6 | | YOLOv8-S  | 8xb16  |  640  |          43.5          |        60.4       |       28.8        |         11.2       | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_s_coco.pth) |
 7 | | YOLOv8-M  | 8xb16  |  640  |                        |                   |                   |                    |  |
 8 | | YOLOv8-L  | 8xb16  |  640  |          50.7          |        68.3       |       165.7       |         43.7       | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_l_coco.pth) |
 9 | 
10 | - For training, we train YOLOv8 series with 500 epochs on COCO.
11 | - For data augmentation, we use the random affine, hsv augmentation, mosaic augmentation and mixup augmentation, following the setting of [YOLOv8](https://github.com/ultralytics/yolov8).
12 | - For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64, which is different from the official YOLOv8. We have tried SGD, but it has weakened performance. For example, when using SGD, YOLOv8-N's AP was only 35.8%, lower than the current result (36.8 %), perhaps because some hyperparameters were not set properly.
13 | - For learning rate scheduler, we use linear decay scheduler.
14 | 
15 | 
16 | ## Train YOLOv8
17 | ### Single GPU
18 | Taking training YOLOv8-S on COCO as the example,
19 | ```Shell
20 | python train.py --cuda -d coco --root path/to/coco -m yolov8_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 500 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
21 | ```
22 | 
23 | ### Multi GPU
24 | Taking training YOLOv8 on COCO as the example,
25 | ```Shell
26 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov8_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 500  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
27 | ```
28 | 
29 | ## Test YOLOv8
30 | Taking testing YOLOv8 on COCO-val as the example,
31 | ```Shell
32 | python test.py --cuda -d coco --root path/to/coco -m yolov8_s --weight path/to/yolov8.pth -size 640 -vt 0.4 --show 
33 | ```
34 | 
35 | ## Evaluate YOLOv8
36 | Taking evaluating YOLOv8 on COCO-val as the example,
37 | ```Shell
38 | python eval.py --cuda -d coco-val --root path/to/coco -m yolov8_s --weight path/to/yolov8.pth 
39 | ```
40 | 
41 | ## Demo
42 | ### Detect with Image
43 | ```Shell
44 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show
45 | ```
46 | 
47 | ### Detect with Video
48 | ```Shell
49 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show --gif
50 | ```
51 | 
52 | ### Detect with Camera
53 | ```Shell
54 | python demo.py --mode camera --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show --gif
55 | ```
56 | 


--------------------------------------------------------------------------------
/models/detectors/yolov8/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolov8 import YOLOv8
 9 | 
10 | 
11 | # build object detector
12 | def build_yolov8(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOv8(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |             
38 |     # -------------- Build criterion --------------
39 |     criterion = None
40 |     if trainable:
41 |         # build criterion for training
42 |         criterion = build_criterion(cfg, device, num_classes)
43 |         
44 |     return model, criterion
45 | 


--------------------------------------------------------------------------------
/models/detectors/yolov8/yolov8_head.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .yolov8_basic import Conv
  5 | 
  6 | 
  7 | # Single-level Head
  8 | class SingleLevelHead(nn.Module):
  9 |     def __init__(self,
 10 |                  in_dim       :int  = 256,
 11 |                  cls_head_dim :int  = 256,
 12 |                  reg_head_dim :int  = 256,
 13 |                  num_cls_head :int  = 2,
 14 |                  num_reg_head :int  = 2,
 15 |                  act_type     :str  = "silu",
 16 |                  norm_type    :str  = "BN",
 17 |                  depthwise    :bool = False):
 18 |         super().__init__()
 19 |         # --------- Basic Parameters ----------
 20 |         self.in_dim = in_dim
 21 |         self.num_cls_head = num_cls_head
 22 |         self.num_reg_head = num_reg_head
 23 |         self.act_type = act_type
 24 |         self.norm_type = norm_type
 25 |         self.depthwise = depthwise
 26 |         
 27 |         # --------- Network Parameters ----------
 28 |         ## cls head
 29 |         cls_feats = []
 30 |         self.cls_head_dim = cls_head_dim
 31 |         for i in range(num_cls_head):
 32 |             if i == 0:
 33 |                 cls_feats.append(
 34 |                     Conv(in_dim, self.cls_head_dim, k=3, p=1, s=1, 
 35 |                          act_type=act_type,
 36 |                          norm_type=norm_type,
 37 |                          depthwise=depthwise)
 38 |                         )
 39 |             else:
 40 |                 cls_feats.append(
 41 |                     Conv(self.cls_head_dim, self.cls_head_dim, k=3, p=1, s=1, 
 42 |                         act_type=act_type,
 43 |                         norm_type=norm_type,
 44 |                         depthwise=depthwise)
 45 |                         )      
 46 |         ## reg head
 47 |         reg_feats = []
 48 |         self.reg_head_dim = reg_head_dim
 49 |         for i in range(num_reg_head):
 50 |             if i == 0:
 51 |                 reg_feats.append(
 52 |                     Conv(in_dim, self.reg_head_dim, k=3, p=1, s=1, 
 53 |                          act_type=act_type,
 54 |                          norm_type=norm_type,
 55 |                          depthwise=depthwise)
 56 |                         )
 57 |             else:
 58 |                 reg_feats.append(
 59 |                     Conv(self.reg_head_dim, self.reg_head_dim, k=3, p=1, s=1, 
 60 |                          act_type=act_type,
 61 |                          norm_type=norm_type,
 62 |                          depthwise=depthwise)
 63 |                         )
 64 |         self.cls_feats = nn.Sequential(*cls_feats)
 65 |         self.reg_feats = nn.Sequential(*reg_feats)
 66 | 
 67 |         self.init_weights()
 68 |         
 69 |     def init_weights(self):
 70 |         """Initialize the parameters."""
 71 |         for m in self.modules():
 72 |             if isinstance(m, torch.nn.Conv2d):
 73 |                 # In order to be consistent with the source code,
 74 |                 # reset the Conv2d initialization parameters
 75 |                 m.reset_parameters()
 76 | 
 77 |     def forward(self, x):
 78 |         """
 79 |             in_feats: (Tensor) [B, C, H, W]
 80 |         """
 81 |         cls_feats = self.cls_feats(x)
 82 |         reg_feats = self.reg_feats(x)
 83 | 
 84 |         return cls_feats, reg_feats
 85 |     
 86 | # Multi-level Head
 87 | class MultiLevelHead(nn.Module):
 88 |     def __init__(self, cfg, in_dims, num_levels=3, num_classes=80, reg_max=16):
 89 |         super().__init__()
 90 |         ## ----------- Network Parameters -----------
 91 |         self.multi_level_heads = nn.ModuleList(
 92 |             [SingleLevelHead(in_dim       = in_dims[level],
 93 |                              cls_head_dim = max(in_dims[0], min(num_classes, 100)),
 94 |                              reg_head_dim = max(in_dims[0]//4, 16, 4*reg_max),
 95 |                              num_cls_head = cfg['num_cls_head'],
 96 |                              num_reg_head = cfg['num_reg_head'],
 97 |                              act_type     = cfg['head_act'],
 98 |                              norm_type    = cfg['head_norm'],
 99 |                              depthwise    = cfg['head_depthwise'])
100 |                              for level in range(num_levels)
101 |                              ])
102 |         # --------- Basic Parameters ----------
103 |         self.in_dims = in_dims
104 |         self.cls_head_dim = self.multi_level_heads[0].cls_head_dim
105 |         self.reg_head_dim = self.multi_level_heads[0].reg_head_dim
106 | 
107 | 
108 |     def forward(self, feats):
109 |         """
110 |             feats: List[(Tensor)] [[B, C, H, W], ...]
111 |         """
112 |         cls_feats = []
113 |         reg_feats = []
114 |         for feat, head in zip(feats, self.multi_level_heads):
115 |             # ---------------- Pred ----------------
116 |             cls_feat, reg_feat = head(feat)
117 | 
118 |             cls_feats.append(cls_feat)
119 |             reg_feats.append(reg_feat)
120 | 
121 |         return cls_feats, reg_feats
122 |     
123 | 
124 | # build detection head
125 | def build_det_head(cfg, in_dims, num_levels=3, num_classes=80, reg_max=16):
126 |     if cfg['head'] == 'decoupled_head':
127 |         head = MultiLevelHead(cfg, in_dims, num_levels, num_classes, reg_max)
128 | 
129 |     return head
130 | 


--------------------------------------------------------------------------------
/models/detectors/yolov8/yolov8_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolov8_basic import Conv
 5 | 
 6 | 
 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 8 | class SPPF(nn.Module):
 9 |     """
10 |         This code referenced to https://github.com/ultralytics/yolov5
11 |     """
12 |     def __init__(self, cfg, in_dim, out_dim, expand_ratio=0.5):
13 |         super().__init__()
14 |         inter_dim = int(in_dim * expand_ratio)
15 |         self.out_dim = out_dim
16 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
17 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
18 |         self.m = nn.MaxPool2d(kernel_size=cfg['pooling_size'], stride=1, padding=cfg['pooling_size'] // 2)
19 | 
20 |     def forward(self, x):
21 |         x = self.cv1(x)
22 |         y1 = self.m(x)
23 |         y2 = self.m(y1)
24 | 
25 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
26 | 
27 | 
28 | # SPPF block with CSP module
29 | class SPPFBlockCSP(nn.Module):
30 |     """
31 |         CSP Spatial Pyramid Pooling Block
32 |     """
33 |     def __init__(self, cfg, in_dim, out_dim, expand_ratio):
34 |         super(SPPFBlockCSP, self).__init__()
35 |         inter_dim = int(in_dim * expand_ratio)
36 |         self.out_dim = out_dim
37 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
38 |         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
39 |         self.m = nn.Sequential(
40 |             Conv(inter_dim, inter_dim, k=3, p=1, 
41 |                  act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 
42 |                  depthwise=cfg['neck_depthwise']),
43 |             SPPF(cfg, inter_dim, inter_dim, expand_ratio=1.0),
44 |             Conv(inter_dim, inter_dim, k=3, p=1, 
45 |                  act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 
46 |                  depthwise=cfg['neck_depthwise'])
47 |         )
48 |         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
49 | 
50 |         
51 |     def forward(self, x):
52 |         x1 = self.cv1(x)
53 |         x2 = self.cv2(x)
54 |         x3 = self.m(x2)
55 |         y = self.cv3(torch.cat([x1, x3], dim=1))
56 | 
57 |         return y
58 | 
59 | 
60 | def build_neck(cfg, in_dim, out_dim):
61 |     model = cfg['neck']
62 |     print('==============================')
63 |     print('Neck: {}'.format(model))
64 |     # build neck
65 |     if model == 'sppf':
66 |         neck = SPPF(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
67 |     elif model == 'csp_sppf':
68 |         neck = SPPFBlockCSP(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
69 | 
70 |     return neck
71 | 


--------------------------------------------------------------------------------
/models/detectors/yolox/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOX:
 2 | 
 3 | |   Model | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 4 | |---------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 5 | | YOLOX-S | 8xb8  |  640  |         40.1           |       60.3        |   26.8            |   8.9              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_s_coco.pth) |
 6 | | YOLOX-M | 8xb8  |  640  |         46.2           |       66.0        |   74.3            |   25.4             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_m_coco.pth) |
 7 | | YOLOX-L | 8xb8  |  640  |         48.7           |       68.0        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_l_coco.pth) |
 8 | | YOLOX-X | 8xb8  |  640  |                        |                   |                   |                    |  |
 9 | 
10 | - For training, we train YOLOX series with 300 epochs on COCO.
11 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
12 | - For optimizer, we use SGD with weight decay 0.0005 and base per image lr 0.01 / 64,.
13 | - For learning rate scheduler, we use Cosine decay scheduler.
14 | 
15 | ## Train YOLOX
16 | ### Single GPU
17 | Taking training YOLOX-S on COCO as the example,
18 | ```Shell
19 | python train.py --cuda -d coco --root path/to/coco -m yolox_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
20 | ```
21 | 
22 | ### Multi GPU
23 | Taking training YOLOX-S on COCO as the example,
24 | ```Shell
25 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolox_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
26 | ```
27 | 
28 | ## Test YOLOX
29 | Taking testing YOLOX-S on COCO-val as the example,
30 | ```Shell
31 | python test.py --cuda -d coco --root path/to/coco -m yolox_s --weight path/to/yolox_s.pth -size 640 -vt 0.4 --show 
32 | ```
33 | 
34 | ## Evaluate YOLOX
35 | Taking evaluating YOLOX-S on COCO-val as the example,
36 | ```Shell
37 | python eval.py --cuda -d coco-val --root path/to/coco -m yolox_s --weight path/to/yolox_s.pth 
38 | ```
39 | 
40 | ## Demo
41 | ### Detect with Image
42 | ```Shell
43 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show
44 | ```
45 | 
46 | ### Detect with Video
47 | ```Shell
48 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show --gif
49 | ```
50 | 
51 | ### Detect with Camera
52 | ```Shell
53 | python demo.py --mode camera --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show --gif
54 | ```


--------------------------------------------------------------------------------
/models/detectors/yolox/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from .loss import build_criterion
 8 | from .yolox import YOLOX
 9 | 
10 | 
11 | # build object detector
12 | def build_yolox(args, cfg, device, num_classes=80, trainable=False, deploy=False):
13 |     print('==============================')
14 |     print('Build {} ...'.format(args.model.upper()))
15 |     
16 |     print('==============================')
17 |     print('Model Configuration: \n', cfg)
18 |     
19 |     # -------------- Build YOLO --------------
20 |     model = YOLOX(cfg                = cfg,
21 |                    device             = device, 
22 |                    num_classes        = num_classes,
23 |                    trainable          = trainable,
24 |                    conf_thresh        = args.conf_thresh,
25 |                    nms_thresh         = args.nms_thresh,
26 |                    topk               = args.topk,
27 |                    deploy             = deploy,
28 |                    no_multi_labels    = args.no_multi_labels,
29 |                    nms_class_agnostic = args.nms_class_agnostic
30 |                    )
31 | 
32 |     # -------------- Initialize YOLO --------------
33 |     for m in model.modules():
34 |         if isinstance(m, nn.BatchNorm2d):
35 |             m.eps = 1e-3
36 |             m.momentum = 0.03    
37 |     # Init bias
38 |     init_prob = 0.01
39 |     bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
40 |     # obj pred
41 |     for obj_pred in model.obj_preds:
42 |         b = obj_pred.bias.view(1, -1)
43 |         b.data.fill_(bias_value.item())
44 |         obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
45 |     # cls pred
46 |     for cls_pred in model.cls_preds:
47 |         b = cls_pred.bias.view(1, -1)
48 |         b.data.fill_(bias_value.item())
49 |         cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
50 |     # reg pred
51 |     for reg_pred in model.reg_preds:
52 |         b = reg_pred.bias.view(-1, )
53 |         b.data.fill_(1.0)
54 |         reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
55 |         w = reg_pred.weight
56 |         w.data.fill_(0.)
57 |         reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
58 | 
59 | 
60 |     # -------------- Build criterion --------------
61 |     criterion = None
62 |     if trainable:
63 |         # build criterion for training
64 |         criterion = build_criterion(args, cfg, device, num_classes)
65 |     return model, criterion
66 | 


--------------------------------------------------------------------------------
/models/detectors/yolox/yolox_backbone.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | try:
  5 |     from .yolox_basic import Conv, CSPBlock
  6 |     from .yolox_neck import SPPF
  7 | except:
  8 |     from yolox_basic import Conv, CSPBlock
  9 |     from yolox_neck import SPPF
 10 | 
 11 | 
 12 | # CSPDarkNet
 13 | class CSPDarkNet(nn.Module):
 14 |     def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False):
 15 |         super(CSPDarkNet, self).__init__()
 16 |         self.feat_dims = [round(64 * width), round(128 * width), round(256 * width), round(512 * width), round(1024 * width)]
 17 |         # P1/2
 18 |         self.layer_1 = Conv(3, self.feat_dims[0], k=6, p=2, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
 19 |         # P2/4
 20 |         self.layer_2 = nn.Sequential(
 21 |             Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
 22 |             CSPBlock(in_dim       = self.feat_dims[1],
 23 |                      out_dim      = self.feat_dims[1],
 24 |                      expand_ratio = 0.5,
 25 |                      nblocks      = round(3*depth),
 26 |                      shortcut     = True,
 27 |                      act_type     = act_type,
 28 |                      norm_type    = norm_type,
 29 |                      depthwise    = depthwise)
 30 |         )
 31 |         # P3/8
 32 |         self.layer_3 = nn.Sequential(
 33 |             Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
 34 |             CSPBlock(in_dim       = self.feat_dims[2],
 35 |                      out_dim      = self.feat_dims[2],
 36 |                      expand_ratio = 0.5,
 37 |                      nblocks      = round(9*depth),
 38 |                      shortcut     = True,
 39 |                      act_type     = act_type,
 40 |                      norm_type    = norm_type,
 41 |                      depthwise    = depthwise)
 42 |         )
 43 |         # P4/16
 44 |         self.layer_4 = nn.Sequential(
 45 |             Conv(self.feat_dims[2], self.feat_dims[3], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
 46 |             CSPBlock(in_dim       = self.feat_dims[3],
 47 |                      out_dim      = self.feat_dims[3],
 48 |                      expand_ratio = 0.5,
 49 |                      nblocks      = round(9*depth),
 50 |                      shortcut     = True,
 51 |                      act_type     = act_type,
 52 |                      norm_type    = norm_type,
 53 |                      depthwise    = depthwise)
 54 |         )
 55 |         # P5/32
 56 |         self.layer_5 = nn.Sequential(
 57 |             Conv(self.feat_dims[3], self.feat_dims[4], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
 58 |             SPPF(self.feat_dims[4], self.feat_dims[4], expand_ratio=0.5),
 59 |             CSPBlock(in_dim       = self.feat_dims[4],
 60 |                      out_dim      = self.feat_dims[4],
 61 |                      expand_ratio = 0.5,
 62 |                      nblocks      = round(3*depth),
 63 |                      shortcut     = True,
 64 |                      act_type     = act_type,
 65 |                      norm_type    = norm_type,
 66 |                      depthwise    = depthwise)
 67 |         )
 68 | 
 69 | 
 70 |     def forward(self, x):
 71 |         c1 = self.layer_1(x)
 72 |         c2 = self.layer_2(c1)
 73 |         c3 = self.layer_3(c2)
 74 |         c4 = self.layer_4(c3)
 75 |         c5 = self.layer_5(c4)
 76 | 
 77 |         outputs = [c3, c4, c5]
 78 | 
 79 |         return outputs
 80 | 
 81 | 
 82 | # ---------------------------- Functions ----------------------------
 83 | ## build CSPDarkNet
 84 | def build_backbone(cfg): 
 85 |     # Build backbone
 86 |     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
 87 |     feat_dims = backbone.feat_dims[-3:]
 88 | 
 89 |     return backbone, feat_dims
 90 | 
 91 | 
 92 | if __name__ == '__main__':
 93 |     import time
 94 |     from thop import profile
 95 |     cfg = {
 96 |         'bk_act': 'lrelu',
 97 |         'bk_norm': 'BN',
 98 |         'bk_dpw': False,
 99 |         'p6_feat': False,
100 |         'p7_feat': False,
101 |         'width': 1.0,
102 |         'depth': 1.0,
103 |     }
104 |     model, feats = build_backbone(cfg)
105 |     x = torch.randn(1, 3, 640, 640)
106 |     t0 = time.time()
107 |     outputs = model(x)
108 |     t1 = time.time()
109 |     print('Time: ', t1 - t0)
110 |     for out in outputs:
111 |         print(out.shape)
112 | 
113 |     print('==============================')
114 |     flops, params = profile(model, inputs=(x, ), verbose=False)
115 |     print('==============================')
116 |     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
117 |     print('Params : {:.2f} M'.format(params / 1e6))


--------------------------------------------------------------------------------
/models/detectors/yolox/yolox_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolox_basic import Conv
 5 | 
 6 | 
 7 | class DecoupledHead(nn.Module):
 8 |     def __init__(self, cfg, in_dim, out_dim, num_classes=80):
 9 |         super().__init__()
10 |         print('==============================')
11 |         print('Head: Decoupled Head')
12 |         # --------- Basic Parameters ----------
13 |         self.in_dim = in_dim
14 |         self.num_cls_head=cfg['num_cls_head']
15 |         self.num_reg_head=cfg['num_reg_head']
16 | 
17 |         # --------- Network Parameters ----------
18 |         ## cls head
19 |         cls_feats = []
20 |         self.cls_out_dim = max(out_dim, num_classes)
21 |         for i in range(cfg['num_cls_head']):
22 |             if i == 0:
23 |                 cls_feats.append(
24 |                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
25 |                         act_type=cfg['head_act'],
26 |                         norm_type=cfg['head_norm'],
27 |                         depthwise=cfg['head_depthwise'])
28 |                         )
29 |             else:
30 |                 cls_feats.append(
31 |                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
32 |                         act_type=cfg['head_act'],
33 |                         norm_type=cfg['head_norm'],
34 |                         depthwise=cfg['head_depthwise'])
35 |                         )      
36 |         ## reg head
37 |         reg_feats = []
38 |         self.reg_out_dim = max(out_dim, 64)
39 |         for i in range(cfg['num_reg_head']):
40 |             if i == 0:
41 |                 reg_feats.append(
42 |                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
43 |                         act_type=cfg['head_act'],
44 |                         norm_type=cfg['head_norm'],
45 |                         depthwise=cfg['head_depthwise'])
46 |                         )
47 |             else:
48 |                 reg_feats.append(
49 |                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
50 |                         act_type=cfg['head_act'],
51 |                         norm_type=cfg['head_norm'],
52 |                         depthwise=cfg['head_depthwise'])
53 |                         )
54 | 
55 |         self.cls_feats = nn.Sequential(*cls_feats)
56 |         self.reg_feats = nn.Sequential(*reg_feats)
57 | 
58 | 
59 |     def forward(self, x):
60 |         """
61 |             in_feats: (Tensor) [B, C, H, W]
62 |         """
63 |         cls_feats = self.cls_feats(x)
64 |         reg_feats = self.reg_feats(x)
65 | 
66 |         return cls_feats, reg_feats
67 |     
68 | 
69 | # build detection head
70 | def build_head(cfg, in_dim, out_dim, num_classes=80):
71 |     head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
72 | 
73 |     return head
74 | 


--------------------------------------------------------------------------------
/models/detectors/yolox/yolox_neck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .yolox_basic import Conv
 5 | 
 6 | 
 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
 8 | class SPPF(nn.Module):
 9 |     """
10 |         This code referenced to https://github.com/ultralytics/yolov5
11 |     """
12 |     def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='silu', norm_type='BN'):
13 |         super().__init__()
14 |         inter_dim = int(in_dim * expand_ratio)
15 |         self.out_dim = out_dim
16 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
17 |         self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
18 |         self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2)
19 | 
20 |     def forward(self, x):
21 |         x = self.cv1(x)
22 |         y1 = self.m(x)
23 |         y2 = self.m(y1)
24 | 
25 |         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
26 | 
27 | 
28 | # SPPF block with CSP module
29 | class SPPFBlockCSP(nn.Module):
30 |     """
31 |         CSP Spatial Pyramid Pooling Block
32 |     """
33 |     def __init__(self,
34 |                  in_dim,
35 |                  out_dim,
36 |                  expand_ratio=0.5,
37 |                  pooling_size=5,
38 |                  act_type='silu',
39 |                  norm_type='BN',
40 |                  depthwise=False
41 |                  ):
42 |         super(SPPFBlockCSP, self).__init__()
43 |         inter_dim = int(in_dim * expand_ratio)
44 |         self.out_dim = out_dim
45 |         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
46 |         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
47 |         self.m = nn.Sequential(
48 |             Conv(inter_dim, inter_dim, k=3, p=1, 
49 |                  act_type=act_type, norm_type=norm_type, 
50 |                  depthwise=depthwise),
51 |             SPPF(inter_dim, 
52 |                  inter_dim, 
53 |                  expand_ratio=1.0, 
54 |                  pooling_size=pooling_size, 
55 |                  act_type=act_type, 
56 |                  norm_type=norm_type),
57 |             Conv(inter_dim, inter_dim, k=3, p=1, 
58 |                  act_type=act_type, norm_type=norm_type, 
59 |                  depthwise=depthwise)
60 |         )
61 |         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type)
62 | 
63 |         
64 |     def forward(self, x):
65 |         x1 = self.cv1(x)
66 |         x2 = self.cv2(x)
67 |         x3 = self.m(x2)
68 |         y = self.cv3(torch.cat([x1, x3], dim=1))
69 | 
70 |         return y
71 | 
72 | 
73 | def build_neck(cfg, in_dim, out_dim):
74 |     model = cfg['neck']
75 |     print('==============================')
76 |     print('Neck: {}'.format(model))
77 |     # build neck
78 |     if model == 'sppf':
79 |         neck = SPPF(
80 |             in_dim=in_dim,
81 |             out_dim=out_dim,
82 |             expand_ratio=cfg['expand_ratio'], 
83 |             pooling_size=cfg['pooling_size'],
84 |             act_type=cfg['neck_act'],
85 |             norm_type=cfg['neck_norm']
86 |             )
87 |     elif model == 'csp_sppf':
88 |         neck = SPPFBlockCSP(
89 |             in_dim=in_dim,
90 |             out_dim=out_dim,
91 |             expand_ratio=cfg['expand_ratio'], 
92 |             pooling_size=cfg['pooling_size'],
93 |             act_type=cfg['neck_act'],
94 |             norm_type=cfg['neck_norm'],
95 |             depthwise=cfg['neck_depthwise']
96 |             )
97 | 
98 |     return neck
99 |         


--------------------------------------------------------------------------------
/models/detectors/yolox/yolox_pafpn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .yolox_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block)
 6 | 
 7 | 
 8 | # YOLO-Style PaFPN
 9 | class YoloxPaFPN(nn.Module):
10 |     def __init__(self, cfg, in_dims=[256, 512, 1024], out_dim=None):
11 |         super(YoloxPaFPN, self).__init__()
12 |         # --------------------------- Basic Parameters ---------------------------
13 |         self.in_dims = in_dims
14 |         c3, c4, c5 = in_dims
15 |         width = cfg['width']
16 | 
17 |         # --------------------------- Network Parameters ---------------------------
18 |         ## top dwon
19 |         ### P5 -> P4
20 |         self.reduce_layer_1 = build_reduce_layer(cfg, c5, round(512*width))
21 |         self.top_down_layer_1 = build_fpn_block(cfg, c4 + round(512*width), round(512*width))
22 | 
23 |         ### P4 -> P3
24 |         self.reduce_layer_2 = build_reduce_layer(cfg, round(512*width), round(256*width))
25 |         self.top_down_layer_2 = build_fpn_block(cfg, c3 + round(256*width), round(256*width))
26 | 
27 |         ## bottom up
28 |         ### P3 -> P4
29 |         self.reduce_layer_3 = build_downsample_layer(cfg, round(256*width), round(256*width))
30 |         self.bottom_up_layer_1 = build_fpn_block(cfg, round(256*width) + round(256*width), round(512*width))
31 | 
32 |         ### P4 -> P5
33 |         self.reduce_layer_4 = build_downsample_layer(cfg, round(512*width), round(512*width))
34 |         self.bottom_up_layer_2 = build_fpn_block(cfg, round(512*width) + round(512*width), round(1024*width))
35 |                 
36 |         ## output proj layers
37 |         if out_dim is not None:
38 |             self.out_layers = nn.ModuleList([
39 |                 Conv(in_dim, out_dim, k=1,
40 |                      act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
41 |                      for in_dim in [round(256*width), round(512*width), round(1024*width)]
42 |                      ])
43 |             self.out_dim = [out_dim] * 3
44 |         else:
45 |             self.out_layers = None
46 |             self.out_dim = [round(256*width), round(512*width), round(1024*width)]
47 | 
48 | 
49 |     def forward(self, features):
50 |         c3, c4, c5 = features
51 | 
52 |         # Top down
53 |         ## P5 -> P4
54 |         c6 = self.reduce_layer_1(c5)
55 |         c7 = F.interpolate(c6, scale_factor=2.0)
56 |         c8 = torch.cat([c7, c4], dim=1)
57 |         c9 = self.top_down_layer_1(c8)
58 |         ## P4 -> P3
59 |         c10 = self.reduce_layer_2(c9)
60 |         c11 = F.interpolate(c10, scale_factor=2.0)
61 |         c12 = torch.cat([c11, c3], dim=1)
62 |         c13 = self.top_down_layer_2(c12)
63 | 
64 |         # Bottom up
65 |         ## p3 -> P4
66 |         c14 = self.reduce_layer_3(c13)
67 |         c15 = torch.cat([c14, c10], dim=1)
68 |         c16 = self.bottom_up_layer_1(c15)
69 |         ## P4 -> P5
70 |         c17 = self.reduce_layer_4(c16)
71 |         c18 = torch.cat([c17, c6], dim=1)
72 |         c19 = self.bottom_up_layer_2(c18)
73 | 
74 |         out_feats = [c13, c16, c19] # [P3, P4, P5]
75 |         
76 |         # output proj layers
77 |         if self.out_layers is not None:
78 |             out_feats_proj = []
79 |             for feat, layer in zip(out_feats, self.out_layers):
80 |                 out_feats_proj.append(layer(feat))
81 |             return out_feats_proj
82 | 
83 |         return out_feats
84 | 
85 | 
86 | def build_fpn(cfg, in_dims, out_dim=None):
87 |     model = cfg['fpn']
88 |     # build pafpn
89 |     if model == 'yolox_pafpn':
90 |         fpn_net = YoloxPaFPN(cfg, in_dims, out_dim)
91 | 
92 |     return fpn_net
93 | 


--------------------------------------------------------------------------------
/models/trackers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .byte_tracker.build import build_byte_tracker
 2 | 
 3 | 
 4 | 
 5 | def build_tracker(args):
 6 |     if args.tracker == 'byte_tracker':
 7 |         return build_byte_tracker(args)
 8 |     else:
 9 |         raise NotImplementedError
10 | 


--------------------------------------------------------------------------------
/models/trackers/byte_tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/models/trackers/byte_tracker/build.py:
--------------------------------------------------------------------------------
 1 | from .byte_tracker import ByteTracker
 2 | 
 3 | 
 4 | def build_byte_tracker(args):
 5 |     tracker = ByteTracker(
 6 |         track_thresh=args.track_thresh,
 7 |         track_buffer=args.track_buffer,
 8 |         frame_rate=args.fps,
 9 |         match_thresh=args.match_thresh,
10 |         mot20=args.mot20
11 |     )
12 | 
13 |     return tracker
14 |     


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch
 2 | 
 3 | torchvision
 4 | 
 5 | opencv-python
 6 | 
 7 | thop
 8 | 
 9 | scipy
10 | 
11 | matplotlib
12 | 
13 | numpy
14 | 
15 | imageio
16 | 
17 | pycocotools
18 | 
19 | onnxsim
20 | 
21 | onnxruntime
22 | 
23 | openvino
24 | 
25 | loguru
26 | 
27 | albumentations
28 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/tools/__init__.py


--------------------------------------------------------------------------------
/tools/clean_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     import argparse
 7 |     
 8 |     parser = argparse.ArgumentParser(description='COCO-Dataset')
 9 | 
10 |     # --------------- opt parameters ---------------
11 |     parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
12 |                         help='data root')
13 |     parser.add_argument('--image_set', type=str, default='val',
14 |                         help='augmentation type')
15 |     parser.add_argument('--task', type=str, default='det',
16 |                         help='augmentation type')
17 |     
18 |     args = parser.parse_args()
19 | 
20 |     # --------------- load json ---------------
21 |     if args.task == 'det':
22 |         task_prefix = 'instances_{}2017.json'
23 |         clean_task_prefix = 'instances_{}2017_clean.json'
24 |     elif args.task == 'pos':
25 |         task_prefix = 'person_keypoints_{}2017.json'
26 |         clean_task_prefix = 'person_keypoints_{}2017_clean.json'
27 |     else:
28 |         raise NotImplementedError('Unkown task !')
29 |     
30 |     json_path = os.path.join(args.root, 'annotations', task_prefix.format(args.image_set))
31 | 
32 |     clean_json_file = dict()
33 |     with open(json_path, 'r') as file:
34 |         json_file = json.load(file)
35 |         # json_file is a Dict: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
36 |         clean_json_file['info'] = json_file['info'] 
37 |         clean_json_file['licenses'] = json_file['licenses']
38 |         clean_json_file['categories'] = json_file['categories']
39 | 
40 |         images_list = json_file['images']
41 |         annots_list = json_file['annotations']
42 |         num_images = len(images_list)
43 | 
44 |         # -------------- Filter annotations --------------
45 |         print("Processing annotations ...")
46 |         valid_image_ids = []
47 |         clean_annots_list = [] 
48 |         for i, anno in enumerate(annots_list):
49 |             if i % 5000 == 0:
50 |                 print("[{}] / [{}] ...".format(i, len(annots_list)))
51 |             x1, y1, bw, bh = anno['bbox']
52 |             if bw > 0 and bh > 0:
53 |                 clean_annots_list.append(anno)
54 |                 if anno['image_id'] not in valid_image_ids:
55 |                     valid_image_ids.append(anno['image_id'])
56 |         print("Valid number of images: ", len(valid_image_ids))
57 |         print("Valid number of annots: ", len(clean_annots_list))
58 |         print("Original number of annots: ", len(annots_list))
59 | 
60 |         # -------------- Filter images --------------
61 |         print("Processing images ...")
62 |         clean_images_list = []
63 |         for i in range(num_images):
64 |             if args.image_set == 'train' and i % 5000 == 0:
65 |                 print("[{}] / [{}] ...".format(i, num_images))
66 |             if args.image_set == 'val' and i % 500 == 0:
67 |                 print("[{}] / [{}] ...".format(i, num_images))
68 |             
69 |             # A single image dict
70 |             image_dict = images_list[i]
71 |             image_id = image_dict['id']
72 | 
73 |             if image_id in valid_image_ids:
74 |                 clean_images_list.append(image_dict)
75 | 
76 |         print('Number of images after cleaning: ', len(clean_images_list))
77 |         print('Number of annotations after cleaning: ', len(clean_annots_list))
78 | 
79 |         clean_json_file['images'] = clean_images_list
80 |         clean_json_file['annotations'] = clean_annots_list
81 |     
82 |     # --------------- Save filterd json file ---------------
83 |     new_json_path = os.path.join(args.root, 'annotations', clean_task_prefix.format(args.image_set))
84 |     with open(new_json_path, 'w') as f:
85 |         json.dump(clean_json_file, f)
86 | 


--------------------------------------------------------------------------------
/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | import argparse
 6 | 
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='YOLO-Tutorial')
11 | 
12 |     # dataset
13 |     parser.add_argument('--root', default='/mnt/share/ssd2/dataset/CrowdHuman/',
14 |                         help='data root')
15 | 
16 |     return parser.parse_args()
17 | 
18 | 
19 | def load_func(fpath):
20 |     print('fpath', fpath)
21 |     assert os.path.exists(fpath)
22 |     with open(fpath,'r') as fid:
23 |         lines = fid.readlines()
24 |     records =[json.loads(line.strip('\n')) for line in lines]
25 |     return records
26 | 
27 | if __name__ == '__main__':
28 |     args = parse_args()
29 | 
30 |     DATA_PATH = args.root
31 |     OUT_PATH = DATA_PATH + 'annotations/'
32 |     SPLITS = ['val', 'train']
33 |     DEBUG = False
34 | 
35 |     if not os.path.exists(OUT_PATH):
36 |         os.mkdir(OUT_PATH)
37 |     for split in SPLITS:
38 |         data_path = DATA_PATH + split
39 |         out_path = OUT_PATH + '{}.json'.format(split)
40 |         out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
41 |         ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split)
42 |         anns_data = load_func(ann_path)
43 |         image_cnt = 0
44 |         ann_cnt = 0
45 |         video_cnt = 0
46 |         for ann_data in anns_data:
47 |             image_cnt += 1
48 |             file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + 'Images/' + '{}.jpg'.format(ann_data['ID'])
49 |             im = Image.open(file_path)
50 |             image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 
51 |                           'id': image_cnt,
52 |                           'height': im.size[1], 
53 |                           'width': im.size[0]}
54 |             out['images'].append(image_info)
55 |             if split != 'test':
56 |                 anns = ann_data['gtboxes']
57 |                 for i in range(len(anns)):
58 |                     ann_cnt += 1
59 |                     fbox = anns[i]['fbox']
60 |                     ann = {'id': ann_cnt,
61 |                          'category_id': 1,
62 |                          'image_id': image_cnt,
63 |                          'track_id': -1,
64 |                          'bbox_vis': anns[i]['vbox'],
65 |                          'bbox': fbox,
66 |                          'area': fbox[2] * fbox[3],
67 |                          'iscrowd': 1 if 'extra' in anns[i] and \
68 |                                          'ignore' in anns[i]['extra'] and \
69 |                                          anns[i]['extra']['ignore'] == 1 else 0}
70 |                     out['annotations'].append(ann)
71 |         print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
72 |         json.dump(out, open(out_path, 'w'))


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
 1 | # Args parameters
 2 | MODEL=$1
 3 | DATASET=$2
 4 | DATASET_ROOT=$3
 5 | BATCH_SIZE=$4
 6 | WORLD_SIZE=$5
 7 | MASTER_PORT=$6
 8 | RESUME=$7
 9 | 
10 | # MODEL setting
11 | IMAGE_SIZE=640
12 | FIND_UNUSED_PARAMS=False
13 | if [[ $MODEL == *"yolov8"* ]]; then
14 |     # Epoch setting
15 |     MAX_EPOCH=500
16 |     WP_EPOCH=3
17 |     EVAL_EPOCH=10
18 |     NO_AUG_EPOCH=20
19 | elif [[ $MODEL == *"yolox"* ]]; then
20 |     # Epoch setting
21 |     MAX_EPOCH=300
22 |     WP_EPOCH=3
23 |     EVAL_EPOCH=10
24 |     NO_AUG_EPOCH=20
25 | elif [[ $MODEL == *"yolov7"* ]]; then
26 |     # Epoch setting
27 |     MAX_EPOCH=300
28 |     WP_EPOCH=3
29 |     EVAL_EPOCH=10
30 |     NO_AUG_EPOCH=20
31 | elif [[ $MODEL == *"yolov5"* ]]; then
32 |     # Epoch setting
33 |     MAX_EPOCH=300
34 |     WP_EPOCH=3
35 |     EVAL_EPOCH=10
36 |     NO_AUG_EPOCH=20
37 | elif [[ $MODEL == *"yolov4"* ]]; then
38 |     # Epoch setting
39 |     MAX_EPOCH=300
40 |     WP_EPOCH=3
41 |     EVAL_EPOCH=10
42 |     NO_AUG_EPOCH=20
43 | elif [[ $MODEL == *"yolov3"* ]]; then
44 |     # Epoch setting
45 |     MAX_EPOCH=300
46 |     WP_EPOCH=3
47 |     EVAL_EPOCH=10
48 |     NO_AUG_EPOCH=20
49 | else
50 |     # Epoch setting
51 |     MAX_EPOCH=150
52 |     WP_EPOCH=3
53 |     EVAL_EPOCH=10
54 |     NO_AUG_EPOCH=10
55 | fi
56 | 
57 | # -------------------------- Train Pipeline --------------------------
58 | if [ $WORLD_SIZE == 1 ]; then
59 |     python train.py \
60 |             --cuda \
61 |             --dataset ${DATASET} \
62 |             --root ${DATASET_ROOT} \
63 |             --model ${MODEL} \
64 |             --batch_size ${BATCH_SIZE} \
65 |             --img_size ${IMAGE_SIZE} \
66 |             --wp_epoch ${WP_EPOCH} \
67 |             --max_epoch ${MAX_EPOCH} \
68 |             --eval_epoch ${EVAL_EPOCH} \
69 |             --no_aug_epoch ${NO_AUG_EPOCH} \
70 |             --resume ${RESUME} \
71 |             --ema \
72 |             --fp16 \
73 |             --find_unused_parameters ${FIND_UNUSED_PARAMS} \
74 |             --multi_scale
75 | elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then
76 |     python -m torch.distributed.run --nproc_per_node=${WORLD_SIZE} --master_port ${MASTER_PORT} train.py \
77 |             --cuda \
78 |             -dist \
79 |             --dataset ${DATASET} \
80 |             --root ${DATASET_ROOT} \
81 |             --model ${MODEL} \
82 |             --batch_size ${BATCH_SIZE} \
83 |             --img_size ${IMAGE_SIZE} \
84 |             --wp_epoch ${WP_EPOCH} \
85 |             --max_epoch ${MAX_EPOCH} \
86 |             --eval_epoch ${EVAL_EPOCH} \
87 |             --no_aug_epoch ${NO_AUG_EPOCH} \
88 |             --resume ${RESUME} \
89 |             --ema \
90 |             --fp16 \
91 |             --find_unused_parameters ${FIND_UNUSED_PARAMS} \
92 |             --multi_scale \
93 |             --sybn
94 | else
95 |     echo "The WORLD_SIZE is set to a value greater than 8, indicating the use of multi-machine \
96 |           multi-card training mode, which is currently unsupported."
97 |     exit 1
98 | fi


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/utils/__init__.py


--------------------------------------------------------------------------------
/utils/solver/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/utils/solver/__init__.py


--------------------------------------------------------------------------------
/utils/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | 
 4 | 
 5 | # ------------------------- WarmUp LR Scheduler -------------------------
 6 | ## Warmup LR Scheduler
 7 | class LinearWarmUpScheduler(object):
 8 |     def __init__(self, base_lr=0.01, wp_iter=500, warmup_factor=0.00066667):
 9 |         self.base_lr = base_lr
10 |         self.wp_iter = wp_iter
11 |         self.warmup_factor = warmup_factor
12 | 
13 | 
14 |     def set_lr(self, optimizer, lr, base_lr):
15 |         for param_group in optimizer.param_groups:
16 |             init_lr = param_group['initial_lr']
17 |             ratio = init_lr / base_lr
18 |             param_group['lr'] = lr * ratio
19 | 
20 | 
21 |     def __call__(self, iter, optimizer):
22 |         # warmup
23 |         alpha = iter / self.wp_iter
24 |         warmup_factor = self.warmup_factor * (1 - alpha) + alpha
25 |         tmp_lr = self.base_lr * warmup_factor
26 |         self.set_lr(optimizer, tmp_lr, self.base_lr)
27 |         
28 | ## Build WP LR Scheduler
29 | def build_wp_lr_scheduler(cfg, base_lr=0.01):
30 |     print('==============================')
31 |     print('WarmUpScheduler: {}'.format(cfg['warmup']))
32 |     print('--base_lr: {}'.format(base_lr))
33 |     print('--warmup_iters: {}'.format(cfg['warmup_iters']))
34 |     print('--warmup_factor: {}'.format(cfg['warmup_factor']))
35 | 
36 |     if cfg['warmup'] == 'linear':
37 |         wp_lr_scheduler = LinearWarmUpScheduler(base_lr, cfg['warmup_iters'], cfg['warmup_factor'])
38 |     
39 |     return wp_lr_scheduler
40 | 
41 |                            
42 | # ------------------------- LR Scheduler -------------------------
43 | def build_lr_scheduler(cfg, optimizer, resume=None):
44 |     print('==============================')
45 |     print('LR Scheduler: {}'.format(cfg['lr_scheduler']))
46 | 
47 |     if cfg['lr_scheduler'] == 'step':
48 |         assert 'lr_epoch' in cfg
49 |         print('--lr_epoch: {}'.format(cfg['lr_epoch']))
50 |         lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=cfg['lr_epoch'])
51 |     elif cfg['lr_scheduler'] == 'cosine':
52 |         pass
53 |         
54 |     if resume is not None and resume.lower() != "none":
55 |         print('keep training: ', resume)
56 |         checkpoint = torch.load(resume)
57 |         # checkpoint state dict
58 |         checkpoint_state_dict = checkpoint.pop("lr_scheduler")
59 |         lr_scheduler.load_state_dict(checkpoint_state_dict)
60 | 
61 |     return lr_scheduler
62 | 
63 | 
64 | def build_lambda_lr_scheduler(cfg, optimizer, epochs):
65 |     """Build learning rate scheduler from cfg file."""
66 |     print('==============================')
67 |     print('Lr Scheduler: {}'.format(cfg['scheduler']))
68 |     # Cosine LR scheduler
69 |     if cfg['scheduler'] == 'cosine':
70 |         lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg['lrf'] - 1) + 1
71 |     # Linear LR scheduler
72 |     elif cfg['scheduler'] == 'linear':
73 |         lf = lambda x: (1 - x / epochs) * (1.0 - cfg['lrf']) + cfg['lrf']
74 | 
75 |     else:
76 |         print('unknown lr scheduler.')
77 |         exit(0)
78 |     scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
79 | 
80 |     return scheduler, lf
81 | 


--------------------------------------------------------------------------------
/utils/solver/optimizer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def build_optimizer(cfg, model, resume=None):
 5 |     print('==============================')
 6 |     print('Optimizer: {}'.format(cfg['optimizer']))
 7 |     print('--base lr: {}'.format(cfg['lr0']))
 8 |     print('--momentum: {}'.format(cfg['momentum']))
 9 |     print('--weight_decay: {}'.format(cfg['weight_decay']))
10 | 
11 |     # ------------- Divide model's parameters -------------
12 |     param_dicts = [], [], []
13 |     norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)]
14 |     for n, p in model.named_parameters():
15 |         if p.requires_grad:
16 |             if "bias" == n.split(".")[-1]:
17 |                 param_dicts[0].append(p)      # no weight decay for all layers' bias
18 |             else:
19 |                 if n.split(".")[-2] in norm_names:
20 |                     param_dicts[1].append(p)  # no weight decay for all NormLayers' weight
21 |                 else:
22 |                     param_dicts[2].append(p)  # weight decay for all Non-NormLayers' weight
23 | 
24 |     # Build optimizer
25 |     if cfg['optimizer'] == 'sgd':
26 |         optimizer = torch.optim.SGD(param_dicts[0], lr=cfg['lr0'], momentum=cfg['momentum'], weight_decay=0.0)
27 |     elif cfg['optimizer'] =='adamw':
28 |         optimizer = torch.optim.AdamW(param_dicts[0], lr=cfg['lr0'], weight_decay=0.0)
29 |     else:
30 |         raise NotImplementedError("Unknown optimizer: {}".format(cfg['optimizer']))
31 |     
32 |     # Add param groups
33 |     optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0})
34 |     optimizer.add_param_group({"params": param_dicts[2], "weight_decay": cfg['weight_decay']})
35 | 
36 |     start_epoch = 0
37 |     if resume and resume != 'None':
38 |         checkpoint = torch.load(resume)
39 |         # checkpoint state dict
40 |         try:
41 |             checkpoint_state_dict = checkpoint.pop("optimizer")
42 |             print('Load optimizer from the checkpoint: ', resume)
43 |             optimizer.load_state_dict(checkpoint_state_dict)
44 |             start_epoch = checkpoint.pop("epoch") + 1
45 |             del checkpoint, checkpoint_state_dict
46 |         except:
47 |             print("No optimzier in the given checkpoint.")
48 |                                                         
49 |     return optimizer, start_epoch
50 | 


--------------------------------------------------------------------------------