├── .gitignore ├── LICENSE ├── README.md ├── README_CN.md ├── config ├── __init__.py ├── data_config │ ├── dataset_config.py │ └── transform_config.py └── model_config │ ├── official_yaml │ └── yolov7.yaml │ ├── yolov1_config.py │ ├── yolov2_config.py │ ├── yolov3_config.py │ ├── yolov4_config.py │ ├── yolov5_config.py │ ├── yolov7_config.py │ ├── yolov8_config.py │ └── yolox_config.py ├── dataset ├── __init__.py ├── build.py ├── coco.py ├── crowdhuman.py ├── customed.py ├── data_augment │ ├── ssd_augment.py │ ├── strong_augment.py │ └── yolov5_augment.py ├── demo │ ├── images │ │ ├── 000000000632.jpg │ │ ├── 000000000785.jpg │ │ ├── 000000000872.jpg │ │ ├── 000000000885.jpg │ │ ├── 000000001000.jpg │ │ ├── 000000001268.jpg │ │ ├── 000000001296.jpg │ │ ├── 000000001503.jpg │ │ └── 000000001532.jpg │ └── videos │ │ └── 000006.mp4 ├── scripts │ ├── COCO2017.sh │ ├── VOC2007.sh │ ├── VOC2012.sh │ └── data_to_h5py.py ├── voc.py └── widerface.py ├── demo.py ├── deployment ├── ONNXRuntime │ ├── README.md │ └── onnx_inference.py └── test_image.jpg ├── engine.py ├── eval.py ├── evaluator ├── build.py ├── coco_evaluator.py ├── crowdhuman_evaluator.py ├── crowdhuman_tools │ ├── APMRToolkits │ │ ├── __init__.py │ │ ├── database.py │ │ └── image.py │ ├── JIToolkits │ │ ├── JI_tools.py │ │ └── matching.py │ ├── __init__.py │ ├── compute_APMR.py │ └── compute_JI.py ├── customed_evaluator.py ├── voc_evaluator.py └── widerface_evaluator.py ├── img_files ├── video_detection_demo.gif ├── video_tracking_demo.gif └── yolo_tutorial.png ├── models ├── detectors │ ├── __init__.py │ ├── yolov1 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov1.py │ │ ├── yolov1_backbone.py │ │ ├── yolov1_basic.py │ │ ├── yolov1_head.py │ │ └── yolov1_neck.py │ ├── yolov2 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov2.py │ │ ├── yolov2_backbone.py │ │ ├── yolov2_basic.py │ │ ├── yolov2_head.py │ │ └── yolov2_neck.py │ ├── yolov3 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov3.py │ │ ├── yolov3_backbone.py │ │ ├── yolov3_basic.py │ │ ├── yolov3_fpn.py │ │ ├── yolov3_head.py │ │ └── yolov3_neck.py │ ├── yolov4 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov4.py │ │ ├── yolov4_backbone.py │ │ ├── yolov4_basic.py │ │ ├── yolov4_head.py │ │ ├── yolov4_neck.py │ │ └── yolov4_pafpn.py │ ├── yolov5 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov5.py │ │ ├── yolov5_backbone.py │ │ ├── yolov5_basic.py │ │ ├── yolov5_head.py │ │ ├── yolov5_neck.py │ │ └── yolov5_pafpn.py │ ├── yolov7 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov7.py │ │ ├── yolov7_backbone.py │ │ ├── yolov7_basic.py │ │ ├── yolov7_head.py │ │ ├── yolov7_neck.py │ │ └── yolov7_pafpn.py │ ├── yolov8 │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolov8.py │ │ ├── yolov8_backbone.py │ │ ├── yolov8_basic.py │ │ ├── yolov8_head.py │ │ ├── yolov8_neck.py │ │ ├── yolov8_pafpn.py │ │ └── yolov8_pred.py │ └── yolox │ │ ├── README.md │ │ ├── build.py │ │ ├── loss.py │ │ ├── matcher.py │ │ ├── yolox.py │ │ ├── yolox_backbone.py │ │ ├── yolox_basic.py │ │ ├── yolox_head.py │ │ ├── yolox_neck.py │ │ └── yolox_pafpn.py └── trackers │ ├── __init__.py │ └── byte_tracker │ ├── basetrack.py │ ├── build.py │ ├── byte_tracker.py │ ├── kalman_filter.py │ └── matching.py ├── requirements.txt ├── test.py ├── tools ├── __init__.py ├── clean_coco.py ├── convert_crowdhuman_to_coco.py ├── convert_ours_to_coco.py ├── convert_widerface_to_coco.py └── export_onnx.py ├── track.py ├── train.py ├── train.sh └── utils ├── __init__.py ├── box_ops.py ├── distributed_utils.py ├── kmeans_anchor.py ├── misc.py ├── solver ├── __init__.py ├── lr_scheduler.py └── optimizer.py └── vis_tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pt 2 | *.pth 3 | *.pkl 4 | *.onnx 5 | *.pyc 6 | *.zip 7 | weights 8 | __pycache__ 9 | det_results 10 | .vscode 11 | deployment/OpenVINO/cpp/build 12 | cluster.json 13 | train_nebula.py 14 | train_nebula.sh 15 | make_data_nebula.sh 16 | dataset/make_dataset_nebula.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jianhua Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------ Dataset Config ------------------ 2 | from .data_config.dataset_config import dataset_cfg 3 | 4 | 5 | def build_dataset_config(args): 6 | if args.dataset in ['coco', 'coco-val', 'coco-test']: 7 | cfg = dataset_cfg['coco'] 8 | else: 9 | cfg = dataset_cfg[args.dataset] 10 | 11 | print('==============================') 12 | print('Dataset Config: {} \n'.format(cfg)) 13 | 14 | return cfg 15 | 16 | 17 | # ------------------ Transform Config ------------------ 18 | from .data_config.transform_config import ( 19 | # SSD-Style 20 | ssd_trans_config, 21 | # YOLOv5-Style 22 | yolo_p_trans_config, 23 | yolo_n_trans_config, 24 | yolo_s_trans_config, 25 | yolo_m_trans_config, 26 | yolo_l_trans_config, 27 | yolo_x_trans_config, 28 | # YOLOX-Style 29 | yolox_p_trans_config, 30 | yolox_n_trans_config, 31 | yolox_s_trans_config, 32 | yolox_m_trans_config, 33 | yolox_l_trans_config, 34 | yolox_x_trans_config, 35 | ) 36 | 37 | def build_trans_config(trans_config='ssd'): 38 | print('==============================') 39 | print('Transform: {}-Style ...'.format(trans_config)) 40 | 41 | # SSD-style transform 42 | if trans_config == 'ssd': 43 | cfg = ssd_trans_config 44 | 45 | # YOLOv5-style transform 46 | elif trans_config == 'yolo_p': 47 | cfg = yolo_p_trans_config 48 | elif trans_config == 'yolo_n': 49 | cfg = yolo_n_trans_config 50 | elif trans_config == 'yolo_s': 51 | cfg = yolo_s_trans_config 52 | elif trans_config == 'yolo_m': 53 | cfg = yolo_m_trans_config 54 | elif trans_config == 'yolo_l': 55 | cfg = yolo_l_trans_config 56 | elif trans_config == 'yolo_x': 57 | cfg = yolo_x_trans_config 58 | 59 | # YOLOX-style transform 60 | elif trans_config == 'yolox_p': 61 | cfg = yolox_p_trans_config 62 | elif trans_config == 'yolox_n': 63 | cfg = yolox_n_trans_config 64 | elif trans_config == 'yolox_s': 65 | cfg = yolox_s_trans_config 66 | elif trans_config == 'yolox_m': 67 | cfg = yolox_m_trans_config 68 | elif trans_config == 'yolox_l': 69 | cfg = yolox_l_trans_config 70 | elif trans_config == 'yolox_x': 71 | cfg = yolox_x_trans_config 72 | 73 | else: 74 | raise NotImplementedError("Unknown transform config: {}".format(trans_config)) 75 | print('Transform Config: {} \n'.format(cfg)) 76 | 77 | return cfg 78 | 79 | 80 | # ------------------ Model Config ------------------ 81 | ## YOLO series 82 | from .model_config.yolov1_config import yolov1_cfg 83 | from .model_config.yolov2_config import yolov2_cfg 84 | from .model_config.yolov3_config import yolov3_cfg 85 | from .model_config.yolov4_config import yolov4_cfg 86 | from .model_config.yolov5_config import yolov5_cfg 87 | from .model_config.yolov7_config import yolov7_cfg 88 | from .model_config.yolov8_config import yolov8_cfg 89 | from .model_config.yolox_config import yolox_cfg 90 | 91 | def build_model_config(args): 92 | print('==============================') 93 | print('Model: {} ...'.format(args.model.upper())) 94 | # YOLOv1 95 | if args.model == 'yolov1': 96 | cfg = yolov1_cfg 97 | # YOLOv2 98 | elif args.model == 'yolov2': 99 | cfg = yolov2_cfg 100 | # YOLOv3 101 | elif args.model in ['yolov3', 'yolov3_tiny']: 102 | cfg = yolov3_cfg[args.model] 103 | # YOLOv4 104 | elif args.model in ['yolov4', 'yolov4_tiny']: 105 | cfg = yolov4_cfg[args.model] 106 | # YOLOv5 107 | elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']: 108 | cfg = yolov5_cfg[args.model] 109 | # YOLOv7 110 | elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']: 111 | cfg = yolov7_cfg[args.model] 112 | # YOLOv8 113 | elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']: 114 | cfg = yolov8_cfg[args.model] 115 | # YOLOX 116 | elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']: 117 | cfg = yolox_cfg[args.model] 118 | 119 | return cfg 120 | 121 | -------------------------------------------------------------------------------- /config/data_config/dataset_config.py: -------------------------------------------------------------------------------- 1 | # Dataset config 2 | 3 | dataset_cfg = { 4 | 'voc': { 5 | 'data_name': 'VOCdevkit', 6 | 'num_classes': 20, 7 | 'class_indexs': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 8 | 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], 9 | 'class_names': ('aeroplane', 'bicycle', 'bird', 'boat', 10 | 'bottle', 'bus', 'car', 'cat', 'chair', 11 | 'cow', 'diningtable', 'dog', 'horse', 12 | 'motorbike', 'person', 'pottedplant', 13 | 'sheep', 'sofa', 'train', 'tvmonitor'), 14 | }, 15 | 16 | 'coco':{ 17 | 'data_name': 'COCO', 18 | 'num_classes': 80, 19 | 'class_indexs': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 20 | 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 21 | 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 22 | 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 23 | 80, 81, 82, 84, 85, 86, 87, 88, 89, 90], 24 | 'class_names': ('background', 25 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 26 | 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 27 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 28 | 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 29 | 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 30 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 31 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 32 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 33 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 34 | 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 35 | 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 36 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 37 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'), 38 | }, 39 | 40 | 'widerface':{ 41 | 'data_name': 'WiderFace', 42 | 'num_classes': 1, 43 | 'class_indexs': [0], 44 | 'class_names': ('face',), 45 | }, 46 | 47 | 'crowdhuman':{ 48 | 'data_name': 'CrowdHuman', 49 | 'num_classes': 1, 50 | 'class_indexs': [0], 51 | 'class_names': ('person',), 52 | }, 53 | 54 | 'customed':{ 55 | 'data_name': 'AnimalDataset', 56 | 'num_classes': 9, 57 | 'class_indexs': [0, 1, 2, 3, 4, 5, 6, 7, 8], 58 | 'class_names': ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', ), 59 | }, 60 | 61 | } -------------------------------------------------------------------------------- /config/model_config/official_yaml/yolov7.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [12,16, 19,36, 40,28] # P3/8 9 | - [36,75, 76,55, 72,146] # P4/16 10 | - [142,110, 192,243, 459,401] # P5/32 11 | 12 | # yolov7 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | 17 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 18 | [-1, 1, Conv, [64, 3, 1]], 19 | 20 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 21 | [-1, 1, Conv, [64, 1, 1]], 22 | [-2, 1, Conv, [64, 1, 1]], 23 | [-1, 1, Conv, [64, 3, 1]], 24 | [-1, 1, Conv, [64, 3, 1]], 25 | [-1, 1, Conv, [64, 3, 1]], 26 | [-1, 1, Conv, [64, 3, 1]], 27 | [[-1, -3, -5, -6], 1, Concat, [1]], 28 | [-1, 1, Conv, [256, 1, 1]], # 11 29 | 30 | [-1, 1, MP, []], 31 | [-1, 1, Conv, [128, 1, 1]], 32 | [-3, 1, Conv, [128, 1, 1]], 33 | [-1, 1, Conv, [128, 3, 2]], 34 | [[-1, -3], 1, Concat, [1]], # 16-P3/8 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-2, 1, Conv, [128, 1, 1]], 37 | [-1, 1, Conv, [128, 3, 1]], 38 | [-1, 1, Conv, [128, 3, 1]], 39 | [-1, 1, Conv, [128, 3, 1]], 40 | [-1, 1, Conv, [128, 3, 1]], 41 | [[-1, -3, -5, -6], 1, Concat, [1]], 42 | [-1, 1, Conv, [512, 1, 1]], # 24 43 | 44 | [-1, 1, MP, []], 45 | [-1, 1, Conv, [256, 1, 1]], 46 | [-3, 1, Conv, [256, 1, 1]], 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, -3], 1, Concat, [1]], # 29-P4/16 49 | [-1, 1, Conv, [256, 1, 1]], 50 | [-2, 1, Conv, [256, 1, 1]], 51 | [-1, 1, Conv, [256, 3, 1]], 52 | [-1, 1, Conv, [256, 3, 1]], 53 | [-1, 1, Conv, [256, 3, 1]], 54 | [-1, 1, Conv, [256, 3, 1]], 55 | [[-1, -3, -5, -6], 1, Concat, [1]], 56 | [-1, 1, Conv, [1024, 1, 1]], # 37 57 | 58 | [-1, 1, MP, []], 59 | [-1, 1, Conv, [512, 1, 1]], 60 | [-3, 1, Conv, [512, 1, 1]], 61 | [-1, 1, Conv, [512, 3, 2]], 62 | [[-1, -3], 1, Concat, [1]], # 42-P5/32 63 | [-1, 1, Conv, [256, 1, 1]], 64 | [-2, 1, Conv, [256, 1, 1]], 65 | [-1, 1, Conv, [256, 3, 1]], 66 | [-1, 1, Conv, [256, 3, 1]], 67 | [-1, 1, Conv, [256, 3, 1]], 68 | [-1, 1, Conv, [256, 3, 1]], 69 | [[-1, -3, -5, -6], 1, Concat, [1]], 70 | [-1, 1, Conv, [1024, 1, 1]], # 50 71 | ] 72 | 73 | # yolov7 head 74 | head: 75 | [[-1, 1, SPPCSPC, [512]], # 51 76 | 77 | [-1, 1, Conv, [256, 1, 1]], 78 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 79 | [37, 1, Conv, [256, 1, 1]], # route backbone P4 80 | [[-1, -2], 1, Concat, [1]], 81 | 82 | [-1, 1, Conv, [256, 1, 1]], 83 | [-2, 1, Conv, [256, 1, 1]], 84 | [-1, 1, Conv, [128, 3, 1]], 85 | [-1, 1, Conv, [128, 3, 1]], 86 | [-1, 1, Conv, [128, 3, 1]], 87 | [-1, 1, Conv, [128, 3, 1]], 88 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 89 | [-1, 1, Conv, [256, 1, 1]], # 63 90 | 91 | [-1, 1, Conv, [128, 1, 1]], 92 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 93 | [24, 1, Conv, [128, 1, 1]], # route backbone P3 94 | [[-1, -2], 1, Concat, [1]], 95 | 96 | [-1, 1, Conv, [128, 1, 1]], 97 | [-2, 1, Conv, [128, 1, 1]], 98 | [-1, 1, Conv, [64, 3, 1]], 99 | [-1, 1, Conv, [64, 3, 1]], 100 | [-1, 1, Conv, [64, 3, 1]], 101 | [-1, 1, Conv, [64, 3, 1]], 102 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 103 | [-1, 1, Conv, [128, 1, 1]], # 75 104 | 105 | [-1, 1, MP, []], 106 | [-1, 1, Conv, [128, 1, 1]], 107 | [-3, 1, Conv, [128, 1, 1]], 108 | [-1, 1, Conv, [128, 3, 2]], 109 | [[-1, -3, 63], 1, Concat, [1]], 110 | 111 | [-1, 1, Conv, [256, 1, 1]], 112 | [-2, 1, Conv, [256, 1, 1]], 113 | [-1, 1, Conv, [128, 3, 1]], 114 | [-1, 1, Conv, [128, 3, 1]], 115 | [-1, 1, Conv, [128, 3, 1]], 116 | [-1, 1, Conv, [128, 3, 1]], 117 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 118 | [-1, 1, Conv, [256, 1, 1]], # 88 119 | 120 | [-1, 1, MP, []], 121 | [-1, 1, Conv, [256, 1, 1]], 122 | [-3, 1, Conv, [256, 1, 1]], 123 | [-1, 1, Conv, [256, 3, 2]], 124 | [[-1, -3, 51], 1, Concat, [1]], 125 | 126 | [-1, 1, Conv, [512, 1, 1]], 127 | [-2, 1, Conv, [512, 1, 1]], 128 | [-1, 1, Conv, [256, 3, 1]], 129 | [-1, 1, Conv, [256, 3, 1]], 130 | [-1, 1, Conv, [256, 3, 1]], 131 | [-1, 1, Conv, [256, 3, 1]], 132 | [[-1, -2, -3, -4, -5, -6], 1, Concat, [1]], 133 | [-1, 1, Conv, [512, 1, 1]], # 101 134 | 135 | [75, 1, RepConv, [256, 3, 1]], 136 | [88, 1, RepConv, [512, 3, 1]], 137 | [101, 1, RepConv, [1024, 3, 1]], 138 | 139 | [[102,103,104], 1, IDetect, [nc, anchors]], # Detect(P3, P4, P5) 140 | ] 141 | -------------------------------------------------------------------------------- /config/model_config/yolov1_config.py: -------------------------------------------------------------------------------- 1 | # YOLOv1 Config 2 | 3 | yolov1_cfg = { 4 | # ---------------- Model config ---------------- 5 | ## Backbone 6 | 'backbone': 'resnet18', 7 | 'pretrained': True, 8 | 'stride': 32, # P5 9 | 'max_stride': 32, 10 | ## Neck 11 | 'neck': 'sppf', 12 | 'neck_act': 'lrelu', 13 | 'neck_norm': 'BN', 14 | 'neck_depthwise': False, 15 | 'expand_ratio': 0.5, 16 | 'pooling_size': 5, 17 | ## Head 18 | 'head': 'decoupled_head', 19 | 'head_act': 'lrelu', 20 | 'head_norm': 'BN', 21 | 'num_cls_head': 2, 22 | 'num_reg_head': 2, 23 | 'head_depthwise': False, 24 | # ---------------- Data process config ---------------- 25 | ## Input 26 | 'multi_scale': [0.5, 1.5], # 320 -> 960 27 | 'trans_type': 'ssd', 28 | # ---------------- Loss config ---------------- 29 | 'loss_obj_weight': 1.0, 30 | 'loss_cls_weight': 1.0, 31 | 'loss_box_weight': 5.0, 32 | # ---------------- Trainer config ---------------- 33 | 'trainer_type': 'yolo', 34 | } -------------------------------------------------------------------------------- /config/model_config/yolov2_config.py: -------------------------------------------------------------------------------- 1 | # YOLOv2 Config 2 | 3 | yolov2_cfg = { 4 | # ---------------- Model config ---------------- 5 | ## Backbone 6 | 'backbone': 'darknet19', 7 | 'pretrained': True, 8 | 'stride': 32, # P5 9 | 'max_stride': 32, 10 | ## Neck 11 | 'neck': 'sppf', 12 | 'neck_act': 'lrelu', 13 | 'neck_norm': 'BN', 14 | 'neck_depthwise': False, 15 | 'expand_ratio': 0.5, 16 | 'pooling_size': 5, 17 | ## Head 18 | 'head': 'decoupled_head', 19 | 'head_act': 'lrelu', 20 | 'head_norm': 'BN', 21 | 'num_cls_head': 2, 22 | 'num_reg_head': 2, 23 | 'head_depthwise': False, 24 | 'anchor_size': [[17, 25], 25 | [55, 75], 26 | [92, 206], 27 | [202, 21], 28 | [289, 311]], # 416 scale 29 | # ---------------- Data process config ---------------- 30 | ## Input 31 | 'multi_scale': [0.5, 1.5], # 320 -> 960 32 | 'trans_type': 'ssd', 33 | # ---------------- Matcher config ---------------- 34 | 'iou_thresh': 0.5, 35 | # ---------------- Loss config ---------------- 36 | 'loss_obj_weight': 1.0, 37 | 'loss_cls_weight': 1.0, 38 | 'loss_box_weight': 5.0, 39 | # ---------------- Trainer config ---------------- 40 | 'trainer_type': 'yolo', 41 | } -------------------------------------------------------------------------------- /config/model_config/yolov3_config.py: -------------------------------------------------------------------------------- 1 | # YOLOv3 Config 2 | 3 | yolov3_cfg = { 4 | 'yolov3':{ 5 | # ---------------- Model config ---------------- 6 | ## Backbone 7 | 'backbone': 'darknet53', 8 | 'pretrained': True, 9 | 'stride': [8, 16, 32], # P3, P4, P5 10 | 'width': 1.0, 11 | 'depth': 1.0, 12 | 'max_stride': 32, 13 | ## Neck 14 | 'neck': 'sppf', 15 | 'neck_act': 'silu', 16 | 'neck_norm': 'BN', 17 | 'neck_depthwise': False, 18 | 'expand_ratio': 0.5, 19 | 'pooling_size': 5, 20 | ## FPN 21 | 'fpn': 'yolov3_fpn', 22 | 'fpn_act': 'silu', 23 | 'fpn_norm': 'BN', 24 | 'fpn_depthwise': False, 25 | ## Head 26 | 'head': 'decoupled_head', 27 | 'head_act': 'silu', 28 | 'head_norm': 'BN', 29 | 'num_cls_head': 2, 30 | 'num_reg_head': 2, 31 | 'head_depthwise': False, 32 | 'anchor_size': [[10, 13], [16, 30], [33, 23], # P3 33 | [30, 61], [62, 45], [59, 119], # P4 34 | [116, 90], [156, 198], [373, 326]], # P5 35 | # ---------------- Data process config ---------------- 36 | 'trans_type': 'yolo_l', 37 | 'multi_scale': [0.5, 1.25], # 320 -> 800 38 | # ---------------- Assignment config ---------------- 39 | ## matcher 40 | 'iou_thresh': 0.5, 41 | # ---------------- Loss config ---------------- 42 | ## loss weight 43 | 'loss_obj_weight': 1.0, 44 | 'loss_cls_weight': 1.0, 45 | 'loss_box_weight': 5.0, 46 | # ---------------- Train config ---------------- 47 | 'trainer_type': 'yolo', 48 | }, 49 | 50 | 'yolov3_tiny':{ 51 | # ---------------- Model config ---------------- 52 | ## Backbone 53 | 'backbone': 'darknet_tiny', 54 | 'pretrained': True, 55 | 'stride': [8, 16, 32], # P3, P4, P5 56 | 'width': 0.25, 57 | 'depth': 0.34, 58 | 'max_stride': 32, 59 | ## Neck 60 | 'neck': 'sppf', 61 | 'neck_act': 'silu', 62 | 'neck_norm': 'BN', 63 | 'neck_depthwise': False, 64 | 'expand_ratio': 0.5, 65 | 'pooling_size': 5, 66 | ## FPN 67 | 'fpn': 'yolov3_fpn', 68 | 'fpn_act': 'silu', 69 | 'fpn_norm': 'BN', 70 | 'fpn_depthwise': False, 71 | ## Head 72 | 'head': 'decoupled_head', 73 | 'head_act': 'silu', 74 | 'head_norm': 'BN', 75 | 'num_cls_head': 2, 76 | 'num_reg_head': 2, 77 | 'head_depthwise': False, 78 | 'anchor_size': [[10, 13], [16, 30], [33, 23], # P3 79 | [30, 61], [62, 45], [59, 119], # P4 80 | [116, 90], [156, 198], [373, 326]], # P5 81 | # ---------------- Data process config ---------------- 82 | ## input 83 | 'trans_type': 'yolo_n', 84 | 'multi_scale': [0.5, 1.25], # 320 -> 800 85 | # ---------------- Assignment config ---------------- 86 | ## matcher 87 | 'iou_thresh': 0.5, 88 | # ---------------- Loss config ---------------- 89 | ## loss weight 90 | 'loss_obj_weight': 1.0, 91 | 'loss_cls_weight': 1.0, 92 | 'loss_box_weight': 5.0, 93 | # ---------------- Train config ---------------- 94 | 'trainer_type': 'yolo', 95 | }, 96 | 97 | } -------------------------------------------------------------------------------- /config/model_config/yolov4_config.py: -------------------------------------------------------------------------------- 1 | # YOLOv4 Config 2 | 3 | yolov4_cfg = { 4 | 'yolov4':{ 5 | # ---------------- Model config ---------------- 6 | ## Backbone 7 | 'backbone': 'cspdarknet53', 8 | 'pretrained': True, 9 | 'stride': [8, 16, 32], # P3, P4, P5 10 | 'width': 1.0, 11 | 'depth': 1.0, 12 | 'max_stride': 32, 13 | ## Neck 14 | 'neck': 'csp_sppf', 15 | 'expand_ratio': 0.5, 16 | 'pooling_size': 5, 17 | 'neck_act': 'silu', 18 | 'neck_norm': 'BN', 19 | 'neck_depthwise': False, 20 | ## FPN 21 | 'fpn': 'yolov4_pafpn', 22 | 'fpn_act': 'silu', 23 | 'fpn_norm': 'BN', 24 | 'fpn_depthwise': False, 25 | ## Head 26 | 'head': 'decoupled_head', 27 | 'head_act': 'silu', 28 | 'head_norm': 'BN', 29 | 'num_cls_head': 2, 30 | 'num_reg_head': 2, 31 | 'head_depthwise': False, 32 | 'anchor_size': [[10, 13], [16, 30], [33, 23], # P3 33 | [30, 61], [62, 45], [59, 119], # P4 34 | [116, 90], [156, 198], [373, 326]], # P5 35 | # ---------------- Data process config ---------------- 36 | 'trans_type': 'yolo_l', 37 | 'multi_scale': [0.5, 1.25], # 320 -> 800 38 | # ---------------- Assignment config ---------------- 39 | ## matcher 40 | 'iou_thresh': 0.5, 41 | # ---------------- Loss config ---------------- 42 | ## loss weight 43 | 'loss_obj_weight': 1.0, 44 | 'loss_cls_weight': 1.0, 45 | 'loss_box_weight': 5.0, 46 | # ---------------- Train config ---------------- 47 | 'trainer_type': 'yolo', 48 | }, 49 | 50 | 'yolov4_tiny':{ 51 | # ---------------- Model config ---------------- 52 | ## Backbone 53 | 'backbone': 'cspdarknet_tiny', 54 | 'pretrained': True, 55 | 'stride': [8, 16, 32], # P3, P4, P5 56 | 'width': 0.25, 57 | 'depth': 0.34, 58 | 'max_stride': 32, 59 | ## Neck 60 | 'neck': 'csp_sppf', 61 | 'neck_act': 'silu', 62 | 'neck_norm': 'BN', 63 | 'neck_depthwise': False, 64 | 'expand_ratio': 0.5, 65 | 'pooling_size': 5, 66 | ## FPN 67 | 'fpn': 'yolov4_pafpn', 68 | 'fpn_act': 'silu', 69 | 'fpn_norm': 'BN', 70 | 'fpn_depthwise': False, 71 | ## Head 72 | 'head': 'decoupled_head', 73 | 'head_act': 'silu', 74 | 'head_norm': 'BN', 75 | 'num_cls_head': 2, 76 | 'num_reg_head': 2, 77 | 'head_depthwise': False, 78 | 'anchor_size': [[10, 13], [16, 30], [33, 23], # P3 79 | [30, 61], [62, 45], [59, 119], # P4 80 | [116, 90], [156, 198], [373, 326]], # P5 81 | # ---------------- Data process config ---------------- 82 | 'trans_type': 'yolo_n', 83 | 'multi_scale': [0.5, 1.25], # 320 -> 800 84 | # ---------------- Assignment config ---------------- 85 | ## matcher 86 | 'iou_thresh': 0.5, 87 | # ---------------- Loss config ---------------- 88 | ## loss weight 89 | 'loss_obj_weight': 1.0, 90 | 'loss_cls_weight': 1.0, 91 | 'loss_box_weight': 5.0, 92 | # ---------------- Train config ---------------- 93 | 'trainer_type': 'yolo', 94 | }, 95 | 96 | } -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/demo/images/000000000632.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000632.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000000785.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000785.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000000872.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000872.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000000885.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000000885.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000001000.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001000.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000001268.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001268.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000001296.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001296.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000001503.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001503.jpg -------------------------------------------------------------------------------- /dataset/demo/images/000000001532.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/images/000000001532.jpg -------------------------------------------------------------------------------- /dataset/demo/videos/000006.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/dataset/demo/videos/000006.mp4 -------------------------------------------------------------------------------- /dataset/scripts/COCO2017.sh: -------------------------------------------------------------------------------- 1 | mkdir COCO 2 | cd COCO 3 | 4 | wget http://images.cocodataset.org/zips/train2017.zip 5 | wget http://images.cocodataset.org/zips/val2017.zip 6 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 7 | wget http://images.cocodataset.org/zips/test2017.zip 8 | wget http://images.cocodataset.org/annotations/image_info_test2017.zip  9 | 10 | unzip train2017.zip 11 | unzip val2017.zip 12 | unzip annotations_trainval2017.zip 13 | unzip test2017.zip 14 | unzip image_info_test2017.zip 15 | 16 | # rm -f train2017.zip 17 | # rm -f val2017.zip 18 | # rm -f annotations_trainval2017.zip 19 | # rm -f test2017.zip 20 | # rm -f image_info_test2017.zip 21 | -------------------------------------------------------------------------------- /dataset/scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /dataset/scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p ~/data 12 | cd ~/data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /dataset/scripts/data_to_h5py.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import h5py 3 | import os 4 | import argparse 5 | import numpy as np 6 | import sys 7 | 8 | sys.path.append('..') 9 | from voc import VOCDetection 10 | from coco import COCODataset 11 | 12 | # ---------------------- Opt ---------------------- 13 | parser = argparse.ArgumentParser(description='Cache-Dataset') 14 | parser.add_argument('-d', '--dataset', default='voc', 15 | help='coco, voc, widerface, crowdhuman') 16 | parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/', 17 | help='data root') 18 | parser.add_argument('-size', '--img_size', default=640, type=int, 19 | help='input image size.') 20 | parser.add_argument('--mosaic', default=None, type=float, 21 | help='mosaic augmentation.') 22 | parser.add_argument('--mixup', default=None, type=float, 23 | help='mixup augmentation.') 24 | parser.add_argument('--keep_ratio', action="store_true", default=False, 25 | help='keep aspect ratio.') 26 | parser.add_argument('--show', action="store_true", default=False, 27 | help='keep aspect ratio.') 28 | 29 | args = parser.parse_args() 30 | 31 | 32 | # ---------------------- Build Dataset ---------------------- 33 | if args.dataset == 'voc': 34 | root = os.path.join(args.root, 'VOCdevkit') 35 | dataset = VOCDetection(args.img_size, root) 36 | elif args.dataset == 'coco': 37 | root = os.path.join(args.root, 'COCO') 38 | dataset = COCODataset(args.img_size, args.root) 39 | print('Data length: ', len(dataset)) 40 | 41 | 42 | # ---------------------- Main Process ---------------------- 43 | cached_image = [] 44 | dataset_size = len(dataset) 45 | for i in range(len(dataset)): 46 | if i % 5000 == 0: 47 | print("[{} / {}]".format(i, dataset_size)) 48 | # load an image 49 | image, image_id = dataset.pull_image(i) 50 | orig_h, orig_w, _ = image.shape 51 | 52 | # resize image 53 | if args.keep_ratio: 54 | r = args.img_size / max(orig_h, orig_w) 55 | if r != 1: 56 | interp = cv2.INTER_LINEAR 57 | new_size = (int(orig_w * r), int(orig_h * r)) 58 | image = cv2.resize(image, new_size, interpolation=interp) 59 | else: 60 | image = cv2.resize(image, (int(args.img_size), int(args.img_size))) 61 | 62 | cached_image.append(image) 63 | if args.show: 64 | cv2.imshow('image', image) 65 | # cv2.imwrite(str(i)+'.jpg', img) 66 | cv2.waitKey(0) 67 | 68 | save_path = "dataset/cache/" 69 | os.makedirs(save_path, exist_ok=True) 70 | np.save(save_path + '{}_train_images.npy'.format(args.dataset), cached_image) 71 | -------------------------------------------------------------------------------- /deployment/ONNXRuntime/README.md: -------------------------------------------------------------------------------- 1 | ## YOLO ONNXRuntime 2 | 3 | 4 | ### Convert Your Model to ONNX 5 | 6 | First, you should move to by: 7 | ```shell 8 | cd 9 | cd tools/ 10 | ``` 11 | Then, you can: 12 | 13 | 1. Convert a standard YOLO model by: 14 | ```shell 15 | python3 export_onnx.py -m yolov1 --weight ../weight/coco/yolov1/yolov1_coco.pth -nc 80 --img_size 640 16 | ``` 17 | 18 | Notes: 19 | * -n: specify a model name. The model name must be one of the [yolox-s,m,l,x and yolox-nano, yolox-tiny, yolov3] 20 | * -c: the model you have trained 21 | * -o: opset version, default 11. **However, if you will further convert your onnx model to [OpenVINO](https://github.com/Megvii-BaseDetection/YOLOX/demo/OpenVINO/), please specify the opset version to 10.** 22 | * --no-onnxsim: disable onnxsim 23 | * To customize an input shape for onnx model, modify the following code in tools/export_onnx.py: 24 | 25 | ```python 26 | dummy_input = torch.randn(args.batch_size, 3, args.img_size, args.img_size) 27 | ``` 28 | 29 | ### ONNXRuntime Demo 30 | 31 | Step1. 32 | ```shell 33 | cd /deployment/ONNXRuntime 34 | ``` 35 | 36 | Step2. 37 | ```shell 38 | python3 onnx_inference.py --model ../../weights/onnx/11/yolov1.onnx -i ../test_image.jpg -s 0.3 --img_size 640 39 | ``` 40 | Notes: 41 | * --model: your converted onnx model 42 | * -i: input_image 43 | * -s: score threshold for visualization. 44 | * --img_size: should be consistent with the shape you used for onnx convertion. 45 | -------------------------------------------------------------------------------- /deployment/ONNXRuntime/onnx_inference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) Megvii, Inc. and its affiliates. 4 | 5 | import argparse 6 | import os 7 | 8 | import cv2 9 | import time 10 | import numpy as np 11 | import sys 12 | sys.path.append('../../') 13 | 14 | import onnxruntime 15 | from utils.misc import PreProcessor, PostProcessor 16 | from utils.vis_tools import visualize 17 | 18 | 19 | def make_parser(): 20 | parser = argparse.ArgumentParser("onnxruntime inference sample") 21 | parser.add_argument("-m", "--model", type=str, default="../../weights/onnx/11/yolov1.onnx", 22 | help="Input your onnx model.") 23 | parser.add_argument("-i", "--image_path", type=str, default='../test_image.jpg', 24 | help="Path to your input image.") 25 | parser.add_argument("-o", "--output_dir", type=str, default='../../det_results/onnx/', 26 | help="Path to your output directory.") 27 | parser.add_argument("-s", "--score_thr", type=float, default=0.35, 28 | help="Score threshould to filter the result.") 29 | parser.add_argument("-size", "--img_size", type=int, default=640, 30 | help="Specify an input shape for inference.") 31 | return parser 32 | 33 | 34 | if __name__ == '__main__': 35 | args = make_parser().parse_args() 36 | 37 | # class color for better visualization 38 | np.random.seed(0) 39 | class_colors = [(np.random.randint(255), 40 | np.random.randint(255), 41 | np.random.randint(255)) for _ in range(80)] 42 | 43 | # preprocessor 44 | prepocess = PreProcessor(img_size=args.img_size) 45 | 46 | # postprocessor 47 | postprocess = PostProcessor(num_classes=80, conf_thresh=args.score_thr, nms_thresh=0.5) 48 | 49 | # read an image 50 | input_shape = tuple([args.img_size, args.img_size]) 51 | origin_img = cv2.imread(args.image_path) 52 | 53 | # preprocess 54 | x, ratio = prepocess(origin_img) 55 | 56 | t0 = time.time() 57 | # inference 58 | session = onnxruntime.InferenceSession(args.model) 59 | 60 | ort_inputs = {session.get_inputs()[0].name: x[None, :, :, :]} 61 | output = session.run(None, ort_inputs) 62 | print("inference time: {:.1f} ms".format((time.time() - t0)*1000)) 63 | 64 | t0 = time.time() 65 | # post process 66 | bboxes, scores, labels = postprocess(output[0]) 67 | bboxes /= ratio 68 | print("post-process time: {:.1f} ms".format((time.time() - t0)*1000)) 69 | 70 | # visualize detection 71 | origin_img = visualize( 72 | img=origin_img, 73 | bboxes=bboxes, 74 | scores=scores, 75 | labels=labels, 76 | vis_thresh=args.score_thr, 77 | class_colors=class_colors 78 | ) 79 | 80 | # show 81 | cv2.imshow('onnx detection', origin_img) 82 | cv2.waitKey(0) 83 | 84 | # save results 85 | os.makedirs(args.output_dir, exist_ok=True) 86 | output_path = os.path.join(args.output_dir, os.path.basename(args.image_path)) 87 | cv2.imwrite(output_path, origin_img) 88 | -------------------------------------------------------------------------------- /deployment/test_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/deployment/test_image.jpg -------------------------------------------------------------------------------- /evaluator/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from evaluator.coco_evaluator import COCOAPIEvaluator 4 | from evaluator.voc_evaluator import VOCAPIEvaluator 5 | from evaluator.crowdhuman_evaluator import CrowdHumanEvaluator 6 | from evaluator.customed_evaluator import CustomedEvaluator 7 | 8 | 9 | 10 | def build_evluator(args, data_cfg, transform, device): 11 | # Basic parameters 12 | data_dir = os.path.join(args.root, data_cfg['data_name']) 13 | 14 | # Evaluator 15 | ## VOC Evaluator 16 | if args.dataset == 'voc': 17 | evaluator = VOCAPIEvaluator(data_dir = data_dir, 18 | device = device, 19 | transform = transform 20 | ) 21 | ## COCO Evaluator 22 | elif args.dataset == 'coco': 23 | evaluator = COCOAPIEvaluator(data_dir = data_dir, 24 | device = device, 25 | transform = transform 26 | ) 27 | ## CrowdHuman Evaluator 28 | elif args.dataset == 'crowdhuman': 29 | evaluator = CrowdHumanEvaluator(data_dir = data_dir, 30 | device = device, 31 | image_set = 'val', 32 | transform = transform 33 | ) 34 | ## Custom dataset Evaluator 35 | elif args.dataset == 'ourdataset': 36 | evaluator = CustomedEvaluator(data_dir = data_dir, 37 | device = device, 38 | image_set = 'val', 39 | transform = transform 40 | ) 41 | 42 | return evaluator 43 | -------------------------------------------------------------------------------- /evaluator/coco_evaluator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | import torch 4 | from dataset.coco import COCODataset 5 | from utils.box_ops import rescale_bboxes 6 | 7 | try: 8 | from pycocotools.cocoeval import COCOeval 9 | except: 10 | print("It seems that the COCOAPI is not installed.") 11 | 12 | 13 | class COCOAPIEvaluator(): 14 | """ 15 | COCO AP Evaluation class. 16 | All the data in the val2017 dataset are processed \ 17 | and evaluated by COCO API. 18 | """ 19 | def __init__(self, data_dir, device, testset=False, transform=None): 20 | """ 21 | Args: 22 | data_dir (str): dataset root directory 23 | img_size (int): image size after preprocess. images are resized \ 24 | to squares whose shape is (img_size, img_size). 25 | confthre (float): 26 | confidence threshold ranging from 0 to 1, \ 27 | which is defined in the config file. 28 | nmsthre (float): 29 | IoU threshold of non-max supression ranging from 0 to 1. 30 | """ 31 | # ----------------- Basic parameters ----------------- 32 | self.image_set = 'test2017' if testset else 'val2017' 33 | self.transform = transform 34 | self.device = device 35 | self.testset = testset 36 | # ----------------- Metrics ----------------- 37 | self.map = 0. 38 | self.ap50_95 = 0. 39 | self.ap50 = 0. 40 | # ----------------- Dataset ----------------- 41 | self.dataset = COCODataset(data_dir=data_dir, image_set=self.image_set) 42 | 43 | 44 | @torch.no_grad() 45 | def evaluate(self, model): 46 | """ 47 | COCO average precision (AP) Evaluation. Iterate inference on the test dataset 48 | and the results are evaluated by COCO API. 49 | Args: 50 | model : model object 51 | Returns: 52 | ap50_95 (float) : calculated COCO AP for IoU=50:95 53 | ap50 (float) : calculated COCO AP for IoU=50 54 | """ 55 | model.eval() 56 | ids = [] 57 | data_dict = [] 58 | num_images = len(self.dataset) 59 | print('total number of images: %d' % (num_images)) 60 | 61 | # start testing 62 | for index in range(num_images): # all the data in val2017 63 | if index % 500 == 0: 64 | print('[Eval: %d / %d]'%(index, num_images)) 65 | 66 | # load an image 67 | img, id_ = self.dataset.pull_image(index) 68 | orig_h, orig_w, _ = img.shape 69 | 70 | # preprocess 71 | x, _, ratio = self.transform(img) 72 | x = x.unsqueeze(0).to(self.device) 73 | 74 | id_ = int(id_) 75 | ids.append(id_) 76 | 77 | # inference 78 | outputs = model(x) 79 | scores = outputs['scores'] 80 | labels = outputs['labels'] 81 | bboxes = outputs['bboxes'] 82 | 83 | # rescale bboxes 84 | bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio) 85 | 86 | # process outputs 87 | for i, box in enumerate(bboxes): 88 | x1 = float(box[0]) 89 | y1 = float(box[1]) 90 | x2 = float(box[2]) 91 | y2 = float(box[3]) 92 | label = self.dataset.class_ids[int(labels[i])] 93 | 94 | bbox = [x1, y1, x2 - x1, y2 - y1] 95 | score = float(scores[i]) # object score * class score 96 | A = {"image_id": id_, "category_id": label, "bbox": bbox, 97 | "score": score} # COCO json format 98 | data_dict.append(A) 99 | 100 | annType = ['segm', 'bbox', 'keypoints'] 101 | 102 | # Evaluate the Dt (detection) json comparing with the ground truth 103 | if len(data_dict) > 0: 104 | print('evaluating ......') 105 | cocoGt = self.dataset.coco 106 | # workaround: temporarily write data to json file because pycocotools can't process dict in py36. 107 | if self.testset: 108 | json.dump(data_dict, open('coco_test-dev.json', 'w')) 109 | cocoDt = cocoGt.loadRes('coco_test-dev.json') 110 | return -1, -1 111 | else: 112 | _, tmp = tempfile.mkstemp() 113 | json.dump(data_dict, open(tmp, 'w')) 114 | cocoDt = cocoGt.loadRes(tmp) 115 | cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) 116 | cocoEval.params.imgIds = ids 117 | cocoEval.evaluate() 118 | cocoEval.accumulate() 119 | cocoEval.summarize() 120 | 121 | ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1] 122 | print('ap50_95 : ', ap50_95) 123 | print('ap50 : ', ap50) 124 | self.map = ap50_95 125 | self.ap50_95 = ap50_95 126 | self.ap50 = ap50 127 | 128 | return ap50, ap50_95 129 | else: 130 | return 0, 0 131 | 132 | -------------------------------------------------------------------------------- /evaluator/crowdhuman_tools/APMRToolkits/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | __author__ = 'jyn' 3 | __email__ = 'jyn@megvii.com' 4 | 5 | from .image import * 6 | from .database import * 7 | -------------------------------------------------------------------------------- /evaluator/crowdhuman_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/evaluator/crowdhuman_tools/__init__.py -------------------------------------------------------------------------------- /evaluator/crowdhuman_tools/compute_APMR.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from .APMRToolkits import * 3 | 4 | dbName = 'human' 5 | def compute_APMR(dt_path, gt_path, target_key=None, mode=0): 6 | database = Database(gt_path, dt_path, target_key, None, mode) 7 | database.compare() 8 | mAP,_ = database.eval_AP() 9 | mMR,_ = database.eval_MR() 10 | line = 'AP:{:.4f}, MR:{:.4f}.'.format(mAP, mMR) 11 | return mAP, mMR 12 | 13 | if __name__ == "__main__": 14 | parser = argparse.ArgumentParser(description='Analyze a json result file with iou match') 15 | parser.add_argument('--detfile', required=True, help='path of json result file to load') 16 | parser.add_argument('--target_key', default=None, required=True) 17 | args = parser.parse_args() 18 | compute_APMR(args.detfile, args.target_key, 0) -------------------------------------------------------------------------------- /evaluator/customed_evaluator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | import torch 4 | from dataset.customed import CustomedDataset 5 | from utils.box_ops import rescale_bboxes 6 | 7 | try: 8 | from pycocotools.cocoeval import COCOeval 9 | except: 10 | print("It seems that the COCOAPI is not installed.") 11 | 12 | 13 | class CustomedEvaluator(): 14 | def __init__(self, data_dir, device, image_set='val', transform=None): 15 | # ----------------- Basic parameters ----------------- 16 | self.image_set = image_set 17 | self.transform = transform 18 | self.device = device 19 | # ----------------- Metrics ----------------- 20 | self.map = 0. 21 | self.ap50_95 = 0. 22 | self.ap50 = 0. 23 | # ----------------- Dataset ----------------- 24 | self.dataset = CustomedDataset(data_dir=data_dir, image_set=image_set) 25 | 26 | 27 | @torch.no_grad() 28 | def evaluate(self, model): 29 | """ 30 | COCO average precision (AP) Evaluation. Iterate inference on the test dataset 31 | and the results are evaluated by COCO API. 32 | Args: 33 | model : model object 34 | Returns: 35 | ap50_95 (float) : calculated COCO AP for IoU=50:95 36 | ap50 (float) : calculated COCO AP for IoU=50 37 | """ 38 | model.eval() 39 | ids = [] 40 | data_dict = [] 41 | num_images = len(self.dataset) 42 | print('total number of images: %d' % (num_images)) 43 | 44 | # start testing 45 | for index in range(num_images): # all the data in val2017 46 | if index % 500 == 0: 47 | print('[Eval: %d / %d]'%(index, num_images)) 48 | 49 | # load an image 50 | img, id_ = self.dataset.pull_image(index) 51 | orig_h, orig_w, _ = img.shape 52 | 53 | # preprocess 54 | x, _, ratio = self.transform(img) 55 | x = x.unsqueeze(0).to(self.device) 56 | 57 | id_ = int(id_) 58 | ids.append(id_) 59 | 60 | # inference 61 | outputs = model(x) 62 | scores = outputs['scores'] 63 | labels = outputs['labels'] 64 | bboxes = outputs['bboxes'] 65 | 66 | # rescale bboxes 67 | bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio) 68 | 69 | for i, box in enumerate(bboxes): 70 | x1 = float(box[0]) 71 | y1 = float(box[1]) 72 | x2 = float(box[2]) 73 | y2 = float(box[3]) 74 | label = self.dataset.class_ids[int(labels[i])] 75 | 76 | bbox = [x1, y1, x2 - x1, y2 - y1] 77 | score = float(scores[i]) # object score * class score 78 | A = {"image_id": id_, "category_id": label, "bbox": bbox, 79 | "score": score} # COCO json format 80 | data_dict.append(A) 81 | 82 | annType = ['segm', 'bbox', 'keypoints'] 83 | 84 | # Evaluate the Dt (detection) json comparing with the ground truth 85 | if len(data_dict) > 0: 86 | print('evaluating ......') 87 | cocoGt = self.dataset.coco 88 | # workaround: temporarily write data to json file because pycocotools can't process dict in py36. 89 | _, tmp = tempfile.mkstemp() 90 | json.dump(data_dict, open(tmp, 'w')) 91 | cocoDt = cocoGt.loadRes(tmp) 92 | cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) 93 | cocoEval.params.imgIds = ids 94 | cocoEval.evaluate() 95 | cocoEval.accumulate() 96 | cocoEval.summarize() 97 | 98 | ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1] 99 | print('ap50_95 : ', ap50_95) 100 | print('ap50 : ', ap50) 101 | self.map = ap50_95 102 | self.ap50_95 = ap50_95 103 | self.ap50 = ap50 104 | 105 | return ap50, ap50_95 106 | else: 107 | return 0, 0 108 | 109 | -------------------------------------------------------------------------------- /evaluator/widerface_evaluator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | import torch 4 | import numpy as np 5 | from dataset.widerface import WiderFaceDataset 6 | from utils.box_ops import rescale_bboxes 7 | 8 | try: 9 | from pycocotools.cocoeval import COCOeval 10 | except: 11 | print("It seems that the COCOAPI is not installed.") 12 | 13 | 14 | class WiderFaceEvaluator(): 15 | """ 16 | COCO AP Evaluation class. 17 | All the data in the val2017 dataset are processed \ 18 | and evaluated by COCO API. 19 | """ 20 | def __init__(self, data_dir, device, image_set='val', transform=None): 21 | """ 22 | data_dir (str): dataset root directory 23 | device: (int): CUDA or CPU. 24 | image_set: train or val. 25 | transform: used to preprocess inputs. 26 | """ 27 | # ----------------- Basic parameters ----------------- 28 | self.image_set = image_set 29 | self.transform = transform 30 | self.device = device 31 | # ----------------- Metrics ----------------- 32 | self.map = 0. 33 | self.ap50_95 = 0. 34 | self.ap50 = 0. 35 | # ----------------- Dataset ----------------- 36 | self.dataset = WiderFaceDataset(data_dir=data_dir, image_set=image_set) 37 | 38 | 39 | @torch.no_grad() 40 | def evaluate(self, model): 41 | """ 42 | COCO average precision (AP) Evaluation. Iterate inference on the test dataset 43 | and the results are evaluated by COCO API. 44 | Args: 45 | model : model object 46 | Returns: 47 | ap50_95 (float) : calculated COCO AP for IoU=50:95 48 | ap50 (float) : calculated COCO AP for IoU=50 49 | """ 50 | model.eval() 51 | ids = [] 52 | data_dict = [] 53 | num_images = len(self.dataset) 54 | print('total number of images: %d' % (num_images)) 55 | 56 | # start testing 57 | for index in range(num_images): # all the data in val2017 58 | if index % 500 == 0: 59 | print('[Eval: %d / %d]'%(index, num_images)) 60 | 61 | # load an image 62 | img, id_ = self.dataset.pull_image(index) 63 | orig_h, orig_w, _ = img.shape 64 | 65 | # preprocess 66 | x, _, ratio = self.transform(img) 67 | x = x.unsqueeze(0).to(self.device) 68 | 69 | id_ = int(id_) 70 | ids.append(id_) 71 | 72 | # inference 73 | outputs = model(x) 74 | scores = outputs['scores'] 75 | labels = outputs['labels'] 76 | bboxes = outputs['bboxes'] 77 | 78 | # rescale bboxes 79 | bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio) 80 | 81 | for i, box in enumerate(bboxes): 82 | x1 = float(box[0]) 83 | y1 = float(box[1]) 84 | x2 = float(box[2]) 85 | y2 = float(box[3]) 86 | label = self.dataset.class_ids[int(labels[i])] 87 | 88 | bbox = [x1, y1, x2 - x1, y2 - y1] 89 | score = float(scores[i]) # object score * class score 90 | A = {"image_id": id_, "category_id": label, "bbox": bbox, 91 | "score": score} # COCO json format 92 | data_dict.append(A) 93 | 94 | annType = ['segm', 'bbox', 'keypoints'] 95 | 96 | # Evaluate the Dt (detection) json comparing with the ground truth 97 | if len(data_dict) > 0: 98 | print('evaluating ......') 99 | cocoGt = self.dataset.coco 100 | # workaround: temporarily write data to json file because pycocotools can't process dict in py36. 101 | _, tmp = tempfile.mkstemp() 102 | json.dump(data_dict, open(tmp, 'w')) 103 | cocoDt = cocoGt.loadRes(tmp) 104 | cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) 105 | cocoEval.params.imgIds = ids 106 | cocoEval.evaluate() 107 | cocoEval.accumulate() 108 | cocoEval.summarize() 109 | 110 | ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1] 111 | print('ap50_95 : ', ap50_95) 112 | print('ap50 : ', ap50) 113 | self.map = ap50_95 114 | self.ap50_95 = ap50_95 115 | self.ap50 = ap50 116 | 117 | return ap50, ap50_95 118 | else: 119 | return 0, 0 120 | 121 | -------------------------------------------------------------------------------- /img_files/video_detection_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/video_detection_demo.gif -------------------------------------------------------------------------------- /img_files/video_tracking_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/video_tracking_demo.gif -------------------------------------------------------------------------------- /img_files/yolo_tutorial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/img_files/yolo_tutorial.png -------------------------------------------------------------------------------- /models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | # YOLO series 6 | from .yolov1.build import build_yolov1 7 | from .yolov2.build import build_yolov2 8 | from .yolov3.build import build_yolov3 9 | from .yolov4.build import build_yolov4 10 | from .yolov5.build import build_yolov5 11 | from .yolov7.build import build_yolov7 12 | from .yolov8.build import build_yolov8 13 | from .yolox.build import build_yolox 14 | 15 | 16 | # build object detector 17 | def build_model(args, 18 | model_cfg, 19 | device, 20 | num_classes=80, 21 | trainable=False, 22 | deploy=False): 23 | # YOLOv1 24 | if args.model == 'yolov1': 25 | model, criterion = build_yolov1( 26 | args, model_cfg, device, num_classes, trainable, deploy) 27 | # YOLOv2 28 | elif args.model == 'yolov2': 29 | model, criterion = build_yolov2( 30 | args, model_cfg, device, num_classes, trainable, deploy) 31 | # YOLOv3 32 | elif args.model in ['yolov3', 'yolov3_tiny']: 33 | model, criterion = build_yolov3( 34 | args, model_cfg, device, num_classes, trainable, deploy) 35 | # YOLOv4 36 | elif args.model in ['yolov4', 'yolov4_tiny']: 37 | model, criterion = build_yolov4( 38 | args, model_cfg, device, num_classes, trainable, deploy) 39 | # YOLOv5 40 | elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']: 41 | model, criterion = build_yolov5( 42 | args, model_cfg, device, num_classes, trainable, deploy) 43 | # YOLOv5-AdamW 44 | elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']: 45 | model, criterion = build_yolov5( 46 | args, model_cfg, device, num_classes, trainable, deploy) 47 | # YOLOv7 48 | elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']: 49 | model, criterion = build_yolov7( 50 | args, model_cfg, device, num_classes, trainable, deploy) 51 | # YOLOv8 52 | elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']: 53 | model, criterion = build_yolov8( 54 | args, model_cfg, device, num_classes, trainable, deploy) 55 | # YOLOX 56 | elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']: 57 | model, criterion = build_yolox( 58 | args, model_cfg, device, num_classes, trainable, deploy) 59 | # YOLOX-AdamW 60 | elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']: 61 | model, criterion = build_yolox( 62 | args, model_cfg, device, num_classes, trainable, deploy) 63 | 64 | if trainable: 65 | # Load pretrained weight 66 | if args.pretrained is not None: 67 | print('Loading COCO pretrained weight ...') 68 | checkpoint = torch.load(args.pretrained, map_location='cpu') 69 | # checkpoint state dict 70 | checkpoint_state_dict = checkpoint.pop("model") 71 | # model state dict 72 | model_state_dict = model.state_dict() 73 | # check 74 | for k in list(checkpoint_state_dict.keys()): 75 | if k in model_state_dict: 76 | shape_model = tuple(model_state_dict[k].shape) 77 | shape_checkpoint = tuple(checkpoint_state_dict[k].shape) 78 | if shape_model != shape_checkpoint: 79 | checkpoint_state_dict.pop(k) 80 | print(k) 81 | else: 82 | checkpoint_state_dict.pop(k) 83 | print(k) 84 | 85 | model.load_state_dict(checkpoint_state_dict, strict=False) 86 | 87 | # keep training 88 | if args.resume and args.resume != "None": 89 | checkpoint = torch.load(args.resume, map_location='cpu') 90 | # checkpoint state dict 91 | try: 92 | checkpoint_state_dict = checkpoint.pop("model") 93 | print('Load model from the checkpoint: ', args.resume) 94 | model.load_state_dict(checkpoint_state_dict) 95 | del checkpoint, checkpoint_state_dict 96 | except: 97 | print("No model in the given checkpoint.") 98 | 99 | return model, criterion 100 | 101 | else: 102 | return model -------------------------------------------------------------------------------- /models/detectors/yolov1/README.md: -------------------------------------------------------------------------------- 1 | # Redesigned YOLOv1: 2 | 3 | | Model | Backbone | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |--------|------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv1 | ResNet-18 | 1xb16 | 640 | 27.9 | 47.5 | 37.8 | 21.3 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov1_coco.pth) | 6 | 7 | - For training, we train redesigned YOLOv1 with 150 epochs on COCO. 8 | - For data augmentation, we only use the large scale jitter (LSJ), no Mosaic or Mixup augmentation. 9 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01. 10 | - For learning rate scheduler, we use linear decay scheduler. 11 | 12 | 13 | ## Train YOLOv1 14 | ### Single GPU 15 | Taking training YOLOv1 on COCO as the example, 16 | ```Shell 17 | python train.py --cuda -d coco --root path/to/coco -m yolov1 -bs 16 -size 640 --wp_epoch 3 --max_epoch 150 --eval_epoch 10 --no_aug_epoch 10 --ema --fp16 --multi_scale 18 | ``` 19 | 20 | ### Multi GPU 21 | Taking training YOLOv1 on COCO as the example, 22 | ```Shell 23 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov1 -bs 128 -size 640 --wp_epoch 3 --max_epoch 150 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 24 | ``` 25 | 26 | ## Test YOLOv1 27 | Taking testing YOLOv1 on COCO-val as the example, 28 | ```Shell 29 | python test.py --cuda -d coco --root path/to/coco -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show 30 | ``` 31 | 32 | ## Evaluate YOLOv1 33 | Taking evaluating YOLOv1 on COCO-val as the example, 34 | ```Shell 35 | python eval.py --cuda -d coco --root path/to/coco -m yolov1 --weight path/to/yolov1_coco.pth 36 | ``` 37 | 38 | ## Demo 39 | ### Detect with Image 40 | ```Shell 41 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show 42 | ``` 43 | 44 | ### Detect with Video 45 | ```Shell 46 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show --gif 47 | ``` 48 | 49 | ### Detect with Camera 50 | ```Shell 51 | python demo.py --mode camera --cuda -m yolov1 --weight path/to/yolov1_coco.pth -size 640 --show --gif 52 | ``` 53 | -------------------------------------------------------------------------------- /models/detectors/yolov1/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov1 import YOLOv1 9 | 10 | 11 | # build object detector 12 | def build_yolov1(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv1(cfg = cfg, 21 | device = device, 22 | img_size = args.img_size, 23 | num_classes = num_classes, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | trainable = trainable, 27 | deploy = deploy, 28 | nms_class_agnostic = args.nms_class_agnostic 29 | ) 30 | 31 | # -------------- Initialize YOLO -------------- 32 | # Init bias 33 | init_prob = 0.01 34 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 35 | # obj pred 36 | b = model.obj_pred.bias.view(1, -1) 37 | b.data.fill_(bias_value.item()) 38 | model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 39 | # cls pred 40 | b = model.cls_pred.bias.view(1, -1) 41 | b.data.fill_(bias_value.item()) 42 | model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 43 | # reg pred 44 | b = model.reg_pred.bias.view(-1, ) 45 | b.data.fill_(1.0) 46 | model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 47 | w = model.reg_pred.weight 48 | w.data.fill_(0.) 49 | model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 50 | 51 | 52 | # -------------- Build criterion -------------- 53 | criterion = None 54 | if trainable: 55 | # build criterion for training 56 | criterion = build_criterion(cfg, device, num_classes) 57 | 58 | return model, criterion 59 | -------------------------------------------------------------------------------- /models/detectors/yolov1/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .matcher import YoloMatcher 4 | from utils.box_ops import get_ious 5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized 6 | 7 | 8 | class Criterion(object): 9 | def __init__(self, cfg, device, num_classes=80): 10 | self.cfg = cfg 11 | self.device = device 12 | self.num_classes = num_classes 13 | self.loss_obj_weight = cfg['loss_obj_weight'] 14 | self.loss_cls_weight = cfg['loss_cls_weight'] 15 | self.loss_box_weight = cfg['loss_box_weight'] 16 | 17 | # matcher 18 | self.matcher = YoloMatcher(num_classes=num_classes) 19 | 20 | 21 | def loss_objectness(self, pred_obj, gt_obj): 22 | loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none') 23 | 24 | return loss_obj 25 | 26 | 27 | def loss_classes(self, pred_cls, gt_label): 28 | loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none') 29 | 30 | return loss_cls 31 | 32 | 33 | def loss_bboxes(self, pred_box, gt_box): 34 | # regression loss 35 | ious = get_ious(pred_box, 36 | gt_box, 37 | box_mode="xyxy", 38 | iou_type='giou') 39 | loss_box = 1.0 - ious 40 | 41 | return loss_box 42 | 43 | 44 | def __call__(self, outputs, targets, epoch=0): 45 | device = outputs['pred_cls'][0].device 46 | stride = outputs['stride'] 47 | fmp_size = outputs['fmp_size'] 48 | ( 49 | gt_objectness, 50 | gt_classes, 51 | gt_bboxes, 52 | ) = self.matcher(fmp_size=fmp_size, 53 | stride=stride, 54 | targets=targets) 55 | # List[B, M, C] -> [B, M, C] -> [BM, C] 56 | pred_obj = outputs['pred_obj'].view(-1) # [BM,] 57 | pred_cls = outputs['pred_cls'].view(-1, self.num_classes) # [BM, C] 58 | pred_box = outputs['pred_box'].view(-1, 4) # [BM, 4] 59 | 60 | gt_objectness = gt_objectness.view(-1).to(device).float() # [BM,] 61 | gt_classes = gt_classes.view(-1, self.num_classes).to(device).float() # [BM, C] 62 | gt_bboxes = gt_bboxes.view(-1, 4).to(device).float() # [BM, 4] 63 | 64 | pos_masks = (gt_objectness > 0) 65 | num_fgs = pos_masks.sum() 66 | 67 | if is_dist_avail_and_initialized(): 68 | torch.distributed.all_reduce(num_fgs) 69 | num_fgs = (num_fgs / get_world_size()).clamp(1.0) 70 | 71 | # obj loss 72 | loss_obj = self.loss_objectness(pred_obj, gt_objectness) 73 | loss_obj = loss_obj.sum() / num_fgs 74 | 75 | # cls loss 76 | pred_cls_pos = pred_cls[pos_masks] 77 | gt_classes_pos = gt_classes[pos_masks] 78 | loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos) 79 | loss_cls = loss_cls.sum() / num_fgs 80 | 81 | # box loss 82 | pred_box_pos = pred_box[pos_masks] 83 | gt_bboxes_pos = gt_bboxes[pos_masks] 84 | loss_box = self.loss_bboxes(pred_box_pos, gt_bboxes_pos) 85 | loss_box = loss_box.sum() / num_fgs 86 | 87 | # total loss 88 | losses = self.loss_obj_weight * loss_obj + \ 89 | self.loss_cls_weight * loss_cls + \ 90 | self.loss_box_weight * loss_box 91 | 92 | loss_dict = dict( 93 | loss_obj = loss_obj, 94 | loss_cls = loss_cls, 95 | loss_box = loss_box, 96 | losses = losses 97 | ) 98 | 99 | return loss_dict 100 | 101 | 102 | def build_criterion(cfg, device, num_classes): 103 | criterion = Criterion( 104 | cfg=cfg, 105 | device=device, 106 | num_classes=num_classes 107 | ) 108 | 109 | return criterion 110 | 111 | 112 | if __name__ == "__main__": 113 | pass 114 | -------------------------------------------------------------------------------- /models/detectors/yolov1/matcher.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class YoloMatcher(object): 6 | def __init__(self, num_classes): 7 | self.num_classes = num_classes 8 | 9 | 10 | @torch.no_grad() 11 | def __call__(self, fmp_size, stride, targets): 12 | """ 13 | img_size: (Int) input image size 14 | stride: (Int) -> stride of YOLOv1 output. 15 | targets: (Dict) dict{'boxes': [...], 16 | 'labels': [...], 17 | 'orig_size': ...} 18 | """ 19 | # prepare 20 | bs = len(targets) 21 | fmp_h, fmp_w = fmp_size 22 | gt_objectness = np.zeros([bs, fmp_h, fmp_w, 1]) 23 | gt_classes = np.zeros([bs, fmp_h, fmp_w, self.num_classes]) 24 | gt_bboxes = np.zeros([bs, fmp_h, fmp_w, 4]) 25 | 26 | for batch_index in range(bs): 27 | targets_per_image = targets[batch_index] 28 | # [N,] 29 | tgt_cls = targets_per_image["labels"].numpy() 30 | # [N, 4] 31 | tgt_box = targets_per_image['boxes'].numpy() 32 | 33 | for gt_box, gt_label in zip(tgt_box, tgt_cls): 34 | x1, y1, x2, y2 = gt_box 35 | # xyxy -> cxcywh 36 | xc, yc = (x2 + x1) * 0.5, (y2 + y1) * 0.5 37 | bw, bh = x2 - x1, y2 - y1 38 | 39 | # check 40 | if bw < 1. or bh < 1.: 41 | continue 42 | 43 | # grid 44 | xs_c = xc / stride 45 | ys_c = yc / stride 46 | grid_x = int(xs_c) 47 | grid_y = int(ys_c) 48 | 49 | if grid_x < fmp_w and grid_y < fmp_h: 50 | # obj 51 | gt_objectness[batch_index, grid_y, grid_x] = 1.0 52 | # cls 53 | cls_ont_hot = np.zeros(self.num_classes) 54 | cls_ont_hot[int(gt_label)] = 1.0 55 | gt_classes[batch_index, grid_y, grid_x] = cls_ont_hot 56 | # box 57 | gt_bboxes[batch_index, grid_y, grid_x] = np.array([x1, y1, x2, y2]) 58 | 59 | # [B, M, C] 60 | gt_objectness = gt_objectness.reshape(bs, -1, 1) 61 | gt_classes = gt_classes.reshape(bs, -1, self.num_classes) 62 | gt_bboxes = gt_bboxes.reshape(bs, -1, 4) 63 | 64 | # to tensor 65 | gt_objectness = torch.from_numpy(gt_objectness).float() 66 | gt_classes = torch.from_numpy(gt_classes).float() 67 | gt_bboxes = torch.from_numpy(gt_bboxes).float() 68 | 69 | return gt_objectness, gt_classes, gt_bboxes 70 | -------------------------------------------------------------------------------- /models/detectors/yolov1/yolov1_basic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SiLU(nn.Module): 6 | """export-friendly version of nn.SiLU()""" 7 | 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False): 14 | conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias) 15 | 16 | return conv 17 | 18 | 19 | def get_activation(act_type=None): 20 | if act_type == 'relu': 21 | return nn.ReLU(inplace=True) 22 | elif act_type == 'lrelu': 23 | return nn.LeakyReLU(0.1, inplace=True) 24 | elif act_type == 'mish': 25 | return nn.Mish(inplace=True) 26 | elif act_type == 'silu': 27 | return nn.SiLU(inplace=True) 28 | 29 | 30 | def get_norm(norm_type, dim): 31 | if norm_type == 'BN': 32 | return nn.BatchNorm2d(dim) 33 | elif norm_type == 'GN': 34 | return nn.GroupNorm(num_groups=32, num_channels=dim) 35 | 36 | 37 | # Basic conv layer 38 | class Conv(nn.Module): 39 | def __init__(self, 40 | c1, # in channels 41 | c2, # out channels 42 | k=1, # kernel size 43 | p=0, # padding 44 | s=1, # padding 45 | d=1, # dilation 46 | act_type='lrelu', # activation 47 | norm_type='BN', # normalization 48 | depthwise=False): 49 | super(Conv, self).__init__() 50 | convs = [] 51 | add_bias = False if norm_type else True 52 | if depthwise: 53 | convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias)) 54 | # depthwise conv 55 | if norm_type: 56 | convs.append(get_norm(norm_type, c1)) 57 | if act_type: 58 | convs.append(get_activation(act_type)) 59 | # pointwise conv 60 | convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias)) 61 | if norm_type: 62 | convs.append(get_norm(norm_type, c2)) 63 | if act_type: 64 | convs.append(get_activation(act_type)) 65 | 66 | else: 67 | convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias)) 68 | if norm_type: 69 | convs.append(get_norm(norm_type, c2)) 70 | if act_type: 71 | convs.append(get_activation(act_type)) 72 | 73 | self.convs = nn.Sequential(*convs) 74 | 75 | 76 | def forward(self, x): 77 | return self.convs(x) 78 | -------------------------------------------------------------------------------- /models/detectors/yolov1/yolov1_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov1_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | self.in_dim = in_dim 13 | self.num_cls_head=cfg['num_cls_head'] 14 | self.num_reg_head=cfg['num_reg_head'] 15 | self.act_type=cfg['head_act'] 16 | self.norm_type=cfg['head_norm'] 17 | 18 | # cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=self.act_type, 26 | norm_type=self.norm_type, 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=self.act_type, 33 | norm_type=self.norm_type, 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | 37 | # reg head 38 | reg_feats = [] 39 | self.reg_out_dim = max(out_dim, 64) 40 | for i in range(cfg['num_reg_head']): 41 | if i == 0: 42 | reg_feats.append( 43 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 44 | act_type=self.act_type, 45 | norm_type=self.norm_type, 46 | depthwise=cfg['head_depthwise']) 47 | ) 48 | else: 49 | reg_feats.append( 50 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 51 | act_type=self.act_type, 52 | norm_type=self.norm_type, 53 | depthwise=cfg['head_depthwise']) 54 | ) 55 | 56 | self.cls_feats = nn.Sequential(*cls_feats) 57 | self.reg_feats = nn.Sequential(*reg_feats) 58 | 59 | 60 | def forward(self, x): 61 | """ 62 | in_feats: (Tensor) [B, C, H, W] 63 | """ 64 | cls_feats = self.cls_feats(x) 65 | reg_feats = self.reg_feats(x) 66 | 67 | return cls_feats, reg_feats 68 | 69 | 70 | # build detection head 71 | def build_head(cfg, in_dim, out_dim, num_classes=80): 72 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 73 | 74 | return head 75 | -------------------------------------------------------------------------------- /models/detectors/yolov1/yolov1_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .yolov1_basic import Conv 4 | 5 | 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 7 | class SPPF(nn.Module): 8 | """ 9 | This code referenced to https://github.com/ultralytics/yolov5 10 | """ 11 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'): 12 | super().__init__() 13 | inter_dim = int(in_dim * expand_ratio) 14 | self.out_dim = out_dim 15 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 16 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 18 | 19 | def forward(self, x): 20 | x = self.cv1(x) 21 | y1 = self.m(x) 22 | y2 = self.m(y1) 23 | 24 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 25 | 26 | 27 | def build_neck(cfg, in_dim, out_dim): 28 | model = cfg['neck'] 29 | print('==============================') 30 | print('Neck: {}'.format(model)) 31 | # build neck 32 | if model == 'sppf': 33 | neck = SPPF( 34 | in_dim=in_dim, 35 | out_dim=out_dim, 36 | expand_ratio=cfg['expand_ratio'], 37 | pooling_size=cfg['pooling_size'], 38 | act_type=cfg['neck_act'], 39 | norm_type=cfg['neck_norm'] 40 | ) 41 | 42 | return neck 43 | -------------------------------------------------------------------------------- /models/detectors/yolov2/README.md: -------------------------------------------------------------------------------- 1 | # Redesigned YOLOv2: 2 | 3 | | Model | Backbone | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |--------|------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv2 | DarkNet-19 | 1xb16 | 640 | 32.7 | 50.9 | 53.9 | 30.9 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov2_coco.pth) | 6 | 7 | - For training, we train redesigned YOLOv2 with 150 epochs on COCO. 8 | - For data augmentation, we only use the large scale jitter (LSJ), no Mosaic or Mixup augmentation. 9 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01. 10 | - For learning rate scheduler, we use linear decay scheduler. 11 | 12 | ## Train YOLOv2 13 | ### Single GPU 14 | Taking training YOLOv2 on COCO as the example, 15 | ```Shell 16 | python train.py --cuda -d coco --root path/to/coco -m yolov2 -bs 16 -size 640 --wp_epoch 3 --max_epoch 200 --eval_epoch 10 --no_aug_epoch 15 --ema --fp16 --multi_scale 17 | ``` 18 | 19 | ### Multi GPU 20 | Taking training YOLOv2 on COCO as the example, 21 | ```Shell 22 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov2 -bs 128 -size 640 --wp_epoch 3 --max_epoch 200 --eval_epoch 10 --no_aug_epoch 15 --ema --fp16 --sybn --multi_scale --save_folder weights/ 23 | ``` 24 | 25 | ## Test YOLOv2 26 | Taking testing YOLOv2 on COCO-val as the example, 27 | ```Shell 28 | python test.py --cuda -d coco --root path/to/coco -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show 29 | ``` 30 | 31 | ## Evaluate YOLOv2 32 | Taking evaluating YOLOv2 on COCO-val as the example, 33 | ```Shell 34 | python eval.py --cuda -d coco --root path/to/coco -m yolov2 --weight path/to/yolov2_coco.pth 35 | ``` 36 | 37 | ## Demo 38 | ### Detect with Image 39 | ```Shell 40 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show 41 | ``` 42 | 43 | ### Detect with Video 44 | ```Shell 45 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show --gif 46 | ``` 47 | 48 | ### Detect with Camera 49 | ```Shell 50 | python demo.py --mode camera --cuda -m yolov2 --weight path/to/yolov2_coco.pth -size 640 --show --gif 51 | ``` 52 | -------------------------------------------------------------------------------- /models/detectors/yolov2/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov2 import YOLOv2 9 | 10 | 11 | # build object detector 12 | def build_yolov2(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv2(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | # Init bias 34 | init_prob = 0.01 35 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 36 | # obj pred 37 | b = model.obj_pred.bias.view(1, -1) 38 | b.data.fill_(bias_value.item()) 39 | model.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 40 | # cls pred 41 | b = model.cls_pred.bias.view(1, -1) 42 | b.data.fill_(bias_value.item()) 43 | model.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 44 | # reg pred 45 | b = model.reg_pred.bias.view(-1, ) 46 | b.data.fill_(1.0) 47 | model.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 48 | w = model.reg_pred.weight 49 | w.data.fill_(0.) 50 | model.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 51 | 52 | 53 | # -------------- Build criterion -------------- 54 | criterion = None 55 | if trainable: 56 | # build criterion for training 57 | criterion = build_criterion(cfg, device, num_classes) 58 | return model, criterion 59 | -------------------------------------------------------------------------------- /models/detectors/yolov2/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .matcher import Yolov2Matcher 4 | from utils.box_ops import get_ious 5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized 6 | 7 | 8 | class Criterion(object): 9 | def __init__(self, cfg, device, num_classes=80): 10 | self.cfg = cfg 11 | self.device = device 12 | self.num_classes = num_classes 13 | # loss weight 14 | self.loss_obj_weight = cfg['loss_obj_weight'] 15 | self.loss_cls_weight = cfg['loss_cls_weight'] 16 | self.loss_box_weight = cfg['loss_box_weight'] 17 | 18 | # matcher 19 | self.matcher = Yolov2Matcher(cfg['iou_thresh'], num_classes, cfg['anchor_size']) 20 | 21 | 22 | def loss_objectness(self, pred_obj, gt_obj): 23 | loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none') 24 | 25 | return loss_obj 26 | 27 | 28 | def loss_classes(self, pred_cls, gt_label): 29 | loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none') 30 | 31 | return loss_cls 32 | 33 | 34 | def loss_bboxes(self, pred_box, gt_box): 35 | # regression loss 36 | ious = get_ious(pred_box, 37 | gt_box, 38 | box_mode="xyxy", 39 | iou_type='giou') 40 | loss_box = 1.0 - ious 41 | 42 | return loss_box, ious 43 | 44 | 45 | def __call__(self, outputs, targets, epoch=0): 46 | device = outputs['pred_cls'].device 47 | stride = outputs['stride'] 48 | fmp_size = outputs['fmp_size'] 49 | ( 50 | gt_objectness, 51 | gt_classes, 52 | gt_bboxes, 53 | ) = self.matcher(fmp_size=fmp_size, 54 | stride=stride, 55 | targets=targets) 56 | # List[B, M, C] -> [B, M, C] -> [BM, C] 57 | pred_obj = outputs['pred_obj'].view(-1) # [BM,] 58 | pred_cls = outputs['pred_cls'].view(-1, self.num_classes) # [BM, C] 59 | pred_box = outputs['pred_box'].view(-1, 4) # [BM, 4] 60 | 61 | gt_objectness = gt_objectness.view(-1).to(device).float() # [BM,] 62 | gt_classes = gt_classes.view(-1, self.num_classes).to(device).float() # [BM, C] 63 | gt_bboxes = gt_bboxes.view(-1, 4).to(device).float() # [BM, 4] 64 | 65 | pos_masks = (gt_objectness > 0) 66 | num_fgs = pos_masks.sum() 67 | 68 | if is_dist_avail_and_initialized(): 69 | torch.distributed.all_reduce(num_fgs) 70 | num_fgs = (num_fgs / get_world_size()).clamp(1.0) 71 | 72 | # box loss 73 | pred_box_pos = pred_box[pos_masks] 74 | gt_bboxes_pos = gt_bboxes[pos_masks] 75 | loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos) 76 | loss_box = loss_box.sum() / num_fgs 77 | 78 | # cls loss 79 | pred_cls_pos = pred_cls[pos_masks] 80 | gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.) 81 | loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos) 82 | loss_cls = loss_cls.sum() / num_fgs 83 | 84 | # obj loss 85 | loss_obj = self.loss_objectness(pred_obj, gt_objectness) 86 | loss_obj = loss_obj.sum() / num_fgs 87 | 88 | # total loss 89 | losses = self.loss_obj_weight * loss_obj + \ 90 | self.loss_cls_weight * loss_cls + \ 91 | self.loss_box_weight * loss_box 92 | 93 | loss_dict = dict( 94 | loss_obj = loss_obj, 95 | loss_cls = loss_cls, 96 | loss_box = loss_box, 97 | losses = losses 98 | ) 99 | 100 | return loss_dict 101 | 102 | 103 | def build_criterion(cfg, device, num_classes): 104 | criterion = Criterion( 105 | cfg=cfg, 106 | device=device, 107 | num_classes=num_classes 108 | ) 109 | 110 | return criterion 111 | 112 | 113 | if __name__ == "__main__": 114 | pass 115 | -------------------------------------------------------------------------------- /models/detectors/yolov2/yolov2_backbone.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | model_urls = { 6 | "darknet19": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet19.pth", 7 | } 8 | 9 | 10 | __all__ = ['darknet19'] 11 | 12 | 13 | # --------------------- Basic Module ----------------------- 14 | class Conv_BN_LeakyReLU(nn.Module): 15 | def __init__(self, in_channels, out_channels, ksize, padding=0, stride=1, dilation=1): 16 | super(Conv_BN_LeakyReLU, self).__init__() 17 | self.convs = nn.Sequential( 18 | nn.Conv2d(in_channels, out_channels, ksize, padding=padding, stride=stride, dilation=dilation), 19 | nn.BatchNorm2d(out_channels), 20 | nn.LeakyReLU(0.1, inplace=True) 21 | ) 22 | 23 | def forward(self, x): 24 | return self.convs(x) 25 | 26 | 27 | # --------------------- DarkNet-19 ----------------------- 28 | class DarkNet19(nn.Module): 29 | def __init__(self): 30 | 31 | super(DarkNet19, self).__init__() 32 | # backbone network : DarkNet-19 33 | # output : stride = 2, c = 32 34 | self.conv_1 = nn.Sequential( 35 | Conv_BN_LeakyReLU(3, 32, 3, 1), 36 | nn.MaxPool2d((2,2), 2), 37 | ) 38 | 39 | # output : stride = 4, c = 64 40 | self.conv_2 = nn.Sequential( 41 | Conv_BN_LeakyReLU(32, 64, 3, 1), 42 | nn.MaxPool2d((2,2), 2) 43 | ) 44 | 45 | # output : stride = 8, c = 128 46 | self.conv_3 = nn.Sequential( 47 | Conv_BN_LeakyReLU(64, 128, 3, 1), 48 | Conv_BN_LeakyReLU(128, 64, 1), 49 | Conv_BN_LeakyReLU(64, 128, 3, 1), 50 | nn.MaxPool2d((2,2), 2) 51 | ) 52 | 53 | # output : stride = 8, c = 256 54 | self.conv_4 = nn.Sequential( 55 | Conv_BN_LeakyReLU(128, 256, 3, 1), 56 | Conv_BN_LeakyReLU(256, 128, 1), 57 | Conv_BN_LeakyReLU(128, 256, 3, 1), 58 | ) 59 | 60 | # output : stride = 16, c = 512 61 | self.maxpool_4 = nn.MaxPool2d((2, 2), 2) 62 | self.conv_5 = nn.Sequential( 63 | Conv_BN_LeakyReLU(256, 512, 3, 1), 64 | Conv_BN_LeakyReLU(512, 256, 1), 65 | Conv_BN_LeakyReLU(256, 512, 3, 1), 66 | Conv_BN_LeakyReLU(512, 256, 1), 67 | Conv_BN_LeakyReLU(256, 512, 3, 1), 68 | ) 69 | 70 | # output : stride = 32, c = 1024 71 | self.maxpool_5 = nn.MaxPool2d((2, 2), 2) 72 | self.conv_6 = nn.Sequential( 73 | Conv_BN_LeakyReLU(512, 1024, 3, 1), 74 | Conv_BN_LeakyReLU(1024, 512, 1), 75 | Conv_BN_LeakyReLU(512, 1024, 3, 1), 76 | Conv_BN_LeakyReLU(1024, 512, 1), 77 | Conv_BN_LeakyReLU(512, 1024, 3, 1) 78 | ) 79 | 80 | 81 | def forward(self, x): 82 | c1 = self.conv_1(x) # c1 83 | c2 = self.conv_2(c1) # c2 84 | c3 = self.conv_3(c2) # c3 85 | c3 = self.conv_4(c3) # c3 86 | c4 = self.conv_5(self.maxpool_4(c3)) # c4 87 | c5 = self.conv_6(self.maxpool_5(c4)) # c5 88 | 89 | return c5 90 | 91 | 92 | # --------------------- Fsnctions ----------------------- 93 | def build_backbone(model_name='darknet19', pretrained=False): 94 | if model_name == 'darknet19': 95 | # model 96 | model = DarkNet19() 97 | feat_dim = 1024 98 | 99 | # load weight 100 | if pretrained: 101 | print('Loading pretrained weight ...') 102 | url = model_urls['darknet19'] 103 | # checkpoint state dict 104 | checkpoint_state_dict = torch.hub.load_state_dict_from_url( 105 | url=url, map_location="cpu", check_hash=True) 106 | # model state dict 107 | model_state_dict = model.state_dict() 108 | # check 109 | for k in list(checkpoint_state_dict.keys()): 110 | if k in model_state_dict: 111 | shape_model = tuple(model_state_dict[k].shape) 112 | shape_checkpoint = tuple(checkpoint_state_dict[k].shape) 113 | if shape_model != shape_checkpoint: 114 | checkpoint_state_dict.pop(k) 115 | else: 116 | checkpoint_state_dict.pop(k) 117 | print('Unused key: ', k) 118 | 119 | model.load_state_dict(checkpoint_state_dict) 120 | 121 | return model, feat_dim 122 | 123 | 124 | if __name__ == '__main__': 125 | import time 126 | model, feat_dim = build_backbone(pretrained=True) 127 | x = torch.randn(1, 3, 224, 224) 128 | t0 = time.time() 129 | y = model(x) 130 | t1 = time.time() 131 | print('Time: ', t1 - t0) 132 | -------------------------------------------------------------------------------- /models/detectors/yolov2/yolov2_basic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SiLU(nn.Module): 6 | """export-friendly version of nn.SiLU()""" 7 | 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False): 14 | conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias) 15 | 16 | return conv 17 | 18 | 19 | def get_activation(act_type=None): 20 | if act_type == 'relu': 21 | return nn.ReLU(inplace=True) 22 | elif act_type == 'lrelu': 23 | return nn.LeakyReLU(0.1, inplace=True) 24 | elif act_type == 'mish': 25 | return nn.Mish(inplace=True) 26 | elif act_type == 'silu': 27 | return nn.SiLU(inplace=True) 28 | 29 | 30 | def get_norm(norm_type, dim): 31 | if norm_type == 'BN': 32 | return nn.BatchNorm2d(dim) 33 | elif norm_type == 'GN': 34 | return nn.GroupNorm(num_groups=32, num_channels=dim) 35 | 36 | 37 | # Basic conv layer 38 | class Conv(nn.Module): 39 | def __init__(self, 40 | c1, # in channels 41 | c2, # out channels 42 | k=1, # kernel size 43 | p=0, # padding 44 | s=1, # padding 45 | d=1, # dilation 46 | act_type='lrelu', # activation 47 | norm_type='BN', # normalization 48 | depthwise=False): 49 | super(Conv, self).__init__() 50 | convs = [] 51 | add_bias = False if norm_type else True 52 | if depthwise: 53 | convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias)) 54 | # depthwise conv 55 | if norm_type: 56 | convs.append(get_norm(norm_type, c1)) 57 | if act_type: 58 | convs.append(get_activation(act_type)) 59 | # pointwise conv 60 | convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias)) 61 | if norm_type: 62 | convs.append(get_norm(norm_type, c2)) 63 | if act_type: 64 | convs.append(get_activation(act_type)) 65 | 66 | else: 67 | convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias)) 68 | if norm_type: 69 | convs.append(get_norm(norm_type, c2)) 70 | if act_type: 71 | convs.append(get_activation(act_type)) 72 | 73 | self.convs = nn.Sequential(*convs) 74 | 75 | 76 | def forward(self, x): 77 | return self.convs(x) 78 | -------------------------------------------------------------------------------- /models/detectors/yolov2/yolov2_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov2_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | self.in_dim = in_dim 13 | self.num_cls_head=cfg['num_cls_head'] 14 | self.num_reg_head=cfg['num_reg_head'] 15 | self.act_type=cfg['head_act'] 16 | self.norm_type=cfg['head_norm'] 17 | 18 | # cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=self.act_type, 26 | norm_type=self.norm_type, 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=self.act_type, 33 | norm_type=self.norm_type, 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | 37 | # reg head 38 | reg_feats = [] 39 | self.reg_out_dim = max(out_dim, 64) 40 | for i in range(cfg['num_reg_head']): 41 | if i == 0: 42 | reg_feats.append( 43 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 44 | act_type=self.act_type, 45 | norm_type=self.norm_type, 46 | depthwise=cfg['head_depthwise']) 47 | ) 48 | else: 49 | reg_feats.append( 50 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 51 | act_type=self.act_type, 52 | norm_type=self.norm_type, 53 | depthwise=cfg['head_depthwise']) 54 | ) 55 | 56 | self.cls_feats = nn.Sequential(*cls_feats) 57 | self.reg_feats = nn.Sequential(*reg_feats) 58 | 59 | 60 | def forward(self, x): 61 | """ 62 | in_feats: (Tensor) [B, C, H, W] 63 | """ 64 | cls_feats = self.cls_feats(x) 65 | reg_feats = self.reg_feats(x) 66 | 67 | return cls_feats, reg_feats 68 | 69 | 70 | # build detection head 71 | def build_head(cfg, in_dim, out_dim, num_classes=80): 72 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 73 | 74 | return head 75 | -------------------------------------------------------------------------------- /models/detectors/yolov2/yolov2_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .yolov2_basic import Conv 4 | 5 | 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 7 | class SPPF(nn.Module): 8 | """ 9 | This code referenced to https://github.com/ultralytics/yolov5 10 | """ 11 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'): 12 | super().__init__() 13 | inter_dim = int(in_dim * expand_ratio) 14 | self.out_dim = out_dim 15 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 16 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 18 | 19 | def forward(self, x): 20 | x = self.cv1(x) 21 | y1 = self.m(x) 22 | y2 = self.m(y1) 23 | 24 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 25 | 26 | 27 | def build_neck(cfg, in_dim, out_dim): 28 | model = cfg['neck'] 29 | print('==============================') 30 | print('Neck: {}'.format(model)) 31 | # build neck 32 | if model == 'sppf': 33 | neck = SPPF( 34 | in_dim=in_dim, 35 | out_dim=out_dim, 36 | expand_ratio=cfg['expand_ratio'], 37 | pooling_size=cfg['pooling_size'], 38 | act_type=cfg['neck_act'], 39 | norm_type=cfg['neck_norm'] 40 | ) 41 | 42 | return neck 43 | -------------------------------------------------------------------------------- /models/detectors/yolov3/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3: 2 | 3 | | Model | Backbone | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |-------------|--------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv3-Tiny | DarkNet-Tiny | 1xb16 | 640 | 25.4 | 43.4 | 7.0 | 2.3 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov3_t_coco.pth) | 6 | | YOLOv3 | DarkNet-53 | 1xb16 | 640 | 42.9 | 63.5 | 167.4 | 54.9 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) | 7 | 8 | - For training, we train YOLOv3 and YOLOv3-Tiny with 250 epochs on COCO. 9 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5). 10 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01. 11 | - For learning rate scheduler, we use linear decay scheduler. 12 | - For YOLOv3's structure, we use decoupled head, following the setting of [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX). 13 | 14 | ## Train YOLOv3 15 | ### Single GPU 16 | Taking training YOLOv3 on COCO as the example, 17 | ```Shell 18 | python train.py --cuda -d coco --root path/to/coco -m yolov3 -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 19 | ``` 20 | 21 | ### Multi GPU 22 | Taking training YOLOv3 on COCO as the example, 23 | ```Shell 24 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov3 -bs 128 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 25 | ``` 26 | 27 | ## Test YOLOv3 28 | Taking testing YOLOv3 on COCO-val as the example, 29 | ```Shell 30 | python test.py --cuda -d coco --root path/to/coco -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show 31 | ``` 32 | 33 | ## Evaluate YOLOv3 34 | Taking evaluating YOLOv3 on COCO-val as the example, 35 | ```Shell 36 | python eval.py --cuda -d coco --root path/to/coco -m yolov3 --weight path/to/yolov3_coco.pth 37 | ``` 38 | 39 | ## Demo 40 | ### Detect with Image 41 | ```Shell 42 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show 43 | ``` 44 | 45 | ### Detect with Video 46 | ```Shell 47 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show --gif 48 | ``` 49 | 50 | ### Detect with Camera 51 | ```Shell 52 | python demo.py --mode camera --cuda -m yolov3 --weight path/to/yolov3_coco.pth -size 640 --show --gif 53 | ``` 54 | -------------------------------------------------------------------------------- /models/detectors/yolov3/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov3 import YOLOv3 9 | 10 | 11 | # build object detector 12 | def build_yolov3(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv3(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | # Init bias 38 | init_prob = 0.01 39 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 40 | # obj pred 41 | for obj_pred in model.obj_preds: 42 | b = obj_pred.bias.view(1, -1) 43 | b.data.fill_(bias_value.item()) 44 | obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 45 | # cls pred 46 | for cls_pred in model.cls_preds: 47 | b = cls_pred.bias.view(1, -1) 48 | b.data.fill_(bias_value.item()) 49 | cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 50 | # reg pred 51 | for reg_pred in model.reg_preds: 52 | b = reg_pred.bias.view(-1, ) 53 | b.data.fill_(1.0) 54 | reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 55 | w = reg_pred.weight 56 | w.data.fill_(0.) 57 | reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 58 | 59 | 60 | # -------------- Build criterion -------------- 61 | criterion = None 62 | if trainable: 63 | # build criterion for training 64 | criterion = build_criterion(cfg, device, num_classes) 65 | return model, criterion 66 | -------------------------------------------------------------------------------- /models/detectors/yolov3/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .matcher import Yolov3Matcher 4 | from utils.box_ops import get_ious 5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized 6 | 7 | 8 | class Criterion(object): 9 | def __init__(self, cfg, device, num_classes=80): 10 | self.cfg = cfg 11 | self.device = device 12 | self.num_classes = num_classes 13 | # loss weight 14 | self.loss_obj_weight = cfg['loss_obj_weight'] 15 | self.loss_cls_weight = cfg['loss_cls_weight'] 16 | self.loss_box_weight = cfg['loss_box_weight'] 17 | 18 | # matcher 19 | self.matcher = Yolov3Matcher(num_classes, 3, cfg['anchor_size'], cfg['iou_thresh']) 20 | 21 | 22 | def loss_objectness(self, pred_obj, gt_obj): 23 | loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none') 24 | 25 | return loss_obj 26 | 27 | 28 | def loss_classes(self, pred_cls, gt_label): 29 | loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none') 30 | 31 | return loss_cls 32 | 33 | 34 | def loss_bboxes(self, pred_box, gt_box): 35 | # regression loss 36 | ious = get_ious(pred_box, 37 | gt_box, 38 | box_mode="xyxy", 39 | iou_type='giou') 40 | loss_box = 1.0 - ious 41 | 42 | return loss_box, ious 43 | 44 | 45 | def __call__(self, outputs, targets, epoch=0): 46 | device = outputs['pred_cls'][0].device 47 | fpn_strides = outputs['strides'] 48 | fmp_sizes = outputs['fmp_sizes'] 49 | ( 50 | gt_objectness, 51 | gt_classes, 52 | gt_bboxes, 53 | ) = self.matcher(fmp_sizes=fmp_sizes, 54 | fpn_strides=fpn_strides, 55 | targets=targets) 56 | # List[B, M, C] -> [B, M, C] -> [BM, C] 57 | pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1) # [BM,] 58 | pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes) # [BM, C] 59 | pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4) # [BM, 4] 60 | 61 | gt_objectness = gt_objectness.view(-1).to(device).float() # [BM,] 62 | gt_classes = gt_classes.view(-1, self.num_classes).to(device).float() # [BM, C] 63 | gt_bboxes = gt_bboxes.view(-1, 4).to(device).float() # [BM, 4] 64 | 65 | pos_masks = (gt_objectness > 0) 66 | num_fgs = pos_masks.sum() 67 | 68 | if is_dist_avail_and_initialized(): 69 | torch.distributed.all_reduce(num_fgs) 70 | num_fgs = (num_fgs / get_world_size()).clamp(1.0) 71 | 72 | # box loss 73 | pred_box_pos = pred_box[pos_masks] 74 | gt_bboxes_pos = gt_bboxes[pos_masks] 75 | loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos) 76 | loss_box = loss_box.sum() / num_fgs 77 | 78 | # cls loss 79 | pred_cls_pos = pred_cls[pos_masks] 80 | gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.) 81 | loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos) 82 | loss_cls = loss_cls.sum() / num_fgs 83 | 84 | # obj loss 85 | loss_obj = self.loss_objectness(pred_obj, gt_objectness) 86 | loss_obj = loss_obj.sum() / num_fgs 87 | 88 | # total loss 89 | losses = self.loss_obj_weight * loss_obj + \ 90 | self.loss_cls_weight * loss_cls + \ 91 | self.loss_box_weight * loss_box 92 | 93 | loss_dict = dict( 94 | loss_obj = loss_obj, 95 | loss_cls = loss_cls, 96 | loss_box = loss_box, 97 | losses = losses 98 | ) 99 | 100 | return loss_dict 101 | 102 | 103 | def build_criterion(cfg, device, num_classes): 104 | criterion = Criterion( 105 | cfg=cfg, 106 | device=device, 107 | num_classes=num_classes 108 | ) 109 | 110 | return criterion 111 | 112 | 113 | if __name__ == "__main__": 114 | pass 115 | -------------------------------------------------------------------------------- /models/detectors/yolov3/yolov3_basic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SiLU(nn.Module): 6 | """export-friendly version of nn.SiLU()""" 7 | 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False): 14 | conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias) 15 | 16 | return conv 17 | 18 | 19 | def get_activation(act_type=None): 20 | if act_type == 'relu': 21 | return nn.ReLU(inplace=True) 22 | elif act_type == 'lrelu': 23 | return nn.LeakyReLU(0.1, inplace=True) 24 | elif act_type == 'mish': 25 | return nn.Mish(inplace=True) 26 | elif act_type == 'silu': 27 | return nn.SiLU(inplace=True) 28 | 29 | 30 | def get_norm(norm_type, dim): 31 | if norm_type == 'BN': 32 | return nn.BatchNorm2d(dim) 33 | elif norm_type == 'GN': 34 | return nn.GroupNorm(num_groups=32, num_channels=dim) 35 | 36 | 37 | # Basic conv layer 38 | class Conv(nn.Module): 39 | def __init__(self, 40 | c1, # in channels 41 | c2, # out channels 42 | k=1, # kernel size 43 | p=0, # padding 44 | s=1, # padding 45 | d=1, # dilation 46 | act_type='lrelu', # activation 47 | norm_type='BN', # normalization 48 | depthwise=False): 49 | super(Conv, self).__init__() 50 | convs = [] 51 | add_bias = False if norm_type else True 52 | if depthwise: 53 | convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias)) 54 | # depthwise conv 55 | if norm_type: 56 | convs.append(get_norm(norm_type, c1)) 57 | if act_type: 58 | convs.append(get_activation(act_type)) 59 | # pointwise conv 60 | convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias)) 61 | if norm_type: 62 | convs.append(get_norm(norm_type, c2)) 63 | if act_type: 64 | convs.append(get_activation(act_type)) 65 | 66 | else: 67 | convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias)) 68 | if norm_type: 69 | convs.append(get_norm(norm_type, c2)) 70 | if act_type: 71 | convs.append(get_activation(act_type)) 72 | 73 | self.convs = nn.Sequential(*convs) 74 | 75 | 76 | def forward(self, x): 77 | return self.convs(x) 78 | 79 | 80 | # BottleNeck 81 | class Bottleneck(nn.Module): 82 | def __init__(self, 83 | in_dim, 84 | out_dim, 85 | expand_ratio=0.5, 86 | shortcut=False, 87 | depthwise=False, 88 | act_type='silu', 89 | norm_type='BN'): 90 | super(Bottleneck, self).__init__() 91 | inter_dim = int(out_dim * expand_ratio) # hidden channels 92 | self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type) 93 | self.cv2 = Conv(inter_dim, out_dim, k=3, p=1, norm_type=norm_type, act_type=act_type, depthwise=depthwise) 94 | self.shortcut = shortcut and in_dim == out_dim 95 | 96 | def forward(self, x): 97 | h = self.cv2(self.cv1(x)) 98 | 99 | return x + h if self.shortcut else h 100 | 101 | 102 | # ResBlock 103 | class ResBlock(nn.Module): 104 | def __init__(self, 105 | in_dim, 106 | out_dim, 107 | nblocks=1, 108 | act_type='silu', 109 | norm_type='BN'): 110 | super(ResBlock, self).__init__() 111 | assert in_dim == out_dim 112 | self.m = nn.Sequential(*[ 113 | Bottleneck(in_dim, out_dim, expand_ratio=0.5, shortcut=True, 114 | norm_type=norm_type, act_type=act_type) 115 | for _ in range(nblocks) 116 | ]) 117 | 118 | def forward(self, x): 119 | return self.m(x) 120 | 121 | 122 | # ConvBlocks 123 | class ConvBlocks(nn.Module): 124 | def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False): 125 | super().__init__() 126 | inter_dim = out_dim // 2 127 | self.convs = nn.Sequential( 128 | Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type), 129 | Conv(out_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 130 | Conv(inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type), 131 | Conv(out_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 132 | Conv(inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type) 133 | ) 134 | 135 | def forward(self, x): 136 | return self.convs(x) 137 | -------------------------------------------------------------------------------- /models/detectors/yolov3/yolov3_fpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .yolov3_basic import Conv, ConvBlocks 6 | 7 | 8 | # Yolov3FPN 9 | class Yolov3FPN(nn.Module): 10 | def __init__(self, 11 | in_dims=[256, 512, 1024], 12 | width=1.0, 13 | depth=1.0, 14 | out_dim=None, 15 | act_type='silu', 16 | norm_type='BN'): 17 | super(Yolov3FPN, self).__init__() 18 | self.in_dims = in_dims 19 | self.out_dim = out_dim 20 | c3, c4, c5 = in_dims 21 | 22 | # P5 -> P4 23 | self.top_down_layer_1 = ConvBlocks(c5, int(512*width), act_type=act_type, norm_type=norm_type) 24 | self.reduce_layer_1 = Conv(int(512*width), int(256*width), k=1, act_type=act_type, norm_type=norm_type) 25 | 26 | # P4 -> P3 27 | self.top_down_layer_2 = ConvBlocks(c4 + int(256*width), int(256*width), act_type=act_type, norm_type=norm_type) 28 | self.reduce_layer_2 = Conv(int(256*width), int(128*width), k=1, act_type=act_type, norm_type=norm_type) 29 | 30 | # P3 31 | self.top_down_layer_3 = ConvBlocks(c3 + int(128*width), int(128*width), act_type=act_type, norm_type=norm_type) 32 | 33 | # output proj layers 34 | if out_dim is not None: 35 | # output proj layers 36 | self.out_layers = nn.ModuleList([ 37 | Conv(in_dim, out_dim, k=1, 38 | norm_type=norm_type, act_type=act_type) 39 | for in_dim in [int(128 * width), int(256 * width), int(512 * width)] 40 | ]) 41 | self.out_dim = [out_dim] * 3 42 | 43 | else: 44 | self.out_layers = None 45 | self.out_dim = [int(128 * width), int(256 * width), int(512 * width)] 46 | 47 | 48 | def forward(self, features): 49 | c3, c4, c5 = features 50 | 51 | # p5/32 52 | p5 = self.top_down_layer_1(c5) 53 | 54 | # p4/16 55 | p5_up = F.interpolate(self.reduce_layer_1(p5), scale_factor=2.0) 56 | p4 = self.top_down_layer_2(torch.cat([c4, p5_up], dim=1)) 57 | 58 | # P3/8 59 | p4_up = F.interpolate(self.reduce_layer_2(p4), scale_factor=2.0) 60 | p3 = self.top_down_layer_3(torch.cat([c3, p4_up], dim=1)) 61 | 62 | out_feats = [p3, p4, p5] 63 | 64 | # output proj layers 65 | if self.out_layers is not None: 66 | # output proj layers 67 | out_feats_proj = [] 68 | for feat, layer in zip(out_feats, self.out_layers): 69 | out_feats_proj.append(layer(feat)) 70 | return out_feats_proj 71 | 72 | return out_feats 73 | 74 | 75 | def build_fpn(cfg, in_dims, out_dim=None): 76 | model = cfg['fpn'] 77 | # build neck 78 | if model == 'yolov3_fpn': 79 | fpn_net = Yolov3FPN(in_dims=in_dims, 80 | out_dim=out_dim, 81 | width=cfg['width'], 82 | depth=cfg['depth'], 83 | act_type=cfg['fpn_act'], 84 | norm_type=cfg['fpn_norm'] 85 | ) 86 | 87 | return fpn_net 88 | -------------------------------------------------------------------------------- /models/detectors/yolov3/yolov3_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov3_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | self.in_dim = in_dim 13 | self.num_cls_head=cfg['num_cls_head'] 14 | self.num_reg_head=cfg['num_reg_head'] 15 | self.act_type=cfg['head_act'] 16 | self.norm_type=cfg['head_norm'] 17 | 18 | # cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=self.act_type, 26 | norm_type=self.norm_type, 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=self.act_type, 33 | norm_type=self.norm_type, 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | 37 | # reg head 38 | reg_feats = [] 39 | self.reg_out_dim = max(out_dim, 64) 40 | for i in range(cfg['num_reg_head']): 41 | if i == 0: 42 | reg_feats.append( 43 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 44 | act_type=self.act_type, 45 | norm_type=self.norm_type, 46 | depthwise=cfg['head_depthwise']) 47 | ) 48 | else: 49 | reg_feats.append( 50 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 51 | act_type=self.act_type, 52 | norm_type=self.norm_type, 53 | depthwise=cfg['head_depthwise']) 54 | ) 55 | 56 | self.cls_feats = nn.Sequential(*cls_feats) 57 | self.reg_feats = nn.Sequential(*reg_feats) 58 | 59 | 60 | def forward(self, x): 61 | """ 62 | in_feats: (Tensor) [B, C, H, W] 63 | """ 64 | cls_feats = self.cls_feats(x) 65 | reg_feats = self.reg_feats(x) 66 | 67 | return cls_feats, reg_feats 68 | 69 | 70 | # build detection head 71 | def build_head(cfg, in_dim, out_dim, num_classes=80): 72 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 73 | 74 | return head 75 | -------------------------------------------------------------------------------- /models/detectors/yolov3/yolov3_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .yolov3_basic import Conv 4 | 5 | 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 7 | class SPPF(nn.Module): 8 | """ 9 | This code referenced to https://github.com/ultralytics/yolov5 10 | """ 11 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'): 12 | super().__init__() 13 | inter_dim = int(in_dim * expand_ratio) 14 | self.out_dim = out_dim 15 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 16 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 18 | 19 | def forward(self, x): 20 | x = self.cv1(x) 21 | y1 = self.m(x) 22 | y2 = self.m(y1) 23 | 24 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 25 | 26 | 27 | def build_neck(cfg, in_dim, out_dim): 28 | model = cfg['neck'] 29 | print('==============================') 30 | print('Neck: {}'.format(model)) 31 | # build neck 32 | if model == 'sppf': 33 | neck = SPPF( 34 | in_dim=in_dim, 35 | out_dim=out_dim, 36 | expand_ratio=cfg['expand_ratio'], 37 | pooling_size=cfg['pooling_size'], 38 | act_type=cfg['neck_act'], 39 | norm_type=cfg['neck_norm'] 40 | ) 41 | 42 | return neck 43 | -------------------------------------------------------------------------------- /models/detectors/yolov4/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv4: 2 | 3 | | Model | Backbone | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |-------------|-----------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv4-Tiny | CSPDarkNet-Tiny | 1xb16 | 640 | 31.0 | 49.1 | 8.1 | 2.9 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov4_t_coco.pth) | 6 | | YOLOv4 | CSPDarkNet-53 | 1xb16 | 640 | 46.6 | 65.8 | 162.7 | 61.5 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) | 7 | 8 | - For training, we train YOLOv4 and YOLOv4-Tiny with 250 epochs on COCO. 9 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5). 10 | - For optimizer, we use SGD with momentum 0.937, weight decay 0.0005 and base lr 0.01. 11 | - For learning rate scheduler, we use linear decay scheduler. 12 | - For YOLOv4's structure, we use decoupled head, following the setting of [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX). 13 | 14 | ## Train YOLOv4 15 | ### Single GPU 16 | Taking training YOLOv4 on COCO as the example, 17 | ```Shell 18 | python train.py --cuda -d coco --root path/to/coco -m yolov4 -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 19 | ``` 20 | 21 | ### Multi GPU 22 | Taking training YOLOv4 on COCO as the example, 23 | ```Shell 24 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov4 -bs 128 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 25 | ``` 26 | 27 | ## Test YOLOv4 28 | Taking testing YOLOv4 on COCO-val as the example, 29 | ```Shell 30 | python test.py --cuda -d coco --root path/to/coco -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show 31 | ``` 32 | 33 | ## Evaluate YOLOv4 34 | Taking evaluating YOLOv4 on COCO-val as the example, 35 | ```Shell 36 | python eval.py --cuda -d coco --root path/to/coco -m yolov4 --weight path/to/yolov4_coco.pth 37 | ``` 38 | 39 | ## Demo 40 | ### Detect with Image 41 | ```Shell 42 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show 43 | ``` 44 | 45 | ### Detect with Video 46 | ```Shell 47 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show --gif 48 | ``` 49 | 50 | ### Detect with Camera 51 | ```Shell 52 | python demo.py --mode camera --cuda -m yolov4 --weight path/to/yolov4_coco.pth -size 640 --show --gif 53 | ``` 54 | -------------------------------------------------------------------------------- /models/detectors/yolov4/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov4 import YOLOv4 9 | 10 | 11 | # build object detector 12 | def build_yolov4(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv4(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | # Init bias 38 | init_prob = 0.01 39 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 40 | # obj pred 41 | for obj_pred in model.obj_preds: 42 | b = obj_pred.bias.view(1, -1) 43 | b.data.fill_(bias_value.item()) 44 | obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 45 | # cls pred 46 | for cls_pred in model.cls_preds: 47 | b = cls_pred.bias.view(1, -1) 48 | b.data.fill_(bias_value.item()) 49 | cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 50 | # reg pred 51 | for reg_pred in model.reg_preds: 52 | b = reg_pred.bias.view(-1, ) 53 | b.data.fill_(1.0) 54 | reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 55 | w = reg_pred.weight 56 | w.data.fill_(0.) 57 | reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 58 | 59 | 60 | # -------------- Build criterion -------------- 61 | criterion = None 62 | if trainable: 63 | # build criterion for training 64 | criterion = build_criterion(cfg, device, num_classes) 65 | return model, criterion 66 | -------------------------------------------------------------------------------- /models/detectors/yolov4/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .matcher import Yolov4Matcher 4 | from utils.box_ops import get_ious 5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized 6 | 7 | 8 | class Criterion(object): 9 | def __init__(self, cfg, device, num_classes=80): 10 | self.cfg = cfg 11 | self.device = device 12 | self.num_classes = num_classes 13 | # loss weight 14 | self.loss_obj_weight = cfg['loss_obj_weight'] 15 | self.loss_cls_weight = cfg['loss_cls_weight'] 16 | self.loss_box_weight = cfg['loss_box_weight'] 17 | 18 | # matcher 19 | self.matcher = Yolov4Matcher(num_classes, 3, cfg['anchor_size'], cfg['iou_thresh']) 20 | 21 | 22 | def loss_objectness(self, pred_obj, gt_obj): 23 | loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none') 24 | 25 | return loss_obj 26 | 27 | 28 | def loss_classes(self, pred_cls, gt_label): 29 | loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none') 30 | 31 | return loss_cls 32 | 33 | 34 | def loss_bboxes(self, pred_box, gt_box): 35 | # regression loss 36 | ious = get_ious(pred_box, 37 | gt_box, 38 | box_mode="xyxy", 39 | iou_type='giou') 40 | loss_box = 1.0 - ious 41 | 42 | return loss_box, ious 43 | 44 | 45 | def __call__(self, outputs, targets, epoch=0): 46 | device = outputs['pred_cls'][0].device 47 | fpn_strides = outputs['strides'] 48 | fmp_sizes = outputs['fmp_sizes'] 49 | ( 50 | gt_objectness, 51 | gt_classes, 52 | gt_bboxes, 53 | ) = self.matcher(fmp_sizes=fmp_sizes, 54 | fpn_strides=fpn_strides, 55 | targets=targets) 56 | # List[B, M, C] -> [B, M, C] -> [BM, C] 57 | pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1) # [BM,] 58 | pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes) # [BM, C] 59 | pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4) # [BM, 4] 60 | 61 | gt_objectness = gt_objectness.view(-1).to(device).float() # [BM,] 62 | gt_classes = gt_classes.view(-1, self.num_classes).to(device).float() # [BM, C] 63 | gt_bboxes = gt_bboxes.view(-1, 4).to(device).float() # [BM, 4] 64 | 65 | pos_masks = (gt_objectness > 0) 66 | num_fgs = pos_masks.sum() 67 | 68 | if is_dist_avail_and_initialized(): 69 | torch.distributed.all_reduce(num_fgs) 70 | num_fgs = (num_fgs / get_world_size()).clamp(1.0) 71 | 72 | # box loss 73 | pred_box_pos = pred_box[pos_masks] 74 | gt_bboxes_pos = gt_bboxes[pos_masks] 75 | loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos) 76 | loss_box = loss_box.sum() / num_fgs 77 | 78 | # cls loss 79 | pred_cls_pos = pred_cls[pos_masks] 80 | gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.) 81 | loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos) 82 | loss_cls = loss_cls.sum() / num_fgs 83 | 84 | # obj loss 85 | loss_obj = self.loss_objectness(pred_obj, gt_objectness) 86 | loss_obj = loss_obj.sum() / num_fgs 87 | 88 | # total loss 89 | losses = self.loss_obj_weight * loss_obj + \ 90 | self.loss_cls_weight * loss_cls + \ 91 | self.loss_box_weight * loss_box 92 | 93 | loss_dict = dict( 94 | loss_obj = loss_obj, 95 | loss_cls = loss_cls, 96 | loss_box = loss_box, 97 | losses = losses 98 | ) 99 | 100 | return loss_dict 101 | 102 | 103 | def build_criterion(cfg, device, num_classes): 104 | criterion = Criterion( 105 | cfg=cfg, 106 | device=device, 107 | num_classes=num_classes 108 | ) 109 | 110 | return criterion 111 | 112 | 113 | if __name__ == "__main__": 114 | pass 115 | -------------------------------------------------------------------------------- /models/detectors/yolov4/yolov4_basic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SiLU(nn.Module): 6 | """export-friendly version of nn.SiLU()""" 7 | 8 | @staticmethod 9 | def forward(x): 10 | return x * torch.sigmoid(x) 11 | 12 | 13 | def get_conv2d(c1, c2, k, p, s, d, g, bias=False): 14 | conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias) 15 | 16 | return conv 17 | 18 | 19 | def get_activation(act_type=None): 20 | if act_type == 'relu': 21 | return nn.ReLU(inplace=True) 22 | elif act_type == 'lrelu': 23 | return nn.LeakyReLU(0.1, inplace=True) 24 | elif act_type == 'mish': 25 | return nn.Mish(inplace=True) 26 | elif act_type == 'silu': 27 | return nn.SiLU(inplace=True) 28 | 29 | 30 | def get_norm(norm_type, dim): 31 | if norm_type == 'BN': 32 | return nn.BatchNorm2d(dim) 33 | elif norm_type == 'GN': 34 | return nn.GroupNorm(num_groups=32, num_channels=dim) 35 | 36 | 37 | # Basic conv layer 38 | class Conv(nn.Module): 39 | def __init__(self, 40 | c1, # in channels 41 | c2, # out channels 42 | k=1, # kernel size 43 | p=0, # padding 44 | s=1, # padding 45 | d=1, # dilation 46 | act_type='lrelu', # activation 47 | norm_type='BN', # normalization 48 | depthwise=False): 49 | super(Conv, self).__init__() 50 | convs = [] 51 | add_bias = False if norm_type else True 52 | if depthwise: 53 | convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias)) 54 | # depthwise conv 55 | if norm_type: 56 | convs.append(get_norm(norm_type, c1)) 57 | if act_type: 58 | convs.append(get_activation(act_type)) 59 | # pointwise conv 60 | convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias)) 61 | if norm_type: 62 | convs.append(get_norm(norm_type, c2)) 63 | if act_type: 64 | convs.append(get_activation(act_type)) 65 | 66 | else: 67 | convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias)) 68 | if norm_type: 69 | convs.append(get_norm(norm_type, c2)) 70 | if act_type: 71 | convs.append(get_activation(act_type)) 72 | 73 | self.convs = nn.Sequential(*convs) 74 | 75 | 76 | def forward(self, x): 77 | return self.convs(x) 78 | 79 | 80 | # BottleNeck 81 | class Bottleneck(nn.Module): 82 | def __init__(self, 83 | in_dim, 84 | out_dim, 85 | expand_ratio=0.5, 86 | shortcut=False, 87 | depthwise=False, 88 | act_type='silu', 89 | norm_type='BN'): 90 | super(Bottleneck, self).__init__() 91 | inter_dim = int(out_dim * expand_ratio) # hidden channels 92 | self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type) 93 | self.cv2 = Conv(inter_dim, out_dim, k=3, p=1, norm_type=norm_type, act_type=act_type, depthwise=depthwise) 94 | self.shortcut = shortcut and in_dim == out_dim 95 | 96 | def forward(self, x): 97 | h = self.cv2(self.cv1(x)) 98 | 99 | return x + h if self.shortcut else h 100 | 101 | 102 | # CSP-stage block 103 | class CSPBlock(nn.Module): 104 | def __init__(self, 105 | in_dim, 106 | out_dim, 107 | expand_ratio=0.5, 108 | nblocks=1, 109 | shortcut=False, 110 | depthwise=False, 111 | act_type='silu', 112 | norm_type='BN'): 113 | super(CSPBlock, self).__init__() 114 | inter_dim = int(out_dim * expand_ratio) 115 | self.cv1 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type) 116 | self.cv2 = Conv(in_dim, inter_dim, k=1, norm_type=norm_type, act_type=act_type) 117 | self.cv3 = Conv(2 * inter_dim, out_dim, k=1, norm_type=norm_type, act_type=act_type) 118 | self.m = nn.Sequential(*[ 119 | Bottleneck(inter_dim, inter_dim, expand_ratio=1.0, shortcut=shortcut, 120 | norm_type=norm_type, act_type=act_type, depthwise=depthwise) 121 | for _ in range(nblocks) 122 | ]) 123 | 124 | def forward(self, x): 125 | x1 = self.cv1(x) 126 | x2 = self.cv2(x) 127 | x3 = self.m(x1) 128 | out = self.cv3(torch.cat([x3, x2], dim=1)) 129 | 130 | return out 131 | -------------------------------------------------------------------------------- /models/detectors/yolov4/yolov4_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov4_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | self.in_dim = in_dim 13 | self.num_cls_head=cfg['num_cls_head'] 14 | self.num_reg_head=cfg['num_reg_head'] 15 | self.act_type=cfg['head_act'] 16 | self.norm_type=cfg['head_norm'] 17 | 18 | # cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=self.act_type, 26 | norm_type=self.norm_type, 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=self.act_type, 33 | norm_type=self.norm_type, 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | 37 | # reg head 38 | reg_feats = [] 39 | self.reg_out_dim = max(out_dim, 64) 40 | for i in range(cfg['num_reg_head']): 41 | if i == 0: 42 | reg_feats.append( 43 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 44 | act_type=self.act_type, 45 | norm_type=self.norm_type, 46 | depthwise=cfg['head_depthwise']) 47 | ) 48 | else: 49 | reg_feats.append( 50 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 51 | act_type=self.act_type, 52 | norm_type=self.norm_type, 53 | depthwise=cfg['head_depthwise']) 54 | ) 55 | 56 | self.cls_feats = nn.Sequential(*cls_feats) 57 | self.reg_feats = nn.Sequential(*reg_feats) 58 | 59 | 60 | def forward(self, x): 61 | """ 62 | in_feats: (Tensor) [B, C, H, W] 63 | """ 64 | cls_feats = self.cls_feats(x) 65 | reg_feats = self.reg_feats(x) 66 | 67 | return cls_feats, reg_feats 68 | 69 | 70 | # build detection head 71 | def build_head(cfg, in_dim, out_dim, num_classes=80): 72 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 73 | 74 | return head 75 | -------------------------------------------------------------------------------- /models/detectors/yolov4/yolov4_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .yolov4_basic import Conv 4 | 5 | 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 7 | class SPPF(nn.Module): 8 | """ 9 | This code referenced to https://github.com/ultralytics/yolov5 10 | """ 11 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'): 12 | super().__init__() 13 | inter_dim = int(in_dim * expand_ratio) 14 | self.out_dim = out_dim 15 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 16 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 18 | 19 | def forward(self, x): 20 | x = self.cv1(x) 21 | y1 = self.m(x) 22 | y2 = self.m(y1) 23 | 24 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 25 | 26 | 27 | # SPPF block with CSP module 28 | class SPPFBlockCSP(nn.Module): 29 | """ 30 | CSP Spatial Pyramid Pooling Block 31 | """ 32 | def __init__(self, 33 | in_dim, 34 | out_dim, 35 | expand_ratio=0.5, 36 | pooling_size=5, 37 | act_type='lrelu', 38 | norm_type='BN', 39 | depthwise=False 40 | ): 41 | super(SPPFBlockCSP, self).__init__() 42 | inter_dim = int(in_dim * expand_ratio) 43 | self.out_dim = out_dim 44 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 45 | self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 46 | self.m = nn.Sequential( 47 | Conv(inter_dim, inter_dim, k=3, p=1, 48 | act_type=act_type, norm_type=norm_type, 49 | depthwise=depthwise), 50 | SPPF(inter_dim, 51 | inter_dim, 52 | expand_ratio=1.0, 53 | pooling_size=pooling_size, 54 | act_type=act_type, 55 | norm_type=norm_type), 56 | Conv(inter_dim, inter_dim, k=3, p=1, 57 | act_type=act_type, norm_type=norm_type, 58 | depthwise=depthwise) 59 | ) 60 | self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type) 61 | 62 | 63 | def forward(self, x): 64 | x1 = self.cv1(x) 65 | x2 = self.cv2(x) 66 | x3 = self.m(x2) 67 | y = self.cv3(torch.cat([x1, x3], dim=1)) 68 | 69 | return y 70 | 71 | 72 | def build_neck(cfg, in_dim, out_dim): 73 | model = cfg['neck'] 74 | print('==============================') 75 | print('Neck: {}'.format(model)) 76 | # build neck 77 | if model == 'sppf': 78 | neck = SPPF( 79 | in_dim=in_dim, 80 | out_dim=out_dim, 81 | expand_ratio=cfg['expand_ratio'], 82 | pooling_size=cfg['pooling_size'], 83 | act_type=cfg['neck_act'], 84 | norm_type=cfg['neck_norm'] 85 | ) 86 | elif model == 'csp_sppf': 87 | neck = SPPFBlockCSP( 88 | in_dim=in_dim, 89 | out_dim=out_dim, 90 | expand_ratio=cfg['expand_ratio'], 91 | pooling_size=cfg['pooling_size'], 92 | act_type=cfg['neck_act'], 93 | norm_type=cfg['neck_norm'], 94 | depthwise=cfg['neck_depthwise'] 95 | ) 96 | 97 | return neck 98 | -------------------------------------------------------------------------------- /models/detectors/yolov5/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv5: 2 | 3 | | Model | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv5-N | 8xb16 | 640 | | | | | | 6 | | YOLOv5-S | 8xb16 | 640 | 39.2 | 57.9 | 27.3 | 9.0 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov5_s_coco_adamw.pth) | 7 | | YOLOv5-M | 8xb16 | 640 | | | | | | 8 | | YOLOv5-L | 8xb16 | 640 | | | | | | 9 | | YOLOv5-X | 8xb16 | 640 | | | | | | 10 | 11 | - For training, we train YOLOv5 series with 300 epochs on COCO. 12 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOv5](https://github.com/ultralytics/yolov5). 13 | - For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64. We are not good at using SGD. 14 | - For learning rate scheduler, we use linear decay scheduler. 15 | - We use decoupled head in our reproduced YOLOv5, which is different from the official YOLOv5'head. 16 | 17 | 18 | ## Train YOLOv5 19 | ### Single GPU 20 | Taking training YOLOv5-S on COCO as the example, 21 | ```Shell 22 | python train.py --cuda -d coco --root path/to/coco -m yolov5_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 23 | ``` 24 | 25 | ### Multi GPU 26 | Taking training YOLOv5 on COCO as the example, 27 | ```Shell 28 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov5_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 29 | ``` 30 | 31 | ## Test YOLOv5 32 | Taking testing YOLOv5 on COCO-val as the example, 33 | ```Shell 34 | python test.py --cuda -d coco --root path/to/coco -m yolov5_s --weight path/to/yolov5_coco.pth -size 640 --show 35 | ``` 36 | 37 | ## Evaluate YOLOv5 38 | Taking evaluating YOLOv5 on COCO-val as the example, 39 | ```Shell 40 | python eval.py --cuda -d coco --root path/to/coco -m yolov5_s --weight path/to/yolov5_coco.pth 41 | ``` 42 | 43 | ## Demo 44 | ### Detect with Image 45 | ```Shell 46 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov5_s --weight path/to/yolov5_coco.pth -size 640 --show 47 | ``` 48 | 49 | ### Detect with Video 50 | ```Shell 51 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov5_s --weight path/to/yolov5_coco.pth -size 640 --show --gif 52 | ``` 53 | 54 | ### Detect with Camera 55 | ```Shell 56 | python demo.py --mode camera --cuda -m yolov5_s --weight path/to/weight -size 640 --show --gif 57 | ``` 58 | -------------------------------------------------------------------------------- /models/detectors/yolov5/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov5 import YOLOv5 9 | 10 | 11 | # build object detector 12 | def build_yolov5(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv5(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | # Init bias 38 | init_prob = 0.01 39 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 40 | # obj pred 41 | for obj_pred in model.obj_preds: 42 | b = obj_pred.bias.view(1, -1) 43 | b.data.fill_(bias_value.item()) 44 | obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 45 | # cls pred 46 | for cls_pred in model.cls_preds: 47 | b = cls_pred.bias.view(1, -1) 48 | b.data.fill_(bias_value.item()) 49 | cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 50 | # reg pred 51 | for reg_pred in model.reg_preds: 52 | b = reg_pred.bias.view(-1, ) 53 | b.data.fill_(1.0) 54 | reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 55 | w = reg_pred.weight 56 | w.data.fill_(0.) 57 | reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 58 | 59 | 60 | # -------------- Build criterion -------------- 61 | criterion = None 62 | if trainable: 63 | # build criterion for training 64 | criterion = build_criterion(cfg, device, num_classes) 65 | return model, criterion 66 | -------------------------------------------------------------------------------- /models/detectors/yolov5/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from .matcher import Yolov5Matcher 4 | from utils.box_ops import get_ious 5 | from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized 6 | 7 | 8 | class Criterion(object): 9 | def __init__(self, cfg, device, num_classes=80): 10 | self.cfg = cfg 11 | self.device = device 12 | self.num_classes = num_classes 13 | # loss weight 14 | self.loss_obj_weight = cfg['loss_obj_weight'] 15 | self.loss_cls_weight = cfg['loss_cls_weight'] 16 | self.loss_box_weight = cfg['loss_box_weight'] 17 | 18 | # matcher 19 | self.matcher = Yolov5Matcher(num_classes, 3, cfg['anchor_size'], cfg['anchor_thresh']) 20 | 21 | 22 | def loss_objectness(self, pred_obj, gt_obj): 23 | loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none') 24 | 25 | return loss_obj 26 | 27 | 28 | def loss_classes(self, pred_cls, gt_label): 29 | loss_cls = F.binary_cross_entropy_with_logits(pred_cls, gt_label, reduction='none') 30 | 31 | return loss_cls 32 | 33 | 34 | def loss_bboxes(self, pred_box, gt_box): 35 | # regression loss 36 | ious = get_ious(pred_box, 37 | gt_box, 38 | box_mode="xyxy", 39 | iou_type='giou') 40 | loss_box = 1.0 - ious 41 | 42 | return loss_box, ious 43 | 44 | 45 | def __call__(self, outputs, targets, epoch=0): 46 | device = outputs['pred_cls'][0].device 47 | fpn_strides = outputs['strides'] 48 | fmp_sizes = outputs['fmp_sizes'] 49 | ( 50 | gt_objectness, 51 | gt_classes, 52 | gt_bboxes, 53 | ) = self.matcher(fmp_sizes=fmp_sizes, 54 | fpn_strides=fpn_strides, 55 | targets=targets) 56 | # List[B, M, C] -> [B, M, C] -> [BM, C] 57 | pred_obj = torch.cat(outputs['pred_obj'], dim=1).view(-1) # [BM,] 58 | pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes) # [BM, C] 59 | pred_box = torch.cat(outputs['pred_box'], dim=1).view(-1, 4) # [BM, 4] 60 | 61 | gt_objectness = gt_objectness.view(-1).to(device).float() # [BM,] 62 | gt_classes = gt_classes.view(-1, self.num_classes).to(device).float() # [BM, C] 63 | gt_bboxes = gt_bboxes.view(-1, 4).to(device).float() # [BM, 4] 64 | 65 | pos_masks = (gt_objectness > 0) 66 | num_fgs = pos_masks.sum() 67 | 68 | if is_dist_avail_and_initialized(): 69 | torch.distributed.all_reduce(num_fgs) 70 | num_fgs = (num_fgs / get_world_size()).clamp(1.0) 71 | 72 | # box loss 73 | pred_box_pos = pred_box[pos_masks] 74 | gt_bboxes_pos = gt_bboxes[pos_masks] 75 | loss_box, ious = self.loss_bboxes(pred_box_pos, gt_bboxes_pos) 76 | loss_box = loss_box.sum() / num_fgs 77 | 78 | # cls loss 79 | pred_cls_pos = pred_cls[pos_masks] 80 | gt_classes_pos = gt_classes[pos_masks] * ious.unsqueeze(-1).clamp(0.) 81 | loss_cls = self.loss_classes(pred_cls_pos, gt_classes_pos) 82 | loss_cls = loss_cls.sum() / num_fgs 83 | 84 | # obj loss 85 | loss_obj = self.loss_objectness(pred_obj, gt_objectness) 86 | loss_obj = loss_obj.sum() / num_fgs 87 | 88 | # total loss 89 | losses = self.loss_obj_weight * loss_obj + \ 90 | self.loss_cls_weight * loss_cls + \ 91 | self.loss_box_weight * loss_box 92 | 93 | loss_dict = dict( 94 | loss_obj = loss_obj, 95 | loss_cls = loss_cls, 96 | loss_box = loss_box, 97 | losses = losses 98 | ) 99 | 100 | return loss_dict 101 | 102 | 103 | def build_criterion(cfg, device, num_classes): 104 | criterion = Criterion( 105 | cfg=cfg, 106 | device=device, 107 | num_classes=num_classes 108 | ) 109 | 110 | return criterion 111 | 112 | 113 | if __name__ == "__main__": 114 | pass 115 | -------------------------------------------------------------------------------- /models/detectors/yolov5/yolov5_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov5_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | # --------- Basic Parameters ---------- 13 | self.in_dim = in_dim 14 | self.num_cls_head=cfg['num_cls_head'] 15 | self.num_reg_head=cfg['num_reg_head'] 16 | 17 | # --------- Network Parameters ---------- 18 | ## cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=cfg['head_act'], 26 | norm_type=cfg['head_norm'], 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=cfg['head_act'], 33 | norm_type=cfg['head_norm'], 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | ## reg head 37 | reg_feats = [] 38 | self.reg_out_dim = max(out_dim, 64) 39 | for i in range(cfg['num_reg_head']): 40 | if i == 0: 41 | reg_feats.append( 42 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 43 | act_type=cfg['head_act'], 44 | norm_type=cfg['head_norm'], 45 | depthwise=cfg['head_depthwise']) 46 | ) 47 | else: 48 | reg_feats.append( 49 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 50 | act_type=cfg['head_act'], 51 | norm_type=cfg['head_norm'], 52 | depthwise=cfg['head_depthwise']) 53 | ) 54 | 55 | self.cls_feats = nn.Sequential(*cls_feats) 56 | self.reg_feats = nn.Sequential(*reg_feats) 57 | 58 | 59 | def forward(self, x): 60 | """ 61 | in_feats: (Tensor) [B, C, H, W] 62 | """ 63 | cls_feats = self.cls_feats(x) 64 | reg_feats = self.reg_feats(x) 65 | 66 | return cls_feats, reg_feats 67 | 68 | 69 | # build detection head 70 | def build_head(cfg, in_dim, out_dim, num_classes=80): 71 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 72 | 73 | return head 74 | -------------------------------------------------------------------------------- /models/detectors/yolov5/yolov5_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov5_basic import Conv 5 | 6 | 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 8 | class SPPF(nn.Module): 9 | """ 10 | This code referenced to https://github.com/ultralytics/yolov5 11 | """ 12 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='silu', norm_type='BN'): 13 | super().__init__() 14 | inter_dim = int(in_dim * expand_ratio) 15 | self.out_dim = out_dim 16 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 18 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 19 | 20 | def forward(self, x): 21 | x = self.cv1(x) 22 | y1 = self.m(x) 23 | y2 = self.m(y1) 24 | 25 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 26 | 27 | 28 | # SPPF block with CSP module 29 | class SPPFBlockCSP(nn.Module): 30 | """ 31 | CSP Spatial Pyramid Pooling Block 32 | """ 33 | def __init__(self, 34 | in_dim, 35 | out_dim, 36 | expand_ratio=0.5, 37 | pooling_size=5, 38 | act_type='silu', 39 | norm_type='BN', 40 | depthwise=False 41 | ): 42 | super(SPPFBlockCSP, self).__init__() 43 | inter_dim = int(in_dim * expand_ratio) 44 | self.out_dim = out_dim 45 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 46 | self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 47 | self.m = nn.Sequential( 48 | Conv(inter_dim, inter_dim, k=3, p=1, 49 | act_type=act_type, norm_type=norm_type, 50 | depthwise=depthwise), 51 | SPPF(inter_dim, 52 | inter_dim, 53 | expand_ratio=1.0, 54 | pooling_size=pooling_size, 55 | act_type=act_type, 56 | norm_type=norm_type), 57 | Conv(inter_dim, inter_dim, k=3, p=1, 58 | act_type=act_type, norm_type=norm_type, 59 | depthwise=depthwise) 60 | ) 61 | self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type) 62 | 63 | 64 | def forward(self, x): 65 | x1 = self.cv1(x) 66 | x2 = self.cv2(x) 67 | x3 = self.m(x2) 68 | y = self.cv3(torch.cat([x1, x3], dim=1)) 69 | 70 | return y 71 | 72 | 73 | def build_neck(cfg, in_dim, out_dim): 74 | model = cfg['neck'] 75 | print('==============================') 76 | print('Neck: {}'.format(model)) 77 | # build neck 78 | if model == 'sppf': 79 | neck = SPPF( 80 | in_dim=in_dim, 81 | out_dim=out_dim, 82 | expand_ratio=cfg['expand_ratio'], 83 | pooling_size=cfg['pooling_size'], 84 | act_type=cfg['neck_act'], 85 | norm_type=cfg['neck_norm'] 86 | ) 87 | elif model == 'csp_sppf': 88 | neck = SPPFBlockCSP( 89 | in_dim=in_dim, 90 | out_dim=out_dim, 91 | expand_ratio=cfg['expand_ratio'], 92 | pooling_size=cfg['pooling_size'], 93 | act_type=cfg['neck_act'], 94 | norm_type=cfg['neck_norm'], 95 | depthwise=cfg['neck_depthwise'] 96 | ) 97 | 98 | return neck 99 | -------------------------------------------------------------------------------- /models/detectors/yolov5/yolov5_pafpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .yolov5_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block) 6 | 7 | 8 | # YOLO-Style PaFPN 9 | class Yolov5PaFPN(nn.Module): 10 | def __init__(self, cfg, in_dims=[256, 512, 1024], out_dim=None): 11 | super(Yolov5PaFPN, self).__init__() 12 | # --------------------------- Basic Parameters --------------------------- 13 | self.in_dims = in_dims 14 | c3, c4, c5 = in_dims 15 | width = cfg['width'] 16 | 17 | # --------------------------- Network Parameters --------------------------- 18 | ## top dwon 19 | ### P5 -> P4 20 | self.reduce_layer_1 = build_reduce_layer(cfg, c5, round(512*width)) 21 | self.top_down_layer_1 = build_fpn_block(cfg, c4 + round(512*width), round(512*width)) 22 | 23 | ### P4 -> P3 24 | self.reduce_layer_2 = build_reduce_layer(cfg, round(512*width), round(256*width)) 25 | self.top_down_layer_2 = build_fpn_block(cfg, c3 + round(256*width), round(256*width)) 26 | 27 | ## bottom up 28 | ### P3 -> P4 29 | self.downsample_layer_1 = build_downsample_layer(cfg, round(256*width), round(256*width)) 30 | self.bottom_up_layer_1 = build_fpn_block(cfg, round(256*width) + round(256*width), round(512*width)) 31 | 32 | ### P4 -> P5 33 | self.downsample_layer_2 = build_downsample_layer(cfg, round(512*width), round(512*width)) 34 | self.bottom_up_layer_2 = build_fpn_block(cfg, round(512*width) + round(512*width), round(1024*width)) 35 | 36 | ## output proj layers 37 | if out_dim is not None: 38 | self.out_layers = nn.ModuleList([ 39 | Conv(in_dim, out_dim, k=1, 40 | act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm']) 41 | for in_dim in [round(256*width), round(512*width), round(1024*width)] 42 | ]) 43 | self.out_dim = [out_dim] * 3 44 | else: 45 | self.out_layers = None 46 | self.out_dim = [round(256*width), round(512*width), round(1024*width)] 47 | 48 | 49 | def forward(self, features): 50 | c3, c4, c5 = features 51 | 52 | # Top down 53 | ## P5 -> P4 54 | c6 = self.reduce_layer_1(c5) 55 | c7 = F.interpolate(c6, scale_factor=2.0) 56 | c8 = torch.cat([c7, c4], dim=1) 57 | c9 = self.top_down_layer_1(c8) 58 | ## P4 -> P3 59 | c10 = self.reduce_layer_2(c9) 60 | c11 = F.interpolate(c10, scale_factor=2.0) 61 | c12 = torch.cat([c11, c3], dim=1) 62 | c13 = self.top_down_layer_2(c12) 63 | 64 | # Bottom up 65 | ## p3 -> P4 66 | c14 = self.downsample_layer_1(c13) 67 | c15 = torch.cat([c14, c10], dim=1) 68 | c16 = self.bottom_up_layer_1(c15) 69 | ## P4 -> P5 70 | c17 = self.downsample_layer_2(c16) 71 | c18 = torch.cat([c17, c6], dim=1) 72 | c19 = self.bottom_up_layer_2(c18) 73 | 74 | out_feats = [c13, c16, c19] # [P3, P4, P5] 75 | 76 | # output proj layers 77 | if self.out_layers is not None: 78 | out_feats_proj = [] 79 | for feat, layer in zip(out_feats, self.out_layers): 80 | out_feats_proj.append(layer(feat)) 81 | return out_feats_proj 82 | 83 | return out_feats 84 | 85 | 86 | def build_fpn(cfg, in_dims, out_dim=None): 87 | model = cfg['fpn'] 88 | # build pafpn 89 | if model == 'yolov5_pafpn': 90 | fpn_net = Yolov5PaFPN(cfg, in_dims, out_dim) 91 | 92 | return fpn_net -------------------------------------------------------------------------------- /models/detectors/yolov7/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv7: 2 | 3 | | Model | Backbone | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |-------------|---------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv7-Tiny | ELANNet-Tiny | 8xb16 | 640 | 39.5 | 58.5 | 22.6 | 7.9 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_tiny_coco.pth) | 6 | | YOLOv7 | ELANNet-Large | 8xb16 | 640 | 49.5 | 68.8 | 144.6 | 44.0 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_coco.pth) | 7 | | YOLOv7-X | ELANNet-Huge | | 640 | | | | | | 8 | 9 | - For training, we train `YOLOv7` and `YOLOv7-Tiny` with 300 epochs on 8 GPUs. 10 | - For data augmentation, we use the [YOLOX-style](https://github.com/Megvii-BaseDetection/YOLOX) augmentation including the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation. 11 | - For optimizer, we use `AdamW` with weight decay 0.05 and per image learning rate 0.001 / 64. 12 | - For learning rate scheduler, we use Cosine decay scheduler. 13 | - For YOLOv7's structure, we replace the coupled head with the YOLOX-style decoupled head. 14 | - I think YOLOv7 uses too many training tricks, such as `anchor box`, `AuxiliaryHead`, `RepConv`, `Mosaic9x` and so on, making the picture of YOLO too complicated, which is against the development concept of the YOLO series. Otherwise, why don't we use the DETR series? It's nothing more than doing some acceleration optimization on DETR. Therefore, I was faithful to my own technical aesthetics and realized a cleaner and simpler YOLOv7, but without the blessing of so many tricks, I did not reproduce all the performance, which is a pity. 15 | - I have no more GPUs to train my `YOLOv7-X`. 16 | 17 | ## Train YOLOv7 18 | ### Single GPU 19 | Taking training YOLOv7-Tiny on COCO as the example, 20 | ```Shell 21 | python train.py --cuda -d coco --root path/to/coco -m yolov7_tiny -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 22 | ``` 23 | 24 | ### Multi GPU 25 | Taking training YOLOv7-Tiny on COCO as the example, 26 | ```Shell 27 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov7_tiny -bs 128 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 28 | ``` 29 | 30 | ## Test YOLOv7 31 | Taking testing YOLOv7-Tiny on COCO-val as the example, 32 | ```Shell 33 | python test.py --cuda -d coco --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth -size 640 -vt 0.4 --show 34 | ``` 35 | 36 | ## Evaluate YOLOv7 37 | Taking evaluating YOLOv7-Tiny on COCO-val as the example, 38 | ```Shell 39 | python eval.py --cuda -d coco-val --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth 40 | ``` 41 | 42 | ## Demo 43 | ### Detect with Image 44 | ```Shell 45 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show 46 | ``` 47 | 48 | ### Detect with Video 49 | ```Shell 50 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif 51 | ``` 52 | 53 | ### Detect with Camera 54 | ```Shell 55 | python demo.py --mode camera --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif 56 | ``` 57 | -------------------------------------------------------------------------------- /models/detectors/yolov7/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov7 import YOLOv7 9 | 10 | 11 | # build object detector 12 | def build_yolov7(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv7(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | # Init bias 38 | init_prob = 0.01 39 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 40 | # obj pred 41 | for obj_pred in model.obj_preds: 42 | b = obj_pred.bias.view(1, -1) 43 | b.data.fill_(bias_value.item()) 44 | obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 45 | # cls pred 46 | for cls_pred in model.cls_preds: 47 | b = cls_pred.bias.view(1, -1) 48 | b.data.fill_(bias_value.item()) 49 | cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 50 | # reg pred 51 | for reg_pred in model.reg_preds: 52 | b = reg_pred.bias.view(-1, ) 53 | b.data.fill_(1.0) 54 | reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 55 | w = reg_pred.weight 56 | w.data.fill_(0.) 57 | reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 58 | 59 | 60 | # -------------- Build criterion -------------- 61 | criterion = None 62 | if trainable: 63 | # build criterion for training 64 | criterion = build_criterion(args, cfg, device, num_classes) 65 | 66 | return model, criterion 67 | -------------------------------------------------------------------------------- /models/detectors/yolov7/yolov7_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov7_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | self.in_dim = in_dim 13 | self.num_cls_head=cfg['num_cls_head'] 14 | self.num_reg_head=cfg['num_reg_head'] 15 | self.act_type=cfg['head_act'] 16 | self.norm_type=cfg['head_norm'] 17 | 18 | # cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=self.act_type, 26 | norm_type=self.norm_type, 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=self.act_type, 33 | norm_type=self.norm_type, 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | 37 | # reg head 38 | reg_feats = [] 39 | self.reg_out_dim = max(out_dim, 64) 40 | for i in range(cfg['num_reg_head']): 41 | if i == 0: 42 | reg_feats.append( 43 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 44 | act_type=self.act_type, 45 | norm_type=self.norm_type, 46 | depthwise=cfg['head_depthwise']) 47 | ) 48 | else: 49 | reg_feats.append( 50 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 51 | act_type=self.act_type, 52 | norm_type=self.norm_type, 53 | depthwise=cfg['head_depthwise']) 54 | ) 55 | 56 | self.cls_feats = nn.Sequential(*cls_feats) 57 | self.reg_feats = nn.Sequential(*reg_feats) 58 | 59 | 60 | def forward(self, x): 61 | """ 62 | in_feats: (Tensor) [B, C, H, W] 63 | """ 64 | cls_feats = self.cls_feats(x) 65 | reg_feats = self.reg_feats(x) 66 | 67 | return cls_feats, reg_feats 68 | 69 | 70 | # build detection head 71 | def build_head(cfg, in_dim, out_dim, num_classes=80): 72 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 73 | 74 | return head 75 | -------------------------------------------------------------------------------- /models/detectors/yolov7/yolov7_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .yolov7_basic import Conv 4 | 5 | 6 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 7 | class SPPF(nn.Module): 8 | """ 9 | This code referenced to https://github.com/ultralytics/yolov5 10 | """ 11 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='lrelu', norm_type='BN'): 12 | super().__init__() 13 | inter_dim = int(in_dim * expand_ratio) 14 | self.out_dim = out_dim 15 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 16 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 18 | 19 | def forward(self, x): 20 | x = self.cv1(x) 21 | y1 = self.m(x) 22 | y2 = self.m(y1) 23 | 24 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 25 | 26 | 27 | # SPPF block with CSP module 28 | class SPPFBlockCSP(nn.Module): 29 | """ 30 | CSP Spatial Pyramid Pooling Block 31 | """ 32 | def __init__(self, 33 | in_dim, 34 | out_dim, 35 | expand_ratio=0.5, 36 | pooling_size=5, 37 | act_type='lrelu', 38 | norm_type='BN', 39 | depthwise=False 40 | ): 41 | super(SPPFBlockCSP, self).__init__() 42 | inter_dim = int(in_dim * expand_ratio) 43 | self.out_dim = out_dim 44 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 45 | self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 46 | self.m = nn.Sequential( 47 | Conv(inter_dim, inter_dim, k=3, p=1, 48 | act_type=act_type, norm_type=norm_type, 49 | depthwise=depthwise), 50 | SPPF(inter_dim, 51 | inter_dim, 52 | expand_ratio=1.0, 53 | pooling_size=pooling_size, 54 | act_type=act_type, 55 | norm_type=norm_type), 56 | Conv(inter_dim, inter_dim, k=3, p=1, 57 | act_type=act_type, norm_type=norm_type, 58 | depthwise=depthwise) 59 | ) 60 | self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type) 61 | 62 | 63 | def forward(self, x): 64 | x1 = self.cv1(x) 65 | x2 = self.cv2(x) 66 | x3 = self.m(x2) 67 | y = self.cv3(torch.cat([x1, x3], dim=1)) 68 | 69 | return y 70 | 71 | 72 | def build_neck(cfg, in_dim, out_dim): 73 | model = cfg['neck'] 74 | print('==============================') 75 | print('Neck: {}'.format(model)) 76 | # build neck 77 | if model == 'sppf': 78 | neck = SPPF( 79 | in_dim=in_dim, 80 | out_dim=out_dim, 81 | expand_ratio=cfg['expand_ratio'], 82 | pooling_size=cfg['pooling_size'], 83 | act_type=cfg['neck_act'], 84 | norm_type=cfg['neck_norm'] 85 | ) 86 | elif model == 'csp_sppf': 87 | neck = SPPFBlockCSP( 88 | in_dim=in_dim, 89 | out_dim=out_dim, 90 | expand_ratio=cfg['expand_ratio'], 91 | pooling_size=cfg['pooling_size'], 92 | act_type=cfg['neck_act'], 93 | norm_type=cfg['neck_norm'], 94 | depthwise=cfg['neck_depthwise'] 95 | ) 96 | 97 | return neck 98 | -------------------------------------------------------------------------------- /models/detectors/yolov8/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv8: 2 | 3 | | Model | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |-----------|--------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOv8-N | 8xb16 | 640 | 37.0 | 52.9 | 8.8 | 3.2 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_n_coco.pth) | 6 | | YOLOv8-S | 8xb16 | 640 | 43.5 | 60.4 | 28.8 | 11.2 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_s_coco.pth) | 7 | | YOLOv8-M | 8xb16 | 640 | | | | | | 8 | | YOLOv8-L | 8xb16 | 640 | 50.7 | 68.3 | 165.7 | 43.7 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_l_coco.pth) | 9 | 10 | - For training, we train YOLOv8 series with 500 epochs on COCO. 11 | - For data augmentation, we use the random affine, hsv augmentation, mosaic augmentation and mixup augmentation, following the setting of [YOLOv8](https://github.com/ultralytics/yolov8). 12 | - For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64, which is different from the official YOLOv8. We have tried SGD, but it has weakened performance. For example, when using SGD, YOLOv8-N's AP was only 35.8%, lower than the current result (36.8 %), perhaps because some hyperparameters were not set properly. 13 | - For learning rate scheduler, we use linear decay scheduler. 14 | 15 | 16 | ## Train YOLOv8 17 | ### Single GPU 18 | Taking training YOLOv8-S on COCO as the example, 19 | ```Shell 20 | python train.py --cuda -d coco --root path/to/coco -m yolov8_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 500 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 21 | ``` 22 | 23 | ### Multi GPU 24 | Taking training YOLOv8 on COCO as the example, 25 | ```Shell 26 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov8_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 500 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 27 | ``` 28 | 29 | ## Test YOLOv8 30 | Taking testing YOLOv8 on COCO-val as the example, 31 | ```Shell 32 | python test.py --cuda -d coco --root path/to/coco -m yolov8_s --weight path/to/yolov8.pth -size 640 -vt 0.4 --show 33 | ``` 34 | 35 | ## Evaluate YOLOv8 36 | Taking evaluating YOLOv8 on COCO-val as the example, 37 | ```Shell 38 | python eval.py --cuda -d coco-val --root path/to/coco -m yolov8_s --weight path/to/yolov8.pth 39 | ``` 40 | 41 | ## Demo 42 | ### Detect with Image 43 | ```Shell 44 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show 45 | ``` 46 | 47 | ### Detect with Video 48 | ```Shell 49 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show --gif 50 | ``` 51 | 52 | ### Detect with Camera 53 | ```Shell 54 | python demo.py --mode camera --cuda -m yolov8_s --weight path/to/weight -size 640 -vt 0.4 --show --gif 55 | ``` 56 | -------------------------------------------------------------------------------- /models/detectors/yolov8/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolov8 import YOLOv8 9 | 10 | 11 | # build object detector 12 | def build_yolov8(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOv8(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | 38 | # -------------- Build criterion -------------- 39 | criterion = None 40 | if trainable: 41 | # build criterion for training 42 | criterion = build_criterion(cfg, device, num_classes) 43 | 44 | return model, criterion 45 | -------------------------------------------------------------------------------- /models/detectors/yolov8/yolov8_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov8_basic import Conv 5 | 6 | 7 | # Single-level Head 8 | class SingleLevelHead(nn.Module): 9 | def __init__(self, 10 | in_dim :int = 256, 11 | cls_head_dim :int = 256, 12 | reg_head_dim :int = 256, 13 | num_cls_head :int = 2, 14 | num_reg_head :int = 2, 15 | act_type :str = "silu", 16 | norm_type :str = "BN", 17 | depthwise :bool = False): 18 | super().__init__() 19 | # --------- Basic Parameters ---------- 20 | self.in_dim = in_dim 21 | self.num_cls_head = num_cls_head 22 | self.num_reg_head = num_reg_head 23 | self.act_type = act_type 24 | self.norm_type = norm_type 25 | self.depthwise = depthwise 26 | 27 | # --------- Network Parameters ---------- 28 | ## cls head 29 | cls_feats = [] 30 | self.cls_head_dim = cls_head_dim 31 | for i in range(num_cls_head): 32 | if i == 0: 33 | cls_feats.append( 34 | Conv(in_dim, self.cls_head_dim, k=3, p=1, s=1, 35 | act_type=act_type, 36 | norm_type=norm_type, 37 | depthwise=depthwise) 38 | ) 39 | else: 40 | cls_feats.append( 41 | Conv(self.cls_head_dim, self.cls_head_dim, k=3, p=1, s=1, 42 | act_type=act_type, 43 | norm_type=norm_type, 44 | depthwise=depthwise) 45 | ) 46 | ## reg head 47 | reg_feats = [] 48 | self.reg_head_dim = reg_head_dim 49 | for i in range(num_reg_head): 50 | if i == 0: 51 | reg_feats.append( 52 | Conv(in_dim, self.reg_head_dim, k=3, p=1, s=1, 53 | act_type=act_type, 54 | norm_type=norm_type, 55 | depthwise=depthwise) 56 | ) 57 | else: 58 | reg_feats.append( 59 | Conv(self.reg_head_dim, self.reg_head_dim, k=3, p=1, s=1, 60 | act_type=act_type, 61 | norm_type=norm_type, 62 | depthwise=depthwise) 63 | ) 64 | self.cls_feats = nn.Sequential(*cls_feats) 65 | self.reg_feats = nn.Sequential(*reg_feats) 66 | 67 | self.init_weights() 68 | 69 | def init_weights(self): 70 | """Initialize the parameters.""" 71 | for m in self.modules(): 72 | if isinstance(m, torch.nn.Conv2d): 73 | # In order to be consistent with the source code, 74 | # reset the Conv2d initialization parameters 75 | m.reset_parameters() 76 | 77 | def forward(self, x): 78 | """ 79 | in_feats: (Tensor) [B, C, H, W] 80 | """ 81 | cls_feats = self.cls_feats(x) 82 | reg_feats = self.reg_feats(x) 83 | 84 | return cls_feats, reg_feats 85 | 86 | # Multi-level Head 87 | class MultiLevelHead(nn.Module): 88 | def __init__(self, cfg, in_dims, num_levels=3, num_classes=80, reg_max=16): 89 | super().__init__() 90 | ## ----------- Network Parameters ----------- 91 | self.multi_level_heads = nn.ModuleList( 92 | [SingleLevelHead(in_dim = in_dims[level], 93 | cls_head_dim = max(in_dims[0], min(num_classes, 100)), 94 | reg_head_dim = max(in_dims[0]//4, 16, 4*reg_max), 95 | num_cls_head = cfg['num_cls_head'], 96 | num_reg_head = cfg['num_reg_head'], 97 | act_type = cfg['head_act'], 98 | norm_type = cfg['head_norm'], 99 | depthwise = cfg['head_depthwise']) 100 | for level in range(num_levels) 101 | ]) 102 | # --------- Basic Parameters ---------- 103 | self.in_dims = in_dims 104 | self.cls_head_dim = self.multi_level_heads[0].cls_head_dim 105 | self.reg_head_dim = self.multi_level_heads[0].reg_head_dim 106 | 107 | 108 | def forward(self, feats): 109 | """ 110 | feats: List[(Tensor)] [[B, C, H, W], ...] 111 | """ 112 | cls_feats = [] 113 | reg_feats = [] 114 | for feat, head in zip(feats, self.multi_level_heads): 115 | # ---------------- Pred ---------------- 116 | cls_feat, reg_feat = head(feat) 117 | 118 | cls_feats.append(cls_feat) 119 | reg_feats.append(reg_feat) 120 | 121 | return cls_feats, reg_feats 122 | 123 | 124 | # build detection head 125 | def build_det_head(cfg, in_dims, num_levels=3, num_classes=80, reg_max=16): 126 | if cfg['head'] == 'decoupled_head': 127 | head = MultiLevelHead(cfg, in_dims, num_levels, num_classes, reg_max) 128 | 129 | return head 130 | -------------------------------------------------------------------------------- /models/detectors/yolov8/yolov8_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolov8_basic import Conv 5 | 6 | 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 8 | class SPPF(nn.Module): 9 | """ 10 | This code referenced to https://github.com/ultralytics/yolov5 11 | """ 12 | def __init__(self, cfg, in_dim, out_dim, expand_ratio=0.5): 13 | super().__init__() 14 | inter_dim = int(in_dim * expand_ratio) 15 | self.out_dim = out_dim 16 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm']) 17 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm']) 18 | self.m = nn.MaxPool2d(kernel_size=cfg['pooling_size'], stride=1, padding=cfg['pooling_size'] // 2) 19 | 20 | def forward(self, x): 21 | x = self.cv1(x) 22 | y1 = self.m(x) 23 | y2 = self.m(y1) 24 | 25 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 26 | 27 | 28 | # SPPF block with CSP module 29 | class SPPFBlockCSP(nn.Module): 30 | """ 31 | CSP Spatial Pyramid Pooling Block 32 | """ 33 | def __init__(self, cfg, in_dim, out_dim, expand_ratio): 34 | super(SPPFBlockCSP, self).__init__() 35 | inter_dim = int(in_dim * expand_ratio) 36 | self.out_dim = out_dim 37 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm']) 38 | self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm']) 39 | self.m = nn.Sequential( 40 | Conv(inter_dim, inter_dim, k=3, p=1, 41 | act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 42 | depthwise=cfg['neck_depthwise']), 43 | SPPF(cfg, inter_dim, inter_dim, expand_ratio=1.0), 44 | Conv(inter_dim, inter_dim, k=3, p=1, 45 | act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 46 | depthwise=cfg['neck_depthwise']) 47 | ) 48 | self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm']) 49 | 50 | 51 | def forward(self, x): 52 | x1 = self.cv1(x) 53 | x2 = self.cv2(x) 54 | x3 = self.m(x2) 55 | y = self.cv3(torch.cat([x1, x3], dim=1)) 56 | 57 | return y 58 | 59 | 60 | def build_neck(cfg, in_dim, out_dim): 61 | model = cfg['neck'] 62 | print('==============================') 63 | print('Neck: {}'.format(model)) 64 | # build neck 65 | if model == 'sppf': 66 | neck = SPPF(cfg, in_dim, out_dim, cfg['neck_expand_ratio']) 67 | elif model == 'csp_sppf': 68 | neck = SPPFBlockCSP(cfg, in_dim, out_dim, cfg['neck_expand_ratio']) 69 | 70 | return neck 71 | -------------------------------------------------------------------------------- /models/detectors/yolox/README.md: -------------------------------------------------------------------------------- 1 | # YOLOX: 2 | 3 | | Model | Batch | Scale | APval
0.5:0.95 | APval
0.5 | FLOPs
(G) | Params
(M) | Weight | 4 | |---------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------| 5 | | YOLOX-S | 8xb8 | 640 | 40.1 | 60.3 | 26.8 | 8.9 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_s_coco.pth) | 6 | | YOLOX-M | 8xb8 | 640 | 46.2 | 66.0 | 74.3 | 25.4 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_m_coco.pth) | 7 | | YOLOX-L | 8xb8 | 640 | 48.7 | 68.0 | 155.4 | 54.2 | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolox_l_coco.pth) | 8 | | YOLOX-X | 8xb8 | 640 | | | | | | 9 | 10 | - For training, we train YOLOX series with 300 epochs on COCO. 11 | - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation. 12 | - For optimizer, we use SGD with weight decay 0.0005 and base per image lr 0.01 / 64,. 13 | - For learning rate scheduler, we use Cosine decay scheduler. 14 | 15 | ## Train YOLOX 16 | ### Single GPU 17 | Taking training YOLOX-S on COCO as the example, 18 | ```Shell 19 | python train.py --cuda -d coco --root path/to/coco -m yolox_s -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 20 | ``` 21 | 22 | ### Multi GPU 23 | Taking training YOLOX-S on COCO as the example, 24 | ```Shell 25 | python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolox_s -bs 128 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 26 | ``` 27 | 28 | ## Test YOLOX 29 | Taking testing YOLOX-S on COCO-val as the example, 30 | ```Shell 31 | python test.py --cuda -d coco --root path/to/coco -m yolox_s --weight path/to/yolox_s.pth -size 640 -vt 0.4 --show 32 | ``` 33 | 34 | ## Evaluate YOLOX 35 | Taking evaluating YOLOX-S on COCO-val as the example, 36 | ```Shell 37 | python eval.py --cuda -d coco-val --root path/to/coco -m yolox_s --weight path/to/yolox_s.pth 38 | ``` 39 | 40 | ## Demo 41 | ### Detect with Image 42 | ```Shell 43 | python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show 44 | ``` 45 | 46 | ### Detect with Video 47 | ```Shell 48 | python demo.py --mode video --path_to_vid path/to/video --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show --gif 49 | ``` 50 | 51 | ### Detect with Camera 52 | ```Shell 53 | python demo.py --mode camera --cuda -m yolox_s --weight path/to/weight -size 640 -vt 0.4 --show --gif 54 | ``` -------------------------------------------------------------------------------- /models/detectors/yolox/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from .loss import build_criterion 8 | from .yolox import YOLOX 9 | 10 | 11 | # build object detector 12 | def build_yolox(args, cfg, device, num_classes=80, trainable=False, deploy=False): 13 | print('==============================') 14 | print('Build {} ...'.format(args.model.upper())) 15 | 16 | print('==============================') 17 | print('Model Configuration: \n', cfg) 18 | 19 | # -------------- Build YOLO -------------- 20 | model = YOLOX(cfg = cfg, 21 | device = device, 22 | num_classes = num_classes, 23 | trainable = trainable, 24 | conf_thresh = args.conf_thresh, 25 | nms_thresh = args.nms_thresh, 26 | topk = args.topk, 27 | deploy = deploy, 28 | no_multi_labels = args.no_multi_labels, 29 | nms_class_agnostic = args.nms_class_agnostic 30 | ) 31 | 32 | # -------------- Initialize YOLO -------------- 33 | for m in model.modules(): 34 | if isinstance(m, nn.BatchNorm2d): 35 | m.eps = 1e-3 36 | m.momentum = 0.03 37 | # Init bias 38 | init_prob = 0.01 39 | bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob)) 40 | # obj pred 41 | for obj_pred in model.obj_preds: 42 | b = obj_pred.bias.view(1, -1) 43 | b.data.fill_(bias_value.item()) 44 | obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 45 | # cls pred 46 | for cls_pred in model.cls_preds: 47 | b = cls_pred.bias.view(1, -1) 48 | b.data.fill_(bias_value.item()) 49 | cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 50 | # reg pred 51 | for reg_pred in model.reg_preds: 52 | b = reg_pred.bias.view(-1, ) 53 | b.data.fill_(1.0) 54 | reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 55 | w = reg_pred.weight 56 | w.data.fill_(0.) 57 | reg_pred.weight = torch.nn.Parameter(w, requires_grad=True) 58 | 59 | 60 | # -------------- Build criterion -------------- 61 | criterion = None 62 | if trainable: 63 | # build criterion for training 64 | criterion = build_criterion(args, cfg, device, num_classes) 65 | return model, criterion 66 | -------------------------------------------------------------------------------- /models/detectors/yolox/yolox_backbone.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | try: 5 | from .yolox_basic import Conv, CSPBlock 6 | from .yolox_neck import SPPF 7 | except: 8 | from yolox_basic import Conv, CSPBlock 9 | from yolox_neck import SPPF 10 | 11 | 12 | # CSPDarkNet 13 | class CSPDarkNet(nn.Module): 14 | def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False): 15 | super(CSPDarkNet, self).__init__() 16 | self.feat_dims = [round(64 * width), round(128 * width), round(256 * width), round(512 * width), round(1024 * width)] 17 | # P1/2 18 | self.layer_1 = Conv(3, self.feat_dims[0], k=6, p=2, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise) 19 | # P2/4 20 | self.layer_2 = nn.Sequential( 21 | Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 22 | CSPBlock(in_dim = self.feat_dims[1], 23 | out_dim = self.feat_dims[1], 24 | expand_ratio = 0.5, 25 | nblocks = round(3*depth), 26 | shortcut = True, 27 | act_type = act_type, 28 | norm_type = norm_type, 29 | depthwise = depthwise) 30 | ) 31 | # P3/8 32 | self.layer_3 = nn.Sequential( 33 | Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 34 | CSPBlock(in_dim = self.feat_dims[2], 35 | out_dim = self.feat_dims[2], 36 | expand_ratio = 0.5, 37 | nblocks = round(9*depth), 38 | shortcut = True, 39 | act_type = act_type, 40 | norm_type = norm_type, 41 | depthwise = depthwise) 42 | ) 43 | # P4/16 44 | self.layer_4 = nn.Sequential( 45 | Conv(self.feat_dims[2], self.feat_dims[3], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 46 | CSPBlock(in_dim = self.feat_dims[3], 47 | out_dim = self.feat_dims[3], 48 | expand_ratio = 0.5, 49 | nblocks = round(9*depth), 50 | shortcut = True, 51 | act_type = act_type, 52 | norm_type = norm_type, 53 | depthwise = depthwise) 54 | ) 55 | # P5/32 56 | self.layer_5 = nn.Sequential( 57 | Conv(self.feat_dims[3], self.feat_dims[4], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise), 58 | SPPF(self.feat_dims[4], self.feat_dims[4], expand_ratio=0.5), 59 | CSPBlock(in_dim = self.feat_dims[4], 60 | out_dim = self.feat_dims[4], 61 | expand_ratio = 0.5, 62 | nblocks = round(3*depth), 63 | shortcut = True, 64 | act_type = act_type, 65 | norm_type = norm_type, 66 | depthwise = depthwise) 67 | ) 68 | 69 | 70 | def forward(self, x): 71 | c1 = self.layer_1(x) 72 | c2 = self.layer_2(c1) 73 | c3 = self.layer_3(c2) 74 | c4 = self.layer_4(c3) 75 | c5 = self.layer_5(c4) 76 | 77 | outputs = [c3, c4, c5] 78 | 79 | return outputs 80 | 81 | 82 | # ---------------------------- Functions ---------------------------- 83 | ## build CSPDarkNet 84 | def build_backbone(cfg): 85 | # Build backbone 86 | backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw']) 87 | feat_dims = backbone.feat_dims[-3:] 88 | 89 | return backbone, feat_dims 90 | 91 | 92 | if __name__ == '__main__': 93 | import time 94 | from thop import profile 95 | cfg = { 96 | 'bk_act': 'lrelu', 97 | 'bk_norm': 'BN', 98 | 'bk_dpw': False, 99 | 'p6_feat': False, 100 | 'p7_feat': False, 101 | 'width': 1.0, 102 | 'depth': 1.0, 103 | } 104 | model, feats = build_backbone(cfg) 105 | x = torch.randn(1, 3, 640, 640) 106 | t0 = time.time() 107 | outputs = model(x) 108 | t1 = time.time() 109 | print('Time: ', t1 - t0) 110 | for out in outputs: 111 | print(out.shape) 112 | 113 | print('==============================') 114 | flops, params = profile(model, inputs=(x, ), verbose=False) 115 | print('==============================') 116 | print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2)) 117 | print('Params : {:.2f} M'.format(params / 1e6)) -------------------------------------------------------------------------------- /models/detectors/yolox/yolox_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolox_basic import Conv 5 | 6 | 7 | class DecoupledHead(nn.Module): 8 | def __init__(self, cfg, in_dim, out_dim, num_classes=80): 9 | super().__init__() 10 | print('==============================') 11 | print('Head: Decoupled Head') 12 | # --------- Basic Parameters ---------- 13 | self.in_dim = in_dim 14 | self.num_cls_head=cfg['num_cls_head'] 15 | self.num_reg_head=cfg['num_reg_head'] 16 | 17 | # --------- Network Parameters ---------- 18 | ## cls head 19 | cls_feats = [] 20 | self.cls_out_dim = max(out_dim, num_classes) 21 | for i in range(cfg['num_cls_head']): 22 | if i == 0: 23 | cls_feats.append( 24 | Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 25 | act_type=cfg['head_act'], 26 | norm_type=cfg['head_norm'], 27 | depthwise=cfg['head_depthwise']) 28 | ) 29 | else: 30 | cls_feats.append( 31 | Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 32 | act_type=cfg['head_act'], 33 | norm_type=cfg['head_norm'], 34 | depthwise=cfg['head_depthwise']) 35 | ) 36 | ## reg head 37 | reg_feats = [] 38 | self.reg_out_dim = max(out_dim, 64) 39 | for i in range(cfg['num_reg_head']): 40 | if i == 0: 41 | reg_feats.append( 42 | Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 43 | act_type=cfg['head_act'], 44 | norm_type=cfg['head_norm'], 45 | depthwise=cfg['head_depthwise']) 46 | ) 47 | else: 48 | reg_feats.append( 49 | Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 50 | act_type=cfg['head_act'], 51 | norm_type=cfg['head_norm'], 52 | depthwise=cfg['head_depthwise']) 53 | ) 54 | 55 | self.cls_feats = nn.Sequential(*cls_feats) 56 | self.reg_feats = nn.Sequential(*reg_feats) 57 | 58 | 59 | def forward(self, x): 60 | """ 61 | in_feats: (Tensor) [B, C, H, W] 62 | """ 63 | cls_feats = self.cls_feats(x) 64 | reg_feats = self.reg_feats(x) 65 | 66 | return cls_feats, reg_feats 67 | 68 | 69 | # build detection head 70 | def build_head(cfg, in_dim, out_dim, num_classes=80): 71 | head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 72 | 73 | return head 74 | -------------------------------------------------------------------------------- /models/detectors/yolox/yolox_neck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .yolox_basic import Conv 5 | 6 | 7 | # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher 8 | class SPPF(nn.Module): 9 | """ 10 | This code referenced to https://github.com/ultralytics/yolov5 11 | """ 12 | def __init__(self, in_dim, out_dim, expand_ratio=0.5, pooling_size=5, act_type='silu', norm_type='BN'): 13 | super().__init__() 14 | inter_dim = int(in_dim * expand_ratio) 15 | self.out_dim = out_dim 16 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 17 | self.cv2 = Conv(inter_dim * 4, out_dim, k=1, act_type=act_type, norm_type=norm_type) 18 | self.m = nn.MaxPool2d(kernel_size=pooling_size, stride=1, padding=pooling_size // 2) 19 | 20 | def forward(self, x): 21 | x = self.cv1(x) 22 | y1 = self.m(x) 23 | y2 = self.m(y1) 24 | 25 | return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) 26 | 27 | 28 | # SPPF block with CSP module 29 | class SPPFBlockCSP(nn.Module): 30 | """ 31 | CSP Spatial Pyramid Pooling Block 32 | """ 33 | def __init__(self, 34 | in_dim, 35 | out_dim, 36 | expand_ratio=0.5, 37 | pooling_size=5, 38 | act_type='silu', 39 | norm_type='BN', 40 | depthwise=False 41 | ): 42 | super(SPPFBlockCSP, self).__init__() 43 | inter_dim = int(in_dim * expand_ratio) 44 | self.out_dim = out_dim 45 | self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 46 | self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) 47 | self.m = nn.Sequential( 48 | Conv(inter_dim, inter_dim, k=3, p=1, 49 | act_type=act_type, norm_type=norm_type, 50 | depthwise=depthwise), 51 | SPPF(inter_dim, 52 | inter_dim, 53 | expand_ratio=1.0, 54 | pooling_size=pooling_size, 55 | act_type=act_type, 56 | norm_type=norm_type), 57 | Conv(inter_dim, inter_dim, k=3, p=1, 58 | act_type=act_type, norm_type=norm_type, 59 | depthwise=depthwise) 60 | ) 61 | self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=act_type, norm_type=norm_type) 62 | 63 | 64 | def forward(self, x): 65 | x1 = self.cv1(x) 66 | x2 = self.cv2(x) 67 | x3 = self.m(x2) 68 | y = self.cv3(torch.cat([x1, x3], dim=1)) 69 | 70 | return y 71 | 72 | 73 | def build_neck(cfg, in_dim, out_dim): 74 | model = cfg['neck'] 75 | print('==============================') 76 | print('Neck: {}'.format(model)) 77 | # build neck 78 | if model == 'sppf': 79 | neck = SPPF( 80 | in_dim=in_dim, 81 | out_dim=out_dim, 82 | expand_ratio=cfg['expand_ratio'], 83 | pooling_size=cfg['pooling_size'], 84 | act_type=cfg['neck_act'], 85 | norm_type=cfg['neck_norm'] 86 | ) 87 | elif model == 'csp_sppf': 88 | neck = SPPFBlockCSP( 89 | in_dim=in_dim, 90 | out_dim=out_dim, 91 | expand_ratio=cfg['expand_ratio'], 92 | pooling_size=cfg['pooling_size'], 93 | act_type=cfg['neck_act'], 94 | norm_type=cfg['neck_norm'], 95 | depthwise=cfg['neck_depthwise'] 96 | ) 97 | 98 | return neck 99 | -------------------------------------------------------------------------------- /models/detectors/yolox/yolox_pafpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .yolox_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block) 6 | 7 | 8 | # YOLO-Style PaFPN 9 | class YoloxPaFPN(nn.Module): 10 | def __init__(self, cfg, in_dims=[256, 512, 1024], out_dim=None): 11 | super(YoloxPaFPN, self).__init__() 12 | # --------------------------- Basic Parameters --------------------------- 13 | self.in_dims = in_dims 14 | c3, c4, c5 = in_dims 15 | width = cfg['width'] 16 | 17 | # --------------------------- Network Parameters --------------------------- 18 | ## top dwon 19 | ### P5 -> P4 20 | self.reduce_layer_1 = build_reduce_layer(cfg, c5, round(512*width)) 21 | self.top_down_layer_1 = build_fpn_block(cfg, c4 + round(512*width), round(512*width)) 22 | 23 | ### P4 -> P3 24 | self.reduce_layer_2 = build_reduce_layer(cfg, round(512*width), round(256*width)) 25 | self.top_down_layer_2 = build_fpn_block(cfg, c3 + round(256*width), round(256*width)) 26 | 27 | ## bottom up 28 | ### P3 -> P4 29 | self.reduce_layer_3 = build_downsample_layer(cfg, round(256*width), round(256*width)) 30 | self.bottom_up_layer_1 = build_fpn_block(cfg, round(256*width) + round(256*width), round(512*width)) 31 | 32 | ### P4 -> P5 33 | self.reduce_layer_4 = build_downsample_layer(cfg, round(512*width), round(512*width)) 34 | self.bottom_up_layer_2 = build_fpn_block(cfg, round(512*width) + round(512*width), round(1024*width)) 35 | 36 | ## output proj layers 37 | if out_dim is not None: 38 | self.out_layers = nn.ModuleList([ 39 | Conv(in_dim, out_dim, k=1, 40 | act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm']) 41 | for in_dim in [round(256*width), round(512*width), round(1024*width)] 42 | ]) 43 | self.out_dim = [out_dim] * 3 44 | else: 45 | self.out_layers = None 46 | self.out_dim = [round(256*width), round(512*width), round(1024*width)] 47 | 48 | 49 | def forward(self, features): 50 | c3, c4, c5 = features 51 | 52 | # Top down 53 | ## P5 -> P4 54 | c6 = self.reduce_layer_1(c5) 55 | c7 = F.interpolate(c6, scale_factor=2.0) 56 | c8 = torch.cat([c7, c4], dim=1) 57 | c9 = self.top_down_layer_1(c8) 58 | ## P4 -> P3 59 | c10 = self.reduce_layer_2(c9) 60 | c11 = F.interpolate(c10, scale_factor=2.0) 61 | c12 = torch.cat([c11, c3], dim=1) 62 | c13 = self.top_down_layer_2(c12) 63 | 64 | # Bottom up 65 | ## p3 -> P4 66 | c14 = self.reduce_layer_3(c13) 67 | c15 = torch.cat([c14, c10], dim=1) 68 | c16 = self.bottom_up_layer_1(c15) 69 | ## P4 -> P5 70 | c17 = self.reduce_layer_4(c16) 71 | c18 = torch.cat([c17, c6], dim=1) 72 | c19 = self.bottom_up_layer_2(c18) 73 | 74 | out_feats = [c13, c16, c19] # [P3, P4, P5] 75 | 76 | # output proj layers 77 | if self.out_layers is not None: 78 | out_feats_proj = [] 79 | for feat, layer in zip(out_feats, self.out_layers): 80 | out_feats_proj.append(layer(feat)) 81 | return out_feats_proj 82 | 83 | return out_feats 84 | 85 | 86 | def build_fpn(cfg, in_dims, out_dim=None): 87 | model = cfg['fpn'] 88 | # build pafpn 89 | if model == 'yolox_pafpn': 90 | fpn_net = YoloxPaFPN(cfg, in_dims, out_dim) 91 | 92 | return fpn_net 93 | -------------------------------------------------------------------------------- /models/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | from .byte_tracker.build import build_byte_tracker 2 | 3 | 4 | 5 | def build_tracker(args): 6 | if args.tracker == 'byte_tracker': 7 | return build_byte_tracker(args) 8 | else: 9 | raise NotImplementedError 10 | -------------------------------------------------------------------------------- /models/trackers/byte_tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed 53 | -------------------------------------------------------------------------------- /models/trackers/byte_tracker/build.py: -------------------------------------------------------------------------------- 1 | from .byte_tracker import ByteTracker 2 | 3 | 4 | def build_byte_tracker(args): 5 | tracker = ByteTracker( 6 | track_thresh=args.track_thresh, 7 | track_buffer=args.track_buffer, 8 | frame_rate=args.fps, 9 | match_thresh=args.match_thresh, 10 | mot20=args.mot20 11 | ) 12 | 13 | return tracker 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | 3 | torchvision 4 | 5 | opencv-python 6 | 7 | thop 8 | 9 | scipy 10 | 11 | matplotlib 12 | 13 | numpy 14 | 15 | imageio 16 | 17 | pycocotools 18 | 19 | onnxsim 20 | 21 | onnxruntime 22 | 23 | openvino 24 | 25 | loguru 26 | 27 | albumentations 28 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/tools/__init__.py -------------------------------------------------------------------------------- /tools/clean_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | 5 | if __name__ == "__main__": 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description='COCO-Dataset') 9 | 10 | # --------------- opt parameters --------------- 11 | parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/', 12 | help='data root') 13 | parser.add_argument('--image_set', type=str, default='val', 14 | help='augmentation type') 15 | parser.add_argument('--task', type=str, default='det', 16 | help='augmentation type') 17 | 18 | args = parser.parse_args() 19 | 20 | # --------------- load json --------------- 21 | if args.task == 'det': 22 | task_prefix = 'instances_{}2017.json' 23 | clean_task_prefix = 'instances_{}2017_clean.json' 24 | elif args.task == 'pos': 25 | task_prefix = 'person_keypoints_{}2017.json' 26 | clean_task_prefix = 'person_keypoints_{}2017_clean.json' 27 | else: 28 | raise NotImplementedError('Unkown task !') 29 | 30 | json_path = os.path.join(args.root, 'annotations', task_prefix.format(args.image_set)) 31 | 32 | clean_json_file = dict() 33 | with open(json_path, 'r') as file: 34 | json_file = json.load(file) 35 | # json_file is a Dict: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories']) 36 | clean_json_file['info'] = json_file['info'] 37 | clean_json_file['licenses'] = json_file['licenses'] 38 | clean_json_file['categories'] = json_file['categories'] 39 | 40 | images_list = json_file['images'] 41 | annots_list = json_file['annotations'] 42 | num_images = len(images_list) 43 | 44 | # -------------- Filter annotations -------------- 45 | print("Processing annotations ...") 46 | valid_image_ids = [] 47 | clean_annots_list = [] 48 | for i, anno in enumerate(annots_list): 49 | if i % 5000 == 0: 50 | print("[{}] / [{}] ...".format(i, len(annots_list))) 51 | x1, y1, bw, bh = anno['bbox'] 52 | if bw > 0 and bh > 0: 53 | clean_annots_list.append(anno) 54 | if anno['image_id'] not in valid_image_ids: 55 | valid_image_ids.append(anno['image_id']) 56 | print("Valid number of images: ", len(valid_image_ids)) 57 | print("Valid number of annots: ", len(clean_annots_list)) 58 | print("Original number of annots: ", len(annots_list)) 59 | 60 | # -------------- Filter images -------------- 61 | print("Processing images ...") 62 | clean_images_list = [] 63 | for i in range(num_images): 64 | if args.image_set == 'train' and i % 5000 == 0: 65 | print("[{}] / [{}] ...".format(i, num_images)) 66 | if args.image_set == 'val' and i % 500 == 0: 67 | print("[{}] / [{}] ...".format(i, num_images)) 68 | 69 | # A single image dict 70 | image_dict = images_list[i] 71 | image_id = image_dict['id'] 72 | 73 | if image_id in valid_image_ids: 74 | clean_images_list.append(image_dict) 75 | 76 | print('Number of images after cleaning: ', len(clean_images_list)) 77 | print('Number of annotations after cleaning: ', len(clean_annots_list)) 78 | 79 | clean_json_file['images'] = clean_images_list 80 | clean_json_file['annotations'] = clean_annots_list 81 | 82 | # --------------- Save filterd json file --------------- 83 | new_json_path = os.path.join(args.root, 'annotations', clean_task_prefix.format(args.image_set)) 84 | with open(new_json_path, 'w') as f: 85 | json.dump(clean_json_file, f) 86 | -------------------------------------------------------------------------------- /tools/convert_crowdhuman_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | from PIL import Image 5 | import argparse 6 | 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='YOLO-Tutorial') 11 | 12 | # dataset 13 | parser.add_argument('--root', default='/mnt/share/ssd2/dataset/CrowdHuman/', 14 | help='data root') 15 | 16 | return parser.parse_args() 17 | 18 | 19 | def load_func(fpath): 20 | print('fpath', fpath) 21 | assert os.path.exists(fpath) 22 | with open(fpath,'r') as fid: 23 | lines = fid.readlines() 24 | records =[json.loads(line.strip('\n')) for line in lines] 25 | return records 26 | 27 | if __name__ == '__main__': 28 | args = parse_args() 29 | 30 | DATA_PATH = args.root 31 | OUT_PATH = DATA_PATH + 'annotations/' 32 | SPLITS = ['val', 'train'] 33 | DEBUG = False 34 | 35 | if not os.path.exists(OUT_PATH): 36 | os.mkdir(OUT_PATH) 37 | for split in SPLITS: 38 | data_path = DATA_PATH + split 39 | out_path = OUT_PATH + '{}.json'.format(split) 40 | out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} 41 | ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split) 42 | anns_data = load_func(ann_path) 43 | image_cnt = 0 44 | ann_cnt = 0 45 | video_cnt = 0 46 | for ann_data in anns_data: 47 | image_cnt += 1 48 | file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + 'Images/' + '{}.jpg'.format(ann_data['ID']) 49 | im = Image.open(file_path) 50 | image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 51 | 'id': image_cnt, 52 | 'height': im.size[1], 53 | 'width': im.size[0]} 54 | out['images'].append(image_info) 55 | if split != 'test': 56 | anns = ann_data['gtboxes'] 57 | for i in range(len(anns)): 58 | ann_cnt += 1 59 | fbox = anns[i]['fbox'] 60 | ann = {'id': ann_cnt, 61 | 'category_id': 1, 62 | 'image_id': image_cnt, 63 | 'track_id': -1, 64 | 'bbox_vis': anns[i]['vbox'], 65 | 'bbox': fbox, 66 | 'area': fbox[2] * fbox[3], 67 | 'iscrowd': 1 if 'extra' in anns[i] and \ 68 | 'ignore' in anns[i]['extra'] and \ 69 | anns[i]['extra']['ignore'] == 1 else 0} 70 | out['annotations'].append(ann) 71 | print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) 72 | json.dump(out, open(out_path, 'w')) -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | # Args parameters 2 | MODEL=$1 3 | DATASET=$2 4 | DATASET_ROOT=$3 5 | BATCH_SIZE=$4 6 | WORLD_SIZE=$5 7 | MASTER_PORT=$6 8 | RESUME=$7 9 | 10 | # MODEL setting 11 | IMAGE_SIZE=640 12 | FIND_UNUSED_PARAMS=False 13 | if [[ $MODEL == *"yolov8"* ]]; then 14 | # Epoch setting 15 | MAX_EPOCH=500 16 | WP_EPOCH=3 17 | EVAL_EPOCH=10 18 | NO_AUG_EPOCH=20 19 | elif [[ $MODEL == *"yolox"* ]]; then 20 | # Epoch setting 21 | MAX_EPOCH=300 22 | WP_EPOCH=3 23 | EVAL_EPOCH=10 24 | NO_AUG_EPOCH=20 25 | elif [[ $MODEL == *"yolov7"* ]]; then 26 | # Epoch setting 27 | MAX_EPOCH=300 28 | WP_EPOCH=3 29 | EVAL_EPOCH=10 30 | NO_AUG_EPOCH=20 31 | elif [[ $MODEL == *"yolov5"* ]]; then 32 | # Epoch setting 33 | MAX_EPOCH=300 34 | WP_EPOCH=3 35 | EVAL_EPOCH=10 36 | NO_AUG_EPOCH=20 37 | elif [[ $MODEL == *"yolov4"* ]]; then 38 | # Epoch setting 39 | MAX_EPOCH=300 40 | WP_EPOCH=3 41 | EVAL_EPOCH=10 42 | NO_AUG_EPOCH=20 43 | elif [[ $MODEL == *"yolov3"* ]]; then 44 | # Epoch setting 45 | MAX_EPOCH=300 46 | WP_EPOCH=3 47 | EVAL_EPOCH=10 48 | NO_AUG_EPOCH=20 49 | else 50 | # Epoch setting 51 | MAX_EPOCH=150 52 | WP_EPOCH=3 53 | EVAL_EPOCH=10 54 | NO_AUG_EPOCH=10 55 | fi 56 | 57 | # -------------------------- Train Pipeline -------------------------- 58 | if [ $WORLD_SIZE == 1 ]; then 59 | python train.py \ 60 | --cuda \ 61 | --dataset ${DATASET} \ 62 | --root ${DATASET_ROOT} \ 63 | --model ${MODEL} \ 64 | --batch_size ${BATCH_SIZE} \ 65 | --img_size ${IMAGE_SIZE} \ 66 | --wp_epoch ${WP_EPOCH} \ 67 | --max_epoch ${MAX_EPOCH} \ 68 | --eval_epoch ${EVAL_EPOCH} \ 69 | --no_aug_epoch ${NO_AUG_EPOCH} \ 70 | --resume ${RESUME} \ 71 | --ema \ 72 | --fp16 \ 73 | --find_unused_parameters ${FIND_UNUSED_PARAMS} \ 74 | --multi_scale 75 | elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then 76 | python -m torch.distributed.run --nproc_per_node=${WORLD_SIZE} --master_port ${MASTER_PORT} train.py \ 77 | --cuda \ 78 | -dist \ 79 | --dataset ${DATASET} \ 80 | --root ${DATASET_ROOT} \ 81 | --model ${MODEL} \ 82 | --batch_size ${BATCH_SIZE} \ 83 | --img_size ${IMAGE_SIZE} \ 84 | --wp_epoch ${WP_EPOCH} \ 85 | --max_epoch ${MAX_EPOCH} \ 86 | --eval_epoch ${EVAL_EPOCH} \ 87 | --no_aug_epoch ${NO_AUG_EPOCH} \ 88 | --resume ${RESUME} \ 89 | --ema \ 90 | --fp16 \ 91 | --find_unused_parameters ${FIND_UNUSED_PARAMS} \ 92 | --multi_scale \ 93 | --sybn 94 | else 95 | echo "The WORLD_SIZE is set to a value greater than 8, indicating the use of multi-machine \ 96 | multi-card training mode, which is currently unsupported." 97 | exit 1 98 | fi -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/utils/__init__.py -------------------------------------------------------------------------------- /utils/solver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yjh0410/RT-ODLab/a17c55d3fddb630ee8055e1dc6d8726e2dae5d88/utils/solver/__init__.py -------------------------------------------------------------------------------- /utils/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | 4 | 5 | # ------------------------- WarmUp LR Scheduler ------------------------- 6 | ## Warmup LR Scheduler 7 | class LinearWarmUpScheduler(object): 8 | def __init__(self, base_lr=0.01, wp_iter=500, warmup_factor=0.00066667): 9 | self.base_lr = base_lr 10 | self.wp_iter = wp_iter 11 | self.warmup_factor = warmup_factor 12 | 13 | 14 | def set_lr(self, optimizer, lr, base_lr): 15 | for param_group in optimizer.param_groups: 16 | init_lr = param_group['initial_lr'] 17 | ratio = init_lr / base_lr 18 | param_group['lr'] = lr * ratio 19 | 20 | 21 | def __call__(self, iter, optimizer): 22 | # warmup 23 | alpha = iter / self.wp_iter 24 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 25 | tmp_lr = self.base_lr * warmup_factor 26 | self.set_lr(optimizer, tmp_lr, self.base_lr) 27 | 28 | ## Build WP LR Scheduler 29 | def build_wp_lr_scheduler(cfg, base_lr=0.01): 30 | print('==============================') 31 | print('WarmUpScheduler: {}'.format(cfg['warmup'])) 32 | print('--base_lr: {}'.format(base_lr)) 33 | print('--warmup_iters: {}'.format(cfg['warmup_iters'])) 34 | print('--warmup_factor: {}'.format(cfg['warmup_factor'])) 35 | 36 | if cfg['warmup'] == 'linear': 37 | wp_lr_scheduler = LinearWarmUpScheduler(base_lr, cfg['warmup_iters'], cfg['warmup_factor']) 38 | 39 | return wp_lr_scheduler 40 | 41 | 42 | # ------------------------- LR Scheduler ------------------------- 43 | def build_lr_scheduler(cfg, optimizer, resume=None): 44 | print('==============================') 45 | print('LR Scheduler: {}'.format(cfg['lr_scheduler'])) 46 | 47 | if cfg['lr_scheduler'] == 'step': 48 | assert 'lr_epoch' in cfg 49 | print('--lr_epoch: {}'.format(cfg['lr_epoch'])) 50 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=cfg['lr_epoch']) 51 | elif cfg['lr_scheduler'] == 'cosine': 52 | pass 53 | 54 | if resume is not None and resume.lower() != "none": 55 | print('keep training: ', resume) 56 | checkpoint = torch.load(resume) 57 | # checkpoint state dict 58 | checkpoint_state_dict = checkpoint.pop("lr_scheduler") 59 | lr_scheduler.load_state_dict(checkpoint_state_dict) 60 | 61 | return lr_scheduler 62 | 63 | 64 | def build_lambda_lr_scheduler(cfg, optimizer, epochs): 65 | """Build learning rate scheduler from cfg file.""" 66 | print('==============================') 67 | print('Lr Scheduler: {}'.format(cfg['scheduler'])) 68 | # Cosine LR scheduler 69 | if cfg['scheduler'] == 'cosine': 70 | lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg['lrf'] - 1) + 1 71 | # Linear LR scheduler 72 | elif cfg['scheduler'] == 'linear': 73 | lf = lambda x: (1 - x / epochs) * (1.0 - cfg['lrf']) + cfg['lrf'] 74 | 75 | else: 76 | print('unknown lr scheduler.') 77 | exit(0) 78 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 79 | 80 | return scheduler, lf 81 | -------------------------------------------------------------------------------- /utils/solver/optimizer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def build_optimizer(cfg, model, resume=None): 5 | print('==============================') 6 | print('Optimizer: {}'.format(cfg['optimizer'])) 7 | print('--base lr: {}'.format(cfg['lr0'])) 8 | print('--momentum: {}'.format(cfg['momentum'])) 9 | print('--weight_decay: {}'.format(cfg['weight_decay'])) 10 | 11 | # ------------- Divide model's parameters ------------- 12 | param_dicts = [], [], [] 13 | norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)] 14 | for n, p in model.named_parameters(): 15 | if p.requires_grad: 16 | if "bias" == n.split(".")[-1]: 17 | param_dicts[0].append(p) # no weight decay for all layers' bias 18 | else: 19 | if n.split(".")[-2] in norm_names: 20 | param_dicts[1].append(p) # no weight decay for all NormLayers' weight 21 | else: 22 | param_dicts[2].append(p) # weight decay for all Non-NormLayers' weight 23 | 24 | # Build optimizer 25 | if cfg['optimizer'] == 'sgd': 26 | optimizer = torch.optim.SGD(param_dicts[0], lr=cfg['lr0'], momentum=cfg['momentum'], weight_decay=0.0) 27 | elif cfg['optimizer'] =='adamw': 28 | optimizer = torch.optim.AdamW(param_dicts[0], lr=cfg['lr0'], weight_decay=0.0) 29 | else: 30 | raise NotImplementedError("Unknown optimizer: {}".format(cfg['optimizer'])) 31 | 32 | # Add param groups 33 | optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0}) 34 | optimizer.add_param_group({"params": param_dicts[2], "weight_decay": cfg['weight_decay']}) 35 | 36 | start_epoch = 0 37 | if resume and resume != 'None': 38 | checkpoint = torch.load(resume) 39 | # checkpoint state dict 40 | try: 41 | checkpoint_state_dict = checkpoint.pop("optimizer") 42 | print('Load optimizer from the checkpoint: ', resume) 43 | optimizer.load_state_dict(checkpoint_state_dict) 44 | start_epoch = checkpoint.pop("epoch") + 1 45 | del checkpoint, checkpoint_state_dict 46 | except: 47 | print("No optimzier in the given checkpoint.") 48 | 49 | return optimizer, start_epoch 50 | --------------------------------------------------------------------------------