├── README.md ├── assets └── architecture.png ├── models ├── __init__.py ├── poolformer.py └── transxnet.py ├── object_detection ├── README.md ├── analysis_tools │ ├── analyze_logs.py │ ├── analyze_results.py │ ├── benchmark.py │ ├── coco_error_analysis.py │ ├── eval_metric.py │ ├── get_flops.py │ ├── robustness_eval.py │ └── test_robustness.py ├── checkpoint.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── coco_detection.py │ │ │ └── coco_instance.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade_mask_rcnn_pvtv2_b2_fpn.py │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── cascade_rcnn_r50_fpn.py │ │ │ ├── fast_rcnn_r50_fpn.py │ │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ │ ├── faster_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50_caffe_c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ ├── mask_rcnn_transx_b_fpn_1x_coco.py │ ├── mask_rcnn_transx_s_fpn_1x_coco.py │ ├── mask_rcnn_transx_t_fpn_1x_coco.py │ ├── retinanet_transx_b_fpn_1x_coco.py │ ├── retinanet_transx_s_fpn_1x_coco.py │ └── retinanet_transx_t_fpn_1x_coco.py ├── dist_test.sh ├── dist_train.sh ├── mmcv_custom │ └── runner │ │ ├── checkpoint.py │ │ ├── epoch_based_runner.py │ │ └── optimizer.py ├── mmdet_custom │ └── apis │ │ └── train.py ├── test.py ├── train.py └── transxnet.py ├── scripts ├── train_base.sh ├── train_small.sh └── train_tiny.sh ├── semantic_segmentation ├── README.md ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ └── ade20k_sfpn.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ └── fpn_r50.py │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ ├── sfpn_transxnet_base.py │ ├── sfpn_transxnet_small.py │ └── sfpn_transxnet_tiny.py ├── mmcv_custom │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── checkpoint.cpython-38.pyc │ │ ├── customized_text.cpython-38.pyc │ │ ├── layer_decay_optimizer_constructor.cpython-38.pyc │ │ └── resize_transform.cpython-38.pyc │ ├── apex_runner │ │ └── checkpoint.py │ ├── checkpoint.py │ ├── customized_text.py │ ├── layer_decay_optimizer_constructor.py │ ├── resize_transform.py │ └── train_api.py ├── mmseg_custom │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── align_resize.cpython-37.pyc │ │ └── align_resize.cpython-38.pyc │ └── align_resize.py ├── scripts │ ├── train_sfpn_transxnet_base.sh │ ├── train_sfpn_transxnet_small.sh │ └── train_sfpn_transxnet_tiny.sh ├── test.py ├── train.py └── transxnet.py ├── train.py └── validate.py /README.md: -------------------------------------------------------------------------------- 1 | # [[TNNLS 2025] TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979) 2 | 3 | > This is an official PyTorch implementation of "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979)". 4 | > 5 | > 📝 Paper: [Journal Version](https://doi.org/10.1109/TNNLS.2025.3550979) | [arXiv Version](https://arxiv.org/abs/2310.19380) 6 | 7 | 8 | # Introduction 9 | **TransXNet** is a CNN-Transformer hybrid vision backbone that can model both global and local dynamics with a Dual Dynamic Token Mixer (D-Mixer), achieving superior performance over both CNN and Transformer-based models. 10 |
11 | 12 |
13 | 14 | # Image Classification 15 | 16 | ## 1. Requirements 17 | We highly suggest using our provided dependencies to ensure reproducibility: 18 | ``` 19 | # Environments: 20 | cuda==11.6 21 | python==3.8.15 22 | # Packages: 23 | mmcv==1.7.1 24 | timm==0.6.12 25 | torch==1.13.1 26 | torchvision==0.14.1 27 | ``` 28 | ## 2. Data Preparation 29 | Prepare [ImageNet](https://image-net.org/) with the following folder structure, you can extract ImageNet by this [script](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4). 30 | 31 | ``` 32 | │imagenet/ 33 | ├──train/ 34 | │ ├── n01440764 35 | │ │ ├── n01440764_10026.JPEG 36 | │ │ ├── n01440764_10027.JPEG 37 | │ │ ├── ...... 38 | │ ├── ...... 39 | ├──val/ 40 | │ ├── n01440764 41 | │ │ ├── ILSVRC2012_val_00000293.JPEG 42 | │ │ ├── ILSVRC2012_val_00002138.JPEG 43 | │ │ ├── ...... 44 | │ ├── ...... 45 | ``` 46 | 47 | ## 3. Main Results on ImageNet with Pretrained Models 48 | 49 | | Models | Input Size | FLOPs (G) | Params (M) | Top-1 Acc.(%) | Download | 50 | |:-----------:|:----------:|:---------:|:----------:|:----------:|:----------:| 51 | | TransXNet-T | 224x224 | 1.8 | 12.8 | 81.6 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 52 | | TransXNet-S | 224x224 | 4.5 | 26.9 | 83.8 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 53 | | TransXNet-B | 224x224 | 8.3 | 48.0 | 84.6 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 54 | | TransXNet-B | 384x384 | 24.2 | 48.0 | 85.5 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b-384.pth.tar) | 55 | 56 | ## 4. Train 57 | To train ```TransXNet``` models on ImageNet-1K with 8 gpus (single node), run: 58 | ``` 59 | bash scripts/train_tiny.sh # train TransXNet-T 60 | bash scripts/train_small.sh # train TransXNet-S 61 | bash scripts/train_base.sh # train TransXNet-B 62 | ``` 63 | 64 | ## 5. Validation 65 | To evaluate ```TransXNet``` on ImageNet-1K, run: 66 | ``` 67 | MODEL=transxnet_t # transxnet_{t, s, b} 68 | python3 validate.py \ 69 | /path/to/imagenet \ 70 | --model $MODEL -b 128 \ 71 | --pretrained # or --checkpoint /path/to/checkpoint 72 | ``` 73 | 74 | # Object Detection and Semantic Segmentation 75 | > [Object Detection](object_detection) 76 | > [Semantic Segmentation](semantic_segmentation) 77 | 78 | # Citation 79 | If you find this project useful for your research, please consider citing: 80 | ``` 81 | @article{lou2023transxnet, 82 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition}, 83 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu}, 84 | journal={IEEE Transactions on Neural Networks and Learning Systems}, 85 | year={2025} 86 | } 87 | ``` 88 | 89 | # Acknowledgment 90 | Our implementation is mainly based on the following codebases. We gratefully thank the authors for their wonderful works. 91 | > [poolformer](https://github.com/sail-sg/poolformer) 92 | > [mmdetection](https://github.com/open-mmlab/mmdetection) 93 | > [mmsegmentation](https://github.com/open-mmlab/mmsegmentation) 94 | > [pytorch-image-models](https://github.com/rwightman/pytorch-image-models) 95 | 96 | # Contact 97 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com. 98 | -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/assets/architecture.png -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .poolformer import * 2 | from .transxnet import * -------------------------------------------------------------------------------- /object_detection/README.md: -------------------------------------------------------------------------------- 1 | # Applying TransXNet to Object Detection and Instance Segmentation 2 | 3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)". 4 | 5 | ## 1. Requirements 6 | ``` 7 | # Environments: 8 | cuda==11.3 9 | python==3.8.15 10 | # Packages: 11 | mmcv==1.7.1 12 | mmdet==2.28.2 13 | timm==0.6.12 14 | torch==1.12.1 15 | torchvision==0.13.1 16 | ``` 17 | 18 | 19 | ## 2. Data Preparation 20 | 21 | Prepare COCO 2017 according to the [guidelines](https://github.com/open-mmlab/mmdetection/blob/2.x/docs/en/1_exist_data_model.md). 22 | 23 | ## 3. Main Results on COCO with Pretrained Models 24 | 25 | 26 | | Method | Backbone | Pretrain | Lr schd | Aug | box AP | mask AP | Config | Download | 27 | |------------|----------|-------------|:-------:|:---:|:------:|:-------:|------------------------------------------------------|----------| 28 | | RetinaNet | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 1x | No | 43.1 | - | [config](configs/retinanet_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.pth) | 29 | | RetinaNet | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 1x | No | 46.4 | - | [config](configs/retinanet_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.pth) | 30 | | RetinaNet | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 1x | No | 47.6 | - | [config](configs/retinanet_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/reinanet_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_base.pth) | 31 | | Mask R-CNN | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 1x | No | 44.5 | 40.7 | [config](configs/mask_rcnn_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.pth) | 32 | | Mask R-CNN | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 1x | No | 47.7 | 43.1 | [config](configs/mask_rcnn_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.pth) | 33 | | Mask R-CNN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 1x | No | 48.8 | 43.8 | [config](configs/mask_rcnn_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.pth) | 34 | 35 | 36 | ## 4. Train 37 | To train ``TransXNet-T + RetinaNet`` models on COCO train2017 with 8 gpus (single node), run: 38 | ``` 39 | bash dist_train.sh configs/retinanet_transx_t_fpn_1x_coco.py 8 40 | ``` 41 | To train ``TransXNet-T + Mask R-CNN`` models on COCO train2017 with 8 gpus (single node), run: 42 | ``` 43 | bash dist_train.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py 8 44 | ``` 45 | 46 | ## 5. Validation 47 | To evaluate ``TransXNet-T + RetinaNet`` models on COCO val2017, run: 48 | ``` 49 | bash dist_test.sh configs/retinanet_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox 50 | ``` 51 | To evaluate ``TransXNet-T + Mask R-CNN`` models on COCO val2017, run: 52 | ``` 53 | bash dist_test.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox segm 54 | ``` 55 | 56 | ## Citation 57 | If you find this project useful for your research, please consider citing: 58 | ``` 59 | @article{lou2023transxnet, 60 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition}, 61 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu}, 62 | journal={IEEE Transactions on Neural Networks and Learning Systems}, 63 | year={2025} 64 | } 65 | ``` 66 | 67 | ## Contact 68 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com. 69 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from collections import defaultdict 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import seaborn as sns 8 | 9 | 10 | def cal_train_time(log_dicts, args): 11 | for i, log_dict in enumerate(log_dicts): 12 | print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}') 13 | all_times = [] 14 | for epoch in log_dict.keys(): 15 | if args.include_outliers: 16 | all_times.append(log_dict[epoch]['time']) 17 | else: 18 | all_times.append(log_dict[epoch]['time'][1:]) 19 | all_times = np.array(all_times) 20 | epoch_ave_time = all_times.mean(-1) 21 | slowest_epoch = epoch_ave_time.argmax() 22 | fastest_epoch = epoch_ave_time.argmin() 23 | std_over_epoch = epoch_ave_time.std() 24 | print(f'slowest epoch {slowest_epoch + 1}, ' 25 | f'average time is {epoch_ave_time[slowest_epoch]:.4f}') 26 | print(f'fastest epoch {fastest_epoch + 1}, ' 27 | f'average time is {epoch_ave_time[fastest_epoch]:.4f}') 28 | print(f'time std over epochs is {std_over_epoch:.4f}') 29 | print(f'average iter time: {np.mean(all_times):.4f} s/iter') 30 | print() 31 | 32 | 33 | def plot_curve(log_dicts, args): 34 | if args.backend is not None: 35 | plt.switch_backend(args.backend) 36 | sns.set_style(args.style) 37 | # if legend is None, use {filename}_{key} as legend 38 | legend = args.legend 39 | if legend is None: 40 | legend = [] 41 | for json_log in args.json_logs: 42 | for metric in args.keys: 43 | legend.append(f'{json_log}_{metric}') 44 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 45 | metrics = args.keys 46 | 47 | num_metrics = len(metrics) 48 | for i, log_dict in enumerate(log_dicts): 49 | epochs = list(log_dict.keys()) 50 | for j, metric in enumerate(metrics): 51 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 52 | if metric not in log_dict[epochs[0]]: 53 | raise KeyError( 54 | f'{args.json_logs[i]} does not contain metric {metric}') 55 | 56 | if 'mAP' in metric: 57 | xs = np.arange(1, max(epochs) + 1) 58 | ys = [] 59 | for epoch in epochs: 60 | ys += log_dict[epoch][metric] 61 | ax = plt.gca() 62 | ax.set_xticks(xs) 63 | plt.xlabel('epoch') 64 | plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o') 65 | else: 66 | xs = [] 67 | ys = [] 68 | num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1] 69 | for epoch in epochs: 70 | iters = log_dict[epoch]['iter'] 71 | if log_dict[epoch]['mode'][-1] == 'val': 72 | iters = iters[:-1] 73 | xs.append( 74 | np.array(iters) + (epoch - 1) * num_iters_per_epoch) 75 | ys.append(np.array(log_dict[epoch][metric][:len(iters)])) 76 | xs = np.concatenate(xs) 77 | ys = np.concatenate(ys) 78 | plt.xlabel('iter') 79 | plt.plot( 80 | xs, ys, label=legend[i * num_metrics + j], linewidth=0.5) 81 | plt.legend() 82 | if args.title is not None: 83 | plt.title(args.title) 84 | if args.out is None: 85 | plt.show() 86 | else: 87 | print(f'save curve to: {args.out}') 88 | plt.savefig(args.out) 89 | plt.cla() 90 | 91 | 92 | def add_plot_parser(subparsers): 93 | parser_plt = subparsers.add_parser( 94 | 'plot_curve', help='parser for plotting curves') 95 | parser_plt.add_argument( 96 | 'json_logs', 97 | type=str, 98 | nargs='+', 99 | help='path of train log in json format') 100 | parser_plt.add_argument( 101 | '--keys', 102 | type=str, 103 | nargs='+', 104 | default=['bbox_mAP'], 105 | help='the metric that you want to plot') 106 | parser_plt.add_argument('--title', type=str, help='title of figure') 107 | parser_plt.add_argument( 108 | '--legend', 109 | type=str, 110 | nargs='+', 111 | default=None, 112 | help='legend of each plot') 113 | parser_plt.add_argument( 114 | '--backend', type=str, default=None, help='backend of plt') 115 | parser_plt.add_argument( 116 | '--style', type=str, default='dark', help='style of plt') 117 | parser_plt.add_argument('--out', type=str, default=None) 118 | 119 | 120 | def add_time_parser(subparsers): 121 | parser_time = subparsers.add_parser( 122 | 'cal_train_time', 123 | help='parser for computing the average time per training iteration') 124 | parser_time.add_argument( 125 | 'json_logs', 126 | type=str, 127 | nargs='+', 128 | help='path of train log in json format') 129 | parser_time.add_argument( 130 | '--include-outliers', 131 | action='store_true', 132 | help='include the first value of every epoch when computing ' 133 | 'the average time') 134 | 135 | 136 | def parse_args(): 137 | parser = argparse.ArgumentParser(description='Analyze Json Log') 138 | # currently only support plot curve and calculate average train time 139 | subparsers = parser.add_subparsers(dest='task', help='task parser') 140 | add_plot_parser(subparsers) 141 | add_time_parser(subparsers) 142 | args = parser.parse_args() 143 | return args 144 | 145 | 146 | def load_json_logs(json_logs): 147 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 148 | # keys of sub dict is different metrics, e.g. memory, bbox_mAP 149 | # value of sub dict is a list of corresponding values of all iterations 150 | log_dicts = [dict() for _ in json_logs] 151 | for json_log, log_dict in zip(json_logs, log_dicts): 152 | with open(json_log, 'r') as log_file: 153 | for line in log_file: 154 | log = json.loads(line.strip()) 155 | # skip lines without `epoch` field 156 | if 'epoch' not in log: 157 | continue 158 | epoch = log.pop('epoch') 159 | if epoch not in log_dict: 160 | log_dict[epoch] = defaultdict(list) 161 | for k, v in log.items(): 162 | log_dict[epoch][k].append(v) 163 | return log_dicts 164 | 165 | 166 | def main(): 167 | args = parse_args() 168 | 169 | json_logs = args.json_logs 170 | for json_log in json_logs: 171 | assert json_log.endswith('.json') 172 | 173 | log_dicts = load_json_logs(json_logs) 174 | 175 | eval(args.task)(log_dicts, args) 176 | 177 | 178 | if __name__ == '__main__': 179 | main() 180 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/analyze_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | 4 | import mmcv 5 | import numpy as np 6 | from mmcv import Config, DictAction 7 | 8 | from mmdet.core.evaluation import eval_map 9 | from mmdet.core.visualization import imshow_gt_det_bboxes 10 | from mmdet.datasets import build_dataset, get_loading_pipeline 11 | 12 | 13 | def bbox_map_eval(det_result, annotation): 14 | """Evaluate mAP of single image det result. 15 | 16 | Args: 17 | det_result (list[list]): [[cls1_det, cls2_det, ...], ...]. 18 | The outer list indicates images, and the inner list indicates 19 | per-class detected bboxes. 20 | annotation (dict): Ground truth annotations where keys of 21 | annotations are: 22 | 23 | - bboxes: numpy array of shape (n, 4) 24 | - labels: numpy array of shape (n, ) 25 | - bboxes_ignore (optional): numpy array of shape (k, 4) 26 | - labels_ignore (optional): numpy array of shape (k, ) 27 | 28 | Returns: 29 | float: mAP 30 | """ 31 | 32 | # use only bbox det result 33 | if isinstance(det_result, tuple): 34 | bbox_det_result = [det_result[0]] 35 | else: 36 | bbox_det_result = [det_result] 37 | # mAP 38 | iou_thrs = np.linspace( 39 | .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) 40 | mean_aps = [] 41 | for thr in iou_thrs: 42 | mean_ap, _ = eval_map( 43 | bbox_det_result, [annotation], iou_thr=thr, logger='silent') 44 | mean_aps.append(mean_ap) 45 | return sum(mean_aps) / len(mean_aps) 46 | 47 | 48 | class ResultVisualizer(object): 49 | """Display and save evaluation results. 50 | 51 | Args: 52 | show (bool): Whether to show the image. Default: True 53 | wait_time (float): Value of waitKey param. Default: 0. 54 | score_thr (float): Minimum score of bboxes to be shown. 55 | Default: 0 56 | """ 57 | 58 | def __init__(self, show=False, wait_time=0, score_thr=0): 59 | self.show = show 60 | self.wait_time = wait_time 61 | self.score_thr = score_thr 62 | 63 | def _save_image_gts_results(self, dataset, results, mAPs, out_dir=None): 64 | mmcv.mkdir_or_exist(out_dir) 65 | 66 | for mAP_info in mAPs: 67 | index, mAP = mAP_info 68 | data_info = dataset.prepare_train_img(index) 69 | 70 | # calc save file path 71 | filename = data_info['filename'] 72 | if data_info['img_prefix'] is not None: 73 | filename = osp.join(data_info['img_prefix'], filename) 74 | else: 75 | filename = data_info['filename'] 76 | fname, name = osp.splitext(osp.basename(filename)) 77 | save_filename = fname + '_' + str(round(mAP, 3)) + name 78 | out_file = osp.join(out_dir, save_filename) 79 | imshow_gt_det_bboxes( 80 | data_info['img'], 81 | data_info, 82 | results[index], 83 | dataset.CLASSES, 84 | show=self.show, 85 | score_thr=self.score_thr, 86 | wait_time=self.wait_time, 87 | out_file=out_file) 88 | 89 | def evaluate_and_show(self, 90 | dataset, 91 | results, 92 | topk=20, 93 | show_dir='work_dir', 94 | eval_fn=None): 95 | """Evaluate and show results. 96 | 97 | Args: 98 | dataset (Dataset): A PyTorch dataset. 99 | results (list): Det results from test results pkl file 100 | topk (int): Number of the highest topk and 101 | lowest topk after evaluation index sorting. Default: 20 102 | show_dir (str, optional): The filename to write the image. 103 | Default: 'work_dir' 104 | eval_fn (callable, optional): Eval function, Default: None 105 | """ 106 | 107 | assert topk > 0 108 | if (topk * 2) > len(dataset): 109 | topk = len(dataset) // 2 110 | 111 | if eval_fn is None: 112 | eval_fn = bbox_map_eval 113 | else: 114 | assert callable(eval_fn) 115 | 116 | prog_bar = mmcv.ProgressBar(len(results)) 117 | _mAPs = {} 118 | for i, (result, ) in enumerate(zip(results)): 119 | # self.dataset[i] should not call directly 120 | # because there is a risk of mismatch 121 | data_info = dataset.prepare_train_img(i) 122 | mAP = eval_fn(result, data_info['ann_info']) 123 | _mAPs[i] = mAP 124 | prog_bar.update() 125 | 126 | # descending select topk image 127 | _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1])) 128 | good_mAPs = _mAPs[-topk:] 129 | bad_mAPs = _mAPs[:topk] 130 | 131 | good_dir = osp.abspath(osp.join(show_dir, 'good')) 132 | bad_dir = osp.abspath(osp.join(show_dir, 'bad')) 133 | self._save_image_gts_results(dataset, results, good_mAPs, good_dir) 134 | self._save_image_gts_results(dataset, results, bad_mAPs, bad_dir) 135 | 136 | 137 | def parse_args(): 138 | parser = argparse.ArgumentParser( 139 | description='MMDet eval image prediction result for each') 140 | parser.add_argument('config', help='test config file path') 141 | parser.add_argument( 142 | 'prediction_path', help='prediction path where test pkl result') 143 | parser.add_argument( 144 | 'show_dir', help='directory where painted images will be saved') 145 | parser.add_argument('--show', action='store_true', help='show results') 146 | parser.add_argument( 147 | '--wait-time', 148 | type=float, 149 | default=0, 150 | help='the interval of show (s), 0 is block') 151 | parser.add_argument( 152 | '--topk', 153 | default=20, 154 | type=int, 155 | help='saved Number of the highest topk ' 156 | 'and lowest topk after index sorting') 157 | parser.add_argument( 158 | '--show-score-thr', 159 | type=float, 160 | default=0, 161 | help='score threshold (default: 0.)') 162 | parser.add_argument( 163 | '--cfg-options', 164 | nargs='+', 165 | action=DictAction, 166 | help='override some settings in the used config, the key-value pair ' 167 | 'in xxx=yyy format will be merged into config file. If the value to ' 168 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 169 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 170 | 'Note that the quotation marks are necessary and that no white space ' 171 | 'is allowed.') 172 | args = parser.parse_args() 173 | return args 174 | 175 | 176 | def main(): 177 | args = parse_args() 178 | 179 | mmcv.check_file_exist(args.prediction_path) 180 | 181 | cfg = Config.fromfile(args.config) 182 | if args.cfg_options is not None: 183 | cfg.merge_from_dict(args.cfg_options) 184 | cfg.data.test.test_mode = True 185 | # import modules from string list. 186 | if cfg.get('custom_imports', None): 187 | from mmcv.utils import import_modules_from_strings 188 | import_modules_from_strings(**cfg['custom_imports']) 189 | 190 | cfg.data.test.pop('samples_per_gpu', 0) 191 | cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline) 192 | dataset = build_dataset(cfg.data.test) 193 | outputs = mmcv.load(args.prediction_path) 194 | 195 | result_visualizer = ResultVisualizer(args.show, args.wait_time, 196 | args.show_score_thr) 197 | result_visualizer.evaluate_and_show( 198 | dataset, outputs, topk=args.topk, show_dir=args.show_dir) 199 | 200 | 201 | if __name__ == '__main__': 202 | main() 203 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | 4 | import torch 5 | from mmcv import Config, DictAction 6 | from mmcv.cnn import fuse_conv_bn 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmdet.datasets import (build_dataloader, build_dataset, 11 | replace_ImageToTensor) 12 | from mmdet.models import build_detector 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 17 | parser.add_argument('config', help='test config file path') 18 | parser.add_argument('checkpoint', help='checkpoint file') 19 | parser.add_argument( 20 | '--log-interval', default=50, help='interval of logging') 21 | parser.add_argument( 22 | '--fuse-conv-bn', 23 | action='store_true', 24 | help='Whether to fuse conv and bn, this will slightly increase' 25 | 'the inference speed') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | args = parser.parse_args() 37 | return args 38 | 39 | 40 | def main(): 41 | args = parse_args() 42 | 43 | cfg = Config.fromfile(args.config) 44 | if args.cfg_options is not None: 45 | cfg.merge_from_dict(args.cfg_options) 46 | # import modules from string list. 47 | if cfg.get('custom_imports', None): 48 | from mmcv.utils import import_modules_from_strings 49 | import_modules_from_strings(**cfg['custom_imports']) 50 | # set cudnn_benchmark 51 | if cfg.get('cudnn_benchmark', False): 52 | torch.backends.cudnn.benchmark = True 53 | cfg.model.pretrained = None 54 | cfg.data.test.test_mode = True 55 | 56 | # build the dataloader 57 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 58 | if samples_per_gpu > 1: 59 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 60 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) 61 | dataset = build_dataset(cfg.data.test) 62 | data_loader = build_dataloader( 63 | dataset, 64 | samples_per_gpu=1, 65 | workers_per_gpu=cfg.data.workers_per_gpu, 66 | dist=False, 67 | shuffle=False) 68 | 69 | # build the model and load checkpoint 70 | cfg.model.train_cfg = None 71 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 72 | fp16_cfg = cfg.get('fp16', None) 73 | if fp16_cfg is not None: 74 | wrap_fp16_model(model) 75 | load_checkpoint(model, args.checkpoint, map_location='cpu') 76 | if args.fuse_conv_bn: 77 | model = fuse_conv_bn(model) 78 | 79 | model = MMDataParallel(model, device_ids=[0]) 80 | 81 | model.eval() 82 | 83 | # the first several iterations may be very slow so skip them 84 | num_warmup = 5 85 | pure_inf_time = 0 86 | 87 | # benchmark with 2000 image and take the average 88 | for i, data in enumerate(data_loader): 89 | 90 | torch.cuda.synchronize() 91 | start_time = time.perf_counter() 92 | 93 | with torch.no_grad(): 94 | model(return_loss=False, rescale=True, **data) 95 | 96 | torch.cuda.synchronize() 97 | elapsed = time.perf_counter() - start_time 98 | 99 | if i >= num_warmup: 100 | pure_inf_time += elapsed 101 | if (i + 1) % args.log_interval == 0: 102 | fps = (i + 1 - num_warmup) / pure_inf_time 103 | print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s') 104 | 105 | if (i + 1) == 2000: 106 | pure_inf_time += elapsed 107 | fps = (i + 1 - num_warmup) / pure_inf_time 108 | print(f'Overall fps: {fps:.1f} img / s') 109 | break 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mmcv 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Evaluate metric of the ' 11 | 'results saved in pkl format') 12 | parser.add_argument('config', help='Config of the model') 13 | parser.add_argument('pkl_results', help='Results in pickle format') 14 | parser.add_argument( 15 | '--format-only', 16 | action='store_true', 17 | help='Format the output results without perform evaluation. It is' 18 | 'useful when you want to format the result to a specific format and ' 19 | 'submit it to the test server') 20 | parser.add_argument( 21 | '--eval', 22 | type=str, 23 | nargs='+', 24 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",' 25 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | parser.add_argument( 37 | '--eval-options', 38 | nargs='+', 39 | action=DictAction, 40 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 41 | 'format will be kwargs for dataset.evaluate() function') 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main(): 47 | args = parse_args() 48 | 49 | cfg = Config.fromfile(args.config) 50 | assert args.eval or args.format_only, ( 51 | 'Please specify at least one operation (eval/format the results) with ' 52 | 'the argument "--eval", "--format-only"') 53 | if args.eval and args.format_only: 54 | raise ValueError('--eval and --format_only cannot be both specified') 55 | 56 | if args.cfg_options is not None: 57 | cfg.merge_from_dict(args.cfg_options) 58 | # import modules from string list. 59 | if cfg.get('custom_imports', None): 60 | from mmcv.utils import import_modules_from_strings 61 | import_modules_from_strings(**cfg['custom_imports']) 62 | cfg.data.test.test_mode = True 63 | 64 | dataset = build_dataset(cfg.data.test) 65 | outputs = mmcv.load(args.pkl_results) 66 | 67 | kwargs = {} if args.eval_options is None else args.eval_options 68 | if args.format_only: 69 | dataset.format_results(outputs, **kwargs) 70 | if args.eval: 71 | eval_kwargs = cfg.get('evaluation', {}).copy() 72 | # hard-code way to remove EvalHook args 73 | for key in [ 74 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 75 | 'rule' 76 | ]: 77 | eval_kwargs.pop(key, None) 78 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 79 | print(dataset.evaluate(outputs, **eval_kwargs)) 80 | 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import torch 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet.models import build_detector 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | sys.path.append(".") 14 | import convformer 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='Train a detector') 18 | parser.add_argument('config', help='train config file path') 19 | parser.add_argument( 20 | '--shape', 21 | type=int, 22 | nargs='+', 23 | default=[1280, 800], 24 | help='input image size') 25 | parser.add_argument( 26 | '--cfg-options', 27 | nargs='+', 28 | action=DictAction, 29 | help='override some settings in the used config, the key-value pair ' 30 | 'in xxx=yyy format will be merged into config file. If the value to ' 31 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 32 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 33 | 'Note that the quotation marks are necessary and that no white space ' 34 | 'is allowed.') 35 | args = parser.parse_args() 36 | return args 37 | 38 | 39 | def main(): 40 | 41 | args = parse_args() 42 | 43 | if len(args.shape) == 1: 44 | input_shape = (1, 9, args.shape[0], args.shape[0]) 45 | elif len(args.shape) == 2: 46 | input_shape = (3, ) + tuple(args.shape) 47 | else: 48 | raise ValueError('invalid input shape') 49 | 50 | cfg = Config.fromfile(args.config) 51 | if args.cfg_options is not None: 52 | cfg.merge_from_dict(args.cfg_options) 53 | # import modules from string list. 54 | if cfg.get('custom_imports', None): 55 | from mmcv.utils import import_modules_from_strings 56 | import_modules_from_strings(**cfg['custom_imports']) 57 | 58 | model = build_detector( 59 | cfg.model, 60 | train_cfg=cfg.get('train_cfg'), 61 | test_cfg=cfg.get('test_cfg')) 62 | if torch.cuda.is_available(): 63 | model.cuda() 64 | model.eval() 65 | 66 | if hasattr(model, 'forward_dummy'): 67 | model.forward = model.forward_dummy 68 | else: 69 | raise NotImplementedError( 70 | 'FLOPs counter is currently not currently supported with {}'. 71 | format(model.__class__.__name__)) 72 | 73 | flops, params = get_model_complexity_info(model, input_shape) 74 | split_line = '=' * 30 75 | print(f'{split_line}\nInput shape: {input_shape}\n' 76 | f'Flops: {flops}\nParams: {params}\n{split_line}') 77 | print('!!!Please be cautious if you use the results in papers. ' 78 | 'You may need to check if all ops are supported and verify that the ' 79 | 'flops computation is correct.') 80 | 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /object_detection/analysis_tools/robustness_eval.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from argparse import ArgumentParser 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | 8 | def print_coco_results(results): 9 | 10 | def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100): 11 | titleStr = 'Average Precision' if ap == 1 else 'Average Recall' 12 | typeStr = '(AP)' if ap == 1 else '(AR)' 13 | iouStr = '0.50:0.95' \ 14 | if iouThr is None else f'{iouThr:0.2f}' 15 | iStr = f' {titleStr:<18} {typeStr} @[ IoU={iouStr:<9} | ' 16 | iStr += f'area={areaRng:>6s} | maxDets={maxDets:>3d} ] = {result:0.3f}' 17 | print(iStr) 18 | 19 | stats = np.zeros((12, )) 20 | stats[0] = _print(results[0], 1) 21 | stats[1] = _print(results[1], 1, iouThr=.5) 22 | stats[2] = _print(results[2], 1, iouThr=.75) 23 | stats[3] = _print(results[3], 1, areaRng='small') 24 | stats[4] = _print(results[4], 1, areaRng='medium') 25 | stats[5] = _print(results[5], 1, areaRng='large') 26 | stats[6] = _print(results[6], 0, maxDets=1) 27 | stats[7] = _print(results[7], 0, maxDets=10) 28 | stats[8] = _print(results[8], 0) 29 | stats[9] = _print(results[9], 0, areaRng='small') 30 | stats[10] = _print(results[10], 0, areaRng='medium') 31 | stats[11] = _print(results[11], 0, areaRng='large') 32 | 33 | 34 | def get_coco_style_results(filename, 35 | task='bbox', 36 | metric=None, 37 | prints='mPC', 38 | aggregate='benchmark'): 39 | 40 | assert aggregate in ['benchmark', 'all'] 41 | 42 | if prints == 'all': 43 | prints = ['P', 'mPC', 'rPC'] 44 | elif isinstance(prints, str): 45 | prints = [prints] 46 | for p in prints: 47 | assert p in ['P', 'mPC', 'rPC'] 48 | 49 | if metric is None: 50 | metrics = [ 51 | 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100', 52 | 'ARs', 'ARm', 'ARl' 53 | ] 54 | elif isinstance(metric, list): 55 | metrics = metric 56 | else: 57 | metrics = [metric] 58 | 59 | for metric_name in metrics: 60 | assert metric_name in [ 61 | 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100', 62 | 'ARs', 'ARm', 'ARl' 63 | ] 64 | 65 | eval_output = mmcv.load(filename) 66 | 67 | num_distortions = len(list(eval_output.keys())) 68 | results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32') 69 | 70 | for corr_i, distortion in enumerate(eval_output): 71 | for severity in eval_output[distortion]: 72 | for metric_j, metric_name in enumerate(metrics): 73 | mAP = eval_output[distortion][severity][task][metric_name] 74 | results[corr_i, severity, metric_j] = mAP 75 | 76 | P = results[0, 0, :] 77 | if aggregate == 'benchmark': 78 | mPC = np.mean(results[:15, 1:, :], axis=(0, 1)) 79 | else: 80 | mPC = np.mean(results[:, 1:, :], axis=(0, 1)) 81 | rPC = mPC / P 82 | 83 | print(f'\nmodel: {osp.basename(filename)}') 84 | if metric is None: 85 | if 'P' in prints: 86 | print(f'Performance on Clean Data [P] ({task})') 87 | print_coco_results(P) 88 | if 'mPC' in prints: 89 | print(f'Mean Performance under Corruption [mPC] ({task})') 90 | print_coco_results(mPC) 91 | if 'rPC' in prints: 92 | print(f'Relative Performance under Corruption [rPC] ({task})') 93 | print_coco_results(rPC) 94 | else: 95 | if 'P' in prints: 96 | print(f'Performance on Clean Data [P] ({task})') 97 | for metric_i, metric_name in enumerate(metrics): 98 | print(f'{metric_name:5} = {P[metric_i]:0.3f}') 99 | if 'mPC' in prints: 100 | print(f'Mean Performance under Corruption [mPC] ({task})') 101 | for metric_i, metric_name in enumerate(metrics): 102 | print(f'{metric_name:5} = {mPC[metric_i]:0.3f}') 103 | if 'rPC' in prints: 104 | print(f'Relative Performance under Corruption [rPC] ({task})') 105 | for metric_i, metric_name in enumerate(metrics): 106 | print(f'{metric_name:5} => {rPC[metric_i] * 100:0.1f} %') 107 | 108 | return results 109 | 110 | 111 | def get_voc_style_results(filename, prints='mPC', aggregate='benchmark'): 112 | 113 | assert aggregate in ['benchmark', 'all'] 114 | 115 | if prints == 'all': 116 | prints = ['P', 'mPC', 'rPC'] 117 | elif isinstance(prints, str): 118 | prints = [prints] 119 | for p in prints: 120 | assert p in ['P', 'mPC', 'rPC'] 121 | 122 | eval_output = mmcv.load(filename) 123 | 124 | num_distortions = len(list(eval_output.keys())) 125 | results = np.zeros((num_distortions, 6, 20), dtype='float32') 126 | 127 | for i, distortion in enumerate(eval_output): 128 | for severity in eval_output[distortion]: 129 | mAP = [ 130 | eval_output[distortion][severity][j]['ap'] 131 | for j in range(len(eval_output[distortion][severity])) 132 | ] 133 | results[i, severity, :] = mAP 134 | 135 | P = results[0, 0, :] 136 | if aggregate == 'benchmark': 137 | mPC = np.mean(results[:15, 1:, :], axis=(0, 1)) 138 | else: 139 | mPC = np.mean(results[:, 1:, :], axis=(0, 1)) 140 | rPC = mPC / P 141 | 142 | print(f'\nmodel: {osp.basename(filename)}') 143 | if 'P' in prints: 144 | print(f'Performance on Clean Data [P] in AP50 = {np.mean(P):0.3f}') 145 | if 'mPC' in prints: 146 | print('Mean Performance under Corruption [mPC] in AP50 = ' 147 | f'{np.mean(mPC):0.3f}') 148 | if 'rPC' in prints: 149 | print('Relative Performance under Corruption [rPC] in % = ' 150 | f'{np.mean(rPC) * 100:0.1f}') 151 | 152 | return np.mean(results, axis=2, keepdims=True) 153 | 154 | 155 | def get_results(filename, 156 | dataset='coco', 157 | task='bbox', 158 | metric=None, 159 | prints='mPC', 160 | aggregate='benchmark'): 161 | assert dataset in ['coco', 'voc', 'cityscapes'] 162 | 163 | if dataset in ['coco', 'cityscapes']: 164 | results = get_coco_style_results( 165 | filename, 166 | task=task, 167 | metric=metric, 168 | prints=prints, 169 | aggregate=aggregate) 170 | elif dataset == 'voc': 171 | if task != 'bbox': 172 | print('Only bbox analysis is supported for Pascal VOC') 173 | print('Will report bbox results\n') 174 | if metric not in [None, ['AP'], ['AP50']]: 175 | print('Only the AP50 metric is supported for Pascal VOC') 176 | print('Will report AP50 metric\n') 177 | results = get_voc_style_results( 178 | filename, prints=prints, aggregate=aggregate) 179 | 180 | return results 181 | 182 | 183 | def get_distortions_from_file(filename): 184 | 185 | eval_output = mmcv.load(filename) 186 | 187 | return get_distortions_from_results(eval_output) 188 | 189 | 190 | def get_distortions_from_results(eval_output): 191 | distortions = [] 192 | for i, distortion in enumerate(eval_output): 193 | distortions.append(distortion.replace('_', ' ')) 194 | return distortions 195 | 196 | 197 | def main(): 198 | parser = ArgumentParser(description='Corruption Result Analysis') 199 | parser.add_argument('filename', help='result file path') 200 | parser.add_argument( 201 | '--dataset', 202 | type=str, 203 | choices=['coco', 'voc', 'cityscapes'], 204 | default='coco', 205 | help='dataset type') 206 | parser.add_argument( 207 | '--task', 208 | type=str, 209 | nargs='+', 210 | choices=['bbox', 'segm'], 211 | default=['bbox'], 212 | help='task to report') 213 | parser.add_argument( 214 | '--metric', 215 | nargs='+', 216 | choices=[ 217 | None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 218 | 'AR100', 'ARs', 'ARm', 'ARl' 219 | ], 220 | default=None, 221 | help='metric to report') 222 | parser.add_argument( 223 | '--prints', 224 | type=str, 225 | nargs='+', 226 | choices=['P', 'mPC', 'rPC'], 227 | default='mPC', 228 | help='corruption benchmark metric to print') 229 | parser.add_argument( 230 | '--aggregate', 231 | type=str, 232 | choices=['all', 'benchmark'], 233 | default='benchmark', 234 | help='aggregate all results or only those \ 235 | for benchmark corruptions') 236 | 237 | args = parser.parse_args() 238 | 239 | for task in args.task: 240 | get_results( 241 | args.filename, 242 | dataset=args.dataset, 243 | task=task, 244 | metric=args.metric, 245 | prints=args.prints, 246 | aggregate=args.aggregate) 247 | 248 | 249 | if __name__ == '__main__': 250 | main() 251 | -------------------------------------------------------------------------------- /object_detection/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import io 3 | import os 4 | import os.path as osp 5 | import pkgutil 6 | import re 7 | import time 8 | import warnings 9 | from collections import OrderedDict 10 | from importlib import import_module 11 | from tempfile import TemporaryDirectory 12 | 13 | import torch 14 | import torchvision 15 | from torch.optim import Optimizer 16 | from torch.utils import model_zoo 17 | 18 | import mmcv 19 | from mmcv.parallel import is_module_wrapper 20 | from mmcv.runner.dist_utils import get_dist_info 21 | 22 | ENV_MMCV_HOME = 'MMCV_HOME' 23 | ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' 24 | DEFAULT_CACHE_DIR = '~/.cache' 25 | 26 | 27 | def load_state_dict(module, state_dict, strict=False, logger=None): 28 | """Load state_dict to a module. 29 | 30 | This method is modified from :meth:`torch.nn.Module.load_state_dict`. 31 | Default value for ``strict`` is set to ``False`` and the message for 32 | param mismatch will be shown even if strict is False. 33 | 34 | Args: 35 | module (Module): Module that receives the state_dict. 36 | state_dict (OrderedDict): Weights. 37 | strict (bool): whether to strictly enforce that the keys 38 | in :attr:`state_dict` match the keys returned by this module's 39 | :meth:`~torch.nn.Module.state_dict` function. Default: ``False``. 40 | logger (:obj:`logging.Logger`, optional): Logger to log the error 41 | message. If not specified, print function will be used. 42 | """ 43 | unexpected_keys = [] 44 | all_missing_keys = [] 45 | err_msg = [] 46 | 47 | metadata = getattr(state_dict, '_metadata', None) 48 | state_dict = state_dict.copy() 49 | if metadata is not None: 50 | state_dict._metadata = metadata 51 | 52 | # use _load_from_state_dict to enable checkpoint version control 53 | def load(module, prefix=''): 54 | # recursively check parallel module in case that the model has a 55 | # complicated structure, e.g., nn.Module(nn.Module(DDP)) 56 | if is_module_wrapper(module): 57 | module = module.module 58 | local_metadata = {} if metadata is None else metadata.get( 59 | prefix[:-1], {}) 60 | module._load_from_state_dict(state_dict, prefix, local_metadata, True, 61 | all_missing_keys, unexpected_keys, 62 | err_msg) 63 | for name, child in module._modules.items(): 64 | if child is not None: 65 | load(child, prefix + name + '.') 66 | 67 | load(module) 68 | load = None # break load->load reference cycle 69 | 70 | # ignore "num_batches_tracked" of BN layers 71 | missing_keys = [ 72 | key for key in all_missing_keys if 'num_batches_tracked' not in key 73 | ] 74 | 75 | if unexpected_keys: 76 | err_msg.append('unexpected key in source ' 77 | f'state_dict: {", ".join(unexpected_keys)}\n') 78 | if missing_keys: 79 | err_msg.append( 80 | f'missing keys in source state_dict: {", ".join(missing_keys)}\n') 81 | 82 | rank, _ = get_dist_info() 83 | if len(err_msg) > 0 and rank == 0: 84 | err_msg.insert( 85 | 0, 'The model and loaded state dict do not match exactly\n') 86 | err_msg = '\n'.join(err_msg) 87 | if strict: 88 | raise RuntimeError(err_msg) 89 | elif logger is not None: 90 | logger.warning(err_msg) 91 | else: 92 | print(err_msg) 93 | 94 | 95 | 96 | 97 | class CheckpointLoader: 98 | """A general checkpoint loader to manage all schemes.""" 99 | 100 | _schemes = {} 101 | 102 | @classmethod 103 | def _register_scheme(cls, prefixes, loader, force=False): 104 | if isinstance(prefixes, str): 105 | prefixes = [prefixes] 106 | else: 107 | assert isinstance(prefixes, (list, tuple)) 108 | for prefix in prefixes: 109 | if (prefix not in cls._schemes) or force: 110 | cls._schemes[prefix] = loader 111 | else: 112 | raise KeyError( 113 | f'{prefix} is already registered as a loader backend, ' 114 | 'add "force=True" if you want to override it') 115 | # sort, longer prefixes take priority 116 | cls._schemes = OrderedDict( 117 | sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True)) 118 | 119 | @classmethod 120 | def register_scheme(cls, prefixes, loader=None, force=False): 121 | """Register a loader to CheckpointLoader. 122 | 123 | This method can be used as a normal class method or a decorator. 124 | 125 | Args: 126 | prefixes (str or list[str] or tuple[str]): 127 | The prefix of the registered loader. 128 | loader (function, optional): The loader function to be registered. 129 | When this method is used as a decorator, loader is None. 130 | Defaults to None. 131 | force (bool, optional): Whether to override the loader 132 | if the prefix has already been registered. Defaults to False. 133 | """ 134 | 135 | if loader is not None: 136 | cls._register_scheme(prefixes, loader, force=force) 137 | return 138 | 139 | def _register(loader_cls): 140 | cls._register_scheme(prefixes, loader_cls, force=force) 141 | return loader_cls 142 | 143 | return _register 144 | 145 | @classmethod 146 | def _get_checkpoint_loader(cls, path): 147 | """Finds a loader that supports the given path. Falls back to the local 148 | loader if no other loader is found. 149 | 150 | Args: 151 | path (str): checkpoint path 152 | 153 | Returns: 154 | loader (function): checkpoint loader 155 | """ 156 | 157 | for p in cls._schemes: 158 | if path.startswith(p): 159 | return cls._schemes[p] 160 | 161 | @classmethod 162 | def load_checkpoint(cls, filename, map_location=None, logger=None): 163 | """load checkpoint through URL scheme path. 164 | 165 | Args: 166 | filename (str): checkpoint file name with given prefix 167 | map_location (str, optional): Same as :func:`torch.load`. 168 | Default: None 169 | logger (:mod:`logging.Logger`, optional): The logger for message. 170 | Default: None 171 | 172 | Returns: 173 | dict or OrderedDict: The loaded checkpoint. 174 | """ 175 | 176 | checkpoint_loader = cls._get_checkpoint_loader(filename) 177 | class_name = checkpoint_loader.__name__ 178 | mmcv.print_log( 179 | f'load checkpoint from {class_name[10:]} path: {filename}', logger) 180 | return checkpoint_loader(filename, map_location) 181 | 182 | 183 | 184 | def _load_checkpoint(filename, map_location=None, logger=None): 185 | """Load checkpoint from somewhere (modelzoo, file, url). 186 | 187 | Args: 188 | filename (str): Accept local filepath, URL, ``torchvision://xxx``, 189 | ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for 190 | details. 191 | map_location (str, optional): Same as :func:`torch.load`. 192 | Default: None. 193 | logger (:mod:`logging.Logger`, optional): The logger for error message. 194 | Default: None 195 | 196 | Returns: 197 | dict or OrderedDict: The loaded checkpoint. It can be either an 198 | OrderedDict storing model weights or a dict containing other 199 | information, which depends on the checkpoint. 200 | """ 201 | return CheckpointLoader.load_checkpoint(filename, map_location, logger) 202 | 203 | 204 | def load_checkpoint(model, 205 | filename, 206 | map_location=None, 207 | strict=False, 208 | logger=None, 209 | revise_keys=[(r'^module\.', '')]): 210 | """Load checkpoint from a file or URI. 211 | 212 | Args: 213 | model (Module): Module to load checkpoint. 214 | filename (str): Accept local filepath, URL, ``torchvision://xxx``, 215 | ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for 216 | details. 217 | map_location (str): Same as :func:`torch.load`. 218 | strict (bool): Whether to allow different params for the model and 219 | checkpoint. 220 | logger (:mod:`logging.Logger` or None): The logger for error message. 221 | revise_keys (list): A list of customized keywords to modify the 222 | state_dict in checkpoint. Each item is a (pattern, replacement) 223 | pair of the regular expression operations. Default: strip 224 | the prefix 'module.' by [(r'^module\\.', '')]. 225 | 226 | Returns: 227 | dict or OrderedDict: The loaded checkpoint. 228 | """ 229 | checkpoint = _load_checkpoint(filename, map_location, logger) 230 | # OrderedDict is a subclass of dict 231 | if not isinstance(checkpoint, dict): 232 | raise RuntimeError( 233 | f'No state_dict found in checkpoint file {filename}') 234 | # get state_dict from checkpoint 235 | import pdb; pdb.set_trace() 236 | if 'state_dict' in checkpoint: 237 | state_dict = checkpoint['state_dict'] 238 | elif 'model' in checkpoint: 239 | state_dict = checkpoint['model'] 240 | else: 241 | state_dict = checkpoint 242 | 243 | # strip prefix of state_dict 244 | metadata = getattr(state_dict, '_metadata', OrderedDict()) 245 | for p, r in revise_keys: 246 | state_dict = OrderedDict( 247 | {re.sub(p, r, k): v 248 | for k, v in state_dict.items()}) 249 | # Keep metadata in state_dict 250 | state_dict._metadata = metadata 251 | 252 | # load state_dict 253 | load_state_dict(model, state_dict, strict, logger) 254 | return checkpoint -------------------------------------------------------------------------------- /object_detection/configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = '/mnt/private_dataset/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=4, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + '/annotations/instances_train2017.json', 37 | img_prefix=data_root + '/train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + '/annotations/instances_val2017.json', 42 | img_prefix=data_root + '/val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + '/annotations/instances_val2017.json', 47 | img_prefix=data_root + '/val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(interval=1, metric='bbox') -------------------------------------------------------------------------------- /object_detection/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = '/mnt/private_dataset/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=4, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(metric=['bbox', 'segm']) -------------------------------------------------------------------------------- /object_detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/cascade_mask_rcnn_pvtv2_b2_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | backbone=dict( 5 | type='pvt_v2_b2', 6 | style='pytorch'), 7 | neck=dict( 8 | type='FPN', 9 | in_channels=[64, 128, 320, 512], 10 | out_channels=256, 11 | num_outs=5), 12 | rpn_head=dict( 13 | type='RPNHead', 14 | in_channels=256, 15 | feat_channels=256, 16 | anchor_generator=dict( 17 | type='AnchorGenerator', 18 | scales=[8], 19 | ratios=[0.5, 1.0, 2.0], 20 | strides=[4, 8, 16, 32, 64]), 21 | bbox_coder=dict( 22 | type='DeltaXYWHBBoxCoder', 23 | target_means=[.0, .0, .0, .0], 24 | target_stds=[1.0, 1.0, 1.0, 1.0]), 25 | loss_cls=dict( 26 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 27 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 28 | roi_head=dict( 29 | type='CascadeRoIHead', 30 | num_stages=3, 31 | stage_loss_weights=[1, 0.5, 0.25], 32 | bbox_roi_extractor=dict( 33 | type='SingleRoIExtractor', 34 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 35 | out_channels=256, 36 | featmap_strides=[4, 8, 16, 32]), 37 | bbox_head=[ 38 | dict( 39 | type='Shared2FCBBoxHead', 40 | in_channels=256, 41 | fc_out_channels=1024, 42 | roi_feat_size=7, 43 | num_classes=80, 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[0., 0., 0., 0.], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | reg_class_agnostic=True, 49 | loss_cls=dict( 50 | type='CrossEntropyLoss', 51 | use_sigmoid=False, 52 | loss_weight=1.0), 53 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 54 | loss_weight=1.0)), 55 | dict( 56 | type='Shared2FCBBoxHead', 57 | in_channels=256, 58 | fc_out_channels=1024, 59 | roi_feat_size=7, 60 | num_classes=80, 61 | bbox_coder=dict( 62 | type='DeltaXYWHBBoxCoder', 63 | target_means=[0., 0., 0., 0.], 64 | target_stds=[0.05, 0.05, 0.1, 0.1]), 65 | reg_class_agnostic=True, 66 | loss_cls=dict( 67 | type='CrossEntropyLoss', 68 | use_sigmoid=False, 69 | loss_weight=1.0), 70 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 71 | loss_weight=1.0)), 72 | dict( 73 | type='Shared2FCBBoxHead', 74 | in_channels=256, 75 | fc_out_channels=1024, 76 | roi_feat_size=7, 77 | num_classes=80, 78 | bbox_coder=dict( 79 | type='DeltaXYWHBBoxCoder', 80 | target_means=[0., 0., 0., 0.], 81 | target_stds=[0.033, 0.033, 0.067, 0.067]), 82 | reg_class_agnostic=True, 83 | loss_cls=dict( 84 | type='CrossEntropyLoss', 85 | use_sigmoid=False, 86 | loss_weight=1.0), 87 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 88 | ], 89 | mask_roi_extractor=dict( 90 | type='SingleRoIExtractor', 91 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 92 | out_channels=256, 93 | featmap_strides=[4, 8, 16, 32]), 94 | mask_head=dict( 95 | type='FCNMaskHead', 96 | num_convs=4, 97 | in_channels=256, 98 | conv_out_channels=256, 99 | num_classes=80, 100 | loss_mask=dict( 101 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 102 | # model training and testing settings 103 | train_cfg=dict( 104 | rpn=dict( 105 | assigner=dict( 106 | type='MaxIoUAssigner', 107 | pos_iou_thr=0.7, 108 | neg_iou_thr=0.3, 109 | min_pos_iou=0.3, 110 | match_low_quality=True, 111 | ignore_iof_thr=-1), 112 | sampler=dict( 113 | type='RandomSampler', 114 | num=256, 115 | pos_fraction=0.5, 116 | neg_pos_ub=-1, 117 | add_gt_as_proposals=False), 118 | allowed_border=0, 119 | pos_weight=-1, 120 | debug=False), 121 | rpn_proposal=dict( 122 | nms_across_levels=False, 123 | nms_pre=2000, 124 | nms_post=2000, 125 | max_per_img=2000, 126 | nms=dict(type='nms', iou_threshold=0.7), 127 | min_bbox_size=0), 128 | rcnn=[ 129 | dict( 130 | assigner=dict( 131 | type='MaxIoUAssigner', 132 | pos_iou_thr=0.5, 133 | neg_iou_thr=0.5, 134 | min_pos_iou=0.5, 135 | match_low_quality=False, 136 | ignore_iof_thr=-1), 137 | sampler=dict( 138 | type='RandomSampler', 139 | num=512, 140 | pos_fraction=0.25, 141 | neg_pos_ub=-1, 142 | add_gt_as_proposals=True), 143 | mask_size=28, 144 | pos_weight=-1, 145 | debug=False), 146 | dict( 147 | assigner=dict( 148 | type='MaxIoUAssigner', 149 | pos_iou_thr=0.6, 150 | neg_iou_thr=0.6, 151 | min_pos_iou=0.6, 152 | match_low_quality=False, 153 | ignore_iof_thr=-1), 154 | sampler=dict( 155 | type='RandomSampler', 156 | num=512, 157 | pos_fraction=0.25, 158 | neg_pos_ub=-1, 159 | add_gt_as_proposals=True), 160 | mask_size=28, 161 | pos_weight=-1, 162 | debug=False), 163 | dict( 164 | assigner=dict( 165 | type='MaxIoUAssigner', 166 | pos_iou_thr=0.7, 167 | neg_iou_thr=0.7, 168 | min_pos_iou=0.7, 169 | match_low_quality=False, 170 | ignore_iof_thr=-1), 171 | sampler=dict( 172 | type='RandomSampler', 173 | num=512, 174 | pos_fraction=0.25, 175 | neg_pos_ub=-1, 176 | add_gt_as_proposals=True), 177 | mask_size=28, 178 | pos_weight=-1, 179 | debug=False) 180 | ]), 181 | test_cfg=dict( 182 | rpn=dict( 183 | nms_across_levels=False, 184 | nms_pre=1000, 185 | nms_post=1000, 186 | max_per_img=1000, 187 | nms=dict(type='nms', iou_threshold=0.7), 188 | min_bbox_size=0), 189 | rcnn=dict( 190 | score_thr=0.05, 191 | nms=dict(type='nms', iou_threshold=0.5), 192 | max_per_img=100, 193 | mask_thr_binary=0.5))) 194 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ], 96 | mask_roi_extractor=dict( 97 | type='SingleRoIExtractor', 98 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 99 | out_channels=256, 100 | featmap_strides=[4, 8, 16, 32]), 101 | mask_head=dict( 102 | type='FCNMaskHead', 103 | num_convs=4, 104 | in_channels=256, 105 | conv_out_channels=256, 106 | num_classes=80, 107 | loss_mask=dict( 108 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 109 | # model training and testing settings 110 | train_cfg=dict( 111 | rpn=dict( 112 | assigner=dict( 113 | type='MaxIoUAssigner', 114 | pos_iou_thr=0.7, 115 | neg_iou_thr=0.3, 116 | min_pos_iou=0.3, 117 | match_low_quality=True, 118 | ignore_iof_thr=-1), 119 | sampler=dict( 120 | type='RandomSampler', 121 | num=256, 122 | pos_fraction=0.5, 123 | neg_pos_ub=-1, 124 | add_gt_as_proposals=False), 125 | allowed_border=0, 126 | pos_weight=-1, 127 | debug=False), 128 | rpn_proposal=dict( 129 | nms_pre=2000, 130 | max_per_img=2000, 131 | nms=dict(type='nms', iou_threshold=0.7), 132 | min_bbox_size=0), 133 | rcnn=[ 134 | dict( 135 | assigner=dict( 136 | type='MaxIoUAssigner', 137 | pos_iou_thr=0.5, 138 | neg_iou_thr=0.5, 139 | min_pos_iou=0.5, 140 | match_low_quality=False, 141 | ignore_iof_thr=-1), 142 | sampler=dict( 143 | type='RandomSampler', 144 | num=512, 145 | pos_fraction=0.25, 146 | neg_pos_ub=-1, 147 | add_gt_as_proposals=True), 148 | mask_size=28, 149 | pos_weight=-1, 150 | debug=False), 151 | dict( 152 | assigner=dict( 153 | type='MaxIoUAssigner', 154 | pos_iou_thr=0.6, 155 | neg_iou_thr=0.6, 156 | min_pos_iou=0.6, 157 | match_low_quality=False, 158 | ignore_iof_thr=-1), 159 | sampler=dict( 160 | type='RandomSampler', 161 | num=512, 162 | pos_fraction=0.25, 163 | neg_pos_ub=-1, 164 | add_gt_as_proposals=True), 165 | mask_size=28, 166 | pos_weight=-1, 167 | debug=False), 168 | dict( 169 | assigner=dict( 170 | type='MaxIoUAssigner', 171 | pos_iou_thr=0.7, 172 | neg_iou_thr=0.7, 173 | min_pos_iou=0.7, 174 | match_low_quality=False, 175 | ignore_iof_thr=-1), 176 | sampler=dict( 177 | type='RandomSampler', 178 | num=512, 179 | pos_fraction=0.25, 180 | neg_pos_ub=-1, 181 | add_gt_as_proposals=True), 182 | mask_size=28, 183 | pos_weight=-1, 184 | debug=False) 185 | ]), 186 | test_cfg=dict( 187 | rpn=dict( 188 | nms_pre=1000, 189 | max_per_img=1000, 190 | nms=dict(type='nms', iou_threshold=0.7), 191 | min_bbox_size=0), 192 | rcnn=dict( 193 | score_thr=0.05, 194 | nms=dict(type='nms', iou_threshold=0.5), 195 | max_per_img=100, 196 | mask_thr_binary=0.5))) 197 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/cascade_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ]), 96 | # model training and testing settings 97 | train_cfg=dict( 98 | rpn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.7, 102 | neg_iou_thr=0.3, 103 | min_pos_iou=0.3, 104 | match_low_quality=True, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=256, 109 | pos_fraction=0.5, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=False), 112 | allowed_border=0, 113 | pos_weight=-1, 114 | debug=False), 115 | rpn_proposal=dict( 116 | nms_pre=2000, 117 | max_per_img=2000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=[ 121 | dict( 122 | assigner=dict( 123 | type='MaxIoUAssigner', 124 | pos_iou_thr=0.5, 125 | neg_iou_thr=0.5, 126 | min_pos_iou=0.5, 127 | match_low_quality=False, 128 | ignore_iof_thr=-1), 129 | sampler=dict( 130 | type='RandomSampler', 131 | num=512, 132 | pos_fraction=0.25, 133 | neg_pos_ub=-1, 134 | add_gt_as_proposals=True), 135 | pos_weight=-1, 136 | debug=False), 137 | dict( 138 | assigner=dict( 139 | type='MaxIoUAssigner', 140 | pos_iou_thr=0.6, 141 | neg_iou_thr=0.6, 142 | min_pos_iou=0.6, 143 | match_low_quality=False, 144 | ignore_iof_thr=-1), 145 | sampler=dict( 146 | type='RandomSampler', 147 | num=512, 148 | pos_fraction=0.25, 149 | neg_pos_ub=-1, 150 | add_gt_as_proposals=True), 151 | pos_weight=-1, 152 | debug=False), 153 | dict( 154 | assigner=dict( 155 | type='MaxIoUAssigner', 156 | pos_iou_thr=0.7, 157 | neg_iou_thr=0.7, 158 | min_pos_iou=0.7, 159 | match_low_quality=False, 160 | ignore_iof_thr=-1), 161 | sampler=dict( 162 | type='RandomSampler', 163 | num=512, 164 | pos_fraction=0.25, 165 | neg_pos_ub=-1, 166 | add_gt_as_proposals=True), 167 | pos_weight=-1, 168 | debug=False) 169 | ]), 170 | test_cfg=dict( 171 | rpn=dict( 172 | nms_pre=1000, 173 | max_per_img=1000, 174 | nms=dict(type='nms', iou_threshold=0.7), 175 | min_bbox_size=0), 176 | rcnn=dict( 177 | score_thr=0.05, 178 | nms=dict(type='nms', iou_threshold=0.5), 179 | max_per_img=100))) 180 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=False, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | pos_weight=-1, 102 | debug=False)), 103 | test_cfg=dict( 104 | rpn=dict( 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | score_thr=0.05, 111 | nms=dict(type='nms', iou_threshold=0.5), 112 | max_per_img=100))) 113 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.5, 82 | neg_iou_thr=0.5, 83 | min_pos_iou=0.5, 84 | match_low_quality=False, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=512, 89 | pos_fraction=0.25, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=True), 92 | pos_weight=-1, 93 | debug=False)), 94 | test_cfg=dict( 95 | rpn=dict( 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | nms_pre=6000, 98 | max_per_img=1000, 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | score_thr=0.05, 102 | nms=dict(type='nms', iou_threshold=0.5), 103 | max_per_img=100))) 104 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_pre=2000, 77 | max_per_img=1000, 78 | nms=dict(type='nms', iou_threshold=0.7), 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_pre=1000, 99 | max_per_img=1000, 100 | nms=dict(type='nms', iou_threshold=0.7), 101 | min_bbox_size=0), 102 | rcnn=dict( 103 | score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100) 106 | # soft-nms is also supported for rcnn testing 107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 108 | )) 109 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=None, 64 | mask_head=dict( 65 | type='FCNMaskHead', 66 | num_convs=0, 67 | in_channels=2048, 68 | conv_out_channels=256, 69 | num_classes=80, 70 | loss_mask=dict( 71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 72 | # model training and testing settings 73 | train_cfg=dict( 74 | rpn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.7, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | match_low_quality=True, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=256, 85 | pos_fraction=0.5, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=False), 88 | allowed_border=0, 89 | pos_weight=-1, 90 | debug=False), 91 | rpn_proposal=dict( 92 | nms_pre=12000, 93 | max_per_img=2000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=False, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=14, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_pre=6000, 116 | nms=dict(type='nms', iou_threshold=0.7), 117 | max_per_img=1000, 118 | min_bbox_size=0), 119 | rcnn=dict( 120 | score_thr=0.05, 121 | nms=dict(type='nms', iou_threshold=0.5), 122 | max_per_img=100, 123 | mask_thr_binary=0.5))) 124 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_pre=2000, 90 | max_per_img=1000, 91 | nms=dict(type='nms', iou_threshold=0.7), 92 | min_bbox_size=0), 93 | rcnn=dict( 94 | assigner=dict( 95 | type='MaxIoUAssigner', 96 | pos_iou_thr=0.5, 97 | neg_iou_thr=0.5, 98 | min_pos_iou=0.5, 99 | match_low_quality=True, 100 | ignore_iof_thr=-1), 101 | sampler=dict( 102 | type='RandomSampler', 103 | num=512, 104 | pos_fraction=0.25, 105 | neg_pos_ub=-1, 106 | add_gt_as_proposals=True), 107 | mask_size=28, 108 | pos_weight=-1, 109 | debug=False)), 110 | test_cfg=dict( 111 | rpn=dict( 112 | nms_pre=1000, 113 | max_per_img=1000, 114 | nms=dict(type='nms', iou_threshold=0.7), 115 | min_bbox_size=0), 116 | rcnn=dict( 117 | score_thr=0.05, 118 | nms=dict(type='nms', iou_threshold=0.5), 119 | max_per_img=100, 120 | mask_thr_binary=0.5))) 121 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # model training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_pre=12000, 54 | max_per_img=2000, 55 | nms=dict(type='nms', iou_threshold=0.7), 56 | min_bbox_size=0))) 57 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=2000, 56 | max_per_img=1000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | # model training and testing settings 32 | train_cfg=dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.5, 37 | min_pos_iou=0., 38 | ignore_iof_thr=-1, 39 | gt_max_assign_all=False), 40 | smoothl1_beta=1., 41 | allowed_border=-1, 42 | pos_weight=-1, 43 | neg_pos_ratio=3, 44 | debug=False), 45 | test_cfg=dict( 46 | nms_pre=1000, 47 | nms=dict(type='nms', iou_threshold=0.45), 48 | min_bbox_size=0, 49 | score_thr=0.02, 50 | max_per_img=200)) 51 | cudnn_benchmark = True 52 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /object_detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /object_detection/configs/mask_rcnn_transx_b_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/mask_rcnn_r50_fpn.py', 3 | '_base_/datasets/coco_instance.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_b', 15 | drop_path_rate=0.3, 16 | ), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[76, 152, 336, 672], 20 | out_channels=256, 21 | num_outs=5)) 22 | # optimizer 23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05, 24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 25 | 'relative_position_bias_table': dict(decay_mult=0.), 26 | 'norm': dict(decay_mult=0.)})) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict() ## AMP Training 29 | evaluation = dict(save_best='auto') -------------------------------------------------------------------------------- /object_detection/configs/mask_rcnn_transx_s_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/mask_rcnn_r50_fpn.py', 3 | '_base_/datasets/coco_instance.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_s', 15 | drop_path_rate=0.2, 16 | ), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[64, 128, 320, 512], 20 | out_channels=256, 21 | num_outs=5)) 22 | # optimizer 23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05, 24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 25 | 'relative_position_bias_table': dict(decay_mult=0.), 26 | 'norm': dict(decay_mult=0.)})) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict() ## AMP Training 29 | evaluation = dict(save_best='auto') -------------------------------------------------------------------------------- /object_detection/configs/mask_rcnn_transx_t_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/mask_rcnn_r50_fpn.py', 3 | '_base_/datasets/coco_instance.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_t', 15 | drop_path_rate=0.1, 16 | ), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[48, 96, 224, 448], 20 | out_channels=256, 21 | num_outs=5)) 22 | # optimizer 23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05, 24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 25 | 'relative_position_bias_table': dict(decay_mult=0.), 26 | 'norm': dict(decay_mult=0.)})) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training 29 | evaluation = dict(save_best='auto') -------------------------------------------------------------------------------- /object_detection/configs/retinanet_transx_b_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/retinanet_r50_fpn.py', 3 | '_base_/datasets/coco_detection.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_b', 15 | drop_path_rate=0.3, 16 | start_level=1, 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[76, 152, 336, 672], 21 | out_channels=256, 22 | start_level=1, 23 | add_extra_convs='on_input', 24 | num_outs=5)) 25 | # optimizer 26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training 29 | evaluation = dict(save_best='auto') 30 | -------------------------------------------------------------------------------- /object_detection/configs/retinanet_transx_s_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/retinanet_r50_fpn.py', 3 | '_base_/datasets/coco_detection.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_s', 15 | drop_path_rate=0.2, 16 | start_level=1, 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[64, 128, 320, 512], 21 | out_channels=256, 22 | start_level=1, 23 | add_extra_convs='on_input', 24 | num_outs=5)) 25 | # optimizer 26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict() ## AMP Training 29 | evaluation = dict(save_best='auto') -------------------------------------------------------------------------------- /object_detection/configs/retinanet_transx_t_fpn_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/retinanet_r50_fpn.py', 3 | '_base_/datasets/coco_detection.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | 8 | 9 | model = dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_t', 15 | drop_path_rate=0.1, 16 | start_level=1, 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[48, 96, 224, 448], 21 | out_channels=256, 22 | start_level=1, 23 | add_extra_convs='on_input', 24 | num_outs=5)) 25 | # optimizer 26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) 27 | optimizer_config = dict(grad_clip=None) 28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training 29 | evaluation = dict(save_best='auto') -------------------------------------------------------------------------------- /object_detection/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | CONFIG=$1 3 | CHECKPOINT=$2 4 | GPUS=$3 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} -------------------------------------------------------------------------------- /object_detection/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | CONFIG=$1 3 | GPUS=$2 4 | PORT=${PORT:-29500} 5 | 6 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 7 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$((RANDOM+10000)) \ 8 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} -------------------------------------------------------------------------------- /object_detection/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() -------------------------------------------------------------------------------- /object_detection/mmcv_custom/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import platform 4 | import shutil 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.runner import RUNNERS, EpochBasedRunner 11 | from .checkpoint import save_checkpoint 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | @RUNNERS.register_module() 20 | class EpochBasedRunnerAmp(EpochBasedRunner): 21 | """Epoch-based Runner with AMP support. 22 | 23 | This runner train models epoch by epoch. 24 | """ 25 | 26 | def save_checkpoint(self, 27 | out_dir, 28 | filename_tmpl='epoch_{}.pth', 29 | save_optimizer=True, 30 | meta=None, 31 | create_symlink=True): 32 | """Save the checkpoint. 33 | 34 | Args: 35 | out_dir (str): The directory that checkpoints are saved. 36 | filename_tmpl (str, optional): The checkpoint filename template, 37 | which contains a placeholder for the epoch number. 38 | Defaults to 'epoch_{}.pth'. 39 | save_optimizer (bool, optional): Whether to save the optimizer to 40 | the checkpoint. Defaults to True. 41 | meta (dict, optional): The meta information to be saved in the 42 | checkpoint. Defaults to None. 43 | create_symlink (bool, optional): Whether to create a symlink 44 | "latest.pth" to point to the latest checkpoint. 45 | Defaults to True. 46 | """ 47 | if meta is None: 48 | meta = dict(epoch=self.epoch + 1, iter=self.iter) 49 | elif isinstance(meta, dict): 50 | meta.update(epoch=self.epoch + 1, iter=self.iter) 51 | else: 52 | raise TypeError( 53 | f'meta should be a dict or None, but got {type(meta)}') 54 | if self.meta is not None: 55 | meta.update(self.meta) 56 | 57 | filename = filename_tmpl.format(self.epoch + 1) 58 | filepath = osp.join(out_dir, filename) 59 | optimizer = self.optimizer if save_optimizer else None 60 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 61 | # in some environments, `os.symlink` is not supported, you may need to 62 | # set `create_symlink` to False 63 | if create_symlink: 64 | dst_file = osp.join(out_dir, 'latest.pth') 65 | if platform.system() != 'Windows': 66 | mmcv.symlink(filename, dst_file) 67 | else: 68 | shutil.copy(filepath, dst_file) 69 | 70 | def resume(self, 71 | checkpoint, 72 | resume_optimizer=True, 73 | map_location='default'): 74 | if map_location == 'default': 75 | if torch.cuda.is_available(): 76 | device_id = torch.cuda.current_device() 77 | checkpoint = self.load_checkpoint( 78 | checkpoint, 79 | map_location=lambda storage, loc: storage.cuda(device_id)) 80 | else: 81 | checkpoint = self.load_checkpoint(checkpoint) 82 | else: 83 | checkpoint = self.load_checkpoint( 84 | checkpoint, map_location=map_location) 85 | 86 | self._epoch = checkpoint['meta']['epoch'] 87 | self._iter = checkpoint['meta']['iter'] 88 | if 'optimizer' in checkpoint and resume_optimizer: 89 | if isinstance(self.optimizer, Optimizer): 90 | self.optimizer.load_state_dict(checkpoint['optimizer']) 91 | elif isinstance(self.optimizer, dict): 92 | for k in self.optimizer.keys(): 93 | self.optimizer[k].load_state_dict( 94 | checkpoint['optimizer'][k]) 95 | else: 96 | raise TypeError( 97 | 'Optimizer should be dict or torch.optim.Optimizer ' 98 | f'but got {type(self.optimizer)}') 99 | 100 | if 'amp' in checkpoint: 101 | apex.amp.load_state_dict(checkpoint['amp']) 102 | self.logger.info('load amp state dict') 103 | 104 | self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) 105 | -------------------------------------------------------------------------------- /object_detection/mmcv_custom/runner/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | try: 3 | import apex 4 | except: 5 | print('apex is not installed') 6 | 7 | 8 | @HOOKS.register_module() 9 | class DistOptimizerHook(OptimizerHook): 10 | """Optimizer hook for distributed training.""" 11 | 12 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 13 | self.grad_clip = grad_clip 14 | self.coalesce = coalesce 15 | self.bucket_size_mb = bucket_size_mb 16 | self.update_interval = update_interval 17 | self.use_fp16 = use_fp16 18 | 19 | def before_run(self, runner): 20 | runner.optimizer.zero_grad() 21 | 22 | def after_train_iter(self, runner): 23 | runner.outputs['loss'] /= self.update_interval 24 | if self.use_fp16: 25 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 26 | scaled_loss.backward() 27 | else: 28 | runner.outputs['loss'].backward() 29 | if self.every_n_iters(runner, self.update_interval): 30 | if self.grad_clip is not None: 31 | self.clip_grads(runner.model.parameters()) 32 | runner.optimizer.step() 33 | runner.optimizer.zero_grad() -------------------------------------------------------------------------------- /object_detection/mmdet_custom/apis/train.py: -------------------------------------------------------------------------------- 1 | import random 2 | import warnings 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 7 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, 8 | Fp16OptimizerHook, OptimizerHook, build_optimizer, 9 | build_runner) 10 | from mmcv.utils import build_from_cfg 11 | 12 | from mmdet.core import DistEvalHook, EvalHook 13 | from mmdet.datasets import (build_dataloader, build_dataset, 14 | replace_ImageToTensor) 15 | from mmdet.utils import get_root_logger 16 | try: 17 | import apex 18 | except: 19 | print('apex is not installed') 20 | 21 | 22 | def set_random_seed(seed, deterministic=False): 23 | """Set random seed. 24 | 25 | Args: 26 | seed (int): Seed to be used. 27 | deterministic (bool): Whether to set the deterministic option for 28 | CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` 29 | to True and `torch.backends.cudnn.benchmark` to False. 30 | Default: False. 31 | """ 32 | random.seed(seed) 33 | np.random.seed(seed) 34 | torch.manual_seed(seed) 35 | torch.cuda.manual_seed_all(seed) 36 | if deterministic: 37 | torch.backends.cudnn.deterministic = True 38 | torch.backends.cudnn.benchmark = False 39 | 40 | 41 | def train_detector(model, 42 | dataset, 43 | cfg, 44 | distributed=False, 45 | validate=False, 46 | timestamp=None, 47 | meta=None): 48 | logger = get_root_logger(cfg.log_level) 49 | 50 | # prepare data loaders 51 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] 52 | if 'imgs_per_gpu' in cfg.data: 53 | logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 54 | 'Please use "samples_per_gpu" instead') 55 | if 'samples_per_gpu' in cfg.data: 56 | logger.warning( 57 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' 58 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' 59 | f'={cfg.data.imgs_per_gpu} is used in this experiments') 60 | else: 61 | logger.warning( 62 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' 63 | f'{cfg.data.imgs_per_gpu} in this experiments') 64 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu 65 | 66 | data_loaders = [ 67 | build_dataloader( 68 | ds, 69 | cfg.data.samples_per_gpu, 70 | cfg.data.workers_per_gpu, 71 | # cfg.gpus will be ignored if distributed 72 | len(cfg.gpu_ids), 73 | dist=distributed, 74 | seed=cfg.seed) for ds in dataset 75 | ] 76 | 77 | # build optimizer 78 | optimizer = build_optimizer(model, cfg.optimizer) 79 | 80 | # use apex fp16 optimizer 81 | if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook": 82 | if cfg.optimizer_config.get("use_fp16", False): 83 | model, optimizer = apex.amp.initialize( 84 | model.cuda(), optimizer, opt_level="O1") 85 | for m in model.modules(): 86 | if hasattr(m, "fp16_enabled"): 87 | m.fp16_enabled = True 88 | 89 | # put model on gpus 90 | if distributed: 91 | find_unused_parameters = cfg.get('find_unused_parameters', False) 92 | # Sets the `find_unused_parameters` parameter in 93 | # torch.nn.parallel.DistributedDataParallel 94 | model = MMDistributedDataParallel( 95 | model.cuda(), 96 | device_ids=[torch.cuda.current_device()], 97 | broadcast_buffers=False, 98 | find_unused_parameters=find_unused_parameters) 99 | else: 100 | model = MMDataParallel( 101 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 102 | 103 | if 'runner' not in cfg: 104 | cfg.runner = { 105 | 'type': 'EpochBasedRunner', 106 | 'max_epochs': cfg.total_epochs 107 | } 108 | warnings.warn( 109 | 'config is now expected to have a `runner` section, ' 110 | 'please set `runner` in your config.', UserWarning) 111 | else: 112 | if 'total_epochs' in cfg: 113 | assert cfg.total_epochs == cfg.runner.max_epochs 114 | 115 | # build runner 116 | runner = build_runner( 117 | cfg.runner, 118 | default_args=dict( 119 | model=model, 120 | optimizer=optimizer, 121 | work_dir=cfg.work_dir, 122 | logger=logger, 123 | meta=meta)) 124 | 125 | # an ugly workaround to make .log and .log.json filenames the same 126 | runner.timestamp = timestamp 127 | 128 | # fp16 setting 129 | fp16_cfg = cfg.get('fp16', None) 130 | if fp16_cfg is not None: 131 | optimizer_config = Fp16OptimizerHook( 132 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed) 133 | elif distributed and 'type' not in cfg.optimizer_config: 134 | optimizer_config = OptimizerHook(**cfg.optimizer_config) 135 | else: 136 | optimizer_config = cfg.optimizer_config 137 | 138 | # register hooks 139 | runner.register_training_hooks(cfg.lr_config, optimizer_config, 140 | cfg.checkpoint_config, cfg.log_config, 141 | cfg.get('momentum_config', None)) 142 | if distributed: 143 | if isinstance(runner, EpochBasedRunner): 144 | runner.register_hook(DistSamplerSeedHook()) 145 | 146 | # register eval hooks 147 | if validate: 148 | # Support batch_size > 1 in validation 149 | val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) 150 | if val_samples_per_gpu > 1: 151 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 152 | cfg.data.val.pipeline = replace_ImageToTensor( 153 | cfg.data.val.pipeline) 154 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) 155 | val_dataloader = build_dataloader( 156 | val_dataset, 157 | samples_per_gpu=val_samples_per_gpu, 158 | workers_per_gpu=cfg.data.workers_per_gpu, 159 | dist=distributed, 160 | shuffle=False) 161 | eval_cfg = cfg.get('evaluation', {}) 162 | eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' 163 | eval_hook = DistEvalHook if distributed else EvalHook 164 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) 165 | 166 | # user-defined hooks 167 | if cfg.get('custom_hooks', None): 168 | custom_hooks = cfg.custom_hooks 169 | assert isinstance(custom_hooks, list), \ 170 | f'custom_hooks expect list type, but got {type(custom_hooks)}' 171 | for hook_cfg in cfg.custom_hooks: 172 | assert isinstance(hook_cfg, dict), \ 173 | 'Each item in custom_hooks expects dict type, but got ' \ 174 | f'{type(hook_cfg)}' 175 | hook_cfg = hook_cfg.copy() 176 | priority = hook_cfg.pop('priority', 'NORMAL') 177 | hook = build_from_cfg(hook_cfg, HOOKS) 178 | runner.register_hook(hook, priority=priority) 179 | 180 | if cfg.resume_from: 181 | runner.resume(cfg.resume_from) 182 | elif cfg.load_from: 183 | runner.load_checkpoint(cfg.load_from) 184 | runner.run(data_loaders, cfg.workflow) -------------------------------------------------------------------------------- /object_detection/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | import warnings 7 | 8 | import mmcv 9 | import torch 10 | from mmcv import Config, DictAction 11 | from mmcv.runner import get_dist_info, init_dist 12 | from mmcv.utils import get_git_hash 13 | 14 | from mmdet import __version__ 15 | from mmdet.apis import set_random_seed #, train_detector 16 | from mmdet_custom.apis.train import train_detector 17 | from mmdet.datasets import build_dataset 18 | from mmdet.models import build_detector 19 | from mmdet.utils import collect_env, get_root_logger 20 | import mmcv_custom.runner.epoch_based_runner 21 | import mmcv_custom.runner.optimizer 22 | 23 | import transxnet 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser(description='Train a detector') 27 | parser.add_argument('config', help='train config file path') 28 | parser.add_argument('--work-dir', help='the dir to save logs and models') 29 | parser.add_argument( 30 | '--resume-from', help='the checkpoint file to resume from') 31 | parser.add_argument( 32 | '--no-validate', 33 | action='store_true', 34 | help='whether not to evaluate the checkpoint during training') 35 | group_gpus = parser.add_mutually_exclusive_group() 36 | group_gpus.add_argument( 37 | '--gpus', 38 | type=int, 39 | help='number of gpus to use ' 40 | '(only applicable to non-distributed training)') 41 | group_gpus.add_argument( 42 | '--gpu-ids', 43 | type=int, 44 | nargs='+', 45 | help='ids of gpus to use ' 46 | '(only applicable to non-distributed training)') 47 | parser.add_argument('--seed', type=int, default=None, help='random seed') 48 | parser.add_argument( 49 | '--deterministic', 50 | action='store_true', 51 | help='whether to set deterministic options for CUDNN backend.') 52 | parser.add_argument( 53 | '--options', 54 | nargs='+', 55 | action=DictAction, 56 | help='override some settings in the used config, the key-value pair ' 57 | 'in xxx=yyy format will be merged into config file (deprecate), ' 58 | 'change to --cfg-options instead.') 59 | parser.add_argument( 60 | '--cfg-options', 61 | nargs='+', 62 | action=DictAction, 63 | help='override some settings in the used config, the key-value pair ' 64 | 'in xxx=yyy format will be merged into config file. If the value to ' 65 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 66 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 67 | 'Note that the quotation marks are necessary and that no white space ' 68 | 'is allowed.') 69 | parser.add_argument( 70 | '--launcher', 71 | choices=['none', 'pytorch', 'slurm', 'mpi'], 72 | default='none', 73 | help='job launcher') 74 | parser.add_argument('--local_rank', type=int, default=0) 75 | args = parser.parse_args() 76 | if 'LOCAL_RANK' not in os.environ: 77 | os.environ['LOCAL_RANK'] = str(args.local_rank) 78 | 79 | if args.options and args.cfg_options: 80 | raise ValueError( 81 | '--options and --cfg-options cannot be both ' 82 | 'specified, --options is deprecated in favor of --cfg-options') 83 | if args.options: 84 | warnings.warn('--options is deprecated in favor of --cfg-options') 85 | args.cfg_options = args.options 86 | 87 | return args 88 | 89 | 90 | def main(): 91 | args = parse_args() 92 | 93 | cfg = Config.fromfile(args.config) 94 | if args.cfg_options is not None: 95 | cfg.merge_from_dict(args.cfg_options) 96 | # import modules from string list. 97 | if cfg.get('custom_imports', None): 98 | from mmcv.utils import import_modules_from_strings 99 | import_modules_from_strings(**cfg['custom_imports']) 100 | # set cudnn_benchmark 101 | if cfg.get('cudnn_benchmark', False): 102 | torch.backends.cudnn.benchmark = True 103 | 104 | # work_dir is determined in this priority: CLI > segment in file > filename 105 | if args.work_dir is not None: 106 | # update configs according to CLI args if args.work_dir is not None 107 | cfg.work_dir = args.work_dir 108 | elif cfg.get('work_dir', None) is None: 109 | # use config filename as default work_dir if cfg.work_dir is None 110 | cfg.work_dir = osp.join('./work_dirs', 111 | osp.splitext(osp.basename(args.config))[0]) 112 | if args.resume_from is not None: 113 | cfg.resume_from = args.resume_from 114 | if args.gpu_ids is not None: 115 | cfg.gpu_ids = args.gpu_ids 116 | else: 117 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 118 | 119 | # init distributed env first, since logger depends on the dist info. 120 | if args.launcher == 'none': 121 | distributed = False 122 | else: 123 | distributed = True 124 | init_dist(args.launcher, **cfg.dist_params) 125 | # re-set gpu_ids with distributed training mode 126 | _, world_size = get_dist_info() 127 | cfg.gpu_ids = range(world_size) 128 | 129 | # create work_dir 130 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 131 | # dump config 132 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 133 | # init the logger before other steps 134 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 135 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 136 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 137 | 138 | # init the meta dict to record some important information such as 139 | # environment info and seed, which will be logged 140 | meta = dict() 141 | # log env info 142 | env_info_dict = collect_env() 143 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 144 | dash_line = '-' * 60 + '\n' 145 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 146 | dash_line) 147 | meta['env_info'] = env_info 148 | meta['config'] = cfg.pretty_text 149 | # log some basic info 150 | logger.info(f'Distributed training: {distributed}') 151 | logger.info(f'Config:\n{cfg.pretty_text}') 152 | 153 | # set random seeds 154 | if args.seed is not None: 155 | logger.info(f'Set random seed to {args.seed}, ' 156 | f'deterministic: {args.deterministic}') 157 | set_random_seed(args.seed, deterministic=args.deterministic) 158 | cfg.seed = args.seed 159 | meta['seed'] = args.seed 160 | meta['exp_name'] = osp.basename(args.config) 161 | 162 | model = build_detector( 163 | cfg.model, 164 | train_cfg=cfg.get('train_cfg'), 165 | test_cfg=cfg.get('test_cfg')) 166 | model.init_weights() 167 | 168 | logger.info(model) 169 | 170 | datasets = [build_dataset(cfg.data.train)] 171 | if len(cfg.workflow) == 2: 172 | val_dataset = copy.deepcopy(cfg.data.val) 173 | val_dataset.pipeline = cfg.data.train.pipeline 174 | datasets.append(build_dataset(val_dataset)) 175 | if cfg.checkpoint_config is not None: 176 | # save mmdet version, config file content and class names in 177 | # checkpoints as meta data 178 | cfg.checkpoint_config.meta = dict( 179 | mmdet_version=__version__ + get_git_hash()[:7], 180 | CLASSES=datasets[0].CLASSES) 181 | # add an attribute for visualization convenience 182 | model.CLASSES = datasets[0].CLASSES 183 | train_detector( 184 | model, 185 | datasets, 186 | cfg, 187 | distributed=distributed, 188 | validate=(not args.no_validate), 189 | timestamp=timestamp, 190 | meta=meta) 191 | 192 | 193 | if __name__ == '__main__': 194 | torch.cuda.empty_cache() 195 | main() -------------------------------------------------------------------------------- /scripts/train_base.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | train.py \ 5 | /path/to/imagenet/ \ 6 | --batch-size 128 \ 7 | --pin-mem \ 8 | --model transxnet_b \ 9 | --drop-path 0.4 \ 10 | --lr 2e-3 \ 11 | --warmup-epochs 5 \ 12 | --sync-bn \ 13 | --model-ema \ 14 | --model-ema-decay 0.99985 \ 15 | --val-start-epoch 250 \ 16 | --val-freq 50 \ 17 | --native-amp \ 18 | --output /path/to/save-checkpoint/ -------------------------------------------------------------------------------- /scripts/train_small.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | train.py \ 5 | /path/to/imagenet/ \ 6 | --batch-size 128 \ 7 | --pin-mem \ 8 | --model transxnet_s \ 9 | --drop-path 0.2 \ 10 | --lr 2e-3 \ 11 | --warmup-epochs 5 \ 12 | --sync-bn \ 13 | --model-ema \ 14 | --model-ema-decay 0.99985 \ 15 | --val-start-epoch 250 \ 16 | --val-freq 50 \ 17 | --native-amp \ 18 | --output /path/to/save-checkpoint/ -------------------------------------------------------------------------------- /scripts/train_tiny.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | train.py \ 5 | /path/to/imagenet/ \ 6 | --batch-size 128 \ 7 | --pin-mem \ 8 | --model transxnet_t \ 9 | --drop-path 0.1 \ 10 | --lr 2e-3 \ 11 | --warmup-epochs 5 \ 12 | --sync-bn \ 13 | --model-ema \ 14 | --model-ema-decay 0.9998 \ 15 | --val-start-epoch 250 \ 16 | --val-freq 50 \ 17 | --native-amp \ 18 | --output /path/to/save-checkpoint/ -------------------------------------------------------------------------------- /semantic_segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Applying TransXNet to Semantic Segmentation 2 | 3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)". 4 | 5 | ## 1. Requirements 6 | 7 | We highly suggest using our provided dependencies to ensure reproducibility: 8 | ``` 9 | # Environments: 10 | cuda==11.3 11 | python==3.8.15 12 | # Packages: 13 | mmcv==1.7.1 14 | timm==0.6.12 15 | torch==1.12.1 16 | torchvision==0.13.1 17 | mmsegmentation==0.30.0 18 | ``` 19 | 20 | ## 2. Data Preparation 21 | 22 | Prepare ADE20K according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md). 23 | 24 | 25 | ## 3. Main Results on ADE20K with Pretrained Models 26 | 27 | | Method | Backbone | Pretrain | Iters | mIoU | Config | Download | 28 | | --- | --- | --- |:---:|:---:| --- | --- | 29 | | Semantic FPN | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 80K | 45.5 | [config](configs/sfpn_transxnet_tiny.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny.pth) | 30 | | Semantic FPN | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 80K | 48.5 | [config](configs/sfpn_transxnet_small.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small.pth) | 31 | | Semantic FPN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 80k | 49.9 | [config](configs/sfpn_transxnet_base.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base.pth) | 32 | 33 | 34 | ## 4. Train 35 | To train ``TransXNet + Semantic FPN`` models on ADE20K with 8 gpus (single node), run: 36 | ``` 37 | bash scripts/train_sfpn_transxnet_tiny.sh # train TransXNet-T + Semantic FPN 38 | bash scripts/train_sfpn_transxnet_small.sh # train TransXNet-S + Semantic FPN 39 | bash scripts/train_sfpn_transxnet_base.sh # train TransXNet-B + Semantic FPN 40 | ``` 41 | 42 | ## 5. Validation 43 | To evaluate ``TransXNet + Semantic FPN`` models on ADE20K, run: 44 | ``` 45 | # Take TransXNet-T + Semantic FPN as an example: 46 | python3 -m torch.distributed.launch \ 47 | --nproc_per_node=8 \ 48 | --master_port=$((RANDOM+8888)) \ 49 | test.py \ 50 | configs/sfpn_transxnet_tiny.py \ 51 | path/to/checkpoint \ 52 | --out work_dirs/output.pkl \ 53 | --eval mIoU \ 54 | --launcher pytorch 55 | ``` 56 | 57 | ## Citation 58 | If you find this project useful for your research, please consider citing: 59 | ``` 60 | @article{lou2023transxnet, 61 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition}, 62 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu}, 63 | journal={IEEE Transactions on Neural Networks and Learning Systems}, 64 | year={2025} 65 | } 66 | ``` 67 | 68 | ## Contact 69 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com. 70 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/datasets/ade20k_sfpn.py: -------------------------------------------------------------------------------- 1 | # copied from uniformer 2 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/configs/_base_/datasets/ade20k.py 3 | 4 | # dataset settings 5 | dataset_type = 'ADE20KDataset' 6 | data_root = '/mnt/private_dataset/ade/ADEChallengeData2016/' 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | crop_size = (512, 512) 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations', reduce_zero_label=True), 13 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=(2048, 512), 27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 28 | flip=False, 29 | transforms=[ 30 | dict(type='AlignResize', keep_ratio=True, size_divisor=32), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='ImageToTensor', keys=['img']), 34 | dict(type='Collect', keys=['img']), 35 | ]) 36 | ] 37 | data = dict( 38 | samples_per_gpu=4, 39 | workers_per_gpu=4, 40 | train=dict( 41 | type='RepeatDataset', 42 | times=50, 43 | dataset=dict( 44 | type=dataset_type, 45 | data_root=data_root, 46 | img_dir='images/training', 47 | ann_dir='annotations/training', 48 | pipeline=train_pipeline)), 49 | val=dict( 50 | type=dataset_type, 51 | data_root=data_root, 52 | img_dir='images/validation', 53 | ann_dir='annotations/validation', 54 | pipeline=test_pipeline), 55 | test=dict( 56 | type=dataset_type, 57 | data_root=data_root, 58 | img_dir='images/validation', 59 | ann_dir='annotations/validation', 60 | pipeline=test_pipeline)) 61 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='CustomizedTextLoggerHook', by_epoch=False), 7 | # dict(type='TensorboardLoggerHook'), 8 | ]) 9 | # yapf:enable 10 | dist_params = dict(backend='nccl') 11 | log_level = 'INFO' 12 | load_from = None 13 | resume_from = None 14 | workflow = [('train', 1)] 15 | cudnn_benchmark = True 16 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # copied from mmsegmentaion official config 2 | # https://github.com/open-mmlab/mmsegmentation/blob/master/configs/_base_/models/fpn_r50.py 3 | 4 | 5 | # model settings 6 | norm_cfg = dict(type='SyncBN', requires_grad=True) 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained='open-mmlab://resnet50_v1c', 10 | backbone=dict( 11 | type='ResNetV1c', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | dilations=(1, 1, 1, 1), 16 | strides=(1, 2, 2, 2), 17 | norm_cfg=norm_cfg, 18 | norm_eval=False, 19 | style='pytorch', 20 | contract_dilation=True), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[256, 512, 1024, 2048], 24 | out_channels=256, 25 | num_outs=4), 26 | decode_head=dict( 27 | type='FPNHead', 28 | in_channels=[256, 256, 256, 256], 29 | in_index=[0, 1, 2, 3], 30 | feature_strides=[4, 8, 16, 32], 31 | channels=128, 32 | dropout_ratio=0.1, 33 | num_classes=19, 34 | norm_cfg=norm_cfg, 35 | align_corners=False, 36 | loss_decode=dict( 37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 38 | # model training and testing settings 39 | train_cfg=dict(), 40 | test_cfg=dict(mode='whole')) 41 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU') -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU') -------------------------------------------------------------------------------- /semantic_segmentation/configs/sfpn_transxnet_base.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/fpn_r50.py', 3 | '_base_/datasets/ade20k_sfpn.py', 4 | '_base_/default_runtime.py', 5 | ] 6 | 7 | # model.pretrained is actually loaded by backbone, see 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32 9 | 10 | model=dict( 11 | pretrained=None, 12 | backbone=dict( 13 | _delete_=True, 14 | pretrained=True, 15 | type='transxnet_b', 16 | drop_path_rate=0.3, 17 | ), 18 | neck=dict(in_channels=[76, 152, 336, 672],), 19 | decode_head=dict(num_classes=150)) 20 | 21 | ############## below we strictly follow uniformer #################################### 22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py 23 | ############################################################################# 24 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 25 | # optimizer 26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001) 27 | # learning policy 28 | lr_config = dict( 29 | policy='CosineAnnealing', 30 | warmup='linear', 31 | warmup_iters=1000, 32 | warmup_ratio=1.0 / 10, 33 | min_lr_ratio=1e-8) 34 | # runtime settings 35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples) 36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1) 37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU') 38 | ############################################################################# 39 | 40 | # NOTE: True is conflict with checkpoint 41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503 42 | find_unused_parameters = False 43 | 44 | # place holder for new verison mmseg compatiability 45 | resume_from = None 46 | device = 'cuda' 47 | 48 | # fp32 training -> 49 | optimizer_config = dict() 50 | 51 | # AMP -> 52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) 53 | # fp16 = dict() -------------------------------------------------------------------------------- /semantic_segmentation/configs/sfpn_transxnet_small.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/fpn_r50.py', 3 | '_base_/datasets/ade20k_sfpn.py', 4 | '_base_/default_runtime.py', 5 | ] 6 | 7 | # model.pretrained is actually loaded by backbone, see 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32 9 | 10 | model=dict( 11 | pretrained=None, 12 | backbone=dict( 13 | _delete_=True, 14 | pretrained=True, 15 | type='transxnet_s', 16 | drop_path_rate=0.2, 17 | ), 18 | neck=dict(in_channels=[64, 128, 320, 512]), 19 | decode_head=dict(num_classes=150)) 20 | 21 | ############## below we strictly follow uniformer #################################### 22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py 23 | ############################################################################# 24 | gpu_multiples=2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 25 | # optimizer 26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001) 27 | # learning policy 28 | lr_config = dict( 29 | policy='CosineAnnealing', 30 | warmup='linear', 31 | warmup_iters=1000, 32 | warmup_ratio=1.0 / 10, 33 | min_lr_ratio=1e-8) 34 | # runtime settings 35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples) 36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1) 37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU') 38 | ############################################################################# 39 | 40 | # NOTE: True is conflict with checkpoint 41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503 42 | find_unused_parameters = False 43 | 44 | # place holder for new verison mmseg compatiability 45 | resume_from = None 46 | device = 'cuda' 47 | 48 | # fp32 training -> 49 | optimizer_config = dict() 50 | 51 | # AMP -> 52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) 53 | # fp16 = dict() 54 | -------------------------------------------------------------------------------- /semantic_segmentation/configs/sfpn_transxnet_tiny.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/fpn_r50.py', 3 | '_base_/datasets/ade20k_sfpn.py', 4 | '_base_/default_runtime.py', 5 | ] 6 | 7 | # model.pretrained is actually loaded by backbone, see 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32 9 | model=dict( 10 | pretrained=None, 11 | backbone=dict( 12 | _delete_=True, 13 | pretrained=True, 14 | type='transxnet_t', 15 | drop_path_rate=0.1, 16 | ), 17 | neck=dict(in_channels=[48, 96, 224, 448]), 18 | decode_head=dict(num_classes=150)) 19 | 20 | ############## below we strictly follow uniformer #################################### 21 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py 22 | ############################################################################# 23 | gpu_multiples=2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 24 | # optimizer 25 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001) 26 | # learning policy 27 | lr_config = dict( 28 | policy='CosineAnnealing', 29 | warmup='linear', 30 | warmup_iters=1000, 31 | warmup_ratio=1.0 / 10, 32 | min_lr_ratio=1e-8) 33 | # runtime settings 34 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples) 35 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1) 36 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU') 37 | ############################################################################# 38 | 39 | # NOTE: True is conflict with checkpoint 40 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503 41 | find_unused_parameters = False 42 | 43 | # place holder for new verison mmseg compatiability 44 | resume_from = None 45 | device = 'cuda' 46 | 47 | # fp32 training -> 48 | optimizer_config = dict() 49 | 50 | # AMP -> 51 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) 52 | # fp16 = dict() -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | 11 | # from .apex_runner.optimizer import DistOptimizerHook 12 | from .checkpoint import load_checkpoint 13 | from .customized_text import CustomizedTextLoggerHook 14 | from .layer_decay_optimizer_constructor import \ 15 | LearningRateDecayOptimizerConstructor 16 | from .resize_transform import SETR_Resize 17 | 18 | # from .train_api import train_segmentor 19 | 20 | # __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'DistOptimizerHook', 'train_segmentor', 'CustomizedTextLoggerHook'] 21 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'CustomizedTextLoggerHook'] 22 | -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/apex_runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | # checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() 86 | -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/customized_text.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import datetime 10 | from collections import OrderedDict 11 | 12 | import mmcv 13 | import torch 14 | from mmcv.runner import HOOKS, TextLoggerHook 15 | 16 | 17 | @HOOKS.register_module() 18 | class CustomizedTextLoggerHook(TextLoggerHook): 19 | """Customized Text Logger hook. 20 | 21 | This logger prints out both lr and layer_0_lr. 22 | 23 | """ 24 | 25 | def _log_info(self, log_dict, runner): 26 | # print exp name for users to distinguish experiments 27 | # at every ``interval_exp_name`` iterations and the end of each epoch 28 | if runner.meta is not None and 'exp_name' in runner.meta: 29 | if (self.every_n_iters(runner, self.interval_exp_name)) or ( 30 | self.by_epoch and self.end_of_epoch(runner)): 31 | exp_info = f'Exp name: {runner.meta["exp_name"]}' 32 | runner.logger.info(exp_info) 33 | 34 | if log_dict['mode'] == 'train': 35 | lr_str = {} 36 | for lr_type in ['lr', 'layer_0_lr']: 37 | if isinstance(log_dict[lr_type], dict): 38 | lr_str[lr_type] = [] 39 | for k, val in log_dict[lr_type].items(): 40 | lr_str.append(f'{lr_type}_{k}: {val:.3e}') 41 | lr_str[lr_type] = ' '.join(lr_str) 42 | else: 43 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}' 44 | 45 | # by epoch: Epoch [4][100/1000] 46 | # by iter: Iter [100/100000] 47 | if self.by_epoch: 48 | log_str = f'Epoch [{log_dict["epoch"]}]' \ 49 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' 50 | else: 51 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' 52 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, ' 53 | 54 | if 'time' in log_dict.keys(): 55 | self.time_sec_tot += (log_dict['time'] * self.interval) 56 | time_sec_avg = self.time_sec_tot / ( 57 | runner.iter - self.start_iter + 1) 58 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) 59 | eta_str = str(datetime.timedelta(seconds=int(eta_sec))) 60 | log_str += f'eta: {eta_str}, ' 61 | log_str += f'time: {log_dict["time"]:.3f}, ' \ 62 | f'data_time: {log_dict["data_time"]:.3f}, ' 63 | # statistic memory 64 | if torch.cuda.is_available(): 65 | log_str += f'memory: {log_dict["memory"]}, ' 66 | else: 67 | # val/test time 68 | # here 1000 is the length of the val dataloader 69 | # by epoch: Epoch[val] [4][1000] 70 | # by iter: Iter[val] [1000] 71 | if self.by_epoch: 72 | log_str = f'Epoch({log_dict["mode"]}) ' \ 73 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' 74 | else: 75 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' 76 | 77 | log_items = [] 78 | for name, val in log_dict.items(): 79 | # TODO: resolve this hack 80 | # these items have been in log_str 81 | if name in [ 82 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time', 83 | 'memory', 'epoch' 84 | ]: 85 | continue 86 | if isinstance(val, float): 87 | val = f'{val:.4f}' 88 | log_items.append(f'{name}: {val}') 89 | log_str += ', '.join(log_items) 90 | 91 | runner.logger.info(log_str) 92 | 93 | 94 | def log(self, runner): 95 | if 'eval_iter_num' in runner.log_buffer.output: 96 | # this doesn't modify runner.iter and is regardless of by_epoch 97 | cur_iter = runner.log_buffer.output.pop('eval_iter_num') 98 | else: 99 | cur_iter = self.get_iter(runner, inner_iter=True) 100 | 101 | log_dict = OrderedDict( 102 | mode=self.get_mode(runner), 103 | epoch=self.get_epoch(runner), 104 | iter=cur_iter) 105 | 106 | # record lr and layer_0_lr 107 | cur_lr = runner.current_lr() 108 | if isinstance(cur_lr, list): 109 | log_dict['layer_0_lr'] = min(cur_lr) 110 | log_dict['lr'] = max(cur_lr) 111 | else: 112 | assert isinstance(cur_lr, dict) 113 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {} 114 | for k, lr_ in cur_lr.items(): 115 | assert isinstance(lr_, list) 116 | log_dict['layer_0_lr'].update({k: min(lr_)}) 117 | log_dict['lr'].update({k: max(lr_)}) 118 | 119 | if 'time' in runner.log_buffer.output: 120 | # statistic memory 121 | if torch.cuda.is_available(): 122 | log_dict['memory'] = self._get_max_memory(runner) 123 | 124 | log_dict = dict(log_dict, **runner.log_buffer.output) 125 | 126 | self._log_info(log_dict, runner) 127 | self._dump_log(log_dict, runner) 128 | return log_dict 129 | -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/layer_decay_optimizer_constructor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import json 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor 11 | from mmcv.runner import get_dist_info 12 | 13 | 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12): 15 | 16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 17 | return 0 18 | elif var_name.startswith("backbone.downsample_layers"): 19 | stage_id = int(var_name.split('.')[2]) 20 | if stage_id == 0: 21 | layer_id = 0 22 | elif stage_id == 1: 23 | layer_id = 2 24 | elif stage_id == 2: 25 | layer_id = 3 26 | elif stage_id == 3: 27 | layer_id = num_max_layer 28 | return layer_id 29 | elif var_name.startswith("backbone.stages"): 30 | stage_id = int(var_name.split('.')[2]) 31 | block_id = int(var_name.split('.')[3]) 32 | if stage_id == 0: 33 | layer_id = 1 34 | elif stage_id == 1: 35 | layer_id = 2 36 | elif stage_id == 2: 37 | layer_id = 3 + block_id // 3 38 | elif stage_id == 3: 39 | layer_id = num_max_layer 40 | return layer_id 41 | else: 42 | return num_max_layer + 1 43 | 44 | 45 | def get_num_layer_stage_wise(var_name, num_max_layer): 46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 47 | return 0 48 | elif var_name.startswith("backbone.downsample_layers"): 49 | return 0 50 | elif var_name.startswith("backbone.stages"): 51 | stage_id = int(var_name.split('.')[2]) 52 | return stage_id + 1 53 | else: 54 | return num_max_layer - 1 55 | 56 | 57 | @OPTIMIZER_BUILDERS.register_module() 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): 59 | def add_params(self, params, module, prefix='', is_dcn_module=None): 60 | """Add all parameters of module to the params list. 61 | The parameters of the given module will be added to the list of param 62 | groups, with specific rules defined by paramwise_cfg. 63 | Args: 64 | params (list[dict]): A list of param groups, it will be modified 65 | in place. 66 | module (nn.Module): The module to be added. 67 | prefix (str): The prefix of the module 68 | is_dcn_module (int|float|None): If the current module is a 69 | submodule of DCN, `is_dcn_module` will be passed to 70 | control conv_offset layer's learning rate. Defaults to None. 71 | """ 72 | parameter_groups = {} 73 | print(self.paramwise_cfg) 74 | num_layers = self.paramwise_cfg.get('num_layers') + 2 75 | decay_rate = self.paramwise_cfg.get('decay_rate') 76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise") 77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers)) 78 | weight_decay = self.base_wd 79 | 80 | for name, param in module.named_parameters(): 81 | if not param.requires_grad: 82 | continue # frozen weights 83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'): 84 | group_name = "no_decay" 85 | this_weight_decay = 0. 86 | else: 87 | group_name = "decay" 88 | this_weight_decay = weight_decay 89 | 90 | if decay_type == "layer_wise": 91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers')) 92 | elif decay_type == "stage_wise": 93 | layer_id = get_num_layer_stage_wise(name, num_layers) 94 | 95 | group_name = "layer_%d_%s" % (layer_id, group_name) 96 | 97 | if group_name not in parameter_groups: 98 | scale = decay_rate ** (num_layers - layer_id - 1) 99 | 100 | parameter_groups[group_name] = { 101 | "weight_decay": this_weight_decay, 102 | "params": [], 103 | "param_names": [], 104 | "lr_scale": scale, 105 | "group_name": group_name, 106 | "lr": scale * self.base_lr, 107 | } 108 | 109 | parameter_groups[group_name]["params"].append(param) 110 | parameter_groups[group_name]["param_names"].append(name) 111 | rank, _ = get_dist_info() 112 | if rank == 0: 113 | to_display = {} 114 | for key in parameter_groups: 115 | to_display[key] = { 116 | "param_names": parameter_groups[key]["param_names"], 117 | "lr_scale": parameter_groups[key]["lr_scale"], 118 | "lr": parameter_groups[key]["lr"], 119 | "weight_decay": parameter_groups[key]["weight_decay"], 120 | } 121 | print("Param groups = %s" % json.dumps(to_display, indent=2)) 122 | 123 | params.extend(parameter_groups.values()) 124 | -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/resize_transform.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | from mmseg.datasets.builder import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module() 7 | class SETR_Resize(object): 8 | """Resize images & seg. 9 | 10 | This transform resizes the input image to some scale. If the input dict 11 | contains the key "scale", then the scale in the input dict is used, 12 | otherwise the specified scale in the init method is used. 13 | 14 | ``img_scale`` can either be a tuple (single-scale) or a list of tuple 15 | (multi-scale). There are 3 multiscale modes: 16 | 17 | - ``ratio_range is not None``: randomly sample a ratio from the ratio range 18 | and multiply it with the image scale. 19 | 20 | - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a 21 | scale from the a range. 22 | 23 | - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a 24 | scale from multiple scales. 25 | 26 | Args: 27 | img_scale (tuple or list[tuple]): Images scales for resizing. 28 | multiscale_mode (str): Either "range" or "value". 29 | ratio_range (tuple[float]): (min_ratio, max_ratio) 30 | keep_ratio (bool): Whether to keep the aspect ratio when resizing the 31 | image. 32 | """ 33 | 34 | def __init__(self, 35 | img_scale=None, 36 | multiscale_mode='range', 37 | ratio_range=None, 38 | keep_ratio=True, 39 | crop_size=None, 40 | setr_multi_scale=False): 41 | 42 | if img_scale is None: 43 | self.img_scale = None 44 | else: 45 | if isinstance(img_scale, list): 46 | self.img_scale = img_scale 47 | else: 48 | self.img_scale = [img_scale] 49 | # assert mmcv.is_list_of(self.img_scale, tuple) 50 | 51 | if ratio_range is not None: 52 | # mode 1: given a scale and a range of image ratio 53 | assert len(self.img_scale) == 1 54 | else: 55 | # mode 2: given multiple scales or a range of scales 56 | assert multiscale_mode in ['value', 'range'] 57 | 58 | self.multiscale_mode = multiscale_mode 59 | self.ratio_range = ratio_range 60 | self.keep_ratio = keep_ratio 61 | self.crop_size = crop_size 62 | self.setr_multi_scale = setr_multi_scale 63 | 64 | @staticmethod 65 | def random_select(img_scales): 66 | """Randomly select an img_scale from given candidates. 67 | 68 | Args: 69 | img_scales (list[tuple]): Images scales for selection. 70 | 71 | Returns: 72 | (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, 73 | where ``img_scale`` is the selected image scale and 74 | ``scale_idx`` is the selected index in the given candidates. 75 | """ 76 | 77 | assert mmcv.is_list_of(img_scales, tuple) 78 | scale_idx = np.random.randint(len(img_scales)) 79 | img_scale = img_scales[scale_idx] 80 | return img_scale, scale_idx 81 | 82 | @staticmethod 83 | def random_sample(img_scales): 84 | """Randomly sample an img_scale when ``multiscale_mode=='range'``. 85 | 86 | Args: 87 | img_scales (list[tuple]): Images scale range for sampling. 88 | There must be two tuples in img_scales, which specify the lower 89 | and uper bound of image scales. 90 | 91 | Returns: 92 | (tuple, None): Returns a tuple ``(img_scale, None)``, where 93 | ``img_scale`` is sampled scale and None is just a placeholder 94 | to be consistent with :func:`random_select`. 95 | """ 96 | 97 | assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 98 | img_scale_long = [max(s) for s in img_scales] 99 | img_scale_short = [min(s) for s in img_scales] 100 | long_edge = np.random.randint( 101 | min(img_scale_long), 102 | max(img_scale_long) + 1) 103 | short_edge = np.random.randint( 104 | min(img_scale_short), 105 | max(img_scale_short) + 1) 106 | img_scale = (long_edge, short_edge) 107 | return img_scale, None 108 | 109 | @staticmethod 110 | def random_sample_ratio(img_scale, ratio_range): 111 | """Randomly sample an img_scale when ``ratio_range`` is specified. 112 | 113 | A ratio will be randomly sampled from the range specified by 114 | ``ratio_range``. Then it would be multiplied with ``img_scale`` to 115 | generate sampled scale. 116 | 117 | Args: 118 | img_scale (tuple): Images scale base to multiply with ratio. 119 | ratio_range (tuple[float]): The minimum and maximum ratio to scale 120 | the ``img_scale``. 121 | 122 | Returns: 123 | (tuple, None): Returns a tuple ``(scale, None)``, where 124 | ``scale`` is sampled ratio multiplied with ``img_scale`` and 125 | None is just a placeholder to be consistent with 126 | :func:`random_select`. 127 | """ 128 | 129 | assert isinstance(img_scale, tuple) and len(img_scale) == 2 130 | min_ratio, max_ratio = ratio_range 131 | assert min_ratio <= max_ratio 132 | ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio 133 | scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) 134 | return scale, None 135 | 136 | def _random_scale(self, results): 137 | """Randomly sample an img_scale according to ``ratio_range`` and 138 | ``multiscale_mode``. 139 | 140 | If ``ratio_range`` is specified, a ratio will be sampled and be 141 | multiplied with ``img_scale``. 142 | If multiple scales are specified by ``img_scale``, a scale will be 143 | sampled according to ``multiscale_mode``. 144 | Otherwise, single scale will be used. 145 | 146 | Args: 147 | results (dict): Result dict from :obj:`dataset`. 148 | 149 | Returns: 150 | dict: Two new keys 'scale` and 'scale_idx` are added into 151 | ``results``, which would be used by subsequent pipelines. 152 | """ 153 | 154 | if self.ratio_range is not None: 155 | scale, scale_idx = self.random_sample_ratio( 156 | self.img_scale[0], self.ratio_range) 157 | elif len(self.img_scale) == 1: 158 | scale, scale_idx = self.img_scale[0], 0 159 | elif self.multiscale_mode == 'range': 160 | scale, scale_idx = self.random_sample(self.img_scale) 161 | elif self.multiscale_mode == 'value': 162 | scale, scale_idx = self.random_select(self.img_scale) 163 | else: 164 | raise NotImplementedError 165 | 166 | results['scale'] = scale 167 | results['scale_idx'] = scale_idx 168 | 169 | def _resize_img(self, results): 170 | """Resize images with ``results['scale']``.""" 171 | 172 | if self.keep_ratio: 173 | if self.setr_multi_scale: 174 | if min(results['scale']) < self.crop_size[0]: 175 | new_short = self.crop_size[0] 176 | else: 177 | new_short = min(results['scale']) 178 | 179 | h, w = results['img'].shape[:2] 180 | if h > w: 181 | new_h, new_w = new_short * h / w, new_short 182 | else: 183 | new_h, new_w = new_short, new_short * w / h 184 | results['scale'] = (new_h, new_w) 185 | 186 | img, scale_factor = mmcv.imrescale( 187 | results['img'], results['scale'], return_scale=True) 188 | # the w_scale and h_scale has minor difference 189 | # a real fix should be done in the mmcv.imrescale in the future 190 | new_h, new_w = img.shape[:2] 191 | h, w = results['img'].shape[:2] 192 | w_scale = new_w / w 193 | h_scale = new_h / h 194 | else: 195 | img, w_scale, h_scale = mmcv.imresize( 196 | results['img'], results['scale'], return_scale=True) 197 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], 198 | dtype=np.float32) 199 | results['img'] = img 200 | results['img_shape'] = img.shape 201 | results['pad_shape'] = img.shape # in case that there is no padding 202 | results['scale_factor'] = scale_factor 203 | results['keep_ratio'] = self.keep_ratio 204 | 205 | def _resize_seg(self, results): 206 | """Resize semantic segmentation map with ``results['scale']``.""" 207 | for key in results.get('seg_fields', []): 208 | if self.keep_ratio: 209 | gt_seg = mmcv.imrescale( 210 | results[key], results['scale'], interpolation='nearest') 211 | else: 212 | gt_seg = mmcv.imresize( 213 | results[key], results['scale'], interpolation='nearest') 214 | results['gt_semantic_seg'] = gt_seg 215 | 216 | def __call__(self, results): 217 | """Call function to resize images, bounding boxes, masks, semantic 218 | segmentation map. 219 | 220 | Args: 221 | results (dict): Result dict from loading pipeline. 222 | 223 | Returns: 224 | dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', 225 | 'keep_ratio' keys are added into result dict. 226 | """ 227 | 228 | if 'scale' not in results: 229 | self._random_scale(results) 230 | self._resize_img(results) 231 | self._resize_seg(results) 232 | return results 233 | 234 | def __repr__(self): 235 | repr_str = self.__class__.__name__ 236 | repr_str += (f'(img_scale={self.img_scale}, ' 237 | f'multiscale_mode={self.multiscale_mode}, ' 238 | f'ratio_range={self.ratio_range}, ' 239 | f'keep_ratio={self.keep_ratio})') 240 | return repr_str -------------------------------------------------------------------------------- /semantic_segmentation/mmcv_custom/train_api.py: -------------------------------------------------------------------------------- 1 | import random 2 | import warnings 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 7 | from mmcv.runner import build_optimizer, build_runner 8 | from mmseg.core import DistEvalHook, EvalHook 9 | from mmseg.datasets import build_dataloader, build_dataset 10 | from mmseg.utils import get_root_logger 11 | 12 | try: 13 | import apex 14 | except: 15 | print('apex is not installed') 16 | 17 | 18 | def set_random_seed(seed, deterministic=False): 19 | """Set random seed. 20 | 21 | Args: 22 | seed (int): Seed to be used. 23 | deterministic (bool): Whether to set the deterministic option for 24 | CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` 25 | to True and `torch.backends.cudnn.benchmark` to False. 26 | Default: False. 27 | """ 28 | random.seed(seed) 29 | np.random.seed(seed) 30 | torch.manual_seed(seed) 31 | torch.cuda.manual_seed_all(seed) 32 | if deterministic: 33 | torch.backends.cudnn.deterministic = True 34 | torch.backends.cudnn.benchmark = False 35 | 36 | 37 | def train_segmentor(model, 38 | dataset, 39 | cfg, 40 | distributed=False, 41 | validate=False, 42 | timestamp=None, 43 | meta=None): 44 | """Launch segmentor training.""" 45 | logger = get_root_logger(cfg.log_level) 46 | 47 | # prepare data loaders 48 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] 49 | data_loaders = [ 50 | build_dataloader( 51 | ds, 52 | cfg.data.samples_per_gpu, 53 | cfg.data.workers_per_gpu, 54 | # cfg.gpus will be ignored if distributed 55 | len(cfg.gpu_ids), 56 | dist=distributed, 57 | seed=cfg.seed, 58 | drop_last=True) for ds in dataset 59 | ] 60 | 61 | # build optimizer 62 | optimizer = build_optimizer(model, cfg.optimizer) 63 | 64 | # use apex fp16 optimizer 65 | if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook": 66 | if cfg.optimizer_config.get("use_fp16", False): 67 | model, optimizer = apex.amp.initialize( 68 | model.cuda(), optimizer, opt_level="O1") 69 | for m in model.modules(): 70 | if hasattr(m, "fp16_enabled"): 71 | m.fp16_enabled = True 72 | 73 | # put model on gpus 74 | if distributed: 75 | find_unused_parameters = cfg.get('find_unused_parameters', False) 76 | # Sets the `find_unused_parameters` parameter in 77 | # torch.nn.parallel.DistributedDataParallel 78 | model = MMDistributedDataParallel( 79 | model.cuda(), 80 | device_ids=[torch.cuda.current_device()], 81 | broadcast_buffers=False, 82 | find_unused_parameters=find_unused_parameters) 83 | else: 84 | model = MMDataParallel( 85 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 86 | 87 | if cfg.get('runner') is None: 88 | cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} 89 | warnings.warn( 90 | 'config is now expected to have a `runner` section, ' 91 | 'please set `runner` in your config.', UserWarning) 92 | 93 | runner = build_runner( 94 | cfg.runner, 95 | default_args=dict( 96 | model=model, 97 | batch_processor=None, 98 | optimizer=optimizer, 99 | work_dir=cfg.work_dir, 100 | logger=logger, 101 | meta=meta)) 102 | 103 | # register hooks 104 | runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, 105 | cfg.checkpoint_config, cfg.log_config, 106 | cfg.get('momentum_config', None)) 107 | 108 | # an ugly walkaround to make the .log and .log.json filenames the same 109 | runner.timestamp = timestamp 110 | 111 | # register eval hooks 112 | if validate: 113 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) 114 | val_dataloader = build_dataloader( 115 | val_dataset, 116 | samples_per_gpu=1, 117 | workers_per_gpu=cfg.data.workers_per_gpu, 118 | dist=distributed, 119 | shuffle=False) 120 | eval_cfg = cfg.get('evaluation', {}) 121 | eval_cfg['by_epoch'] = 'IterBasedRunner' not in cfg.runner['type'] 122 | eval_hook = DistEvalHook if distributed else EvalHook 123 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) 124 | 125 | if cfg.resume_from: 126 | runner.resume(cfg.resume_from) 127 | elif cfg.load_from: 128 | runner.load_checkpoint(cfg.load_from) 129 | runner.run(data_loaders, cfg.workflow) 130 | -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .align_resize import AlignResize 3 | 4 | __all__=['AlignResize'] -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc -------------------------------------------------------------------------------- /semantic_segmentation/mmseg_custom/align_resize.py: -------------------------------------------------------------------------------- 1 | ######### 2 | # copied from uniformer 3 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/tools/align_resize.py 4 | ######### 5 | import mmcv 6 | import numpy as np 7 | from mmcv.utils import deprecated_api_warning, is_tuple_of 8 | from mmseg.datasets.builder import PIPELINES 9 | from numpy import random 10 | 11 | 12 | @PIPELINES.register_module() 13 | class AlignResize(object): 14 | """Resize images & seg. Align 15 | """ 16 | 17 | def __init__(self, 18 | img_scale=None, 19 | multiscale_mode='range', 20 | ratio_range=None, 21 | keep_ratio=True, 22 | size_divisor=32): 23 | if img_scale is None: 24 | self.img_scale = None 25 | else: 26 | if isinstance(img_scale, list): 27 | self.img_scale = img_scale 28 | else: 29 | self.img_scale = [img_scale] 30 | assert mmcv.is_list_of(self.img_scale, tuple) 31 | 32 | if ratio_range is not None: 33 | # mode 1: given img_scale=None and a range of image ratio 34 | # mode 2: given a scale and a range of image ratio 35 | assert self.img_scale is None or len(self.img_scale) == 1 36 | else: 37 | # mode 3 and 4: given multiple scales or a range of scales 38 | assert multiscale_mode in ['value', 'range'] 39 | 40 | self.multiscale_mode = multiscale_mode 41 | self.ratio_range = ratio_range 42 | self.keep_ratio = keep_ratio 43 | self.size_divisor = size_divisor 44 | 45 | @staticmethod 46 | def random_select(img_scales): 47 | """Randomly select an img_scale from given candidates. 48 | Args: 49 | img_scales (list[tuple]): Images scales for selection. 50 | Returns: 51 | (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, 52 | where ``img_scale`` is the selected image scale and 53 | ``scale_idx`` is the selected index in the given candidates. 54 | """ 55 | 56 | assert mmcv.is_list_of(img_scales, tuple) 57 | scale_idx = np.random.randint(len(img_scales)) 58 | img_scale = img_scales[scale_idx] 59 | return img_scale, scale_idx 60 | 61 | @staticmethod 62 | def random_sample(img_scales): 63 | """Randomly sample an img_scale when ``multiscale_mode=='range'``. 64 | Args: 65 | img_scales (list[tuple]): Images scale range for sampling. 66 | There must be two tuples in img_scales, which specify the lower 67 | and uper bound of image scales. 68 | Returns: 69 | (tuple, None): Returns a tuple ``(img_scale, None)``, where 70 | ``img_scale`` is sampled scale and None is just a placeholder 71 | to be consistent with :func:`random_select`. 72 | """ 73 | 74 | assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 75 | img_scale_long = [max(s) for s in img_scales] 76 | img_scale_short = [min(s) for s in img_scales] 77 | long_edge = np.random.randint( 78 | min(img_scale_long), 79 | max(img_scale_long) + 1) 80 | short_edge = np.random.randint( 81 | min(img_scale_short), 82 | max(img_scale_short) + 1) 83 | img_scale = (long_edge, short_edge) 84 | return img_scale, None 85 | 86 | @staticmethod 87 | def random_sample_ratio(img_scale, ratio_range): 88 | """Randomly sample an img_scale when ``ratio_range`` is specified. 89 | A ratio will be randomly sampled from the range specified by 90 | ``ratio_range``. Then it would be multiplied with ``img_scale`` to 91 | generate sampled scale. 92 | Args: 93 | img_scale (tuple): Images scale base to multiply with ratio. 94 | ratio_range (tuple[float]): The minimum and maximum ratio to scale 95 | the ``img_scale``. 96 | Returns: 97 | (tuple, None): Returns a tuple ``(scale, None)``, where 98 | ``scale`` is sampled ratio multiplied with ``img_scale`` and 99 | None is just a placeholder to be consistent with 100 | :func:`random_select`. 101 | """ 102 | 103 | assert isinstance(img_scale, tuple) and len(img_scale) == 2 104 | min_ratio, max_ratio = ratio_range 105 | assert min_ratio <= max_ratio 106 | ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio 107 | scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) 108 | return scale, None 109 | 110 | def _random_scale(self, results): 111 | """Randomly sample an img_scale according to ``ratio_range`` and 112 | ``multiscale_mode``. 113 | If ``ratio_range`` is specified, a ratio will be sampled and be 114 | multiplied with ``img_scale``. 115 | If multiple scales are specified by ``img_scale``, a scale will be 116 | sampled according to ``multiscale_mode``. 117 | Otherwise, single scale will be used. 118 | Args: 119 | results (dict): Result dict from :obj:`dataset`. 120 | Returns: 121 | dict: Two new keys 'scale` and 'scale_idx` are added into 122 | ``results``, which would be used by subsequent pipelines. 123 | """ 124 | 125 | if self.ratio_range is not None: 126 | if self.img_scale is None: 127 | h, w = results['img'].shape[:2] 128 | scale, scale_idx = self.random_sample_ratio((w, h), 129 | self.ratio_range) 130 | else: 131 | scale, scale_idx = self.random_sample_ratio( 132 | self.img_scale[0], self.ratio_range) 133 | elif len(self.img_scale) == 1: 134 | scale, scale_idx = self.img_scale[0], 0 135 | elif self.multiscale_mode == 'range': 136 | scale, scale_idx = self.random_sample(self.img_scale) 137 | elif self.multiscale_mode == 'value': 138 | scale, scale_idx = self.random_select(self.img_scale) 139 | else: 140 | raise NotImplementedError 141 | 142 | results['scale'] = scale 143 | results['scale_idx'] = scale_idx 144 | 145 | def _align(self, img, size_divisor, interpolation=None): 146 | align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor 147 | align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor 148 | if interpolation == None: 149 | img = mmcv.imresize(img, (align_w, align_h)) 150 | else: 151 | img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation) 152 | return img 153 | 154 | def _resize_img(self, results): 155 | """Resize images with ``results['scale']``.""" 156 | if self.keep_ratio: 157 | img, scale_factor = mmcv.imrescale( 158 | results['img'], results['scale'], return_scale=True) 159 | #### align #### 160 | img = self._align(img, self.size_divisor) 161 | # the w_scale and h_scale has minor difference 162 | # a real fix should be done in the mmcv.imrescale in the future 163 | new_h, new_w = img.shape[:2] 164 | h, w = results['img'].shape[:2] 165 | w_scale = new_w / w 166 | h_scale = new_h / h 167 | else: 168 | img, w_scale, h_scale = mmcv.imresize( 169 | results['img'], results['scale'], return_scale=True) 170 | 171 | h, w = img.shape[:2] 172 | assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ 173 | int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ 174 | "img size not align. h:{} w:{}".format(h,w) 175 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], 176 | dtype=np.float32) 177 | results['img'] = img 178 | results['img_shape'] = img.shape 179 | results['pad_shape'] = img.shape # in case that there is no padding 180 | results['scale_factor'] = scale_factor 181 | results['keep_ratio'] = self.keep_ratio 182 | 183 | def _resize_seg(self, results): 184 | """Resize semantic segmentation map with ``results['scale']``.""" 185 | for key in results.get('seg_fields', []): 186 | if self.keep_ratio: 187 | gt_seg = mmcv.imrescale( 188 | results[key], results['scale'], interpolation='nearest') 189 | gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest') 190 | else: 191 | gt_seg = mmcv.imresize( 192 | results[key], results['scale'], interpolation='nearest') 193 | h, w = gt_seg.shape[:2] 194 | assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \ 195 | int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \ 196 | "gt_seg size not align. h:{} w:{}".format(h, w) 197 | results[key] = gt_seg 198 | 199 | def __call__(self, results): 200 | """Call function to resize images, bounding boxes, masks, semantic 201 | segmentation map. 202 | Args: 203 | results (dict): Result dict from loading pipeline. 204 | Returns: 205 | dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', 206 | 'keep_ratio' keys are added into result dict. 207 | """ 208 | 209 | if 'scale' not in results: 210 | self._random_scale(results) 211 | self._resize_img(results) 212 | self._resize_seg(results) 213 | return results 214 | 215 | def __repr__(self): 216 | repr_str = self.__class__.__name__ 217 | repr_str += (f'(img_scale={self.img_scale}, ' 218 | f'multiscale_mode={self.multiscale_mode}, ' 219 | f'ratio_range={self.ratio_range}, ' 220 | f'keep_ratio={self.keep_ratio})') 221 | return repr_str 222 | -------------------------------------------------------------------------------- /semantic_segmentation/scripts/train_sfpn_transxnet_base.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | --master_port=$((RANDOM+10000)) \ 5 | train.py \ 6 | configs/sfpn_transxnet_base.py \ 7 | --work-dir work_dirs/sfpn_transxnet_base/ \ 8 | --launcher pytorch -------------------------------------------------------------------------------- /semantic_segmentation/scripts/train_sfpn_transxnet_small.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | --master_port=$((RANDOM+10000)) \ 5 | train.py \ 6 | configs/sfpn_transxnet_small.py \ 7 | --work-dir work_dirs/sfpn_transxnet_small/ \ 8 | --launcher pytorch -------------------------------------------------------------------------------- /semantic_segmentation/scripts/train_sfpn_transxnet_tiny.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python3 -m torch.distributed.launch \ 3 | --nproc_per_node=8 \ 4 | --master_port=$((RANDOM+10000)) \ 5 | train.py \ 6 | configs/sfpn_transxnet_tiny.py \ 7 | --work-dir work_dirs/sfpn_transxnet_tiny/ \ 8 | --launcher pytorch -------------------------------------------------------------------------------- /semantic_segmentation/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import mmcv 5 | import torch 6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 7 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, 8 | wrap_fp16_model) 9 | from mmcv.utils import DictAction 10 | 11 | from mmseg.apis import multi_gpu_test, single_gpu_test 12 | from mmseg.datasets import build_dataloader, build_dataset 13 | from mmseg.models import build_segmentor 14 | 15 | import mmcv_custom 16 | import mmseg_custom 17 | import transxnet 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser( 21 | description='mmseg test (and eval) a model') 22 | parser.add_argument('config', help='test config file path') 23 | parser.add_argument('checkpoint', help='checkpoint file') 24 | parser.add_argument( 25 | '--aug-test', action='store_true', help='Use Flip and Multi scale aug') 26 | parser.add_argument('--out', help='output result file in pickle format') 27 | parser.add_argument( 28 | '--format-only', 29 | action='store_true', 30 | help='Format the output results without perform evaluation. It is' 31 | 'useful when you want to format the result to a specific format and ' 32 | 'submit it to the test server') 33 | parser.add_argument( 34 | '--eval', 35 | type=str, 36 | nargs='+', 37 | help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' 38 | ' for generic datasets, and "cityscapes" for Cityscapes') 39 | parser.add_argument('--show', action='store_true', help='show results') 40 | parser.add_argument( 41 | '--show-dir', help='directory where painted images will be saved') 42 | parser.add_argument( 43 | '--gpu-collect', 44 | action='store_true', 45 | help='whether to use gpu to collect results.') 46 | parser.add_argument( 47 | '--tmpdir', 48 | help='tmp directory used for collecting results from multiple ' 49 | 'workers, available when gpu_collect is not specified') 50 | parser.add_argument( 51 | '--options', nargs='+', action=DictAction, help='custom options') 52 | parser.add_argument( 53 | '--eval-options', 54 | nargs='+', 55 | action=DictAction, 56 | help='custom options for evaluation') 57 | parser.add_argument( 58 | '--launcher', 59 | choices=['none', 'pytorch', 'slurm', 'mpi'], 60 | default='none', 61 | help='job launcher') 62 | parser.add_argument( 63 | '--opacity', 64 | type=float, 65 | default=0.5, 66 | help='Opacity of painted segmentation map. In (0, 1] range.') 67 | parser.add_argument('--local_rank', type=int, default=0) 68 | args = parser.parse_args() 69 | if 'LOCAL_RANK' not in os.environ: 70 | os.environ['LOCAL_RANK'] = str(args.local_rank) 71 | return args 72 | 73 | 74 | def main(): 75 | args = parse_args() 76 | 77 | assert args.out or args.eval or args.format_only or args.show \ 78 | or args.show_dir, \ 79 | ('Please specify at least one operation (save/eval/format/show the ' 80 | 'results / save the results) with the argument "--out", "--eval"' 81 | ', "--format-only", "--show" or "--show-dir"') 82 | 83 | if args.eval and args.format_only: 84 | raise ValueError('--eval and --format_only cannot be both specified') 85 | 86 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): 87 | raise ValueError('The output file must be a pkl file.') 88 | 89 | cfg = mmcv.Config.fromfile(args.config) 90 | if args.options is not None: 91 | cfg.merge_from_dict(args.options) 92 | # set cudnn_benchmark 93 | if cfg.get('cudnn_benchmark', False): 94 | torch.backends.cudnn.benchmark = True 95 | if args.aug_test: 96 | # hard code index 97 | cfg.data.test.pipeline[1].img_ratios = [ 98 | 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 99 | ] 100 | cfg.data.test.pipeline[1].flip = True 101 | cfg.model.pretrained = None 102 | cfg.data.test.test_mode = True 103 | 104 | # init distributed env first, since logger depends on the dist info. 105 | if args.launcher == 'none': 106 | distributed = False 107 | else: 108 | distributed = True 109 | init_dist(args.launcher, **cfg.dist_params) 110 | 111 | # build the dataloader 112 | # TODO: support multiple images per gpu (only minor changes are needed) 113 | dataset = build_dataset(cfg.data.test) 114 | data_loader = build_dataloader( 115 | dataset, 116 | samples_per_gpu=1, 117 | workers_per_gpu=cfg.data.workers_per_gpu, 118 | dist=distributed, 119 | shuffle=False) 120 | 121 | # build the model and load checkpoint 122 | cfg.model.train_cfg = None 123 | model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) 124 | 125 | fp16_cfg = cfg.get('fp16', None) 126 | if fp16_cfg is not None: 127 | wrap_fp16_model(model) 128 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') 129 | model.CLASSES = dataset.CLASSES # checkpoint['meta']['CLASSES'] 130 | model.PALETTE = dataset.PALETTE # checkpoint['meta']['PALETTE'] 131 | 132 | efficient_test = False 133 | if args.eval_options is not None: 134 | efficient_test = args.eval_options.get('efficient_test', False) 135 | 136 | if not distributed: 137 | model = MMDataParallel(model, device_ids=[0]) 138 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, 139 | efficient_test, args.opacity) 140 | else: 141 | model = MMDistributedDataParallel( 142 | model.cuda(), 143 | device_ids=[torch.cuda.current_device()], 144 | broadcast_buffers=False) 145 | outputs = multi_gpu_test(model, data_loader, args.tmpdir, 146 | args.gpu_collect, efficient_test) 147 | 148 | rank, _ = get_dist_info() 149 | if rank == 0: 150 | if args.out: 151 | print(f'\nwriting results to {args.out}') 152 | mmcv.dump(outputs, args.out) 153 | kwargs = {} if args.eval_options is None else args.eval_options 154 | if args.format_only: 155 | dataset.format_results(outputs, **kwargs) 156 | if args.eval: 157 | dataset.evaluate(outputs, args.eval, **kwargs) 158 | 159 | 160 | if __name__ == '__main__': 161 | main() 162 | -------------------------------------------------------------------------------- /semantic_segmentation/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import time 4 | import copy 5 | import argparse 6 | 7 | import mmcv 8 | import torch 9 | from mmcv.runner import init_dist 10 | from mmcv.utils import Config, DictAction, get_git_hash 11 | from mmseg import __version__ 12 | from mmseg.apis import set_random_seed, train_segmentor 13 | from mmseg.datasets import build_dataset 14 | from mmseg.models import build_segmentor 15 | from mmseg.utils import collect_env, get_root_logger 16 | 17 | # imoprt custom utils 18 | import mmcv_custom 19 | import mmseg_custom 20 | # import models 21 | import transxnet 22 | 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Train a segmentor') 26 | parser.add_argument('config', help='train config file path') 27 | parser.add_argument('--work-dir', help='the dir to save logs and models') 28 | parser.add_argument( 29 | '--load-from', help='the checkpoint file to load weights from') 30 | parser.add_argument( 31 | '--resume-from', help='the checkpoint file to resume from') 32 | parser.add_argument( 33 | '--no-validate', 34 | action='store_true', 35 | help='whether not to evaluate the checkpoint during training') 36 | group_gpus = parser.add_mutually_exclusive_group() 37 | group_gpus.add_argument( 38 | '--gpus', 39 | type=int, 40 | help='number of gpus to use ' 41 | '(only applicable to non-distributed training)') 42 | group_gpus.add_argument( 43 | '--gpu-ids', 44 | type=int, 45 | nargs='+', 46 | help='ids of gpus to use ' 47 | '(only applicable to non-distributed training)') 48 | parser.add_argument('--seed', type=int, default=None, help='random seed') 49 | parser.add_argument( 50 | '--deterministic', 51 | action='store_true', 52 | help='whether to set deterministic options for CUDNN backend.') 53 | parser.add_argument( 54 | '--options', nargs='+', action=DictAction, help='custom options') 55 | parser.add_argument( 56 | '--launcher', 57 | choices=['none', 'pytorch', 'slurm', 'mpi'], 58 | default='none', 59 | help='job launcher') 60 | parser.add_argument('--local_rank', type=int, default=0) 61 | args = parser.parse_args() 62 | if 'LOCAL_RANK' not in os.environ: 63 | os.environ['LOCAL_RANK'] = str(args.local_rank) 64 | 65 | return args 66 | 67 | 68 | def main(): 69 | args = parse_args() 70 | 71 | cfg = Config.fromfile(args.config) 72 | if args.options is not None: 73 | cfg.merge_from_dict(args.options) 74 | # set cudnn_benchmark 75 | if cfg.get('cudnn_benchmark', False): 76 | torch.backends.cudnn.benchmark = True 77 | 78 | # work_dir is determined in this priority: CLI > segment in file > filename 79 | if args.work_dir is not None: 80 | # update configs according to CLI args if args.work_dir is not None 81 | cfg.work_dir = args.work_dir 82 | elif cfg.get('work_dir', None) is None: 83 | # use config filename as default work_dir if cfg.work_dir is None 84 | cfg.work_dir = osp.join('./work_dirs', 85 | osp.splitext(osp.basename(args.config))[0]) 86 | if args.load_from is not None: 87 | cfg.load_from = args.load_from 88 | if args.resume_from is not None: 89 | cfg.resume_from = args.resume_from 90 | if args.gpu_ids is not None: 91 | cfg.gpu_ids = args.gpu_ids 92 | else: 93 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 94 | 95 | # init distributed env first, since logger depends on the dist info. 96 | if args.launcher == 'none': 97 | distributed = False 98 | else: 99 | distributed = True 100 | init_dist(args.launcher, **cfg.dist_params) 101 | 102 | # create work_dir 103 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 104 | # dump config 105 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 106 | # init the logger before other steps 107 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 108 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 109 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 110 | 111 | # init the meta dict to record some important information such as 112 | # environment info and seed, which will be logged 113 | meta = dict() 114 | # log env info 115 | env_info_dict = collect_env() 116 | env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) 117 | dash_line = '-' * 60 + '\n' 118 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 119 | dash_line) 120 | meta['env_info'] = env_info 121 | 122 | # log some basic info 123 | logger.info(f'Distributed training: {distributed}') 124 | logger.info(f'Config:\n{cfg.pretty_text}') 125 | 126 | # set random seeds 127 | if args.seed is not None: 128 | logger.info(f'Set random seed to {args.seed}, deterministic: ' 129 | f'{args.deterministic}') 130 | set_random_seed(args.seed, deterministic=args.deterministic) 131 | cfg.seed = args.seed 132 | meta['seed'] = args.seed 133 | meta['exp_name'] = osp.basename(args.config) 134 | 135 | model = build_segmentor( 136 | cfg.model, 137 | train_cfg=cfg.get('train_cfg'), 138 | test_cfg=cfg.get('test_cfg')) 139 | 140 | logger.info(model) 141 | 142 | datasets = [build_dataset(cfg.data.train)] 143 | if len(cfg.workflow) == 2: 144 | val_dataset = copy.deepcopy(cfg.data.val) 145 | val_dataset.pipeline = cfg.data.train.pipeline 146 | datasets.append(build_dataset(val_dataset)) 147 | if cfg.checkpoint_config is not None: 148 | # save mmseg version, config file content and class names in 149 | # checkpoints as meta data 150 | cfg.checkpoint_config.meta = dict( 151 | mmseg_version=f'{__version__}+{get_git_hash()[:7]}', 152 | config=cfg.pretty_text, 153 | CLASSES=datasets[0].CLASSES, 154 | PALETTE=datasets[0].PALETTE) 155 | # add an attribute for visualization convenience 156 | model.CLASSES = datasets[0].CLASSES 157 | train_segmentor( 158 | model, 159 | datasets, 160 | cfg, 161 | distributed=distributed, 162 | validate=(not args.no_validate), 163 | timestamp=timestamp, 164 | meta=meta) 165 | 166 | 167 | if __name__ == '__main__': 168 | main() --------------------------------------------------------------------------------