├── README.md
├── assets
└── architecture.png
├── models
├── __init__.py
├── poolformer.py
└── transxnet.py
├── object_detection
├── README.md
├── analysis_tools
│ ├── analyze_logs.py
│ ├── analyze_results.py
│ ├── benchmark.py
│ ├── coco_error_analysis.py
│ ├── eval_metric.py
│ ├── get_flops.py
│ ├── robustness_eval.py
│ └── test_robustness.py
├── checkpoint.py
├── configs
│ ├── _base_
│ │ ├── datasets
│ │ │ ├── coco_detection.py
│ │ │ └── coco_instance.py
│ │ ├── default_runtime.py
│ │ ├── models
│ │ │ ├── cascade_mask_rcnn_pvtv2_b2_fpn.py
│ │ │ ├── cascade_mask_rcnn_r50_fpn.py
│ │ │ ├── cascade_rcnn_r50_fpn.py
│ │ │ ├── fast_rcnn_r50_fpn.py
│ │ │ ├── faster_rcnn_r50_caffe_c4.py
│ │ │ ├── faster_rcnn_r50_caffe_dc5.py
│ │ │ ├── faster_rcnn_r50_fpn.py
│ │ │ ├── mask_rcnn_r50_caffe_c4.py
│ │ │ ├── mask_rcnn_r50_fpn.py
│ │ │ ├── retinanet_r50_fpn.py
│ │ │ ├── rpn_r50_caffe_c4.py
│ │ │ ├── rpn_r50_fpn.py
│ │ │ └── ssd300.py
│ │ └── schedules
│ │ │ ├── schedule_1x.py
│ │ │ ├── schedule_20e.py
│ │ │ └── schedule_2x.py
│ ├── mask_rcnn_transx_b_fpn_1x_coco.py
│ ├── mask_rcnn_transx_s_fpn_1x_coco.py
│ ├── mask_rcnn_transx_t_fpn_1x_coco.py
│ ├── retinanet_transx_b_fpn_1x_coco.py
│ ├── retinanet_transx_s_fpn_1x_coco.py
│ └── retinanet_transx_t_fpn_1x_coco.py
├── dist_test.sh
├── dist_train.sh
├── mmcv_custom
│ └── runner
│ │ ├── checkpoint.py
│ │ ├── epoch_based_runner.py
│ │ └── optimizer.py
├── mmdet_custom
│ └── apis
│ │ └── train.py
├── test.py
├── train.py
└── transxnet.py
├── scripts
├── train_base.sh
├── train_small.sh
└── train_tiny.sh
├── semantic_segmentation
├── README.md
├── configs
│ ├── _base_
│ │ ├── datasets
│ │ │ └── ade20k_sfpn.py
│ │ ├── default_runtime.py
│ │ ├── models
│ │ │ └── fpn_r50.py
│ │ └── schedules
│ │ │ ├── schedule_160k.py
│ │ │ ├── schedule_20k.py
│ │ │ ├── schedule_40k.py
│ │ │ └── schedule_80k.py
│ ├── sfpn_transxnet_base.py
│ ├── sfpn_transxnet_small.py
│ └── sfpn_transxnet_tiny.py
├── mmcv_custom
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── checkpoint.cpython-38.pyc
│ │ ├── customized_text.cpython-38.pyc
│ │ ├── layer_decay_optimizer_constructor.cpython-38.pyc
│ │ └── resize_transform.cpython-38.pyc
│ ├── apex_runner
│ │ └── checkpoint.py
│ ├── checkpoint.py
│ ├── customized_text.py
│ ├── layer_decay_optimizer_constructor.py
│ ├── resize_transform.py
│ └── train_api.py
├── mmseg_custom
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── __init__.cpython-38.pyc
│ │ ├── align_resize.cpython-37.pyc
│ │ └── align_resize.cpython-38.pyc
│ └── align_resize.py
├── scripts
│ ├── train_sfpn_transxnet_base.sh
│ ├── train_sfpn_transxnet_small.sh
│ └── train_sfpn_transxnet_tiny.sh
├── test.py
├── train.py
└── transxnet.py
├── train.py
└── validate.py
/README.md:
--------------------------------------------------------------------------------
1 | # [[TNNLS 2025] TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979)
2 |
3 | > This is an official PyTorch implementation of "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979)".
4 | >
5 | > 📝 Paper: [Journal Version](https://doi.org/10.1109/TNNLS.2025.3550979) | [arXiv Version](https://arxiv.org/abs/2310.19380)
6 |
7 |
8 | # Introduction
9 | **TransXNet** is a CNN-Transformer hybrid vision backbone that can model both global and local dynamics with a Dual Dynamic Token Mixer (D-Mixer), achieving superior performance over both CNN and Transformer-based models.
10 |
11 |
12 |
13 |
14 | # Image Classification
15 |
16 | ## 1. Requirements
17 | We highly suggest using our provided dependencies to ensure reproducibility:
18 | ```
19 | # Environments:
20 | cuda==11.6
21 | python==3.8.15
22 | # Packages:
23 | mmcv==1.7.1
24 | timm==0.6.12
25 | torch==1.13.1
26 | torchvision==0.14.1
27 | ```
28 | ## 2. Data Preparation
29 | Prepare [ImageNet](https://image-net.org/) with the following folder structure, you can extract ImageNet by this [script](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4).
30 |
31 | ```
32 | │imagenet/
33 | ├──train/
34 | │ ├── n01440764
35 | │ │ ├── n01440764_10026.JPEG
36 | │ │ ├── n01440764_10027.JPEG
37 | │ │ ├── ......
38 | │ ├── ......
39 | ├──val/
40 | │ ├── n01440764
41 | │ │ ├── ILSVRC2012_val_00000293.JPEG
42 | │ │ ├── ILSVRC2012_val_00002138.JPEG
43 | │ │ ├── ......
44 | │ ├── ......
45 | ```
46 |
47 | ## 3. Main Results on ImageNet with Pretrained Models
48 |
49 | | Models | Input Size | FLOPs (G) | Params (M) | Top-1 Acc.(%) | Download |
50 | |:-----------:|:----------:|:---------:|:----------:|:----------:|:----------:|
51 | | TransXNet-T | 224x224 | 1.8 | 12.8 | 81.6 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) |
52 | | TransXNet-S | 224x224 | 4.5 | 26.9 | 83.8 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) |
53 | | TransXNet-B | 224x224 | 8.3 | 48.0 | 84.6 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) |
54 | | TransXNet-B | 384x384 | 24.2 | 48.0 | 85.5 | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b-384.pth.tar) |
55 |
56 | ## 4. Train
57 | To train ```TransXNet``` models on ImageNet-1K with 8 gpus (single node), run:
58 | ```
59 | bash scripts/train_tiny.sh # train TransXNet-T
60 | bash scripts/train_small.sh # train TransXNet-S
61 | bash scripts/train_base.sh # train TransXNet-B
62 | ```
63 |
64 | ## 5. Validation
65 | To evaluate ```TransXNet``` on ImageNet-1K, run:
66 | ```
67 | MODEL=transxnet_t # transxnet_{t, s, b}
68 | python3 validate.py \
69 | /path/to/imagenet \
70 | --model $MODEL -b 128 \
71 | --pretrained # or --checkpoint /path/to/checkpoint
72 | ```
73 |
74 | # Object Detection and Semantic Segmentation
75 | > [Object Detection](object_detection)
76 | > [Semantic Segmentation](semantic_segmentation)
77 |
78 | # Citation
79 | If you find this project useful for your research, please consider citing:
80 | ```
81 | @article{lou2023transxnet,
82 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
83 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
84 | journal={IEEE Transactions on Neural Networks and Learning Systems},
85 | year={2025}
86 | }
87 | ```
88 |
89 | # Acknowledgment
90 | Our implementation is mainly based on the following codebases. We gratefully thank the authors for their wonderful works.
91 | > [poolformer](https://github.com/sail-sg/poolformer)
92 | > [mmdetection](https://github.com/open-mmlab/mmdetection)
93 | > [mmsegmentation](https://github.com/open-mmlab/mmsegmentation)
94 | > [pytorch-image-models](https://github.com/rwightman/pytorch-image-models)
95 |
96 | # Contact
97 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
98 |
--------------------------------------------------------------------------------
/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/assets/architecture.png
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .poolformer import *
2 | from .transxnet import *
--------------------------------------------------------------------------------
/object_detection/README.md:
--------------------------------------------------------------------------------
1 | # Applying TransXNet to Object Detection and Instance Segmentation
2 |
3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)".
4 |
5 | ## 1. Requirements
6 | ```
7 | # Environments:
8 | cuda==11.3
9 | python==3.8.15
10 | # Packages:
11 | mmcv==1.7.1
12 | mmdet==2.28.2
13 | timm==0.6.12
14 | torch==1.12.1
15 | torchvision==0.13.1
16 | ```
17 |
18 |
19 | ## 2. Data Preparation
20 |
21 | Prepare COCO 2017 according to the [guidelines](https://github.com/open-mmlab/mmdetection/blob/2.x/docs/en/1_exist_data_model.md).
22 |
23 | ## 3. Main Results on COCO with Pretrained Models
24 |
25 |
26 | | Method | Backbone | Pretrain | Lr schd | Aug | box AP | mask AP | Config | Download |
27 | |------------|----------|-------------|:-------:|:---:|:------:|:-------:|------------------------------------------------------|----------|
28 | | RetinaNet | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 1x | No | 43.1 | - | [config](configs/retinanet_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.pth) |
29 | | RetinaNet | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 1x | No | 46.4 | - | [config](configs/retinanet_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.pth) |
30 | | RetinaNet | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 1x | No | 47.6 | - | [config](configs/retinanet_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/reinanet_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_base.pth) |
31 | | Mask R-CNN | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 1x | No | 44.5 | 40.7 | [config](configs/mask_rcnn_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.pth) |
32 | | Mask R-CNN | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 1x | No | 47.7 | 43.1 | [config](configs/mask_rcnn_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.pth) |
33 | | Mask R-CNN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 1x | No | 48.8 | 43.8 | [config](configs/mask_rcnn_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.pth) |
34 |
35 |
36 | ## 4. Train
37 | To train ``TransXNet-T + RetinaNet`` models on COCO train2017 with 8 gpus (single node), run:
38 | ```
39 | bash dist_train.sh configs/retinanet_transx_t_fpn_1x_coco.py 8
40 | ```
41 | To train ``TransXNet-T + Mask R-CNN`` models on COCO train2017 with 8 gpus (single node), run:
42 | ```
43 | bash dist_train.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py 8
44 | ```
45 |
46 | ## 5. Validation
47 | To evaluate ``TransXNet-T + RetinaNet`` models on COCO val2017, run:
48 | ```
49 | bash dist_test.sh configs/retinanet_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox
50 | ```
51 | To evaluate ``TransXNet-T + Mask R-CNN`` models on COCO val2017, run:
52 | ```
53 | bash dist_test.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox segm
54 | ```
55 |
56 | ## Citation
57 | If you find this project useful for your research, please consider citing:
58 | ```
59 | @article{lou2023transxnet,
60 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
61 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
62 | journal={IEEE Transactions on Neural Networks and Learning Systems},
63 | year={2025}
64 | }
65 | ```
66 |
67 | ## Contact
68 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
69 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/analyze_logs.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 | from collections import defaultdict
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import seaborn as sns
8 |
9 |
10 | def cal_train_time(log_dicts, args):
11 | for i, log_dict in enumerate(log_dicts):
12 | print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
13 | all_times = []
14 | for epoch in log_dict.keys():
15 | if args.include_outliers:
16 | all_times.append(log_dict[epoch]['time'])
17 | else:
18 | all_times.append(log_dict[epoch]['time'][1:])
19 | all_times = np.array(all_times)
20 | epoch_ave_time = all_times.mean(-1)
21 | slowest_epoch = epoch_ave_time.argmax()
22 | fastest_epoch = epoch_ave_time.argmin()
23 | std_over_epoch = epoch_ave_time.std()
24 | print(f'slowest epoch {slowest_epoch + 1}, '
25 | f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
26 | print(f'fastest epoch {fastest_epoch + 1}, '
27 | f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
28 | print(f'time std over epochs is {std_over_epoch:.4f}')
29 | print(f'average iter time: {np.mean(all_times):.4f} s/iter')
30 | print()
31 |
32 |
33 | def plot_curve(log_dicts, args):
34 | if args.backend is not None:
35 | plt.switch_backend(args.backend)
36 | sns.set_style(args.style)
37 | # if legend is None, use {filename}_{key} as legend
38 | legend = args.legend
39 | if legend is None:
40 | legend = []
41 | for json_log in args.json_logs:
42 | for metric in args.keys:
43 | legend.append(f'{json_log}_{metric}')
44 | assert len(legend) == (len(args.json_logs) * len(args.keys))
45 | metrics = args.keys
46 |
47 | num_metrics = len(metrics)
48 | for i, log_dict in enumerate(log_dicts):
49 | epochs = list(log_dict.keys())
50 | for j, metric in enumerate(metrics):
51 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
52 | if metric not in log_dict[epochs[0]]:
53 | raise KeyError(
54 | f'{args.json_logs[i]} does not contain metric {metric}')
55 |
56 | if 'mAP' in metric:
57 | xs = np.arange(1, max(epochs) + 1)
58 | ys = []
59 | for epoch in epochs:
60 | ys += log_dict[epoch][metric]
61 | ax = plt.gca()
62 | ax.set_xticks(xs)
63 | plt.xlabel('epoch')
64 | plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
65 | else:
66 | xs = []
67 | ys = []
68 | num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]
69 | for epoch in epochs:
70 | iters = log_dict[epoch]['iter']
71 | if log_dict[epoch]['mode'][-1] == 'val':
72 | iters = iters[:-1]
73 | xs.append(
74 | np.array(iters) + (epoch - 1) * num_iters_per_epoch)
75 | ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
76 | xs = np.concatenate(xs)
77 | ys = np.concatenate(ys)
78 | plt.xlabel('iter')
79 | plt.plot(
80 | xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
81 | plt.legend()
82 | if args.title is not None:
83 | plt.title(args.title)
84 | if args.out is None:
85 | plt.show()
86 | else:
87 | print(f'save curve to: {args.out}')
88 | plt.savefig(args.out)
89 | plt.cla()
90 |
91 |
92 | def add_plot_parser(subparsers):
93 | parser_plt = subparsers.add_parser(
94 | 'plot_curve', help='parser for plotting curves')
95 | parser_plt.add_argument(
96 | 'json_logs',
97 | type=str,
98 | nargs='+',
99 | help='path of train log in json format')
100 | parser_plt.add_argument(
101 | '--keys',
102 | type=str,
103 | nargs='+',
104 | default=['bbox_mAP'],
105 | help='the metric that you want to plot')
106 | parser_plt.add_argument('--title', type=str, help='title of figure')
107 | parser_plt.add_argument(
108 | '--legend',
109 | type=str,
110 | nargs='+',
111 | default=None,
112 | help='legend of each plot')
113 | parser_plt.add_argument(
114 | '--backend', type=str, default=None, help='backend of plt')
115 | parser_plt.add_argument(
116 | '--style', type=str, default='dark', help='style of plt')
117 | parser_plt.add_argument('--out', type=str, default=None)
118 |
119 |
120 | def add_time_parser(subparsers):
121 | parser_time = subparsers.add_parser(
122 | 'cal_train_time',
123 | help='parser for computing the average time per training iteration')
124 | parser_time.add_argument(
125 | 'json_logs',
126 | type=str,
127 | nargs='+',
128 | help='path of train log in json format')
129 | parser_time.add_argument(
130 | '--include-outliers',
131 | action='store_true',
132 | help='include the first value of every epoch when computing '
133 | 'the average time')
134 |
135 |
136 | def parse_args():
137 | parser = argparse.ArgumentParser(description='Analyze Json Log')
138 | # currently only support plot curve and calculate average train time
139 | subparsers = parser.add_subparsers(dest='task', help='task parser')
140 | add_plot_parser(subparsers)
141 | add_time_parser(subparsers)
142 | args = parser.parse_args()
143 | return args
144 |
145 |
146 | def load_json_logs(json_logs):
147 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict
148 | # keys of sub dict is different metrics, e.g. memory, bbox_mAP
149 | # value of sub dict is a list of corresponding values of all iterations
150 | log_dicts = [dict() for _ in json_logs]
151 | for json_log, log_dict in zip(json_logs, log_dicts):
152 | with open(json_log, 'r') as log_file:
153 | for line in log_file:
154 | log = json.loads(line.strip())
155 | # skip lines without `epoch` field
156 | if 'epoch' not in log:
157 | continue
158 | epoch = log.pop('epoch')
159 | if epoch not in log_dict:
160 | log_dict[epoch] = defaultdict(list)
161 | for k, v in log.items():
162 | log_dict[epoch][k].append(v)
163 | return log_dicts
164 |
165 |
166 | def main():
167 | args = parse_args()
168 |
169 | json_logs = args.json_logs
170 | for json_log in json_logs:
171 | assert json_log.endswith('.json')
172 |
173 | log_dicts = load_json_logs(json_logs)
174 |
175 | eval(args.task)(log_dicts, args)
176 |
177 |
178 | if __name__ == '__main__':
179 | main()
180 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/analyze_results.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os.path as osp
3 |
4 | import mmcv
5 | import numpy as np
6 | from mmcv import Config, DictAction
7 |
8 | from mmdet.core.evaluation import eval_map
9 | from mmdet.core.visualization import imshow_gt_det_bboxes
10 | from mmdet.datasets import build_dataset, get_loading_pipeline
11 |
12 |
13 | def bbox_map_eval(det_result, annotation):
14 | """Evaluate mAP of single image det result.
15 |
16 | Args:
17 | det_result (list[list]): [[cls1_det, cls2_det, ...], ...].
18 | The outer list indicates images, and the inner list indicates
19 | per-class detected bboxes.
20 | annotation (dict): Ground truth annotations where keys of
21 | annotations are:
22 |
23 | - bboxes: numpy array of shape (n, 4)
24 | - labels: numpy array of shape (n, )
25 | - bboxes_ignore (optional): numpy array of shape (k, 4)
26 | - labels_ignore (optional): numpy array of shape (k, )
27 |
28 | Returns:
29 | float: mAP
30 | """
31 |
32 | # use only bbox det result
33 | if isinstance(det_result, tuple):
34 | bbox_det_result = [det_result[0]]
35 | else:
36 | bbox_det_result = [det_result]
37 | # mAP
38 | iou_thrs = np.linspace(
39 | .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
40 | mean_aps = []
41 | for thr in iou_thrs:
42 | mean_ap, _ = eval_map(
43 | bbox_det_result, [annotation], iou_thr=thr, logger='silent')
44 | mean_aps.append(mean_ap)
45 | return sum(mean_aps) / len(mean_aps)
46 |
47 |
48 | class ResultVisualizer(object):
49 | """Display and save evaluation results.
50 |
51 | Args:
52 | show (bool): Whether to show the image. Default: True
53 | wait_time (float): Value of waitKey param. Default: 0.
54 | score_thr (float): Minimum score of bboxes to be shown.
55 | Default: 0
56 | """
57 |
58 | def __init__(self, show=False, wait_time=0, score_thr=0):
59 | self.show = show
60 | self.wait_time = wait_time
61 | self.score_thr = score_thr
62 |
63 | def _save_image_gts_results(self, dataset, results, mAPs, out_dir=None):
64 | mmcv.mkdir_or_exist(out_dir)
65 |
66 | for mAP_info in mAPs:
67 | index, mAP = mAP_info
68 | data_info = dataset.prepare_train_img(index)
69 |
70 | # calc save file path
71 | filename = data_info['filename']
72 | if data_info['img_prefix'] is not None:
73 | filename = osp.join(data_info['img_prefix'], filename)
74 | else:
75 | filename = data_info['filename']
76 | fname, name = osp.splitext(osp.basename(filename))
77 | save_filename = fname + '_' + str(round(mAP, 3)) + name
78 | out_file = osp.join(out_dir, save_filename)
79 | imshow_gt_det_bboxes(
80 | data_info['img'],
81 | data_info,
82 | results[index],
83 | dataset.CLASSES,
84 | show=self.show,
85 | score_thr=self.score_thr,
86 | wait_time=self.wait_time,
87 | out_file=out_file)
88 |
89 | def evaluate_and_show(self,
90 | dataset,
91 | results,
92 | topk=20,
93 | show_dir='work_dir',
94 | eval_fn=None):
95 | """Evaluate and show results.
96 |
97 | Args:
98 | dataset (Dataset): A PyTorch dataset.
99 | results (list): Det results from test results pkl file
100 | topk (int): Number of the highest topk and
101 | lowest topk after evaluation index sorting. Default: 20
102 | show_dir (str, optional): The filename to write the image.
103 | Default: 'work_dir'
104 | eval_fn (callable, optional): Eval function, Default: None
105 | """
106 |
107 | assert topk > 0
108 | if (topk * 2) > len(dataset):
109 | topk = len(dataset) // 2
110 |
111 | if eval_fn is None:
112 | eval_fn = bbox_map_eval
113 | else:
114 | assert callable(eval_fn)
115 |
116 | prog_bar = mmcv.ProgressBar(len(results))
117 | _mAPs = {}
118 | for i, (result, ) in enumerate(zip(results)):
119 | # self.dataset[i] should not call directly
120 | # because there is a risk of mismatch
121 | data_info = dataset.prepare_train_img(i)
122 | mAP = eval_fn(result, data_info['ann_info'])
123 | _mAPs[i] = mAP
124 | prog_bar.update()
125 |
126 | # descending select topk image
127 | _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1]))
128 | good_mAPs = _mAPs[-topk:]
129 | bad_mAPs = _mAPs[:topk]
130 |
131 | good_dir = osp.abspath(osp.join(show_dir, 'good'))
132 | bad_dir = osp.abspath(osp.join(show_dir, 'bad'))
133 | self._save_image_gts_results(dataset, results, good_mAPs, good_dir)
134 | self._save_image_gts_results(dataset, results, bad_mAPs, bad_dir)
135 |
136 |
137 | def parse_args():
138 | parser = argparse.ArgumentParser(
139 | description='MMDet eval image prediction result for each')
140 | parser.add_argument('config', help='test config file path')
141 | parser.add_argument(
142 | 'prediction_path', help='prediction path where test pkl result')
143 | parser.add_argument(
144 | 'show_dir', help='directory where painted images will be saved')
145 | parser.add_argument('--show', action='store_true', help='show results')
146 | parser.add_argument(
147 | '--wait-time',
148 | type=float,
149 | default=0,
150 | help='the interval of show (s), 0 is block')
151 | parser.add_argument(
152 | '--topk',
153 | default=20,
154 | type=int,
155 | help='saved Number of the highest topk '
156 | 'and lowest topk after index sorting')
157 | parser.add_argument(
158 | '--show-score-thr',
159 | type=float,
160 | default=0,
161 | help='score threshold (default: 0.)')
162 | parser.add_argument(
163 | '--cfg-options',
164 | nargs='+',
165 | action=DictAction,
166 | help='override some settings in the used config, the key-value pair '
167 | 'in xxx=yyy format will be merged into config file. If the value to '
168 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
169 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
170 | 'Note that the quotation marks are necessary and that no white space '
171 | 'is allowed.')
172 | args = parser.parse_args()
173 | return args
174 |
175 |
176 | def main():
177 | args = parse_args()
178 |
179 | mmcv.check_file_exist(args.prediction_path)
180 |
181 | cfg = Config.fromfile(args.config)
182 | if args.cfg_options is not None:
183 | cfg.merge_from_dict(args.cfg_options)
184 | cfg.data.test.test_mode = True
185 | # import modules from string list.
186 | if cfg.get('custom_imports', None):
187 | from mmcv.utils import import_modules_from_strings
188 | import_modules_from_strings(**cfg['custom_imports'])
189 |
190 | cfg.data.test.pop('samples_per_gpu', 0)
191 | cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline)
192 | dataset = build_dataset(cfg.data.test)
193 | outputs = mmcv.load(args.prediction_path)
194 |
195 | result_visualizer = ResultVisualizer(args.show, args.wait_time,
196 | args.show_score_thr)
197 | result_visualizer.evaluate_and_show(
198 | dataset, outputs, topk=args.topk, show_dir=args.show_dir)
199 |
200 |
201 | if __name__ == '__main__':
202 | main()
203 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 |
4 | import torch
5 | from mmcv import Config, DictAction
6 | from mmcv.cnn import fuse_conv_bn
7 | from mmcv.parallel import MMDataParallel
8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
9 |
10 | from mmdet.datasets import (build_dataloader, build_dataset,
11 | replace_ImageToTensor)
12 | from mmdet.models import build_detector
13 |
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(description='MMDet benchmark a model')
17 | parser.add_argument('config', help='test config file path')
18 | parser.add_argument('checkpoint', help='checkpoint file')
19 | parser.add_argument(
20 | '--log-interval', default=50, help='interval of logging')
21 | parser.add_argument(
22 | '--fuse-conv-bn',
23 | action='store_true',
24 | help='Whether to fuse conv and bn, this will slightly increase'
25 | 'the inference speed')
26 | parser.add_argument(
27 | '--cfg-options',
28 | nargs='+',
29 | action=DictAction,
30 | help='override some settings in the used config, the key-value pair '
31 | 'in xxx=yyy format will be merged into config file. If the value to '
32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 | 'Note that the quotation marks are necessary and that no white space '
35 | 'is allowed.')
36 | args = parser.parse_args()
37 | return args
38 |
39 |
40 | def main():
41 | args = parse_args()
42 |
43 | cfg = Config.fromfile(args.config)
44 | if args.cfg_options is not None:
45 | cfg.merge_from_dict(args.cfg_options)
46 | # import modules from string list.
47 | if cfg.get('custom_imports', None):
48 | from mmcv.utils import import_modules_from_strings
49 | import_modules_from_strings(**cfg['custom_imports'])
50 | # set cudnn_benchmark
51 | if cfg.get('cudnn_benchmark', False):
52 | torch.backends.cudnn.benchmark = True
53 | cfg.model.pretrained = None
54 | cfg.data.test.test_mode = True
55 |
56 | # build the dataloader
57 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
58 | if samples_per_gpu > 1:
59 | # Replace 'ImageToTensor' to 'DefaultFormatBundle'
60 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
61 | dataset = build_dataset(cfg.data.test)
62 | data_loader = build_dataloader(
63 | dataset,
64 | samples_per_gpu=1,
65 | workers_per_gpu=cfg.data.workers_per_gpu,
66 | dist=False,
67 | shuffle=False)
68 |
69 | # build the model and load checkpoint
70 | cfg.model.train_cfg = None
71 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
72 | fp16_cfg = cfg.get('fp16', None)
73 | if fp16_cfg is not None:
74 | wrap_fp16_model(model)
75 | load_checkpoint(model, args.checkpoint, map_location='cpu')
76 | if args.fuse_conv_bn:
77 | model = fuse_conv_bn(model)
78 |
79 | model = MMDataParallel(model, device_ids=[0])
80 |
81 | model.eval()
82 |
83 | # the first several iterations may be very slow so skip them
84 | num_warmup = 5
85 | pure_inf_time = 0
86 |
87 | # benchmark with 2000 image and take the average
88 | for i, data in enumerate(data_loader):
89 |
90 | torch.cuda.synchronize()
91 | start_time = time.perf_counter()
92 |
93 | with torch.no_grad():
94 | model(return_loss=False, rescale=True, **data)
95 |
96 | torch.cuda.synchronize()
97 | elapsed = time.perf_counter() - start_time
98 |
99 | if i >= num_warmup:
100 | pure_inf_time += elapsed
101 | if (i + 1) % args.log_interval == 0:
102 | fps = (i + 1 - num_warmup) / pure_inf_time
103 | print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
104 |
105 | if (i + 1) == 2000:
106 | pure_inf_time += elapsed
107 | fps = (i + 1 - num_warmup) / pure_inf_time
108 | print(f'Overall fps: {fps:.1f} img / s')
109 | break
110 |
111 |
112 | if __name__ == '__main__':
113 | main()
114 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import mmcv
4 | from mmcv import Config, DictAction
5 |
6 | from mmdet.datasets import build_dataset
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(description='Evaluate metric of the '
11 | 'results saved in pkl format')
12 | parser.add_argument('config', help='Config of the model')
13 | parser.add_argument('pkl_results', help='Results in pickle format')
14 | parser.add_argument(
15 | '--format-only',
16 | action='store_true',
17 | help='Format the output results without perform evaluation. It is'
18 | 'useful when you want to format the result to a specific format and '
19 | 'submit it to the test server')
20 | parser.add_argument(
21 | '--eval',
22 | type=str,
23 | nargs='+',
24 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
25 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
26 | parser.add_argument(
27 | '--cfg-options',
28 | nargs='+',
29 | action=DictAction,
30 | help='override some settings in the used config, the key-value pair '
31 | 'in xxx=yyy format will be merged into config file. If the value to '
32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 | 'Note that the quotation marks are necessary and that no white space '
35 | 'is allowed.')
36 | parser.add_argument(
37 | '--eval-options',
38 | nargs='+',
39 | action=DictAction,
40 | help='custom options for evaluation, the key-value pair in xxx=yyy '
41 | 'format will be kwargs for dataset.evaluate() function')
42 | args = parser.parse_args()
43 | return args
44 |
45 |
46 | def main():
47 | args = parse_args()
48 |
49 | cfg = Config.fromfile(args.config)
50 | assert args.eval or args.format_only, (
51 | 'Please specify at least one operation (eval/format the results) with '
52 | 'the argument "--eval", "--format-only"')
53 | if args.eval and args.format_only:
54 | raise ValueError('--eval and --format_only cannot be both specified')
55 |
56 | if args.cfg_options is not None:
57 | cfg.merge_from_dict(args.cfg_options)
58 | # import modules from string list.
59 | if cfg.get('custom_imports', None):
60 | from mmcv.utils import import_modules_from_strings
61 | import_modules_from_strings(**cfg['custom_imports'])
62 | cfg.data.test.test_mode = True
63 |
64 | dataset = build_dataset(cfg.data.test)
65 | outputs = mmcv.load(args.pkl_results)
66 |
67 | kwargs = {} if args.eval_options is None else args.eval_options
68 | if args.format_only:
69 | dataset.format_results(outputs, **kwargs)
70 | if args.eval:
71 | eval_kwargs = cfg.get('evaluation', {}).copy()
72 | # hard-code way to remove EvalHook args
73 | for key in [
74 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
75 | 'rule'
76 | ]:
77 | eval_kwargs.pop(key, None)
78 | eval_kwargs.update(dict(metric=args.eval, **kwargs))
79 | print(dataset.evaluate(outputs, **eval_kwargs))
80 |
81 |
82 | if __name__ == '__main__':
83 | main()
84 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/get_flops.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | import torch
4 | from mmcv import Config, DictAction
5 |
6 | from mmdet.models import build_detector
7 |
8 | try:
9 | from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 | raise ImportError('Please upgrade mmcv to >0.6.2')
12 |
13 | sys.path.append(".")
14 | import convformer
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description='Train a detector')
18 | parser.add_argument('config', help='train config file path')
19 | parser.add_argument(
20 | '--shape',
21 | type=int,
22 | nargs='+',
23 | default=[1280, 800],
24 | help='input image size')
25 | parser.add_argument(
26 | '--cfg-options',
27 | nargs='+',
28 | action=DictAction,
29 | help='override some settings in the used config, the key-value pair '
30 | 'in xxx=yyy format will be merged into config file. If the value to '
31 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
32 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
33 | 'Note that the quotation marks are necessary and that no white space '
34 | 'is allowed.')
35 | args = parser.parse_args()
36 | return args
37 |
38 |
39 | def main():
40 |
41 | args = parse_args()
42 |
43 | if len(args.shape) == 1:
44 | input_shape = (1, 9, args.shape[0], args.shape[0])
45 | elif len(args.shape) == 2:
46 | input_shape = (3, ) + tuple(args.shape)
47 | else:
48 | raise ValueError('invalid input shape')
49 |
50 | cfg = Config.fromfile(args.config)
51 | if args.cfg_options is not None:
52 | cfg.merge_from_dict(args.cfg_options)
53 | # import modules from string list.
54 | if cfg.get('custom_imports', None):
55 | from mmcv.utils import import_modules_from_strings
56 | import_modules_from_strings(**cfg['custom_imports'])
57 |
58 | model = build_detector(
59 | cfg.model,
60 | train_cfg=cfg.get('train_cfg'),
61 | test_cfg=cfg.get('test_cfg'))
62 | if torch.cuda.is_available():
63 | model.cuda()
64 | model.eval()
65 |
66 | if hasattr(model, 'forward_dummy'):
67 | model.forward = model.forward_dummy
68 | else:
69 | raise NotImplementedError(
70 | 'FLOPs counter is currently not currently supported with {}'.
71 | format(model.__class__.__name__))
72 |
73 | flops, params = get_model_complexity_info(model, input_shape)
74 | split_line = '=' * 30
75 | print(f'{split_line}\nInput shape: {input_shape}\n'
76 | f'Flops: {flops}\nParams: {params}\n{split_line}')
77 | print('!!!Please be cautious if you use the results in papers. '
78 | 'You may need to check if all ops are supported and verify that the '
79 | 'flops computation is correct.')
80 |
81 |
82 | if __name__ == '__main__':
83 | main()
84 |
--------------------------------------------------------------------------------
/object_detection/analysis_tools/robustness_eval.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | from argparse import ArgumentParser
3 |
4 | import mmcv
5 | import numpy as np
6 |
7 |
8 | def print_coco_results(results):
9 |
10 | def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100):
11 | titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
12 | typeStr = '(AP)' if ap == 1 else '(AR)'
13 | iouStr = '0.50:0.95' \
14 | if iouThr is None else f'{iouThr:0.2f}'
15 | iStr = f' {titleStr:<18} {typeStr} @[ IoU={iouStr:<9} | '
16 | iStr += f'area={areaRng:>6s} | maxDets={maxDets:>3d} ] = {result:0.3f}'
17 | print(iStr)
18 |
19 | stats = np.zeros((12, ))
20 | stats[0] = _print(results[0], 1)
21 | stats[1] = _print(results[1], 1, iouThr=.5)
22 | stats[2] = _print(results[2], 1, iouThr=.75)
23 | stats[3] = _print(results[3], 1, areaRng='small')
24 | stats[4] = _print(results[4], 1, areaRng='medium')
25 | stats[5] = _print(results[5], 1, areaRng='large')
26 | stats[6] = _print(results[6], 0, maxDets=1)
27 | stats[7] = _print(results[7], 0, maxDets=10)
28 | stats[8] = _print(results[8], 0)
29 | stats[9] = _print(results[9], 0, areaRng='small')
30 | stats[10] = _print(results[10], 0, areaRng='medium')
31 | stats[11] = _print(results[11], 0, areaRng='large')
32 |
33 |
34 | def get_coco_style_results(filename,
35 | task='bbox',
36 | metric=None,
37 | prints='mPC',
38 | aggregate='benchmark'):
39 |
40 | assert aggregate in ['benchmark', 'all']
41 |
42 | if prints == 'all':
43 | prints = ['P', 'mPC', 'rPC']
44 | elif isinstance(prints, str):
45 | prints = [prints]
46 | for p in prints:
47 | assert p in ['P', 'mPC', 'rPC']
48 |
49 | if metric is None:
50 | metrics = [
51 | 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
52 | 'ARs', 'ARm', 'ARl'
53 | ]
54 | elif isinstance(metric, list):
55 | metrics = metric
56 | else:
57 | metrics = [metric]
58 |
59 | for metric_name in metrics:
60 | assert metric_name in [
61 | 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
62 | 'ARs', 'ARm', 'ARl'
63 | ]
64 |
65 | eval_output = mmcv.load(filename)
66 |
67 | num_distortions = len(list(eval_output.keys()))
68 | results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32')
69 |
70 | for corr_i, distortion in enumerate(eval_output):
71 | for severity in eval_output[distortion]:
72 | for metric_j, metric_name in enumerate(metrics):
73 | mAP = eval_output[distortion][severity][task][metric_name]
74 | results[corr_i, severity, metric_j] = mAP
75 |
76 | P = results[0, 0, :]
77 | if aggregate == 'benchmark':
78 | mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
79 | else:
80 | mPC = np.mean(results[:, 1:, :], axis=(0, 1))
81 | rPC = mPC / P
82 |
83 | print(f'\nmodel: {osp.basename(filename)}')
84 | if metric is None:
85 | if 'P' in prints:
86 | print(f'Performance on Clean Data [P] ({task})')
87 | print_coco_results(P)
88 | if 'mPC' in prints:
89 | print(f'Mean Performance under Corruption [mPC] ({task})')
90 | print_coco_results(mPC)
91 | if 'rPC' in prints:
92 | print(f'Relative Performance under Corruption [rPC] ({task})')
93 | print_coco_results(rPC)
94 | else:
95 | if 'P' in prints:
96 | print(f'Performance on Clean Data [P] ({task})')
97 | for metric_i, metric_name in enumerate(metrics):
98 | print(f'{metric_name:5} = {P[metric_i]:0.3f}')
99 | if 'mPC' in prints:
100 | print(f'Mean Performance under Corruption [mPC] ({task})')
101 | for metric_i, metric_name in enumerate(metrics):
102 | print(f'{metric_name:5} = {mPC[metric_i]:0.3f}')
103 | if 'rPC' in prints:
104 | print(f'Relative Performance under Corruption [rPC] ({task})')
105 | for metric_i, metric_name in enumerate(metrics):
106 | print(f'{metric_name:5} => {rPC[metric_i] * 100:0.1f} %')
107 |
108 | return results
109 |
110 |
111 | def get_voc_style_results(filename, prints='mPC', aggregate='benchmark'):
112 |
113 | assert aggregate in ['benchmark', 'all']
114 |
115 | if prints == 'all':
116 | prints = ['P', 'mPC', 'rPC']
117 | elif isinstance(prints, str):
118 | prints = [prints]
119 | for p in prints:
120 | assert p in ['P', 'mPC', 'rPC']
121 |
122 | eval_output = mmcv.load(filename)
123 |
124 | num_distortions = len(list(eval_output.keys()))
125 | results = np.zeros((num_distortions, 6, 20), dtype='float32')
126 |
127 | for i, distortion in enumerate(eval_output):
128 | for severity in eval_output[distortion]:
129 | mAP = [
130 | eval_output[distortion][severity][j]['ap']
131 | for j in range(len(eval_output[distortion][severity]))
132 | ]
133 | results[i, severity, :] = mAP
134 |
135 | P = results[0, 0, :]
136 | if aggregate == 'benchmark':
137 | mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
138 | else:
139 | mPC = np.mean(results[:, 1:, :], axis=(0, 1))
140 | rPC = mPC / P
141 |
142 | print(f'\nmodel: {osp.basename(filename)}')
143 | if 'P' in prints:
144 | print(f'Performance on Clean Data [P] in AP50 = {np.mean(P):0.3f}')
145 | if 'mPC' in prints:
146 | print('Mean Performance under Corruption [mPC] in AP50 = '
147 | f'{np.mean(mPC):0.3f}')
148 | if 'rPC' in prints:
149 | print('Relative Performance under Corruption [rPC] in % = '
150 | f'{np.mean(rPC) * 100:0.1f}')
151 |
152 | return np.mean(results, axis=2, keepdims=True)
153 |
154 |
155 | def get_results(filename,
156 | dataset='coco',
157 | task='bbox',
158 | metric=None,
159 | prints='mPC',
160 | aggregate='benchmark'):
161 | assert dataset in ['coco', 'voc', 'cityscapes']
162 |
163 | if dataset in ['coco', 'cityscapes']:
164 | results = get_coco_style_results(
165 | filename,
166 | task=task,
167 | metric=metric,
168 | prints=prints,
169 | aggregate=aggregate)
170 | elif dataset == 'voc':
171 | if task != 'bbox':
172 | print('Only bbox analysis is supported for Pascal VOC')
173 | print('Will report bbox results\n')
174 | if metric not in [None, ['AP'], ['AP50']]:
175 | print('Only the AP50 metric is supported for Pascal VOC')
176 | print('Will report AP50 metric\n')
177 | results = get_voc_style_results(
178 | filename, prints=prints, aggregate=aggregate)
179 |
180 | return results
181 |
182 |
183 | def get_distortions_from_file(filename):
184 |
185 | eval_output = mmcv.load(filename)
186 |
187 | return get_distortions_from_results(eval_output)
188 |
189 |
190 | def get_distortions_from_results(eval_output):
191 | distortions = []
192 | for i, distortion in enumerate(eval_output):
193 | distortions.append(distortion.replace('_', ' '))
194 | return distortions
195 |
196 |
197 | def main():
198 | parser = ArgumentParser(description='Corruption Result Analysis')
199 | parser.add_argument('filename', help='result file path')
200 | parser.add_argument(
201 | '--dataset',
202 | type=str,
203 | choices=['coco', 'voc', 'cityscapes'],
204 | default='coco',
205 | help='dataset type')
206 | parser.add_argument(
207 | '--task',
208 | type=str,
209 | nargs='+',
210 | choices=['bbox', 'segm'],
211 | default=['bbox'],
212 | help='task to report')
213 | parser.add_argument(
214 | '--metric',
215 | nargs='+',
216 | choices=[
217 | None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',
218 | 'AR100', 'ARs', 'ARm', 'ARl'
219 | ],
220 | default=None,
221 | help='metric to report')
222 | parser.add_argument(
223 | '--prints',
224 | type=str,
225 | nargs='+',
226 | choices=['P', 'mPC', 'rPC'],
227 | default='mPC',
228 | help='corruption benchmark metric to print')
229 | parser.add_argument(
230 | '--aggregate',
231 | type=str,
232 | choices=['all', 'benchmark'],
233 | default='benchmark',
234 | help='aggregate all results or only those \
235 | for benchmark corruptions')
236 |
237 | args = parser.parse_args()
238 |
239 | for task in args.task:
240 | get_results(
241 | args.filename,
242 | dataset=args.dataset,
243 | task=task,
244 | metric=args.metric,
245 | prints=args.prints,
246 | aggregate=args.aggregate)
247 |
248 |
249 | if __name__ == '__main__':
250 | main()
251 |
--------------------------------------------------------------------------------
/object_detection/checkpoint.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import io
3 | import os
4 | import os.path as osp
5 | import pkgutil
6 | import re
7 | import time
8 | import warnings
9 | from collections import OrderedDict
10 | from importlib import import_module
11 | from tempfile import TemporaryDirectory
12 |
13 | import torch
14 | import torchvision
15 | from torch.optim import Optimizer
16 | from torch.utils import model_zoo
17 |
18 | import mmcv
19 | from mmcv.parallel import is_module_wrapper
20 | from mmcv.runner.dist_utils import get_dist_info
21 |
22 | ENV_MMCV_HOME = 'MMCV_HOME'
23 | ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
24 | DEFAULT_CACHE_DIR = '~/.cache'
25 |
26 |
27 | def load_state_dict(module, state_dict, strict=False, logger=None):
28 | """Load state_dict to a module.
29 |
30 | This method is modified from :meth:`torch.nn.Module.load_state_dict`.
31 | Default value for ``strict`` is set to ``False`` and the message for
32 | param mismatch will be shown even if strict is False.
33 |
34 | Args:
35 | module (Module): Module that receives the state_dict.
36 | state_dict (OrderedDict): Weights.
37 | strict (bool): whether to strictly enforce that the keys
38 | in :attr:`state_dict` match the keys returned by this module's
39 | :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
40 | logger (:obj:`logging.Logger`, optional): Logger to log the error
41 | message. If not specified, print function will be used.
42 | """
43 | unexpected_keys = []
44 | all_missing_keys = []
45 | err_msg = []
46 |
47 | metadata = getattr(state_dict, '_metadata', None)
48 | state_dict = state_dict.copy()
49 | if metadata is not None:
50 | state_dict._metadata = metadata
51 |
52 | # use _load_from_state_dict to enable checkpoint version control
53 | def load(module, prefix=''):
54 | # recursively check parallel module in case that the model has a
55 | # complicated structure, e.g., nn.Module(nn.Module(DDP))
56 | if is_module_wrapper(module):
57 | module = module.module
58 | local_metadata = {} if metadata is None else metadata.get(
59 | prefix[:-1], {})
60 | module._load_from_state_dict(state_dict, prefix, local_metadata, True,
61 | all_missing_keys, unexpected_keys,
62 | err_msg)
63 | for name, child in module._modules.items():
64 | if child is not None:
65 | load(child, prefix + name + '.')
66 |
67 | load(module)
68 | load = None # break load->load reference cycle
69 |
70 | # ignore "num_batches_tracked" of BN layers
71 | missing_keys = [
72 | key for key in all_missing_keys if 'num_batches_tracked' not in key
73 | ]
74 |
75 | if unexpected_keys:
76 | err_msg.append('unexpected key in source '
77 | f'state_dict: {", ".join(unexpected_keys)}\n')
78 | if missing_keys:
79 | err_msg.append(
80 | f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
81 |
82 | rank, _ = get_dist_info()
83 | if len(err_msg) > 0 and rank == 0:
84 | err_msg.insert(
85 | 0, 'The model and loaded state dict do not match exactly\n')
86 | err_msg = '\n'.join(err_msg)
87 | if strict:
88 | raise RuntimeError(err_msg)
89 | elif logger is not None:
90 | logger.warning(err_msg)
91 | else:
92 | print(err_msg)
93 |
94 |
95 |
96 |
97 | class CheckpointLoader:
98 | """A general checkpoint loader to manage all schemes."""
99 |
100 | _schemes = {}
101 |
102 | @classmethod
103 | def _register_scheme(cls, prefixes, loader, force=False):
104 | if isinstance(prefixes, str):
105 | prefixes = [prefixes]
106 | else:
107 | assert isinstance(prefixes, (list, tuple))
108 | for prefix in prefixes:
109 | if (prefix not in cls._schemes) or force:
110 | cls._schemes[prefix] = loader
111 | else:
112 | raise KeyError(
113 | f'{prefix} is already registered as a loader backend, '
114 | 'add "force=True" if you want to override it')
115 | # sort, longer prefixes take priority
116 | cls._schemes = OrderedDict(
117 | sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True))
118 |
119 | @classmethod
120 | def register_scheme(cls, prefixes, loader=None, force=False):
121 | """Register a loader to CheckpointLoader.
122 |
123 | This method can be used as a normal class method or a decorator.
124 |
125 | Args:
126 | prefixes (str or list[str] or tuple[str]):
127 | The prefix of the registered loader.
128 | loader (function, optional): The loader function to be registered.
129 | When this method is used as a decorator, loader is None.
130 | Defaults to None.
131 | force (bool, optional): Whether to override the loader
132 | if the prefix has already been registered. Defaults to False.
133 | """
134 |
135 | if loader is not None:
136 | cls._register_scheme(prefixes, loader, force=force)
137 | return
138 |
139 | def _register(loader_cls):
140 | cls._register_scheme(prefixes, loader_cls, force=force)
141 | return loader_cls
142 |
143 | return _register
144 |
145 | @classmethod
146 | def _get_checkpoint_loader(cls, path):
147 | """Finds a loader that supports the given path. Falls back to the local
148 | loader if no other loader is found.
149 |
150 | Args:
151 | path (str): checkpoint path
152 |
153 | Returns:
154 | loader (function): checkpoint loader
155 | """
156 |
157 | for p in cls._schemes:
158 | if path.startswith(p):
159 | return cls._schemes[p]
160 |
161 | @classmethod
162 | def load_checkpoint(cls, filename, map_location=None, logger=None):
163 | """load checkpoint through URL scheme path.
164 |
165 | Args:
166 | filename (str): checkpoint file name with given prefix
167 | map_location (str, optional): Same as :func:`torch.load`.
168 | Default: None
169 | logger (:mod:`logging.Logger`, optional): The logger for message.
170 | Default: None
171 |
172 | Returns:
173 | dict or OrderedDict: The loaded checkpoint.
174 | """
175 |
176 | checkpoint_loader = cls._get_checkpoint_loader(filename)
177 | class_name = checkpoint_loader.__name__
178 | mmcv.print_log(
179 | f'load checkpoint from {class_name[10:]} path: {filename}', logger)
180 | return checkpoint_loader(filename, map_location)
181 |
182 |
183 |
184 | def _load_checkpoint(filename, map_location=None, logger=None):
185 | """Load checkpoint from somewhere (modelzoo, file, url).
186 |
187 | Args:
188 | filename (str): Accept local filepath, URL, ``torchvision://xxx``,
189 | ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
190 | details.
191 | map_location (str, optional): Same as :func:`torch.load`.
192 | Default: None.
193 | logger (:mod:`logging.Logger`, optional): The logger for error message.
194 | Default: None
195 |
196 | Returns:
197 | dict or OrderedDict: The loaded checkpoint. It can be either an
198 | OrderedDict storing model weights or a dict containing other
199 | information, which depends on the checkpoint.
200 | """
201 | return CheckpointLoader.load_checkpoint(filename, map_location, logger)
202 |
203 |
204 | def load_checkpoint(model,
205 | filename,
206 | map_location=None,
207 | strict=False,
208 | logger=None,
209 | revise_keys=[(r'^module\.', '')]):
210 | """Load checkpoint from a file or URI.
211 |
212 | Args:
213 | model (Module): Module to load checkpoint.
214 | filename (str): Accept local filepath, URL, ``torchvision://xxx``,
215 | ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
216 | details.
217 | map_location (str): Same as :func:`torch.load`.
218 | strict (bool): Whether to allow different params for the model and
219 | checkpoint.
220 | logger (:mod:`logging.Logger` or None): The logger for error message.
221 | revise_keys (list): A list of customized keywords to modify the
222 | state_dict in checkpoint. Each item is a (pattern, replacement)
223 | pair of the regular expression operations. Default: strip
224 | the prefix 'module.' by [(r'^module\\.', '')].
225 |
226 | Returns:
227 | dict or OrderedDict: The loaded checkpoint.
228 | """
229 | checkpoint = _load_checkpoint(filename, map_location, logger)
230 | # OrderedDict is a subclass of dict
231 | if not isinstance(checkpoint, dict):
232 | raise RuntimeError(
233 | f'No state_dict found in checkpoint file {filename}')
234 | # get state_dict from checkpoint
235 | import pdb; pdb.set_trace()
236 | if 'state_dict' in checkpoint:
237 | state_dict = checkpoint['state_dict']
238 | elif 'model' in checkpoint:
239 | state_dict = checkpoint['model']
240 | else:
241 | state_dict = checkpoint
242 |
243 | # strip prefix of state_dict
244 | metadata = getattr(state_dict, '_metadata', OrderedDict())
245 | for p, r in revise_keys:
246 | state_dict = OrderedDict(
247 | {re.sub(p, r, k): v
248 | for k, v in state_dict.items()})
249 | # Keep metadata in state_dict
250 | state_dict._metadata = metadata
251 |
252 | # load state_dict
253 | load_state_dict(model, state_dict, strict, logger)
254 | return checkpoint
--------------------------------------------------------------------------------
/object_detection/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoDataset'
3 | data_root = '/mnt/private_dataset/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True),
9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(1333, 800),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | samples_per_gpu=2,
33 | workers_per_gpu=4,
34 | train=dict(
35 | type=dataset_type,
36 | ann_file=data_root + '/annotations/instances_train2017.json',
37 | img_prefix=data_root + '/train2017/',
38 | pipeline=train_pipeline),
39 | val=dict(
40 | type=dataset_type,
41 | ann_file=data_root + '/annotations/instances_val2017.json',
42 | img_prefix=data_root + '/val2017/',
43 | pipeline=test_pipeline),
44 | test=dict(
45 | type=dataset_type,
46 | ann_file=data_root + '/annotations/instances_val2017.json',
47 | img_prefix=data_root + '/val2017/',
48 | pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')
--------------------------------------------------------------------------------
/object_detection/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'CocoDataset'
3 | data_root = '/mnt/private_dataset/coco/'
4 | img_norm_cfg = dict(
5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6 | train_pipeline = [
7 | dict(type='LoadImageFromFile'),
8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 | dict(type='RandomFlip', flip_ratio=0.5),
11 | dict(type='Normalize', **img_norm_cfg),
12 | dict(type='Pad', size_divisor=32),
13 | dict(type='DefaultFormatBundle'),
14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 | dict(type='LoadImageFromFile'),
18 | dict(
19 | type='MultiScaleFlipAug',
20 | img_scale=(1333, 800),
21 | flip=False,
22 | transforms=[
23 | dict(type='Resize', keep_ratio=True),
24 | dict(type='RandomFlip'),
25 | dict(type='Normalize', **img_norm_cfg),
26 | dict(type='Pad', size_divisor=32),
27 | dict(type='ImageToTensor', keys=['img']),
28 | dict(type='Collect', keys=['img']),
29 | ])
30 | ]
31 | data = dict(
32 | samples_per_gpu=2,
33 | workers_per_gpu=4,
34 | train=dict(
35 | type=dataset_type,
36 | ann_file=data_root + 'annotations/instances_train2017.json',
37 | img_prefix=data_root + 'train2017/',
38 | pipeline=train_pipeline),
39 | val=dict(
40 | type=dataset_type,
41 | ann_file=data_root + 'annotations/instances_val2017.json',
42 | img_prefix=data_root + 'val2017/',
43 | pipeline=test_pipeline),
44 | test=dict(
45 | type=dataset_type,
46 | ann_file=data_root + 'annotations/instances_val2017.json',
47 | img_prefix=data_root + 'val2017/',
48 | pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])
--------------------------------------------------------------------------------
/object_detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | checkpoint_config = dict(interval=1)
2 | # yapf:disable
3 | log_config = dict(
4 | interval=50,
5 | hooks=[
6 | dict(type='TextLoggerHook'),
7 | # dict(type='TensorboardLoggerHook')
8 | ])
9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 |
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_mask_rcnn_pvtv2_b2_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='CascadeRCNN',
4 | backbone=dict(
5 | type='pvt_v2_b2',
6 | style='pytorch'),
7 | neck=dict(
8 | type='FPN',
9 | in_channels=[64, 128, 320, 512],
10 | out_channels=256,
11 | num_outs=5),
12 | rpn_head=dict(
13 | type='RPNHead',
14 | in_channels=256,
15 | feat_channels=256,
16 | anchor_generator=dict(
17 | type='AnchorGenerator',
18 | scales=[8],
19 | ratios=[0.5, 1.0, 2.0],
20 | strides=[4, 8, 16, 32, 64]),
21 | bbox_coder=dict(
22 | type='DeltaXYWHBBoxCoder',
23 | target_means=[.0, .0, .0, .0],
24 | target_stds=[1.0, 1.0, 1.0, 1.0]),
25 | loss_cls=dict(
26 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
27 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
28 | roi_head=dict(
29 | type='CascadeRoIHead',
30 | num_stages=3,
31 | stage_loss_weights=[1, 0.5, 0.25],
32 | bbox_roi_extractor=dict(
33 | type='SingleRoIExtractor',
34 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
35 | out_channels=256,
36 | featmap_strides=[4, 8, 16, 32]),
37 | bbox_head=[
38 | dict(
39 | type='Shared2FCBBoxHead',
40 | in_channels=256,
41 | fc_out_channels=1024,
42 | roi_feat_size=7,
43 | num_classes=80,
44 | bbox_coder=dict(
45 | type='DeltaXYWHBBoxCoder',
46 | target_means=[0., 0., 0., 0.],
47 | target_stds=[0.1, 0.1, 0.2, 0.2]),
48 | reg_class_agnostic=True,
49 | loss_cls=dict(
50 | type='CrossEntropyLoss',
51 | use_sigmoid=False,
52 | loss_weight=1.0),
53 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
54 | loss_weight=1.0)),
55 | dict(
56 | type='Shared2FCBBoxHead',
57 | in_channels=256,
58 | fc_out_channels=1024,
59 | roi_feat_size=7,
60 | num_classes=80,
61 | bbox_coder=dict(
62 | type='DeltaXYWHBBoxCoder',
63 | target_means=[0., 0., 0., 0.],
64 | target_stds=[0.05, 0.05, 0.1, 0.1]),
65 | reg_class_agnostic=True,
66 | loss_cls=dict(
67 | type='CrossEntropyLoss',
68 | use_sigmoid=False,
69 | loss_weight=1.0),
70 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
71 | loss_weight=1.0)),
72 | dict(
73 | type='Shared2FCBBoxHead',
74 | in_channels=256,
75 | fc_out_channels=1024,
76 | roi_feat_size=7,
77 | num_classes=80,
78 | bbox_coder=dict(
79 | type='DeltaXYWHBBoxCoder',
80 | target_means=[0., 0., 0., 0.],
81 | target_stds=[0.033, 0.033, 0.067, 0.067]),
82 | reg_class_agnostic=True,
83 | loss_cls=dict(
84 | type='CrossEntropyLoss',
85 | use_sigmoid=False,
86 | loss_weight=1.0),
87 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
88 | ],
89 | mask_roi_extractor=dict(
90 | type='SingleRoIExtractor',
91 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
92 | out_channels=256,
93 | featmap_strides=[4, 8, 16, 32]),
94 | mask_head=dict(
95 | type='FCNMaskHead',
96 | num_convs=4,
97 | in_channels=256,
98 | conv_out_channels=256,
99 | num_classes=80,
100 | loss_mask=dict(
101 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
102 | # model training and testing settings
103 | train_cfg=dict(
104 | rpn=dict(
105 | assigner=dict(
106 | type='MaxIoUAssigner',
107 | pos_iou_thr=0.7,
108 | neg_iou_thr=0.3,
109 | min_pos_iou=0.3,
110 | match_low_quality=True,
111 | ignore_iof_thr=-1),
112 | sampler=dict(
113 | type='RandomSampler',
114 | num=256,
115 | pos_fraction=0.5,
116 | neg_pos_ub=-1,
117 | add_gt_as_proposals=False),
118 | allowed_border=0,
119 | pos_weight=-1,
120 | debug=False),
121 | rpn_proposal=dict(
122 | nms_across_levels=False,
123 | nms_pre=2000,
124 | nms_post=2000,
125 | max_per_img=2000,
126 | nms=dict(type='nms', iou_threshold=0.7),
127 | min_bbox_size=0),
128 | rcnn=[
129 | dict(
130 | assigner=dict(
131 | type='MaxIoUAssigner',
132 | pos_iou_thr=0.5,
133 | neg_iou_thr=0.5,
134 | min_pos_iou=0.5,
135 | match_low_quality=False,
136 | ignore_iof_thr=-1),
137 | sampler=dict(
138 | type='RandomSampler',
139 | num=512,
140 | pos_fraction=0.25,
141 | neg_pos_ub=-1,
142 | add_gt_as_proposals=True),
143 | mask_size=28,
144 | pos_weight=-1,
145 | debug=False),
146 | dict(
147 | assigner=dict(
148 | type='MaxIoUAssigner',
149 | pos_iou_thr=0.6,
150 | neg_iou_thr=0.6,
151 | min_pos_iou=0.6,
152 | match_low_quality=False,
153 | ignore_iof_thr=-1),
154 | sampler=dict(
155 | type='RandomSampler',
156 | num=512,
157 | pos_fraction=0.25,
158 | neg_pos_ub=-1,
159 | add_gt_as_proposals=True),
160 | mask_size=28,
161 | pos_weight=-1,
162 | debug=False),
163 | dict(
164 | assigner=dict(
165 | type='MaxIoUAssigner',
166 | pos_iou_thr=0.7,
167 | neg_iou_thr=0.7,
168 | min_pos_iou=0.7,
169 | match_low_quality=False,
170 | ignore_iof_thr=-1),
171 | sampler=dict(
172 | type='RandomSampler',
173 | num=512,
174 | pos_fraction=0.25,
175 | neg_pos_ub=-1,
176 | add_gt_as_proposals=True),
177 | mask_size=28,
178 | pos_weight=-1,
179 | debug=False)
180 | ]),
181 | test_cfg=dict(
182 | rpn=dict(
183 | nms_across_levels=False,
184 | nms_pre=1000,
185 | nms_post=1000,
186 | max_per_img=1000,
187 | nms=dict(type='nms', iou_threshold=0.7),
188 | min_bbox_size=0),
189 | rcnn=dict(
190 | score_thr=0.05,
191 | nms=dict(type='nms', iou_threshold=0.5),
192 | max_per_img=100,
193 | mask_thr_binary=0.5)))
194 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='CascadeRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 | roi_head=dict(
36 | type='CascadeRoIHead',
37 | num_stages=3,
38 | stage_loss_weights=[1, 0.5, 0.25],
39 | bbox_roi_extractor=dict(
40 | type='SingleRoIExtractor',
41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 | out_channels=256,
43 | featmap_strides=[4, 8, 16, 32]),
44 | bbox_head=[
45 | dict(
46 | type='Shared2FCBBoxHead',
47 | in_channels=256,
48 | fc_out_channels=1024,
49 | roi_feat_size=7,
50 | num_classes=80,
51 | bbox_coder=dict(
52 | type='DeltaXYWHBBoxCoder',
53 | target_means=[0., 0., 0., 0.],
54 | target_stds=[0.1, 0.1, 0.2, 0.2]),
55 | reg_class_agnostic=True,
56 | loss_cls=dict(
57 | type='CrossEntropyLoss',
58 | use_sigmoid=False,
59 | loss_weight=1.0),
60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 | loss_weight=1.0)),
62 | dict(
63 | type='Shared2FCBBoxHead',
64 | in_channels=256,
65 | fc_out_channels=1024,
66 | roi_feat_size=7,
67 | num_classes=80,
68 | bbox_coder=dict(
69 | type='DeltaXYWHBBoxCoder',
70 | target_means=[0., 0., 0., 0.],
71 | target_stds=[0.05, 0.05, 0.1, 0.1]),
72 | reg_class_agnostic=True,
73 | loss_cls=dict(
74 | type='CrossEntropyLoss',
75 | use_sigmoid=False,
76 | loss_weight=1.0),
77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 | loss_weight=1.0)),
79 | dict(
80 | type='Shared2FCBBoxHead',
81 | in_channels=256,
82 | fc_out_channels=1024,
83 | roi_feat_size=7,
84 | num_classes=80,
85 | bbox_coder=dict(
86 | type='DeltaXYWHBBoxCoder',
87 | target_means=[0., 0., 0., 0.],
88 | target_stds=[0.033, 0.033, 0.067, 0.067]),
89 | reg_class_agnostic=True,
90 | loss_cls=dict(
91 | type='CrossEntropyLoss',
92 | use_sigmoid=False,
93 | loss_weight=1.0),
94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 | ],
96 | mask_roi_extractor=dict(
97 | type='SingleRoIExtractor',
98 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
99 | out_channels=256,
100 | featmap_strides=[4, 8, 16, 32]),
101 | mask_head=dict(
102 | type='FCNMaskHead',
103 | num_convs=4,
104 | in_channels=256,
105 | conv_out_channels=256,
106 | num_classes=80,
107 | loss_mask=dict(
108 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 | # model training and testing settings
110 | train_cfg=dict(
111 | rpn=dict(
112 | assigner=dict(
113 | type='MaxIoUAssigner',
114 | pos_iou_thr=0.7,
115 | neg_iou_thr=0.3,
116 | min_pos_iou=0.3,
117 | match_low_quality=True,
118 | ignore_iof_thr=-1),
119 | sampler=dict(
120 | type='RandomSampler',
121 | num=256,
122 | pos_fraction=0.5,
123 | neg_pos_ub=-1,
124 | add_gt_as_proposals=False),
125 | allowed_border=0,
126 | pos_weight=-1,
127 | debug=False),
128 | rpn_proposal=dict(
129 | nms_pre=2000,
130 | max_per_img=2000,
131 | nms=dict(type='nms', iou_threshold=0.7),
132 | min_bbox_size=0),
133 | rcnn=[
134 | dict(
135 | assigner=dict(
136 | type='MaxIoUAssigner',
137 | pos_iou_thr=0.5,
138 | neg_iou_thr=0.5,
139 | min_pos_iou=0.5,
140 | match_low_quality=False,
141 | ignore_iof_thr=-1),
142 | sampler=dict(
143 | type='RandomSampler',
144 | num=512,
145 | pos_fraction=0.25,
146 | neg_pos_ub=-1,
147 | add_gt_as_proposals=True),
148 | mask_size=28,
149 | pos_weight=-1,
150 | debug=False),
151 | dict(
152 | assigner=dict(
153 | type='MaxIoUAssigner',
154 | pos_iou_thr=0.6,
155 | neg_iou_thr=0.6,
156 | min_pos_iou=0.6,
157 | match_low_quality=False,
158 | ignore_iof_thr=-1),
159 | sampler=dict(
160 | type='RandomSampler',
161 | num=512,
162 | pos_fraction=0.25,
163 | neg_pos_ub=-1,
164 | add_gt_as_proposals=True),
165 | mask_size=28,
166 | pos_weight=-1,
167 | debug=False),
168 | dict(
169 | assigner=dict(
170 | type='MaxIoUAssigner',
171 | pos_iou_thr=0.7,
172 | neg_iou_thr=0.7,
173 | min_pos_iou=0.7,
174 | match_low_quality=False,
175 | ignore_iof_thr=-1),
176 | sampler=dict(
177 | type='RandomSampler',
178 | num=512,
179 | pos_fraction=0.25,
180 | neg_pos_ub=-1,
181 | add_gt_as_proposals=True),
182 | mask_size=28,
183 | pos_weight=-1,
184 | debug=False)
185 | ]),
186 | test_cfg=dict(
187 | rpn=dict(
188 | nms_pre=1000,
189 | max_per_img=1000,
190 | nms=dict(type='nms', iou_threshold=0.7),
191 | min_bbox_size=0),
192 | rcnn=dict(
193 | score_thr=0.05,
194 | nms=dict(type='nms', iou_threshold=0.5),
195 | max_per_img=100,
196 | mask_thr_binary=0.5)))
197 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='CascadeRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
35 | roi_head=dict(
36 | type='CascadeRoIHead',
37 | num_stages=3,
38 | stage_loss_weights=[1, 0.5, 0.25],
39 | bbox_roi_extractor=dict(
40 | type='SingleRoIExtractor',
41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
42 | out_channels=256,
43 | featmap_strides=[4, 8, 16, 32]),
44 | bbox_head=[
45 | dict(
46 | type='Shared2FCBBoxHead',
47 | in_channels=256,
48 | fc_out_channels=1024,
49 | roi_feat_size=7,
50 | num_classes=80,
51 | bbox_coder=dict(
52 | type='DeltaXYWHBBoxCoder',
53 | target_means=[0., 0., 0., 0.],
54 | target_stds=[0.1, 0.1, 0.2, 0.2]),
55 | reg_class_agnostic=True,
56 | loss_cls=dict(
57 | type='CrossEntropyLoss',
58 | use_sigmoid=False,
59 | loss_weight=1.0),
60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
61 | loss_weight=1.0)),
62 | dict(
63 | type='Shared2FCBBoxHead',
64 | in_channels=256,
65 | fc_out_channels=1024,
66 | roi_feat_size=7,
67 | num_classes=80,
68 | bbox_coder=dict(
69 | type='DeltaXYWHBBoxCoder',
70 | target_means=[0., 0., 0., 0.],
71 | target_stds=[0.05, 0.05, 0.1, 0.1]),
72 | reg_class_agnostic=True,
73 | loss_cls=dict(
74 | type='CrossEntropyLoss',
75 | use_sigmoid=False,
76 | loss_weight=1.0),
77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
78 | loss_weight=1.0)),
79 | dict(
80 | type='Shared2FCBBoxHead',
81 | in_channels=256,
82 | fc_out_channels=1024,
83 | roi_feat_size=7,
84 | num_classes=80,
85 | bbox_coder=dict(
86 | type='DeltaXYWHBBoxCoder',
87 | target_means=[0., 0., 0., 0.],
88 | target_stds=[0.033, 0.033, 0.067, 0.067]),
89 | reg_class_agnostic=True,
90 | loss_cls=dict(
91 | type='CrossEntropyLoss',
92 | use_sigmoid=False,
93 | loss_weight=1.0),
94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
95 | ]),
96 | # model training and testing settings
97 | train_cfg=dict(
98 | rpn=dict(
99 | assigner=dict(
100 | type='MaxIoUAssigner',
101 | pos_iou_thr=0.7,
102 | neg_iou_thr=0.3,
103 | min_pos_iou=0.3,
104 | match_low_quality=True,
105 | ignore_iof_thr=-1),
106 | sampler=dict(
107 | type='RandomSampler',
108 | num=256,
109 | pos_fraction=0.5,
110 | neg_pos_ub=-1,
111 | add_gt_as_proposals=False),
112 | allowed_border=0,
113 | pos_weight=-1,
114 | debug=False),
115 | rpn_proposal=dict(
116 | nms_pre=2000,
117 | max_per_img=2000,
118 | nms=dict(type='nms', iou_threshold=0.7),
119 | min_bbox_size=0),
120 | rcnn=[
121 | dict(
122 | assigner=dict(
123 | type='MaxIoUAssigner',
124 | pos_iou_thr=0.5,
125 | neg_iou_thr=0.5,
126 | min_pos_iou=0.5,
127 | match_low_quality=False,
128 | ignore_iof_thr=-1),
129 | sampler=dict(
130 | type='RandomSampler',
131 | num=512,
132 | pos_fraction=0.25,
133 | neg_pos_ub=-1,
134 | add_gt_as_proposals=True),
135 | pos_weight=-1,
136 | debug=False),
137 | dict(
138 | assigner=dict(
139 | type='MaxIoUAssigner',
140 | pos_iou_thr=0.6,
141 | neg_iou_thr=0.6,
142 | min_pos_iou=0.6,
143 | match_low_quality=False,
144 | ignore_iof_thr=-1),
145 | sampler=dict(
146 | type='RandomSampler',
147 | num=512,
148 | pos_fraction=0.25,
149 | neg_pos_ub=-1,
150 | add_gt_as_proposals=True),
151 | pos_weight=-1,
152 | debug=False),
153 | dict(
154 | assigner=dict(
155 | type='MaxIoUAssigner',
156 | pos_iou_thr=0.7,
157 | neg_iou_thr=0.7,
158 | min_pos_iou=0.7,
159 | match_low_quality=False,
160 | ignore_iof_thr=-1),
161 | sampler=dict(
162 | type='RandomSampler',
163 | num=512,
164 | pos_fraction=0.25,
165 | neg_pos_ub=-1,
166 | add_gt_as_proposals=True),
167 | pos_weight=-1,
168 | debug=False)
169 | ]),
170 | test_cfg=dict(
171 | rpn=dict(
172 | nms_pre=1000,
173 | max_per_img=1000,
174 | nms=dict(type='nms', iou_threshold=0.7),
175 | min_bbox_size=0),
176 | rcnn=dict(
177 | score_thr=0.05,
178 | nms=dict(type='nms', iou_threshold=0.5),
179 | max_per_img=100)))
180 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FastRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | roi_head=dict(
20 | type='StandardRoIHead',
21 | bbox_roi_extractor=dict(
22 | type='SingleRoIExtractor',
23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 | out_channels=256,
25 | featmap_strides=[4, 8, 16, 32]),
26 | bbox_head=dict(
27 | type='Shared2FCBBoxHead',
28 | in_channels=256,
29 | fc_out_channels=1024,
30 | roi_feat_size=7,
31 | num_classes=80,
32 | bbox_coder=dict(
33 | type='DeltaXYWHBBoxCoder',
34 | target_means=[0., 0., 0., 0.],
35 | target_stds=[0.1, 0.1, 0.2, 0.2]),
36 | reg_class_agnostic=False,
37 | loss_cls=dict(
38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 | # model training and testing settings
41 | train_cfg=dict(
42 | rcnn=dict(
43 | assigner=dict(
44 | type='MaxIoUAssigner',
45 | pos_iou_thr=0.5,
46 | neg_iou_thr=0.5,
47 | min_pos_iou=0.5,
48 | match_low_quality=False,
49 | ignore_iof_thr=-1),
50 | sampler=dict(
51 | type='RandomSampler',
52 | num=512,
53 | pos_fraction=0.25,
54 | neg_pos_ub=-1,
55 | add_gt_as_proposals=True),
56 | pos_weight=-1,
57 | debug=False)),
58 | test_cfg=dict(
59 | rcnn=dict(
60 | score_thr=0.05,
61 | nms=dict(type='nms', iou_threshold=0.5),
62 | max_per_img=100)))
63 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='FasterRCNN',
5 | pretrained='open-mmlab://detectron2/resnet50_caffe',
6 | backbone=dict(
7 | type='ResNet',
8 | depth=50,
9 | num_stages=3,
10 | strides=(1, 2, 2),
11 | dilations=(1, 1, 1),
12 | out_indices=(2, ),
13 | frozen_stages=1,
14 | norm_cfg=norm_cfg,
15 | norm_eval=True,
16 | style='caffe'),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=1024,
20 | feat_channels=1024,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[2, 4, 8, 16, 32],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[16]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | roi_head=dict(
34 | type='StandardRoIHead',
35 | shared_head=dict(
36 | type='ResLayer',
37 | depth=50,
38 | stage=3,
39 | stride=2,
40 | dilation=1,
41 | style='caffe',
42 | norm_cfg=norm_cfg,
43 | norm_eval=True),
44 | bbox_roi_extractor=dict(
45 | type='SingleRoIExtractor',
46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
47 | out_channels=1024,
48 | featmap_strides=[16]),
49 | bbox_head=dict(
50 | type='BBoxHead',
51 | with_avg_pool=True,
52 | roi_feat_size=7,
53 | in_channels=2048,
54 | num_classes=80,
55 | bbox_coder=dict(
56 | type='DeltaXYWHBBoxCoder',
57 | target_means=[0., 0., 0., 0.],
58 | target_stds=[0.1, 0.1, 0.2, 0.2]),
59 | reg_class_agnostic=False,
60 | loss_cls=dict(
61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
63 | # model training and testing settings
64 | train_cfg=dict(
65 | rpn=dict(
66 | assigner=dict(
67 | type='MaxIoUAssigner',
68 | pos_iou_thr=0.7,
69 | neg_iou_thr=0.3,
70 | min_pos_iou=0.3,
71 | match_low_quality=True,
72 | ignore_iof_thr=-1),
73 | sampler=dict(
74 | type='RandomSampler',
75 | num=256,
76 | pos_fraction=0.5,
77 | neg_pos_ub=-1,
78 | add_gt_as_proposals=False),
79 | allowed_border=0,
80 | pos_weight=-1,
81 | debug=False),
82 | rpn_proposal=dict(
83 | nms_pre=12000,
84 | max_per_img=2000,
85 | nms=dict(type='nms', iou_threshold=0.7),
86 | min_bbox_size=0),
87 | rcnn=dict(
88 | assigner=dict(
89 | type='MaxIoUAssigner',
90 | pos_iou_thr=0.5,
91 | neg_iou_thr=0.5,
92 | min_pos_iou=0.5,
93 | match_low_quality=False,
94 | ignore_iof_thr=-1),
95 | sampler=dict(
96 | type='RandomSampler',
97 | num=512,
98 | pos_fraction=0.25,
99 | neg_pos_ub=-1,
100 | add_gt_as_proposals=True),
101 | pos_weight=-1,
102 | debug=False)),
103 | test_cfg=dict(
104 | rpn=dict(
105 | nms_pre=6000,
106 | max_per_img=1000,
107 | nms=dict(type='nms', iou_threshold=0.7),
108 | min_bbox_size=0),
109 | rcnn=dict(
110 | score_thr=0.05,
111 | nms=dict(type='nms', iou_threshold=0.5),
112 | max_per_img=100)))
113 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='FasterRCNN',
5 | pretrained='open-mmlab://detectron2/resnet50_caffe',
6 | backbone=dict(
7 | type='ResNet',
8 | depth=50,
9 | num_stages=4,
10 | strides=(1, 2, 2, 1),
11 | dilations=(1, 1, 1, 2),
12 | out_indices=(3, ),
13 | frozen_stages=1,
14 | norm_cfg=norm_cfg,
15 | norm_eval=True,
16 | style='caffe'),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=2048,
20 | feat_channels=2048,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[2, 4, 8, 16, 32],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[16]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | roi_head=dict(
34 | type='StandardRoIHead',
35 | bbox_roi_extractor=dict(
36 | type='SingleRoIExtractor',
37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
38 | out_channels=2048,
39 | featmap_strides=[16]),
40 | bbox_head=dict(
41 | type='Shared2FCBBoxHead',
42 | in_channels=2048,
43 | fc_out_channels=1024,
44 | roi_feat_size=7,
45 | num_classes=80,
46 | bbox_coder=dict(
47 | type='DeltaXYWHBBoxCoder',
48 | target_means=[0., 0., 0., 0.],
49 | target_stds=[0.1, 0.1, 0.2, 0.2]),
50 | reg_class_agnostic=False,
51 | loss_cls=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
54 | # model training and testing settings
55 | train_cfg=dict(
56 | rpn=dict(
57 | assigner=dict(
58 | type='MaxIoUAssigner',
59 | pos_iou_thr=0.7,
60 | neg_iou_thr=0.3,
61 | min_pos_iou=0.3,
62 | match_low_quality=True,
63 | ignore_iof_thr=-1),
64 | sampler=dict(
65 | type='RandomSampler',
66 | num=256,
67 | pos_fraction=0.5,
68 | neg_pos_ub=-1,
69 | add_gt_as_proposals=False),
70 | allowed_border=0,
71 | pos_weight=-1,
72 | debug=False),
73 | rpn_proposal=dict(
74 | nms_pre=12000,
75 | max_per_img=2000,
76 | nms=dict(type='nms', iou_threshold=0.7),
77 | min_bbox_size=0),
78 | rcnn=dict(
79 | assigner=dict(
80 | type='MaxIoUAssigner',
81 | pos_iou_thr=0.5,
82 | neg_iou_thr=0.5,
83 | min_pos_iou=0.5,
84 | match_low_quality=False,
85 | ignore_iof_thr=-1),
86 | sampler=dict(
87 | type='RandomSampler',
88 | num=512,
89 | pos_fraction=0.25,
90 | neg_pos_ub=-1,
91 | add_gt_as_proposals=True),
92 | pos_weight=-1,
93 | debug=False)),
94 | test_cfg=dict(
95 | rpn=dict(
96 | nms=dict(type='nms', iou_threshold=0.7),
97 | nms_pre=6000,
98 | max_per_img=1000,
99 | min_bbox_size=0),
100 | rcnn=dict(
101 | score_thr=0.05,
102 | nms=dict(type='nms', iou_threshold=0.5),
103 | max_per_img=100)))
104 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FasterRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_pre=2000,
77 | max_per_img=1000,
78 | nms=dict(type='nms', iou_threshold=0.7),
79 | min_bbox_size=0),
80 | rcnn=dict(
81 | assigner=dict(
82 | type='MaxIoUAssigner',
83 | pos_iou_thr=0.5,
84 | neg_iou_thr=0.5,
85 | min_pos_iou=0.5,
86 | match_low_quality=False,
87 | ignore_iof_thr=-1),
88 | sampler=dict(
89 | type='RandomSampler',
90 | num=512,
91 | pos_fraction=0.25,
92 | neg_pos_ub=-1,
93 | add_gt_as_proposals=True),
94 | pos_weight=-1,
95 | debug=False)),
96 | test_cfg=dict(
97 | rpn=dict(
98 | nms_pre=1000,
99 | max_per_img=1000,
100 | nms=dict(type='nms', iou_threshold=0.7),
101 | min_bbox_size=0),
102 | rcnn=dict(
103 | score_thr=0.05,
104 | nms=dict(type='nms', iou_threshold=0.5),
105 | max_per_img=100)
106 | # soft-nms is also supported for rcnn testing
107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 | ))
109 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | norm_cfg = dict(type='BN', requires_grad=False)
3 | model = dict(
4 | type='MaskRCNN',
5 | pretrained='open-mmlab://detectron2/resnet50_caffe',
6 | backbone=dict(
7 | type='ResNet',
8 | depth=50,
9 | num_stages=3,
10 | strides=(1, 2, 2),
11 | dilations=(1, 1, 1),
12 | out_indices=(2, ),
13 | frozen_stages=1,
14 | norm_cfg=norm_cfg,
15 | norm_eval=True,
16 | style='caffe'),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=1024,
20 | feat_channels=1024,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[2, 4, 8, 16, 32],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[16]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | roi_head=dict(
34 | type='StandardRoIHead',
35 | shared_head=dict(
36 | type='ResLayer',
37 | depth=50,
38 | stage=3,
39 | stride=2,
40 | dilation=1,
41 | style='caffe',
42 | norm_cfg=norm_cfg,
43 | norm_eval=True),
44 | bbox_roi_extractor=dict(
45 | type='SingleRoIExtractor',
46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
47 | out_channels=1024,
48 | featmap_strides=[16]),
49 | bbox_head=dict(
50 | type='BBoxHead',
51 | with_avg_pool=True,
52 | roi_feat_size=7,
53 | in_channels=2048,
54 | num_classes=80,
55 | bbox_coder=dict(
56 | type='DeltaXYWHBBoxCoder',
57 | target_means=[0., 0., 0., 0.],
58 | target_stds=[0.1, 0.1, 0.2, 0.2]),
59 | reg_class_agnostic=False,
60 | loss_cls=dict(
61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
63 | mask_roi_extractor=None,
64 | mask_head=dict(
65 | type='FCNMaskHead',
66 | num_convs=0,
67 | in_channels=2048,
68 | conv_out_channels=256,
69 | num_classes=80,
70 | loss_mask=dict(
71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
72 | # model training and testing settings
73 | train_cfg=dict(
74 | rpn=dict(
75 | assigner=dict(
76 | type='MaxIoUAssigner',
77 | pos_iou_thr=0.7,
78 | neg_iou_thr=0.3,
79 | min_pos_iou=0.3,
80 | match_low_quality=True,
81 | ignore_iof_thr=-1),
82 | sampler=dict(
83 | type='RandomSampler',
84 | num=256,
85 | pos_fraction=0.5,
86 | neg_pos_ub=-1,
87 | add_gt_as_proposals=False),
88 | allowed_border=0,
89 | pos_weight=-1,
90 | debug=False),
91 | rpn_proposal=dict(
92 | nms_pre=12000,
93 | max_per_img=2000,
94 | nms=dict(type='nms', iou_threshold=0.7),
95 | min_bbox_size=0),
96 | rcnn=dict(
97 | assigner=dict(
98 | type='MaxIoUAssigner',
99 | pos_iou_thr=0.5,
100 | neg_iou_thr=0.5,
101 | min_pos_iou=0.5,
102 | match_low_quality=False,
103 | ignore_iof_thr=-1),
104 | sampler=dict(
105 | type='RandomSampler',
106 | num=512,
107 | pos_fraction=0.25,
108 | neg_pos_ub=-1,
109 | add_gt_as_proposals=True),
110 | mask_size=14,
111 | pos_weight=-1,
112 | debug=False)),
113 | test_cfg=dict(
114 | rpn=dict(
115 | nms_pre=6000,
116 | nms=dict(type='nms', iou_threshold=0.7),
117 | max_per_img=1000,
118 | min_bbox_size=0),
119 | rcnn=dict(
120 | score_thr=0.05,
121 | nms=dict(type='nms', iou_threshold=0.5),
122 | max_per_img=100,
123 | mask_thr_binary=0.5)))
124 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
56 | mask_roi_extractor=dict(
57 | type='SingleRoIExtractor',
58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
59 | out_channels=256,
60 | featmap_strides=[4, 8, 16, 32]),
61 | mask_head=dict(
62 | type='FCNMaskHead',
63 | num_convs=4,
64 | in_channels=256,
65 | conv_out_channels=256,
66 | num_classes=80,
67 | loss_mask=dict(
68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | rpn=dict(
72 | assigner=dict(
73 | type='MaxIoUAssigner',
74 | pos_iou_thr=0.7,
75 | neg_iou_thr=0.3,
76 | min_pos_iou=0.3,
77 | match_low_quality=True,
78 | ignore_iof_thr=-1),
79 | sampler=dict(
80 | type='RandomSampler',
81 | num=256,
82 | pos_fraction=0.5,
83 | neg_pos_ub=-1,
84 | add_gt_as_proposals=False),
85 | allowed_border=-1,
86 | pos_weight=-1,
87 | debug=False),
88 | rpn_proposal=dict(
89 | nms_pre=2000,
90 | max_per_img=1000,
91 | nms=dict(type='nms', iou_threshold=0.7),
92 | min_bbox_size=0),
93 | rcnn=dict(
94 | assigner=dict(
95 | type='MaxIoUAssigner',
96 | pos_iou_thr=0.5,
97 | neg_iou_thr=0.5,
98 | min_pos_iou=0.5,
99 | match_low_quality=True,
100 | ignore_iof_thr=-1),
101 | sampler=dict(
102 | type='RandomSampler',
103 | num=512,
104 | pos_fraction=0.25,
105 | neg_pos_ub=-1,
106 | add_gt_as_proposals=True),
107 | mask_size=28,
108 | pos_weight=-1,
109 | debug=False)),
110 | test_cfg=dict(
111 | rpn=dict(
112 | nms_pre=1000,
113 | max_per_img=1000,
114 | nms=dict(type='nms', iou_threshold=0.7),
115 | min_bbox_size=0),
116 | rcnn=dict(
117 | score_thr=0.05,
118 | nms=dict(type='nms', iou_threshold=0.5),
119 | max_per_img=100,
120 | mask_thr_binary=0.5)))
121 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RetinaNet',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | start_level=1,
19 | add_extra_convs='on_input',
20 | num_outs=5),
21 | bbox_head=dict(
22 | type='RetinaHead',
23 | num_classes=80,
24 | in_channels=256,
25 | stacked_convs=4,
26 | feat_channels=256,
27 | anchor_generator=dict(
28 | type='AnchorGenerator',
29 | octave_base_scale=4,
30 | scales_per_octave=3,
31 | ratios=[0.5, 1.0, 2.0],
32 | strides=[8, 16, 32, 64, 128]),
33 | bbox_coder=dict(
34 | type='DeltaXYWHBBoxCoder',
35 | target_means=[.0, .0, .0, .0],
36 | target_stds=[1.0, 1.0, 1.0, 1.0]),
37 | loss_cls=dict(
38 | type='FocalLoss',
39 | use_sigmoid=True,
40 | gamma=2.0,
41 | alpha=0.25,
42 | loss_weight=1.0),
43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 | # model training and testing settings
45 | train_cfg=dict(
46 | assigner=dict(
47 | type='MaxIoUAssigner',
48 | pos_iou_thr=0.5,
49 | neg_iou_thr=0.4,
50 | min_pos_iou=0,
51 | ignore_iof_thr=-1),
52 | allowed_border=-1,
53 | pos_weight=-1,
54 | debug=False),
55 | test_cfg=dict(
56 | nms_pre=1000,
57 | min_bbox_size=0,
58 | score_thr=0.05,
59 | nms=dict(type='nms', iou_threshold=0.5),
60 | max_per_img=100))
61 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | pretrained='open-mmlab://detectron2/resnet50_caffe',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=3,
9 | strides=(1, 2, 2),
10 | dilations=(1, 1, 1),
11 | out_indices=(2, ),
12 | frozen_stages=1,
13 | norm_cfg=dict(type='BN', requires_grad=False),
14 | norm_eval=True,
15 | style='caffe'),
16 | neck=None,
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=1024,
20 | feat_channels=1024,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[2, 4, 8, 16, 32],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[16]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | # model training and testing settings
34 | train_cfg=dict(
35 | rpn=dict(
36 | assigner=dict(
37 | type='MaxIoUAssigner',
38 | pos_iou_thr=0.7,
39 | neg_iou_thr=0.3,
40 | min_pos_iou=0.3,
41 | ignore_iof_thr=-1),
42 | sampler=dict(
43 | type='RandomSampler',
44 | num=256,
45 | pos_fraction=0.5,
46 | neg_pos_ub=-1,
47 | add_gt_as_proposals=False),
48 | allowed_border=0,
49 | pos_weight=-1,
50 | debug=False)),
51 | test_cfg=dict(
52 | rpn=dict(
53 | nms_pre=12000,
54 | max_per_img=2000,
55 | nms=dict(type='nms', iou_threshold=0.7),
56 | min_bbox_size=0)))
57 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='RPN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | # model training and testing settings
36 | train_cfg=dict(
37 | rpn=dict(
38 | assigner=dict(
39 | type='MaxIoUAssigner',
40 | pos_iou_thr=0.7,
41 | neg_iou_thr=0.3,
42 | min_pos_iou=0.3,
43 | ignore_iof_thr=-1),
44 | sampler=dict(
45 | type='RandomSampler',
46 | num=256,
47 | pos_fraction=0.5,
48 | neg_pos_ub=-1,
49 | add_gt_as_proposals=False),
50 | allowed_border=0,
51 | pos_weight=-1,
52 | debug=False)),
53 | test_cfg=dict(
54 | rpn=dict(
55 | nms_pre=2000,
56 | max_per_img=1000,
57 | nms=dict(type='nms', iou_threshold=0.7),
58 | min_bbox_size=0)))
59 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | input_size = 300
3 | model = dict(
4 | type='SingleStageDetector',
5 | pretrained='open-mmlab://vgg16_caffe',
6 | backbone=dict(
7 | type='SSDVGG',
8 | input_size=input_size,
9 | depth=16,
10 | with_last_pool=False,
11 | ceil_mode=True,
12 | out_indices=(3, 4),
13 | out_feature_indices=(22, 34),
14 | l2_norm_scale=20),
15 | neck=None,
16 | bbox_head=dict(
17 | type='SSDHead',
18 | in_channels=(512, 1024, 512, 256, 256, 256),
19 | num_classes=80,
20 | anchor_generator=dict(
21 | type='SSDAnchorGenerator',
22 | scale_major=False,
23 | input_size=input_size,
24 | basesize_ratio_range=(0.15, 0.9),
25 | strides=[8, 16, 32, 64, 100, 300],
26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 | bbox_coder=dict(
28 | type='DeltaXYWHBBoxCoder',
29 | target_means=[.0, .0, .0, .0],
30 | target_stds=[0.1, 0.1, 0.2, 0.2])),
31 | # model training and testing settings
32 | train_cfg=dict(
33 | assigner=dict(
34 | type='MaxIoUAssigner',
35 | pos_iou_thr=0.5,
36 | neg_iou_thr=0.5,
37 | min_pos_iou=0.,
38 | ignore_iof_thr=-1,
39 | gt_max_assign_all=False),
40 | smoothl1_beta=1.,
41 | allowed_border=-1,
42 | pos_weight=-1,
43 | neg_pos_ratio=3,
44 | debug=False),
45 | test_cfg=dict(
46 | nms_pre=1000,
47 | nms=dict(type='nms', iou_threshold=0.45),
48 | min_bbox_size=0,
49 | score_thr=0.02,
50 | max_per_img=200))
51 | cudnn_benchmark = True
52 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 |
--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 |
--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/mask_rcnn_r50_fpn.py',
3 | '_base_/datasets/coco_instance.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_b',
15 | drop_path_rate=0.3,
16 | ),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[76, 152, 336, 672],
20 | out_channels=256,
21 | num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 | 'relative_position_bias_table': dict(decay_mult=0.),
26 | 'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')
--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/mask_rcnn_r50_fpn.py',
3 | '_base_/datasets/coco_instance.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_s',
15 | drop_path_rate=0.2,
16 | ),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[64, 128, 320, 512],
20 | out_channels=256,
21 | num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 | 'relative_position_bias_table': dict(decay_mult=0.),
26 | 'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')
--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/mask_rcnn_r50_fpn.py',
3 | '_base_/datasets/coco_instance.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_t',
15 | drop_path_rate=0.1,
16 | ),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[48, 96, 224, 448],
20 | out_channels=256,
21 | num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 | 'relative_position_bias_table': dict(decay_mult=0.),
26 | 'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')
--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/retinanet_r50_fpn.py',
3 | '_base_/datasets/coco_detection.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_b',
15 | drop_path_rate=0.3,
16 | start_level=1,
17 | ),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[76, 152, 336, 672],
21 | out_channels=256,
22 | start_level=1,
23 | add_extra_convs='on_input',
24 | num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')
30 |
--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/retinanet_r50_fpn.py',
3 | '_base_/datasets/coco_detection.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_s',
15 | drop_path_rate=0.2,
16 | start_level=1,
17 | ),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[64, 128, 320, 512],
21 | out_channels=256,
22 | start_level=1,
23 | add_extra_convs='on_input',
24 | num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')
--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/retinanet_r50_fpn.py',
3 | '_base_/datasets/coco_detection.py',
4 | '_base_/schedules/schedule_1x.py',
5 | '_base_/default_runtime.py'
6 | ]
7 |
8 |
9 | model = dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_t',
15 | drop_path_rate=0.1,
16 | start_level=1,
17 | ),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[48, 96, 224, 448],
21 | out_channels=256,
22 | start_level=1,
23 | add_extra_convs='on_input',
24 | num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')
--------------------------------------------------------------------------------
/object_detection/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | CONFIG=$1
3 | CHECKPOINT=$2
4 | GPUS=$3
5 | PORT=${PORT:-29500}
6 |
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
--------------------------------------------------------------------------------
/object_detection/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | CONFIG=$1
3 | GPUS=$2
4 | PORT=${PORT:-29500}
5 |
6 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
7 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$((RANDOM+10000)) \
8 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | import os.path as osp
3 | import time
4 | from tempfile import TemporaryDirectory
5 |
6 | import torch
7 | from torch.optim import Optimizer
8 |
9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 |
13 | try:
14 | import apex
15 | except:
16 | print('apex is not installed')
17 |
18 |
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 | """Save checkpoint to file.
21 |
22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 | ``optimizer``, ``amp``. By default ``meta`` will contain version
24 | and time info.
25 |
26 | Args:
27 | model (Module): Module whose params are to be saved.
28 | filename (str): Checkpoint filename.
29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 | meta (dict, optional): Metadata to be saved in checkpoint.
31 | """
32 | if meta is None:
33 | meta = {}
34 | elif not isinstance(meta, dict):
35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 |
38 | if is_module_wrapper(model):
39 | model = model.module
40 |
41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 | # save class name to the meta
43 | meta.update(CLASSES=model.CLASSES)
44 |
45 | checkpoint = {
46 | 'meta': meta,
47 | 'state_dict': weights_to_cpu(get_state_dict(model))
48 | }
49 | # save optimizer state dict in the checkpoint
50 | if isinstance(optimizer, Optimizer):
51 | checkpoint['optimizer'] = optimizer.state_dict()
52 | elif isinstance(optimizer, dict):
53 | checkpoint['optimizer'] = {}
54 | for name, optim in optimizer.items():
55 | checkpoint['optimizer'][name] = optim.state_dict()
56 |
57 | # save amp state dict in the checkpoint
58 | checkpoint['amp'] = apex.amp.state_dict()
59 |
60 | if filename.startswith('pavi://'):
61 | try:
62 | from pavi import modelcloud
63 | from pavi.exception import NodeNotFoundError
64 | except ImportError:
65 | raise ImportError(
66 | 'Please install pavi to load checkpoint from modelcloud.')
67 | model_path = filename[7:]
68 | root = modelcloud.Folder()
69 | model_dir, model_name = osp.split(model_path)
70 | try:
71 | model = modelcloud.get(model_dir)
72 | except NodeNotFoundError:
73 | model = root.create_training_model(model_dir)
74 | with TemporaryDirectory() as tmp_dir:
75 | checkpoint_file = osp.join(tmp_dir, model_name)
76 | with open(checkpoint_file, 'wb') as f:
77 | torch.save(checkpoint, f)
78 | f.flush()
79 | model.create_file(checkpoint_file, name=model_name)
80 | else:
81 | mmcv.mkdir_or_exist(osp.dirname(filename))
82 | # immediately flush buffer
83 | with open(filename, 'wb') as f:
84 | torch.save(checkpoint, f)
85 | f.flush()
--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | import os.path as osp
3 | import platform
4 | import shutil
5 |
6 | import torch
7 | from torch.optim import Optimizer
8 |
9 | import mmcv
10 | from mmcv.runner import RUNNERS, EpochBasedRunner
11 | from .checkpoint import save_checkpoint
12 |
13 | try:
14 | import apex
15 | except:
16 | print('apex is not installed')
17 |
18 |
19 | @RUNNERS.register_module()
20 | class EpochBasedRunnerAmp(EpochBasedRunner):
21 | """Epoch-based Runner with AMP support.
22 |
23 | This runner train models epoch by epoch.
24 | """
25 |
26 | def save_checkpoint(self,
27 | out_dir,
28 | filename_tmpl='epoch_{}.pth',
29 | save_optimizer=True,
30 | meta=None,
31 | create_symlink=True):
32 | """Save the checkpoint.
33 |
34 | Args:
35 | out_dir (str): The directory that checkpoints are saved.
36 | filename_tmpl (str, optional): The checkpoint filename template,
37 | which contains a placeholder for the epoch number.
38 | Defaults to 'epoch_{}.pth'.
39 | save_optimizer (bool, optional): Whether to save the optimizer to
40 | the checkpoint. Defaults to True.
41 | meta (dict, optional): The meta information to be saved in the
42 | checkpoint. Defaults to None.
43 | create_symlink (bool, optional): Whether to create a symlink
44 | "latest.pth" to point to the latest checkpoint.
45 | Defaults to True.
46 | """
47 | if meta is None:
48 | meta = dict(epoch=self.epoch + 1, iter=self.iter)
49 | elif isinstance(meta, dict):
50 | meta.update(epoch=self.epoch + 1, iter=self.iter)
51 | else:
52 | raise TypeError(
53 | f'meta should be a dict or None, but got {type(meta)}')
54 | if self.meta is not None:
55 | meta.update(self.meta)
56 |
57 | filename = filename_tmpl.format(self.epoch + 1)
58 | filepath = osp.join(out_dir, filename)
59 | optimizer = self.optimizer if save_optimizer else None
60 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
61 | # in some environments, `os.symlink` is not supported, you may need to
62 | # set `create_symlink` to False
63 | if create_symlink:
64 | dst_file = osp.join(out_dir, 'latest.pth')
65 | if platform.system() != 'Windows':
66 | mmcv.symlink(filename, dst_file)
67 | else:
68 | shutil.copy(filepath, dst_file)
69 |
70 | def resume(self,
71 | checkpoint,
72 | resume_optimizer=True,
73 | map_location='default'):
74 | if map_location == 'default':
75 | if torch.cuda.is_available():
76 | device_id = torch.cuda.current_device()
77 | checkpoint = self.load_checkpoint(
78 | checkpoint,
79 | map_location=lambda storage, loc: storage.cuda(device_id))
80 | else:
81 | checkpoint = self.load_checkpoint(checkpoint)
82 | else:
83 | checkpoint = self.load_checkpoint(
84 | checkpoint, map_location=map_location)
85 |
86 | self._epoch = checkpoint['meta']['epoch']
87 | self._iter = checkpoint['meta']['iter']
88 | if 'optimizer' in checkpoint and resume_optimizer:
89 | if isinstance(self.optimizer, Optimizer):
90 | self.optimizer.load_state_dict(checkpoint['optimizer'])
91 | elif isinstance(self.optimizer, dict):
92 | for k in self.optimizer.keys():
93 | self.optimizer[k].load_state_dict(
94 | checkpoint['optimizer'][k])
95 | else:
96 | raise TypeError(
97 | 'Optimizer should be dict or torch.optim.Optimizer '
98 | f'but got {type(self.optimizer)}')
99 |
100 | if 'amp' in checkpoint:
101 | apex.amp.load_state_dict(checkpoint['amp'])
102 | self.logger.info('load amp state dict')
103 |
104 | self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)
105 |
--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/optimizer.py:
--------------------------------------------------------------------------------
1 | from mmcv.runner import OptimizerHook, HOOKS
2 | try:
3 | import apex
4 | except:
5 | print('apex is not installed')
6 |
7 |
8 | @HOOKS.register_module()
9 | class DistOptimizerHook(OptimizerHook):
10 | """Optimizer hook for distributed training."""
11 |
12 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
13 | self.grad_clip = grad_clip
14 | self.coalesce = coalesce
15 | self.bucket_size_mb = bucket_size_mb
16 | self.update_interval = update_interval
17 | self.use_fp16 = use_fp16
18 |
19 | def before_run(self, runner):
20 | runner.optimizer.zero_grad()
21 |
22 | def after_train_iter(self, runner):
23 | runner.outputs['loss'] /= self.update_interval
24 | if self.use_fp16:
25 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
26 | scaled_loss.backward()
27 | else:
28 | runner.outputs['loss'].backward()
29 | if self.every_n_iters(runner, self.update_interval):
30 | if self.grad_clip is not None:
31 | self.clip_grads(runner.model.parameters())
32 | runner.optimizer.step()
33 | runner.optimizer.zero_grad()
--------------------------------------------------------------------------------
/object_detection/mmdet_custom/apis/train.py:
--------------------------------------------------------------------------------
1 | import random
2 | import warnings
3 |
4 | import numpy as np
5 | import torch
6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
7 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
8 | Fp16OptimizerHook, OptimizerHook, build_optimizer,
9 | build_runner)
10 | from mmcv.utils import build_from_cfg
11 |
12 | from mmdet.core import DistEvalHook, EvalHook
13 | from mmdet.datasets import (build_dataloader, build_dataset,
14 | replace_ImageToTensor)
15 | from mmdet.utils import get_root_logger
16 | try:
17 | import apex
18 | except:
19 | print('apex is not installed')
20 |
21 |
22 | def set_random_seed(seed, deterministic=False):
23 | """Set random seed.
24 |
25 | Args:
26 | seed (int): Seed to be used.
27 | deterministic (bool): Whether to set the deterministic option for
28 | CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
29 | to True and `torch.backends.cudnn.benchmark` to False.
30 | Default: False.
31 | """
32 | random.seed(seed)
33 | np.random.seed(seed)
34 | torch.manual_seed(seed)
35 | torch.cuda.manual_seed_all(seed)
36 | if deterministic:
37 | torch.backends.cudnn.deterministic = True
38 | torch.backends.cudnn.benchmark = False
39 |
40 |
41 | def train_detector(model,
42 | dataset,
43 | cfg,
44 | distributed=False,
45 | validate=False,
46 | timestamp=None,
47 | meta=None):
48 | logger = get_root_logger(cfg.log_level)
49 |
50 | # prepare data loaders
51 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
52 | if 'imgs_per_gpu' in cfg.data:
53 | logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
54 | 'Please use "samples_per_gpu" instead')
55 | if 'samples_per_gpu' in cfg.data:
56 | logger.warning(
57 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
58 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
59 | f'={cfg.data.imgs_per_gpu} is used in this experiments')
60 | else:
61 | logger.warning(
62 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"='
63 | f'{cfg.data.imgs_per_gpu} in this experiments')
64 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
65 |
66 | data_loaders = [
67 | build_dataloader(
68 | ds,
69 | cfg.data.samples_per_gpu,
70 | cfg.data.workers_per_gpu,
71 | # cfg.gpus will be ignored if distributed
72 | len(cfg.gpu_ids),
73 | dist=distributed,
74 | seed=cfg.seed) for ds in dataset
75 | ]
76 |
77 | # build optimizer
78 | optimizer = build_optimizer(model, cfg.optimizer)
79 |
80 | # use apex fp16 optimizer
81 | if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook":
82 | if cfg.optimizer_config.get("use_fp16", False):
83 | model, optimizer = apex.amp.initialize(
84 | model.cuda(), optimizer, opt_level="O1")
85 | for m in model.modules():
86 | if hasattr(m, "fp16_enabled"):
87 | m.fp16_enabled = True
88 |
89 | # put model on gpus
90 | if distributed:
91 | find_unused_parameters = cfg.get('find_unused_parameters', False)
92 | # Sets the `find_unused_parameters` parameter in
93 | # torch.nn.parallel.DistributedDataParallel
94 | model = MMDistributedDataParallel(
95 | model.cuda(),
96 | device_ids=[torch.cuda.current_device()],
97 | broadcast_buffers=False,
98 | find_unused_parameters=find_unused_parameters)
99 | else:
100 | model = MMDataParallel(
101 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
102 |
103 | if 'runner' not in cfg:
104 | cfg.runner = {
105 | 'type': 'EpochBasedRunner',
106 | 'max_epochs': cfg.total_epochs
107 | }
108 | warnings.warn(
109 | 'config is now expected to have a `runner` section, '
110 | 'please set `runner` in your config.', UserWarning)
111 | else:
112 | if 'total_epochs' in cfg:
113 | assert cfg.total_epochs == cfg.runner.max_epochs
114 |
115 | # build runner
116 | runner = build_runner(
117 | cfg.runner,
118 | default_args=dict(
119 | model=model,
120 | optimizer=optimizer,
121 | work_dir=cfg.work_dir,
122 | logger=logger,
123 | meta=meta))
124 |
125 | # an ugly workaround to make .log and .log.json filenames the same
126 | runner.timestamp = timestamp
127 |
128 | # fp16 setting
129 | fp16_cfg = cfg.get('fp16', None)
130 | if fp16_cfg is not None:
131 | optimizer_config = Fp16OptimizerHook(
132 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
133 | elif distributed and 'type' not in cfg.optimizer_config:
134 | optimizer_config = OptimizerHook(**cfg.optimizer_config)
135 | else:
136 | optimizer_config = cfg.optimizer_config
137 |
138 | # register hooks
139 | runner.register_training_hooks(cfg.lr_config, optimizer_config,
140 | cfg.checkpoint_config, cfg.log_config,
141 | cfg.get('momentum_config', None))
142 | if distributed:
143 | if isinstance(runner, EpochBasedRunner):
144 | runner.register_hook(DistSamplerSeedHook())
145 |
146 | # register eval hooks
147 | if validate:
148 | # Support batch_size > 1 in validation
149 | val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
150 | if val_samples_per_gpu > 1:
151 | # Replace 'ImageToTensor' to 'DefaultFormatBundle'
152 | cfg.data.val.pipeline = replace_ImageToTensor(
153 | cfg.data.val.pipeline)
154 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
155 | val_dataloader = build_dataloader(
156 | val_dataset,
157 | samples_per_gpu=val_samples_per_gpu,
158 | workers_per_gpu=cfg.data.workers_per_gpu,
159 | dist=distributed,
160 | shuffle=False)
161 | eval_cfg = cfg.get('evaluation', {})
162 | eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
163 | eval_hook = DistEvalHook if distributed else EvalHook
164 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
165 |
166 | # user-defined hooks
167 | if cfg.get('custom_hooks', None):
168 | custom_hooks = cfg.custom_hooks
169 | assert isinstance(custom_hooks, list), \
170 | f'custom_hooks expect list type, but got {type(custom_hooks)}'
171 | for hook_cfg in cfg.custom_hooks:
172 | assert isinstance(hook_cfg, dict), \
173 | 'Each item in custom_hooks expects dict type, but got ' \
174 | f'{type(hook_cfg)}'
175 | hook_cfg = hook_cfg.copy()
176 | priority = hook_cfg.pop('priority', 'NORMAL')
177 | hook = build_from_cfg(hook_cfg, HOOKS)
178 | runner.register_hook(hook, priority=priority)
179 |
180 | if cfg.resume_from:
181 | runner.resume(cfg.resume_from)
182 | elif cfg.load_from:
183 | runner.load_checkpoint(cfg.load_from)
184 | runner.run(data_loaders, cfg.workflow)
--------------------------------------------------------------------------------
/object_detection/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import copy
3 | import os
4 | import os.path as osp
5 | import time
6 | import warnings
7 |
8 | import mmcv
9 | import torch
10 | from mmcv import Config, DictAction
11 | from mmcv.runner import get_dist_info, init_dist
12 | from mmcv.utils import get_git_hash
13 |
14 | from mmdet import __version__
15 | from mmdet.apis import set_random_seed #, train_detector
16 | from mmdet_custom.apis.train import train_detector
17 | from mmdet.datasets import build_dataset
18 | from mmdet.models import build_detector
19 | from mmdet.utils import collect_env, get_root_logger
20 | import mmcv_custom.runner.epoch_based_runner
21 | import mmcv_custom.runner.optimizer
22 |
23 | import transxnet
24 |
25 | def parse_args():
26 | parser = argparse.ArgumentParser(description='Train a detector')
27 | parser.add_argument('config', help='train config file path')
28 | parser.add_argument('--work-dir', help='the dir to save logs and models')
29 | parser.add_argument(
30 | '--resume-from', help='the checkpoint file to resume from')
31 | parser.add_argument(
32 | '--no-validate',
33 | action='store_true',
34 | help='whether not to evaluate the checkpoint during training')
35 | group_gpus = parser.add_mutually_exclusive_group()
36 | group_gpus.add_argument(
37 | '--gpus',
38 | type=int,
39 | help='number of gpus to use '
40 | '(only applicable to non-distributed training)')
41 | group_gpus.add_argument(
42 | '--gpu-ids',
43 | type=int,
44 | nargs='+',
45 | help='ids of gpus to use '
46 | '(only applicable to non-distributed training)')
47 | parser.add_argument('--seed', type=int, default=None, help='random seed')
48 | parser.add_argument(
49 | '--deterministic',
50 | action='store_true',
51 | help='whether to set deterministic options for CUDNN backend.')
52 | parser.add_argument(
53 | '--options',
54 | nargs='+',
55 | action=DictAction,
56 | help='override some settings in the used config, the key-value pair '
57 | 'in xxx=yyy format will be merged into config file (deprecate), '
58 | 'change to --cfg-options instead.')
59 | parser.add_argument(
60 | '--cfg-options',
61 | nargs='+',
62 | action=DictAction,
63 | help='override some settings in the used config, the key-value pair '
64 | 'in xxx=yyy format will be merged into config file. If the value to '
65 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
66 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
67 | 'Note that the quotation marks are necessary and that no white space '
68 | 'is allowed.')
69 | parser.add_argument(
70 | '--launcher',
71 | choices=['none', 'pytorch', 'slurm', 'mpi'],
72 | default='none',
73 | help='job launcher')
74 | parser.add_argument('--local_rank', type=int, default=0)
75 | args = parser.parse_args()
76 | if 'LOCAL_RANK' not in os.environ:
77 | os.environ['LOCAL_RANK'] = str(args.local_rank)
78 |
79 | if args.options and args.cfg_options:
80 | raise ValueError(
81 | '--options and --cfg-options cannot be both '
82 | 'specified, --options is deprecated in favor of --cfg-options')
83 | if args.options:
84 | warnings.warn('--options is deprecated in favor of --cfg-options')
85 | args.cfg_options = args.options
86 |
87 | return args
88 |
89 |
90 | def main():
91 | args = parse_args()
92 |
93 | cfg = Config.fromfile(args.config)
94 | if args.cfg_options is not None:
95 | cfg.merge_from_dict(args.cfg_options)
96 | # import modules from string list.
97 | if cfg.get('custom_imports', None):
98 | from mmcv.utils import import_modules_from_strings
99 | import_modules_from_strings(**cfg['custom_imports'])
100 | # set cudnn_benchmark
101 | if cfg.get('cudnn_benchmark', False):
102 | torch.backends.cudnn.benchmark = True
103 |
104 | # work_dir is determined in this priority: CLI > segment in file > filename
105 | if args.work_dir is not None:
106 | # update configs according to CLI args if args.work_dir is not None
107 | cfg.work_dir = args.work_dir
108 | elif cfg.get('work_dir', None) is None:
109 | # use config filename as default work_dir if cfg.work_dir is None
110 | cfg.work_dir = osp.join('./work_dirs',
111 | osp.splitext(osp.basename(args.config))[0])
112 | if args.resume_from is not None:
113 | cfg.resume_from = args.resume_from
114 | if args.gpu_ids is not None:
115 | cfg.gpu_ids = args.gpu_ids
116 | else:
117 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
118 |
119 | # init distributed env first, since logger depends on the dist info.
120 | if args.launcher == 'none':
121 | distributed = False
122 | else:
123 | distributed = True
124 | init_dist(args.launcher, **cfg.dist_params)
125 | # re-set gpu_ids with distributed training mode
126 | _, world_size = get_dist_info()
127 | cfg.gpu_ids = range(world_size)
128 |
129 | # create work_dir
130 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
131 | # dump config
132 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
133 | # init the logger before other steps
134 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
135 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
136 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
137 |
138 | # init the meta dict to record some important information such as
139 | # environment info and seed, which will be logged
140 | meta = dict()
141 | # log env info
142 | env_info_dict = collect_env()
143 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
144 | dash_line = '-' * 60 + '\n'
145 | logger.info('Environment info:\n' + dash_line + env_info + '\n' +
146 | dash_line)
147 | meta['env_info'] = env_info
148 | meta['config'] = cfg.pretty_text
149 | # log some basic info
150 | logger.info(f'Distributed training: {distributed}')
151 | logger.info(f'Config:\n{cfg.pretty_text}')
152 |
153 | # set random seeds
154 | if args.seed is not None:
155 | logger.info(f'Set random seed to {args.seed}, '
156 | f'deterministic: {args.deterministic}')
157 | set_random_seed(args.seed, deterministic=args.deterministic)
158 | cfg.seed = args.seed
159 | meta['seed'] = args.seed
160 | meta['exp_name'] = osp.basename(args.config)
161 |
162 | model = build_detector(
163 | cfg.model,
164 | train_cfg=cfg.get('train_cfg'),
165 | test_cfg=cfg.get('test_cfg'))
166 | model.init_weights()
167 |
168 | logger.info(model)
169 |
170 | datasets = [build_dataset(cfg.data.train)]
171 | if len(cfg.workflow) == 2:
172 | val_dataset = copy.deepcopy(cfg.data.val)
173 | val_dataset.pipeline = cfg.data.train.pipeline
174 | datasets.append(build_dataset(val_dataset))
175 | if cfg.checkpoint_config is not None:
176 | # save mmdet version, config file content and class names in
177 | # checkpoints as meta data
178 | cfg.checkpoint_config.meta = dict(
179 | mmdet_version=__version__ + get_git_hash()[:7],
180 | CLASSES=datasets[0].CLASSES)
181 | # add an attribute for visualization convenience
182 | model.CLASSES = datasets[0].CLASSES
183 | train_detector(
184 | model,
185 | datasets,
186 | cfg,
187 | distributed=distributed,
188 | validate=(not args.no_validate),
189 | timestamp=timestamp,
190 | meta=meta)
191 |
192 |
193 | if __name__ == '__main__':
194 | torch.cuda.empty_cache()
195 | main()
--------------------------------------------------------------------------------
/scripts/train_base.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | train.py \
5 | /path/to/imagenet/ \
6 | --batch-size 128 \
7 | --pin-mem \
8 | --model transxnet_b \
9 | --drop-path 0.4 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.99985 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/
--------------------------------------------------------------------------------
/scripts/train_small.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | train.py \
5 | /path/to/imagenet/ \
6 | --batch-size 128 \
7 | --pin-mem \
8 | --model transxnet_s \
9 | --drop-path 0.2 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.99985 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/
--------------------------------------------------------------------------------
/scripts/train_tiny.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | train.py \
5 | /path/to/imagenet/ \
6 | --batch-size 128 \
7 | --pin-mem \
8 | --model transxnet_t \
9 | --drop-path 0.1 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.9998 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/
--------------------------------------------------------------------------------
/semantic_segmentation/README.md:
--------------------------------------------------------------------------------
1 | # Applying TransXNet to Semantic Segmentation
2 |
3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)".
4 |
5 | ## 1. Requirements
6 |
7 | We highly suggest using our provided dependencies to ensure reproducibility:
8 | ```
9 | # Environments:
10 | cuda==11.3
11 | python==3.8.15
12 | # Packages:
13 | mmcv==1.7.1
14 | timm==0.6.12
15 | torch==1.12.1
16 | torchvision==0.13.1
17 | mmsegmentation==0.30.0
18 | ```
19 |
20 | ## 2. Data Preparation
21 |
22 | Prepare ADE20K according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md).
23 |
24 |
25 | ## 3. Main Results on ADE20K with Pretrained Models
26 |
27 | | Method | Backbone | Pretrain | Iters | mIoU | Config | Download |
28 | | --- | --- | --- |:---:|:---:| --- | --- |
29 | | Semantic FPN | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) | 80K | 45.5 | [config](configs/sfpn_transxnet_tiny.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny.pth) |
30 | | Semantic FPN | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) | 80K | 48.5 | [config](configs/sfpn_transxnet_small.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small.pth) |
31 | | Semantic FPN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) | 80k | 49.9 | [config](configs/sfpn_transxnet_base.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base.pth) |
32 |
33 |
34 | ## 4. Train
35 | To train ``TransXNet + Semantic FPN`` models on ADE20K with 8 gpus (single node), run:
36 | ```
37 | bash scripts/train_sfpn_transxnet_tiny.sh # train TransXNet-T + Semantic FPN
38 | bash scripts/train_sfpn_transxnet_small.sh # train TransXNet-S + Semantic FPN
39 | bash scripts/train_sfpn_transxnet_base.sh # train TransXNet-B + Semantic FPN
40 | ```
41 |
42 | ## 5. Validation
43 | To evaluate ``TransXNet + Semantic FPN`` models on ADE20K, run:
44 | ```
45 | # Take TransXNet-T + Semantic FPN as an example:
46 | python3 -m torch.distributed.launch \
47 | --nproc_per_node=8 \
48 | --master_port=$((RANDOM+8888)) \
49 | test.py \
50 | configs/sfpn_transxnet_tiny.py \
51 | path/to/checkpoint \
52 | --out work_dirs/output.pkl \
53 | --eval mIoU \
54 | --launcher pytorch
55 | ```
56 |
57 | ## Citation
58 | If you find this project useful for your research, please consider citing:
59 | ```
60 | @article{lou2023transxnet,
61 | title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
62 | author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
63 | journal={IEEE Transactions on Neural Networks and Learning Systems},
64 | year={2025}
65 | }
66 | ```
67 |
68 | ## Contact
69 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
70 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/datasets/ade20k_sfpn.py:
--------------------------------------------------------------------------------
1 | # copied from uniformer
2 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/configs/_base_/datasets/ade20k.py
3 |
4 | # dataset settings
5 | dataset_type = 'ADE20KDataset'
6 | data_root = '/mnt/private_dataset/ade/ADEChallengeData2016/'
7 | img_norm_cfg = dict(
8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
9 | crop_size = (512, 512)
10 | train_pipeline = [
11 | dict(type='LoadImageFromFile'),
12 | dict(type='LoadAnnotations', reduce_zero_label=True),
13 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 | dict(type='RandomFlip', prob=0.5),
16 | dict(type='PhotoMetricDistortion'),
17 | dict(type='Normalize', **img_norm_cfg),
18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 | dict(type='DefaultFormatBundle'),
20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 | dict(type='LoadImageFromFile'),
24 | dict(
25 | type='MultiScaleFlipAug',
26 | img_scale=(2048, 512),
27 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 | flip=False,
29 | transforms=[
30 | dict(type='AlignResize', keep_ratio=True, size_divisor=32),
31 | dict(type='RandomFlip'),
32 | dict(type='Normalize', **img_norm_cfg),
33 | dict(type='ImageToTensor', keys=['img']),
34 | dict(type='Collect', keys=['img']),
35 | ])
36 | ]
37 | data = dict(
38 | samples_per_gpu=4,
39 | workers_per_gpu=4,
40 | train=dict(
41 | type='RepeatDataset',
42 | times=50,
43 | dataset=dict(
44 | type=dataset_type,
45 | data_root=data_root,
46 | img_dir='images/training',
47 | ann_dir='annotations/training',
48 | pipeline=train_pipeline)),
49 | val=dict(
50 | type=dataset_type,
51 | data_root=data_root,
52 | img_dir='images/validation',
53 | ann_dir='annotations/validation',
54 | pipeline=test_pipeline),
55 | test=dict(
56 | type=dataset_type,
57 | data_root=data_root,
58 | img_dir='images/validation',
59 | ann_dir='annotations/validation',
60 | pipeline=test_pipeline))
61 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | # yapf:disable
2 | log_config = dict(
3 | interval=50,
4 | hooks=[
5 | dict(type='TextLoggerHook', by_epoch=False),
6 | # dict(type='CustomizedTextLoggerHook', by_epoch=False),
7 | # dict(type='TensorboardLoggerHook'),
8 | ])
9 | # yapf:enable
10 | dist_params = dict(backend='nccl')
11 | log_level = 'INFO'
12 | load_from = None
13 | resume_from = None
14 | workflow = [('train', 1)]
15 | cudnn_benchmark = True
16 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
1 | # copied from mmsegmentaion official config
2 | # https://github.com/open-mmlab/mmsegmentation/blob/master/configs/_base_/models/fpn_r50.py
3 |
4 |
5 | # model settings
6 | norm_cfg = dict(type='SyncBN', requires_grad=True)
7 | model = dict(
8 | type='EncoderDecoder',
9 | pretrained='open-mmlab://resnet50_v1c',
10 | backbone=dict(
11 | type='ResNetV1c',
12 | depth=50,
13 | num_stages=4,
14 | out_indices=(0, 1, 2, 3),
15 | dilations=(1, 1, 1, 1),
16 | strides=(1, 2, 2, 2),
17 | norm_cfg=norm_cfg,
18 | norm_eval=False,
19 | style='pytorch',
20 | contract_dilation=True),
21 | neck=dict(
22 | type='FPN',
23 | in_channels=[256, 512, 1024, 2048],
24 | out_channels=256,
25 | num_outs=4),
26 | decode_head=dict(
27 | type='FPNHead',
28 | in_channels=[256, 256, 256, 256],
29 | in_index=[0, 1, 2, 3],
30 | feature_strides=[4, 8, 16, 32],
31 | channels=128,
32 | dropout_ratio=0.1,
33 | num_classes=19,
34 | norm_cfg=norm_cfg,
35 | align_corners=False,
36 | loss_decode=dict(
37 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 | # model training and testing settings
39 | train_cfg=dict(),
40 | test_cfg=dict(mode='whole'))
41 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=160000)
8 | checkpoint_config = dict(by_epoch=False, interval=16000)
9 | evaluation = dict(interval=16000, metric='mIoU')
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=20000)
8 | checkpoint_config = dict(by_epoch=False, interval=2000)
9 | evaluation = dict(interval=2000, metric='mIoU')
10 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=40000)
8 | checkpoint_config = dict(by_epoch=False, interval=4000)
9 | evaluation = dict(interval=4000, metric='mIoU')
10 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=80000)
8 | checkpoint_config = dict(by_epoch=False, interval=8000)
9 | evaluation = dict(interval=8000, metric='mIoU')
--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_base.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/fpn_r50.py',
3 | '_base_/datasets/ade20k_sfpn.py',
4 | '_base_/default_runtime.py',
5 | ]
6 |
7 | # model.pretrained is actually loaded by backbone, see
8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
9 |
10 | model=dict(
11 | pretrained=None,
12 | backbone=dict(
13 | _delete_=True,
14 | pretrained=True,
15 | type='transxnet_b',
16 | drop_path_rate=0.3,
17 | ),
18 | neck=dict(in_channels=[76, 152, 336, 672],),
19 | decode_head=dict(num_classes=150))
20 |
21 | ############## below we strictly follow uniformer ####################################
22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
23 | #############################################################################
24 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
27 | # learning policy
28 | lr_config = dict(
29 | policy='CosineAnnealing',
30 | warmup='linear',
31 | warmup_iters=1000,
32 | warmup_ratio=1.0 / 10,
33 | min_lr_ratio=1e-8)
34 | # runtime settings
35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
38 | #############################################################################
39 |
40 | # NOTE: True is conflict with checkpoint
41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
42 | find_unused_parameters = False
43 |
44 | # place holder for new verison mmseg compatiability
45 | resume_from = None
46 | device = 'cuda'
47 |
48 | # fp32 training ->
49 | optimizer_config = dict()
50 |
51 | # AMP ->
52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
53 | # fp16 = dict()
--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_small.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/fpn_r50.py',
3 | '_base_/datasets/ade20k_sfpn.py',
4 | '_base_/default_runtime.py',
5 | ]
6 |
7 | # model.pretrained is actually loaded by backbone, see
8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
9 |
10 | model=dict(
11 | pretrained=None,
12 | backbone=dict(
13 | _delete_=True,
14 | pretrained=True,
15 | type='transxnet_s',
16 | drop_path_rate=0.2,
17 | ),
18 | neck=dict(in_channels=[64, 128, 320, 512]),
19 | decode_head=dict(num_classes=150))
20 |
21 | ############## below we strictly follow uniformer ####################################
22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
23 | #############################################################################
24 | gpu_multiples=2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
27 | # learning policy
28 | lr_config = dict(
29 | policy='CosineAnnealing',
30 | warmup='linear',
31 | warmup_iters=1000,
32 | warmup_ratio=1.0 / 10,
33 | min_lr_ratio=1e-8)
34 | # runtime settings
35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
38 | #############################################################################
39 |
40 | # NOTE: True is conflict with checkpoint
41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
42 | find_unused_parameters = False
43 |
44 | # place holder for new verison mmseg compatiability
45 | resume_from = None
46 | device = 'cuda'
47 |
48 | # fp32 training ->
49 | optimizer_config = dict()
50 |
51 | # AMP ->
52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
53 | # fp16 = dict()
54 |
--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_tiny.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '_base_/models/fpn_r50.py',
3 | '_base_/datasets/ade20k_sfpn.py',
4 | '_base_/default_runtime.py',
5 | ]
6 |
7 | # model.pretrained is actually loaded by backbone, see
8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
9 | model=dict(
10 | pretrained=None,
11 | backbone=dict(
12 | _delete_=True,
13 | pretrained=True,
14 | type='transxnet_t',
15 | drop_path_rate=0.1,
16 | ),
17 | neck=dict(in_channels=[48, 96, 224, 448]),
18 | decode_head=dict(num_classes=150))
19 |
20 | ############## below we strictly follow uniformer ####################################
21 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
22 | #############################################################################
23 | gpu_multiples=2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
24 | # optimizer
25 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
26 | # learning policy
27 | lr_config = dict(
28 | policy='CosineAnnealing',
29 | warmup='linear',
30 | warmup_iters=1000,
31 | warmup_ratio=1.0 / 10,
32 | min_lr_ratio=1e-8)
33 | # runtime settings
34 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
35 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
36 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
37 | #############################################################################
38 |
39 | # NOTE: True is conflict with checkpoint
40 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
41 | find_unused_parameters = False
42 |
43 | # place holder for new verison mmseg compatiability
44 | resume_from = None
45 | device = 'cuda'
46 |
47 | # fp32 training ->
48 | optimizer_config = dict()
49 |
50 | # AMP ->
51 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
52 | # fp16 = dict()
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | # -*- coding: utf-8 -*-
10 |
11 | # from .apex_runner.optimizer import DistOptimizerHook
12 | from .checkpoint import load_checkpoint
13 | from .customized_text import CustomizedTextLoggerHook
14 | from .layer_decay_optimizer_constructor import \
15 | LearningRateDecayOptimizerConstructor
16 | from .resize_transform import SETR_Resize
17 |
18 | # from .train_api import train_segmentor
19 |
20 | # __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'DistOptimizerHook', 'train_segmentor', 'CustomizedTextLoggerHook']
21 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'CustomizedTextLoggerHook']
22 |
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/apex_runner/checkpoint.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | import os.path as osp
3 | import time
4 | from tempfile import TemporaryDirectory
5 |
6 | import torch
7 | from torch.optim import Optimizer
8 |
9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 |
13 | try:
14 | import apex
15 | except:
16 | print('apex is not installed')
17 |
18 |
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 | """Save checkpoint to file.
21 |
22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 | ``optimizer``, ``amp``. By default ``meta`` will contain version
24 | and time info.
25 |
26 | Args:
27 | model (Module): Module whose params are to be saved.
28 | filename (str): Checkpoint filename.
29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 | meta (dict, optional): Metadata to be saved in checkpoint.
31 | """
32 | if meta is None:
33 | meta = {}
34 | elif not isinstance(meta, dict):
35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 |
38 | if is_module_wrapper(model):
39 | model = model.module
40 |
41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 | # save class name to the meta
43 | meta.update(CLASSES=model.CLASSES)
44 |
45 | checkpoint = {
46 | 'meta': meta,
47 | 'state_dict': weights_to_cpu(get_state_dict(model))
48 | }
49 | # save optimizer state dict in the checkpoint
50 | if isinstance(optimizer, Optimizer):
51 | checkpoint['optimizer'] = optimizer.state_dict()
52 | elif isinstance(optimizer, dict):
53 | checkpoint['optimizer'] = {}
54 | for name, optim in optimizer.items():
55 | checkpoint['optimizer'][name] = optim.state_dict()
56 |
57 | # save amp state dict in the checkpoint
58 | # checkpoint['amp'] = apex.amp.state_dict()
59 |
60 | if filename.startswith('pavi://'):
61 | try:
62 | from pavi import modelcloud
63 | from pavi.exception import NodeNotFoundError
64 | except ImportError:
65 | raise ImportError(
66 | 'Please install pavi to load checkpoint from modelcloud.')
67 | model_path = filename[7:]
68 | root = modelcloud.Folder()
69 | model_dir, model_name = osp.split(model_path)
70 | try:
71 | model = modelcloud.get(model_dir)
72 | except NodeNotFoundError:
73 | model = root.create_training_model(model_dir)
74 | with TemporaryDirectory() as tmp_dir:
75 | checkpoint_file = osp.join(tmp_dir, model_name)
76 | with open(checkpoint_file, 'wb') as f:
77 | torch.save(checkpoint, f)
78 | f.flush()
79 | model.create_file(checkpoint_file, name=model_name)
80 | else:
81 | mmcv.mkdir_or_exist(osp.dirname(filename))
82 | # immediately flush buffer
83 | with open(filename, 'wb') as f:
84 | torch.save(checkpoint, f)
85 | f.flush()
86 |
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/customized_text.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | import datetime
10 | from collections import OrderedDict
11 |
12 | import mmcv
13 | import torch
14 | from mmcv.runner import HOOKS, TextLoggerHook
15 |
16 |
17 | @HOOKS.register_module()
18 | class CustomizedTextLoggerHook(TextLoggerHook):
19 | """Customized Text Logger hook.
20 |
21 | This logger prints out both lr and layer_0_lr.
22 |
23 | """
24 |
25 | def _log_info(self, log_dict, runner):
26 | # print exp name for users to distinguish experiments
27 | # at every ``interval_exp_name`` iterations and the end of each epoch
28 | if runner.meta is not None and 'exp_name' in runner.meta:
29 | if (self.every_n_iters(runner, self.interval_exp_name)) or (
30 | self.by_epoch and self.end_of_epoch(runner)):
31 | exp_info = f'Exp name: {runner.meta["exp_name"]}'
32 | runner.logger.info(exp_info)
33 |
34 | if log_dict['mode'] == 'train':
35 | lr_str = {}
36 | for lr_type in ['lr', 'layer_0_lr']:
37 | if isinstance(log_dict[lr_type], dict):
38 | lr_str[lr_type] = []
39 | for k, val in log_dict[lr_type].items():
40 | lr_str.append(f'{lr_type}_{k}: {val:.3e}')
41 | lr_str[lr_type] = ' '.join(lr_str)
42 | else:
43 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}'
44 |
45 | # by epoch: Epoch [4][100/1000]
46 | # by iter: Iter [100/100000]
47 | if self.by_epoch:
48 | log_str = f'Epoch [{log_dict["epoch"]}]' \
49 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
50 | else:
51 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
52 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, '
53 |
54 | if 'time' in log_dict.keys():
55 | self.time_sec_tot += (log_dict['time'] * self.interval)
56 | time_sec_avg = self.time_sec_tot / (
57 | runner.iter - self.start_iter + 1)
58 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
59 | eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
60 | log_str += f'eta: {eta_str}, '
61 | log_str += f'time: {log_dict["time"]:.3f}, ' \
62 | f'data_time: {log_dict["data_time"]:.3f}, '
63 | # statistic memory
64 | if torch.cuda.is_available():
65 | log_str += f'memory: {log_dict["memory"]}, '
66 | else:
67 | # val/test time
68 | # here 1000 is the length of the val dataloader
69 | # by epoch: Epoch[val] [4][1000]
70 | # by iter: Iter[val] [1000]
71 | if self.by_epoch:
72 | log_str = f'Epoch({log_dict["mode"]}) ' \
73 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
74 | else:
75 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
76 |
77 | log_items = []
78 | for name, val in log_dict.items():
79 | # TODO: resolve this hack
80 | # these items have been in log_str
81 | if name in [
82 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time',
83 | 'memory', 'epoch'
84 | ]:
85 | continue
86 | if isinstance(val, float):
87 | val = f'{val:.4f}'
88 | log_items.append(f'{name}: {val}')
89 | log_str += ', '.join(log_items)
90 |
91 | runner.logger.info(log_str)
92 |
93 |
94 | def log(self, runner):
95 | if 'eval_iter_num' in runner.log_buffer.output:
96 | # this doesn't modify runner.iter and is regardless of by_epoch
97 | cur_iter = runner.log_buffer.output.pop('eval_iter_num')
98 | else:
99 | cur_iter = self.get_iter(runner, inner_iter=True)
100 |
101 | log_dict = OrderedDict(
102 | mode=self.get_mode(runner),
103 | epoch=self.get_epoch(runner),
104 | iter=cur_iter)
105 |
106 | # record lr and layer_0_lr
107 | cur_lr = runner.current_lr()
108 | if isinstance(cur_lr, list):
109 | log_dict['layer_0_lr'] = min(cur_lr)
110 | log_dict['lr'] = max(cur_lr)
111 | else:
112 | assert isinstance(cur_lr, dict)
113 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {}
114 | for k, lr_ in cur_lr.items():
115 | assert isinstance(lr_, list)
116 | log_dict['layer_0_lr'].update({k: min(lr_)})
117 | log_dict['lr'].update({k: max(lr_)})
118 |
119 | if 'time' in runner.log_buffer.output:
120 | # statistic memory
121 | if torch.cuda.is_available():
122 | log_dict['memory'] = self._get_max_memory(runner)
123 |
124 | log_dict = dict(log_dict, **runner.log_buffer.output)
125 |
126 | self._log_info(log_dict, runner)
127 | self._dump_log(log_dict, runner)
128 | return log_dict
129 |
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | import json
10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
11 | from mmcv.runner import get_dist_info
12 |
13 |
14 | def get_num_layer_layer_wise(var_name, num_max_layer=12):
15 |
16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
17 | return 0
18 | elif var_name.startswith("backbone.downsample_layers"):
19 | stage_id = int(var_name.split('.')[2])
20 | if stage_id == 0:
21 | layer_id = 0
22 | elif stage_id == 1:
23 | layer_id = 2
24 | elif stage_id == 2:
25 | layer_id = 3
26 | elif stage_id == 3:
27 | layer_id = num_max_layer
28 | return layer_id
29 | elif var_name.startswith("backbone.stages"):
30 | stage_id = int(var_name.split('.')[2])
31 | block_id = int(var_name.split('.')[3])
32 | if stage_id == 0:
33 | layer_id = 1
34 | elif stage_id == 1:
35 | layer_id = 2
36 | elif stage_id == 2:
37 | layer_id = 3 + block_id // 3
38 | elif stage_id == 3:
39 | layer_id = num_max_layer
40 | return layer_id
41 | else:
42 | return num_max_layer + 1
43 |
44 |
45 | def get_num_layer_stage_wise(var_name, num_max_layer):
46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
47 | return 0
48 | elif var_name.startswith("backbone.downsample_layers"):
49 | return 0
50 | elif var_name.startswith("backbone.stages"):
51 | stage_id = int(var_name.split('.')[2])
52 | return stage_id + 1
53 | else:
54 | return num_max_layer - 1
55 |
56 |
57 | @OPTIMIZER_BUILDERS.register_module()
58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
59 | def add_params(self, params, module, prefix='', is_dcn_module=None):
60 | """Add all parameters of module to the params list.
61 | The parameters of the given module will be added to the list of param
62 | groups, with specific rules defined by paramwise_cfg.
63 | Args:
64 | params (list[dict]): A list of param groups, it will be modified
65 | in place.
66 | module (nn.Module): The module to be added.
67 | prefix (str): The prefix of the module
68 | is_dcn_module (int|float|None): If the current module is a
69 | submodule of DCN, `is_dcn_module` will be passed to
70 | control conv_offset layer's learning rate. Defaults to None.
71 | """
72 | parameter_groups = {}
73 | print(self.paramwise_cfg)
74 | num_layers = self.paramwise_cfg.get('num_layers') + 2
75 | decay_rate = self.paramwise_cfg.get('decay_rate')
76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise")
77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers))
78 | weight_decay = self.base_wd
79 |
80 | for name, param in module.named_parameters():
81 | if not param.requires_grad:
82 | continue # frozen weights
83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
84 | group_name = "no_decay"
85 | this_weight_decay = 0.
86 | else:
87 | group_name = "decay"
88 | this_weight_decay = weight_decay
89 |
90 | if decay_type == "layer_wise":
91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers'))
92 | elif decay_type == "stage_wise":
93 | layer_id = get_num_layer_stage_wise(name, num_layers)
94 |
95 | group_name = "layer_%d_%s" % (layer_id, group_name)
96 |
97 | if group_name not in parameter_groups:
98 | scale = decay_rate ** (num_layers - layer_id - 1)
99 |
100 | parameter_groups[group_name] = {
101 | "weight_decay": this_weight_decay,
102 | "params": [],
103 | "param_names": [],
104 | "lr_scale": scale,
105 | "group_name": group_name,
106 | "lr": scale * self.base_lr,
107 | }
108 |
109 | parameter_groups[group_name]["params"].append(param)
110 | parameter_groups[group_name]["param_names"].append(name)
111 | rank, _ = get_dist_info()
112 | if rank == 0:
113 | to_display = {}
114 | for key in parameter_groups:
115 | to_display[key] = {
116 | "param_names": parameter_groups[key]["param_names"],
117 | "lr_scale": parameter_groups[key]["lr_scale"],
118 | "lr": parameter_groups[key]["lr"],
119 | "weight_decay": parameter_groups[key]["weight_decay"],
120 | }
121 | print("Param groups = %s" % json.dumps(to_display, indent=2))
122 |
123 | params.extend(parameter_groups.values())
124 |
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/resize_transform.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 | from mmseg.datasets.builder import PIPELINES
4 |
5 |
6 | @PIPELINES.register_module()
7 | class SETR_Resize(object):
8 | """Resize images & seg.
9 |
10 | This transform resizes the input image to some scale. If the input dict
11 | contains the key "scale", then the scale in the input dict is used,
12 | otherwise the specified scale in the init method is used.
13 |
14 | ``img_scale`` can either be a tuple (single-scale) or a list of tuple
15 | (multi-scale). There are 3 multiscale modes:
16 |
17 | - ``ratio_range is not None``: randomly sample a ratio from the ratio range
18 | and multiply it with the image scale.
19 |
20 | - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
21 | scale from the a range.
22 |
23 | - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
24 | scale from multiple scales.
25 |
26 | Args:
27 | img_scale (tuple or list[tuple]): Images scales for resizing.
28 | multiscale_mode (str): Either "range" or "value".
29 | ratio_range (tuple[float]): (min_ratio, max_ratio)
30 | keep_ratio (bool): Whether to keep the aspect ratio when resizing the
31 | image.
32 | """
33 |
34 | def __init__(self,
35 | img_scale=None,
36 | multiscale_mode='range',
37 | ratio_range=None,
38 | keep_ratio=True,
39 | crop_size=None,
40 | setr_multi_scale=False):
41 |
42 | if img_scale is None:
43 | self.img_scale = None
44 | else:
45 | if isinstance(img_scale, list):
46 | self.img_scale = img_scale
47 | else:
48 | self.img_scale = [img_scale]
49 | # assert mmcv.is_list_of(self.img_scale, tuple)
50 |
51 | if ratio_range is not None:
52 | # mode 1: given a scale and a range of image ratio
53 | assert len(self.img_scale) == 1
54 | else:
55 | # mode 2: given multiple scales or a range of scales
56 | assert multiscale_mode in ['value', 'range']
57 |
58 | self.multiscale_mode = multiscale_mode
59 | self.ratio_range = ratio_range
60 | self.keep_ratio = keep_ratio
61 | self.crop_size = crop_size
62 | self.setr_multi_scale = setr_multi_scale
63 |
64 | @staticmethod
65 | def random_select(img_scales):
66 | """Randomly select an img_scale from given candidates.
67 |
68 | Args:
69 | img_scales (list[tuple]): Images scales for selection.
70 |
71 | Returns:
72 | (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
73 | where ``img_scale`` is the selected image scale and
74 | ``scale_idx`` is the selected index in the given candidates.
75 | """
76 |
77 | assert mmcv.is_list_of(img_scales, tuple)
78 | scale_idx = np.random.randint(len(img_scales))
79 | img_scale = img_scales[scale_idx]
80 | return img_scale, scale_idx
81 |
82 | @staticmethod
83 | def random_sample(img_scales):
84 | """Randomly sample an img_scale when ``multiscale_mode=='range'``.
85 |
86 | Args:
87 | img_scales (list[tuple]): Images scale range for sampling.
88 | There must be two tuples in img_scales, which specify the lower
89 | and uper bound of image scales.
90 |
91 | Returns:
92 | (tuple, None): Returns a tuple ``(img_scale, None)``, where
93 | ``img_scale`` is sampled scale and None is just a placeholder
94 | to be consistent with :func:`random_select`.
95 | """
96 |
97 | assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
98 | img_scale_long = [max(s) for s in img_scales]
99 | img_scale_short = [min(s) for s in img_scales]
100 | long_edge = np.random.randint(
101 | min(img_scale_long),
102 | max(img_scale_long) + 1)
103 | short_edge = np.random.randint(
104 | min(img_scale_short),
105 | max(img_scale_short) + 1)
106 | img_scale = (long_edge, short_edge)
107 | return img_scale, None
108 |
109 | @staticmethod
110 | def random_sample_ratio(img_scale, ratio_range):
111 | """Randomly sample an img_scale when ``ratio_range`` is specified.
112 |
113 | A ratio will be randomly sampled from the range specified by
114 | ``ratio_range``. Then it would be multiplied with ``img_scale`` to
115 | generate sampled scale.
116 |
117 | Args:
118 | img_scale (tuple): Images scale base to multiply with ratio.
119 | ratio_range (tuple[float]): The minimum and maximum ratio to scale
120 | the ``img_scale``.
121 |
122 | Returns:
123 | (tuple, None): Returns a tuple ``(scale, None)``, where
124 | ``scale`` is sampled ratio multiplied with ``img_scale`` and
125 | None is just a placeholder to be consistent with
126 | :func:`random_select`.
127 | """
128 |
129 | assert isinstance(img_scale, tuple) and len(img_scale) == 2
130 | min_ratio, max_ratio = ratio_range
131 | assert min_ratio <= max_ratio
132 | ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
133 | scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
134 | return scale, None
135 |
136 | def _random_scale(self, results):
137 | """Randomly sample an img_scale according to ``ratio_range`` and
138 | ``multiscale_mode``.
139 |
140 | If ``ratio_range`` is specified, a ratio will be sampled and be
141 | multiplied with ``img_scale``.
142 | If multiple scales are specified by ``img_scale``, a scale will be
143 | sampled according to ``multiscale_mode``.
144 | Otherwise, single scale will be used.
145 |
146 | Args:
147 | results (dict): Result dict from :obj:`dataset`.
148 |
149 | Returns:
150 | dict: Two new keys 'scale` and 'scale_idx` are added into
151 | ``results``, which would be used by subsequent pipelines.
152 | """
153 |
154 | if self.ratio_range is not None:
155 | scale, scale_idx = self.random_sample_ratio(
156 | self.img_scale[0], self.ratio_range)
157 | elif len(self.img_scale) == 1:
158 | scale, scale_idx = self.img_scale[0], 0
159 | elif self.multiscale_mode == 'range':
160 | scale, scale_idx = self.random_sample(self.img_scale)
161 | elif self.multiscale_mode == 'value':
162 | scale, scale_idx = self.random_select(self.img_scale)
163 | else:
164 | raise NotImplementedError
165 |
166 | results['scale'] = scale
167 | results['scale_idx'] = scale_idx
168 |
169 | def _resize_img(self, results):
170 | """Resize images with ``results['scale']``."""
171 |
172 | if self.keep_ratio:
173 | if self.setr_multi_scale:
174 | if min(results['scale']) < self.crop_size[0]:
175 | new_short = self.crop_size[0]
176 | else:
177 | new_short = min(results['scale'])
178 |
179 | h, w = results['img'].shape[:2]
180 | if h > w:
181 | new_h, new_w = new_short * h / w, new_short
182 | else:
183 | new_h, new_w = new_short, new_short * w / h
184 | results['scale'] = (new_h, new_w)
185 |
186 | img, scale_factor = mmcv.imrescale(
187 | results['img'], results['scale'], return_scale=True)
188 | # the w_scale and h_scale has minor difference
189 | # a real fix should be done in the mmcv.imrescale in the future
190 | new_h, new_w = img.shape[:2]
191 | h, w = results['img'].shape[:2]
192 | w_scale = new_w / w
193 | h_scale = new_h / h
194 | else:
195 | img, w_scale, h_scale = mmcv.imresize(
196 | results['img'], results['scale'], return_scale=True)
197 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
198 | dtype=np.float32)
199 | results['img'] = img
200 | results['img_shape'] = img.shape
201 | results['pad_shape'] = img.shape # in case that there is no padding
202 | results['scale_factor'] = scale_factor
203 | results['keep_ratio'] = self.keep_ratio
204 |
205 | def _resize_seg(self, results):
206 | """Resize semantic segmentation map with ``results['scale']``."""
207 | for key in results.get('seg_fields', []):
208 | if self.keep_ratio:
209 | gt_seg = mmcv.imrescale(
210 | results[key], results['scale'], interpolation='nearest')
211 | else:
212 | gt_seg = mmcv.imresize(
213 | results[key], results['scale'], interpolation='nearest')
214 | results['gt_semantic_seg'] = gt_seg
215 |
216 | def __call__(self, results):
217 | """Call function to resize images, bounding boxes, masks, semantic
218 | segmentation map.
219 |
220 | Args:
221 | results (dict): Result dict from loading pipeline.
222 |
223 | Returns:
224 | dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
225 | 'keep_ratio' keys are added into result dict.
226 | """
227 |
228 | if 'scale' not in results:
229 | self._random_scale(results)
230 | self._resize_img(results)
231 | self._resize_seg(results)
232 | return results
233 |
234 | def __repr__(self):
235 | repr_str = self.__class__.__name__
236 | repr_str += (f'(img_scale={self.img_scale}, '
237 | f'multiscale_mode={self.multiscale_mode}, '
238 | f'ratio_range={self.ratio_range}, '
239 | f'keep_ratio={self.keep_ratio})')
240 | return repr_str
--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/train_api.py:
--------------------------------------------------------------------------------
1 | import random
2 | import warnings
3 |
4 | import numpy as np
5 | import torch
6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
7 | from mmcv.runner import build_optimizer, build_runner
8 | from mmseg.core import DistEvalHook, EvalHook
9 | from mmseg.datasets import build_dataloader, build_dataset
10 | from mmseg.utils import get_root_logger
11 |
12 | try:
13 | import apex
14 | except:
15 | print('apex is not installed')
16 |
17 |
18 | def set_random_seed(seed, deterministic=False):
19 | """Set random seed.
20 |
21 | Args:
22 | seed (int): Seed to be used.
23 | deterministic (bool): Whether to set the deterministic option for
24 | CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
25 | to True and `torch.backends.cudnn.benchmark` to False.
26 | Default: False.
27 | """
28 | random.seed(seed)
29 | np.random.seed(seed)
30 | torch.manual_seed(seed)
31 | torch.cuda.manual_seed_all(seed)
32 | if deterministic:
33 | torch.backends.cudnn.deterministic = True
34 | torch.backends.cudnn.benchmark = False
35 |
36 |
37 | def train_segmentor(model,
38 | dataset,
39 | cfg,
40 | distributed=False,
41 | validate=False,
42 | timestamp=None,
43 | meta=None):
44 | """Launch segmentor training."""
45 | logger = get_root_logger(cfg.log_level)
46 |
47 | # prepare data loaders
48 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
49 | data_loaders = [
50 | build_dataloader(
51 | ds,
52 | cfg.data.samples_per_gpu,
53 | cfg.data.workers_per_gpu,
54 | # cfg.gpus will be ignored if distributed
55 | len(cfg.gpu_ids),
56 | dist=distributed,
57 | seed=cfg.seed,
58 | drop_last=True) for ds in dataset
59 | ]
60 |
61 | # build optimizer
62 | optimizer = build_optimizer(model, cfg.optimizer)
63 |
64 | # use apex fp16 optimizer
65 | if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook":
66 | if cfg.optimizer_config.get("use_fp16", False):
67 | model, optimizer = apex.amp.initialize(
68 | model.cuda(), optimizer, opt_level="O1")
69 | for m in model.modules():
70 | if hasattr(m, "fp16_enabled"):
71 | m.fp16_enabled = True
72 |
73 | # put model on gpus
74 | if distributed:
75 | find_unused_parameters = cfg.get('find_unused_parameters', False)
76 | # Sets the `find_unused_parameters` parameter in
77 | # torch.nn.parallel.DistributedDataParallel
78 | model = MMDistributedDataParallel(
79 | model.cuda(),
80 | device_ids=[torch.cuda.current_device()],
81 | broadcast_buffers=False,
82 | find_unused_parameters=find_unused_parameters)
83 | else:
84 | model = MMDataParallel(
85 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
86 |
87 | if cfg.get('runner') is None:
88 | cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters}
89 | warnings.warn(
90 | 'config is now expected to have a `runner` section, '
91 | 'please set `runner` in your config.', UserWarning)
92 |
93 | runner = build_runner(
94 | cfg.runner,
95 | default_args=dict(
96 | model=model,
97 | batch_processor=None,
98 | optimizer=optimizer,
99 | work_dir=cfg.work_dir,
100 | logger=logger,
101 | meta=meta))
102 |
103 | # register hooks
104 | runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
105 | cfg.checkpoint_config, cfg.log_config,
106 | cfg.get('momentum_config', None))
107 |
108 | # an ugly walkaround to make the .log and .log.json filenames the same
109 | runner.timestamp = timestamp
110 |
111 | # register eval hooks
112 | if validate:
113 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
114 | val_dataloader = build_dataloader(
115 | val_dataset,
116 | samples_per_gpu=1,
117 | workers_per_gpu=cfg.data.workers_per_gpu,
118 | dist=distributed,
119 | shuffle=False)
120 | eval_cfg = cfg.get('evaluation', {})
121 | eval_cfg['by_epoch'] = 'IterBasedRunner' not in cfg.runner['type']
122 | eval_hook = DistEvalHook if distributed else EvalHook
123 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
124 |
125 | if cfg.resume_from:
126 | runner.resume(cfg.resume_from)
127 | elif cfg.load_from:
128 | runner.load_checkpoint(cfg.load_from)
129 | runner.run(data_loaders, cfg.workflow)
130 |
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .align_resize import AlignResize
3 |
4 | __all__=['AlignResize']
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc
--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/align_resize.py:
--------------------------------------------------------------------------------
1 | #########
2 | # copied from uniformer
3 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/tools/align_resize.py
4 | #########
5 | import mmcv
6 | import numpy as np
7 | from mmcv.utils import deprecated_api_warning, is_tuple_of
8 | from mmseg.datasets.builder import PIPELINES
9 | from numpy import random
10 |
11 |
12 | @PIPELINES.register_module()
13 | class AlignResize(object):
14 | """Resize images & seg. Align
15 | """
16 |
17 | def __init__(self,
18 | img_scale=None,
19 | multiscale_mode='range',
20 | ratio_range=None,
21 | keep_ratio=True,
22 | size_divisor=32):
23 | if img_scale is None:
24 | self.img_scale = None
25 | else:
26 | if isinstance(img_scale, list):
27 | self.img_scale = img_scale
28 | else:
29 | self.img_scale = [img_scale]
30 | assert mmcv.is_list_of(self.img_scale, tuple)
31 |
32 | if ratio_range is not None:
33 | # mode 1: given img_scale=None and a range of image ratio
34 | # mode 2: given a scale and a range of image ratio
35 | assert self.img_scale is None or len(self.img_scale) == 1
36 | else:
37 | # mode 3 and 4: given multiple scales or a range of scales
38 | assert multiscale_mode in ['value', 'range']
39 |
40 | self.multiscale_mode = multiscale_mode
41 | self.ratio_range = ratio_range
42 | self.keep_ratio = keep_ratio
43 | self.size_divisor = size_divisor
44 |
45 | @staticmethod
46 | def random_select(img_scales):
47 | """Randomly select an img_scale from given candidates.
48 | Args:
49 | img_scales (list[tuple]): Images scales for selection.
50 | Returns:
51 | (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
52 | where ``img_scale`` is the selected image scale and
53 | ``scale_idx`` is the selected index in the given candidates.
54 | """
55 |
56 | assert mmcv.is_list_of(img_scales, tuple)
57 | scale_idx = np.random.randint(len(img_scales))
58 | img_scale = img_scales[scale_idx]
59 | return img_scale, scale_idx
60 |
61 | @staticmethod
62 | def random_sample(img_scales):
63 | """Randomly sample an img_scale when ``multiscale_mode=='range'``.
64 | Args:
65 | img_scales (list[tuple]): Images scale range for sampling.
66 | There must be two tuples in img_scales, which specify the lower
67 | and uper bound of image scales.
68 | Returns:
69 | (tuple, None): Returns a tuple ``(img_scale, None)``, where
70 | ``img_scale`` is sampled scale and None is just a placeholder
71 | to be consistent with :func:`random_select`.
72 | """
73 |
74 | assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
75 | img_scale_long = [max(s) for s in img_scales]
76 | img_scale_short = [min(s) for s in img_scales]
77 | long_edge = np.random.randint(
78 | min(img_scale_long),
79 | max(img_scale_long) + 1)
80 | short_edge = np.random.randint(
81 | min(img_scale_short),
82 | max(img_scale_short) + 1)
83 | img_scale = (long_edge, short_edge)
84 | return img_scale, None
85 |
86 | @staticmethod
87 | def random_sample_ratio(img_scale, ratio_range):
88 | """Randomly sample an img_scale when ``ratio_range`` is specified.
89 | A ratio will be randomly sampled from the range specified by
90 | ``ratio_range``. Then it would be multiplied with ``img_scale`` to
91 | generate sampled scale.
92 | Args:
93 | img_scale (tuple): Images scale base to multiply with ratio.
94 | ratio_range (tuple[float]): The minimum and maximum ratio to scale
95 | the ``img_scale``.
96 | Returns:
97 | (tuple, None): Returns a tuple ``(scale, None)``, where
98 | ``scale`` is sampled ratio multiplied with ``img_scale`` and
99 | None is just a placeholder to be consistent with
100 | :func:`random_select`.
101 | """
102 |
103 | assert isinstance(img_scale, tuple) and len(img_scale) == 2
104 | min_ratio, max_ratio = ratio_range
105 | assert min_ratio <= max_ratio
106 | ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
107 | scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
108 | return scale, None
109 |
110 | def _random_scale(self, results):
111 | """Randomly sample an img_scale according to ``ratio_range`` and
112 | ``multiscale_mode``.
113 | If ``ratio_range`` is specified, a ratio will be sampled and be
114 | multiplied with ``img_scale``.
115 | If multiple scales are specified by ``img_scale``, a scale will be
116 | sampled according to ``multiscale_mode``.
117 | Otherwise, single scale will be used.
118 | Args:
119 | results (dict): Result dict from :obj:`dataset`.
120 | Returns:
121 | dict: Two new keys 'scale` and 'scale_idx` are added into
122 | ``results``, which would be used by subsequent pipelines.
123 | """
124 |
125 | if self.ratio_range is not None:
126 | if self.img_scale is None:
127 | h, w = results['img'].shape[:2]
128 | scale, scale_idx = self.random_sample_ratio((w, h),
129 | self.ratio_range)
130 | else:
131 | scale, scale_idx = self.random_sample_ratio(
132 | self.img_scale[0], self.ratio_range)
133 | elif len(self.img_scale) == 1:
134 | scale, scale_idx = self.img_scale[0], 0
135 | elif self.multiscale_mode == 'range':
136 | scale, scale_idx = self.random_sample(self.img_scale)
137 | elif self.multiscale_mode == 'value':
138 | scale, scale_idx = self.random_select(self.img_scale)
139 | else:
140 | raise NotImplementedError
141 |
142 | results['scale'] = scale
143 | results['scale_idx'] = scale_idx
144 |
145 | def _align(self, img, size_divisor, interpolation=None):
146 | align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor
147 | align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor
148 | if interpolation == None:
149 | img = mmcv.imresize(img, (align_w, align_h))
150 | else:
151 | img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation)
152 | return img
153 |
154 | def _resize_img(self, results):
155 | """Resize images with ``results['scale']``."""
156 | if self.keep_ratio:
157 | img, scale_factor = mmcv.imrescale(
158 | results['img'], results['scale'], return_scale=True)
159 | #### align ####
160 | img = self._align(img, self.size_divisor)
161 | # the w_scale and h_scale has minor difference
162 | # a real fix should be done in the mmcv.imrescale in the future
163 | new_h, new_w = img.shape[:2]
164 | h, w = results['img'].shape[:2]
165 | w_scale = new_w / w
166 | h_scale = new_h / h
167 | else:
168 | img, w_scale, h_scale = mmcv.imresize(
169 | results['img'], results['scale'], return_scale=True)
170 |
171 | h, w = img.shape[:2]
172 | assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
173 | int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
174 | "img size not align. h:{} w:{}".format(h,w)
175 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
176 | dtype=np.float32)
177 | results['img'] = img
178 | results['img_shape'] = img.shape
179 | results['pad_shape'] = img.shape # in case that there is no padding
180 | results['scale_factor'] = scale_factor
181 | results['keep_ratio'] = self.keep_ratio
182 |
183 | def _resize_seg(self, results):
184 | """Resize semantic segmentation map with ``results['scale']``."""
185 | for key in results.get('seg_fields', []):
186 | if self.keep_ratio:
187 | gt_seg = mmcv.imrescale(
188 | results[key], results['scale'], interpolation='nearest')
189 | gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest')
190 | else:
191 | gt_seg = mmcv.imresize(
192 | results[key], results['scale'], interpolation='nearest')
193 | h, w = gt_seg.shape[:2]
194 | assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
195 | int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
196 | "gt_seg size not align. h:{} w:{}".format(h, w)
197 | results[key] = gt_seg
198 |
199 | def __call__(self, results):
200 | """Call function to resize images, bounding boxes, masks, semantic
201 | segmentation map.
202 | Args:
203 | results (dict): Result dict from loading pipeline.
204 | Returns:
205 | dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
206 | 'keep_ratio' keys are added into result dict.
207 | """
208 |
209 | if 'scale' not in results:
210 | self._random_scale(results)
211 | self._resize_img(results)
212 | self._resize_seg(results)
213 | return results
214 |
215 | def __repr__(self):
216 | repr_str = self.__class__.__name__
217 | repr_str += (f'(img_scale={self.img_scale}, '
218 | f'multiscale_mode={self.multiscale_mode}, '
219 | f'ratio_range={self.ratio_range}, '
220 | f'keep_ratio={self.keep_ratio})')
221 | return repr_str
222 |
--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_base.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_base.py \
7 | --work-dir work_dirs/sfpn_transxnet_base/ \
8 | --launcher pytorch
--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_small.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_small.py \
7 | --work-dir work_dirs/sfpn_transxnet_small/ \
8 | --launcher pytorch
--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_tiny.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_tiny.py \
7 | --work-dir work_dirs/sfpn_transxnet_tiny/ \
8 | --launcher pytorch
--------------------------------------------------------------------------------
/semantic_segmentation/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import mmcv
5 | import torch
6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
7 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
8 | wrap_fp16_model)
9 | from mmcv.utils import DictAction
10 |
11 | from mmseg.apis import multi_gpu_test, single_gpu_test
12 | from mmseg.datasets import build_dataloader, build_dataset
13 | from mmseg.models import build_segmentor
14 |
15 | import mmcv_custom
16 | import mmseg_custom
17 | import transxnet
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(
21 | description='mmseg test (and eval) a model')
22 | parser.add_argument('config', help='test config file path')
23 | parser.add_argument('checkpoint', help='checkpoint file')
24 | parser.add_argument(
25 | '--aug-test', action='store_true', help='Use Flip and Multi scale aug')
26 | parser.add_argument('--out', help='output result file in pickle format')
27 | parser.add_argument(
28 | '--format-only',
29 | action='store_true',
30 | help='Format the output results without perform evaluation. It is'
31 | 'useful when you want to format the result to a specific format and '
32 | 'submit it to the test server')
33 | parser.add_argument(
34 | '--eval',
35 | type=str,
36 | nargs='+',
37 | help='evaluation metrics, which depends on the dataset, e.g., "mIoU"'
38 | ' for generic datasets, and "cityscapes" for Cityscapes')
39 | parser.add_argument('--show', action='store_true', help='show results')
40 | parser.add_argument(
41 | '--show-dir', help='directory where painted images will be saved')
42 | parser.add_argument(
43 | '--gpu-collect',
44 | action='store_true',
45 | help='whether to use gpu to collect results.')
46 | parser.add_argument(
47 | '--tmpdir',
48 | help='tmp directory used for collecting results from multiple '
49 | 'workers, available when gpu_collect is not specified')
50 | parser.add_argument(
51 | '--options', nargs='+', action=DictAction, help='custom options')
52 | parser.add_argument(
53 | '--eval-options',
54 | nargs='+',
55 | action=DictAction,
56 | help='custom options for evaluation')
57 | parser.add_argument(
58 | '--launcher',
59 | choices=['none', 'pytorch', 'slurm', 'mpi'],
60 | default='none',
61 | help='job launcher')
62 | parser.add_argument(
63 | '--opacity',
64 | type=float,
65 | default=0.5,
66 | help='Opacity of painted segmentation map. In (0, 1] range.')
67 | parser.add_argument('--local_rank', type=int, default=0)
68 | args = parser.parse_args()
69 | if 'LOCAL_RANK' not in os.environ:
70 | os.environ['LOCAL_RANK'] = str(args.local_rank)
71 | return args
72 |
73 |
74 | def main():
75 | args = parse_args()
76 |
77 | assert args.out or args.eval or args.format_only or args.show \
78 | or args.show_dir, \
79 | ('Please specify at least one operation (save/eval/format/show the '
80 | 'results / save the results) with the argument "--out", "--eval"'
81 | ', "--format-only", "--show" or "--show-dir"')
82 |
83 | if args.eval and args.format_only:
84 | raise ValueError('--eval and --format_only cannot be both specified')
85 |
86 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
87 | raise ValueError('The output file must be a pkl file.')
88 |
89 | cfg = mmcv.Config.fromfile(args.config)
90 | if args.options is not None:
91 | cfg.merge_from_dict(args.options)
92 | # set cudnn_benchmark
93 | if cfg.get('cudnn_benchmark', False):
94 | torch.backends.cudnn.benchmark = True
95 | if args.aug_test:
96 | # hard code index
97 | cfg.data.test.pipeline[1].img_ratios = [
98 | 0.5, 0.75, 1.0, 1.25, 1.5, 1.75
99 | ]
100 | cfg.data.test.pipeline[1].flip = True
101 | cfg.model.pretrained = None
102 | cfg.data.test.test_mode = True
103 |
104 | # init distributed env first, since logger depends on the dist info.
105 | if args.launcher == 'none':
106 | distributed = False
107 | else:
108 | distributed = True
109 | init_dist(args.launcher, **cfg.dist_params)
110 |
111 | # build the dataloader
112 | # TODO: support multiple images per gpu (only minor changes are needed)
113 | dataset = build_dataset(cfg.data.test)
114 | data_loader = build_dataloader(
115 | dataset,
116 | samples_per_gpu=1,
117 | workers_per_gpu=cfg.data.workers_per_gpu,
118 | dist=distributed,
119 | shuffle=False)
120 |
121 | # build the model and load checkpoint
122 | cfg.model.train_cfg = None
123 | model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
124 |
125 | fp16_cfg = cfg.get('fp16', None)
126 | if fp16_cfg is not None:
127 | wrap_fp16_model(model)
128 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
129 | model.CLASSES = dataset.CLASSES # checkpoint['meta']['CLASSES']
130 | model.PALETTE = dataset.PALETTE # checkpoint['meta']['PALETTE']
131 |
132 | efficient_test = False
133 | if args.eval_options is not None:
134 | efficient_test = args.eval_options.get('efficient_test', False)
135 |
136 | if not distributed:
137 | model = MMDataParallel(model, device_ids=[0])
138 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
139 | efficient_test, args.opacity)
140 | else:
141 | model = MMDistributedDataParallel(
142 | model.cuda(),
143 | device_ids=[torch.cuda.current_device()],
144 | broadcast_buffers=False)
145 | outputs = multi_gpu_test(model, data_loader, args.tmpdir,
146 | args.gpu_collect, efficient_test)
147 |
148 | rank, _ = get_dist_info()
149 | if rank == 0:
150 | if args.out:
151 | print(f'\nwriting results to {args.out}')
152 | mmcv.dump(outputs, args.out)
153 | kwargs = {} if args.eval_options is None else args.eval_options
154 | if args.format_only:
155 | dataset.format_results(outputs, **kwargs)
156 | if args.eval:
157 | dataset.evaluate(outputs, args.eval, **kwargs)
158 |
159 |
160 | if __name__ == '__main__':
161 | main()
162 |
--------------------------------------------------------------------------------
/semantic_segmentation/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | import time
4 | import copy
5 | import argparse
6 |
7 | import mmcv
8 | import torch
9 | from mmcv.runner import init_dist
10 | from mmcv.utils import Config, DictAction, get_git_hash
11 | from mmseg import __version__
12 | from mmseg.apis import set_random_seed, train_segmentor
13 | from mmseg.datasets import build_dataset
14 | from mmseg.models import build_segmentor
15 | from mmseg.utils import collect_env, get_root_logger
16 |
17 | # imoprt custom utils
18 | import mmcv_custom
19 | import mmseg_custom
20 | # import models
21 | import transxnet
22 |
23 |
24 | def parse_args():
25 | parser = argparse.ArgumentParser(description='Train a segmentor')
26 | parser.add_argument('config', help='train config file path')
27 | parser.add_argument('--work-dir', help='the dir to save logs and models')
28 | parser.add_argument(
29 | '--load-from', help='the checkpoint file to load weights from')
30 | parser.add_argument(
31 | '--resume-from', help='the checkpoint file to resume from')
32 | parser.add_argument(
33 | '--no-validate',
34 | action='store_true',
35 | help='whether not to evaluate the checkpoint during training')
36 | group_gpus = parser.add_mutually_exclusive_group()
37 | group_gpus.add_argument(
38 | '--gpus',
39 | type=int,
40 | help='number of gpus to use '
41 | '(only applicable to non-distributed training)')
42 | group_gpus.add_argument(
43 | '--gpu-ids',
44 | type=int,
45 | nargs='+',
46 | help='ids of gpus to use '
47 | '(only applicable to non-distributed training)')
48 | parser.add_argument('--seed', type=int, default=None, help='random seed')
49 | parser.add_argument(
50 | '--deterministic',
51 | action='store_true',
52 | help='whether to set deterministic options for CUDNN backend.')
53 | parser.add_argument(
54 | '--options', nargs='+', action=DictAction, help='custom options')
55 | parser.add_argument(
56 | '--launcher',
57 | choices=['none', 'pytorch', 'slurm', 'mpi'],
58 | default='none',
59 | help='job launcher')
60 | parser.add_argument('--local_rank', type=int, default=0)
61 | args = parser.parse_args()
62 | if 'LOCAL_RANK' not in os.environ:
63 | os.environ['LOCAL_RANK'] = str(args.local_rank)
64 |
65 | return args
66 |
67 |
68 | def main():
69 | args = parse_args()
70 |
71 | cfg = Config.fromfile(args.config)
72 | if args.options is not None:
73 | cfg.merge_from_dict(args.options)
74 | # set cudnn_benchmark
75 | if cfg.get('cudnn_benchmark', False):
76 | torch.backends.cudnn.benchmark = True
77 |
78 | # work_dir is determined in this priority: CLI > segment in file > filename
79 | if args.work_dir is not None:
80 | # update configs according to CLI args if args.work_dir is not None
81 | cfg.work_dir = args.work_dir
82 | elif cfg.get('work_dir', None) is None:
83 | # use config filename as default work_dir if cfg.work_dir is None
84 | cfg.work_dir = osp.join('./work_dirs',
85 | osp.splitext(osp.basename(args.config))[0])
86 | if args.load_from is not None:
87 | cfg.load_from = args.load_from
88 | if args.resume_from is not None:
89 | cfg.resume_from = args.resume_from
90 | if args.gpu_ids is not None:
91 | cfg.gpu_ids = args.gpu_ids
92 | else:
93 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
94 |
95 | # init distributed env first, since logger depends on the dist info.
96 | if args.launcher == 'none':
97 | distributed = False
98 | else:
99 | distributed = True
100 | init_dist(args.launcher, **cfg.dist_params)
101 |
102 | # create work_dir
103 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
104 | # dump config
105 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
106 | # init the logger before other steps
107 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
108 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
109 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
110 |
111 | # init the meta dict to record some important information such as
112 | # environment info and seed, which will be logged
113 | meta = dict()
114 | # log env info
115 | env_info_dict = collect_env()
116 | env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
117 | dash_line = '-' * 60 + '\n'
118 | logger.info('Environment info:\n' + dash_line + env_info + '\n' +
119 | dash_line)
120 | meta['env_info'] = env_info
121 |
122 | # log some basic info
123 | logger.info(f'Distributed training: {distributed}')
124 | logger.info(f'Config:\n{cfg.pretty_text}')
125 |
126 | # set random seeds
127 | if args.seed is not None:
128 | logger.info(f'Set random seed to {args.seed}, deterministic: '
129 | f'{args.deterministic}')
130 | set_random_seed(args.seed, deterministic=args.deterministic)
131 | cfg.seed = args.seed
132 | meta['seed'] = args.seed
133 | meta['exp_name'] = osp.basename(args.config)
134 |
135 | model = build_segmentor(
136 | cfg.model,
137 | train_cfg=cfg.get('train_cfg'),
138 | test_cfg=cfg.get('test_cfg'))
139 |
140 | logger.info(model)
141 |
142 | datasets = [build_dataset(cfg.data.train)]
143 | if len(cfg.workflow) == 2:
144 | val_dataset = copy.deepcopy(cfg.data.val)
145 | val_dataset.pipeline = cfg.data.train.pipeline
146 | datasets.append(build_dataset(val_dataset))
147 | if cfg.checkpoint_config is not None:
148 | # save mmseg version, config file content and class names in
149 | # checkpoints as meta data
150 | cfg.checkpoint_config.meta = dict(
151 | mmseg_version=f'{__version__}+{get_git_hash()[:7]}',
152 | config=cfg.pretty_text,
153 | CLASSES=datasets[0].CLASSES,
154 | PALETTE=datasets[0].PALETTE)
155 | # add an attribute for visualization convenience
156 | model.CLASSES = datasets[0].CLASSES
157 | train_segmentor(
158 | model,
159 | datasets,
160 | cfg,
161 | distributed=distributed,
162 | validate=(not args.no_validate),
163 | timestamp=timestamp,
164 | meta=meta)
165 |
166 |
167 | if __name__ == '__main__':
168 | main()
--------------------------------------------------------------------------------