├── README.md
├── assets
    └── architecture.png
├── models
    ├── __init__.py
    ├── poolformer.py
    └── transxnet.py
├── object_detection
    ├── README.md
    ├── analysis_tools
    │   ├── analyze_logs.py
    │   ├── analyze_results.py
    │   ├── benchmark.py
    │   ├── coco_error_analysis.py
    │   ├── eval_metric.py
    │   ├── get_flops.py
    │   ├── robustness_eval.py
    │   └── test_robustness.py
    ├── checkpoint.py
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── coco_detection.py
    │   │   │   └── coco_instance.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── cascade_mask_rcnn_pvtv2_b2_fpn.py
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── cascade_rcnn_r50_fpn.py
    │   │   │   ├── fast_rcnn_r50_fpn.py
    │   │   │   ├── faster_rcnn_r50_caffe_c4.py
    │   │   │   ├── faster_rcnn_r50_caffe_dc5.py
    │   │   │   ├── faster_rcnn_r50_fpn.py
    │   │   │   ├── mask_rcnn_r50_caffe_c4.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── retinanet_r50_fpn.py
    │   │   │   ├── rpn_r50_caffe_c4.py
    │   │   │   ├── rpn_r50_fpn.py
    │   │   │   └── ssd300.py
    │   │   └── schedules
    │   │   │   ├── schedule_1x.py
    │   │   │   ├── schedule_20e.py
    │   │   │   └── schedule_2x.py
    │   ├── mask_rcnn_transx_b_fpn_1x_coco.py
    │   ├── mask_rcnn_transx_s_fpn_1x_coco.py
    │   ├── mask_rcnn_transx_t_fpn_1x_coco.py
    │   ├── retinanet_transx_b_fpn_1x_coco.py
    │   ├── retinanet_transx_s_fpn_1x_coco.py
    │   └── retinanet_transx_t_fpn_1x_coco.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── mmcv_custom
    │   └── runner
    │   │   ├── checkpoint.py
    │   │   ├── epoch_based_runner.py
    │   │   └── optimizer.py
    ├── mmdet_custom
    │   └── apis
    │   │   └── train.py
    ├── test.py
    ├── train.py
    └── transxnet.py
├── scripts
    ├── train_base.sh
    ├── train_small.sh
    └── train_tiny.sh
├── semantic_segmentation
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   └── ade20k_sfpn.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   └── fpn_r50.py
    │   │   └── schedules
    │   │   │   ├── schedule_160k.py
    │   │   │   ├── schedule_20k.py
    │   │   │   ├── schedule_40k.py
    │   │   │   └── schedule_80k.py
    │   ├── sfpn_transxnet_base.py
    │   ├── sfpn_transxnet_small.py
    │   └── sfpn_transxnet_tiny.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── checkpoint.cpython-38.pyc
    │   │   ├── customized_text.cpython-38.pyc
    │   │   ├── layer_decay_optimizer_constructor.cpython-38.pyc
    │   │   └── resize_transform.cpython-38.pyc
    │   ├── apex_runner
    │   │   └── checkpoint.py
    │   ├── checkpoint.py
    │   ├── customized_text.py
    │   ├── layer_decay_optimizer_constructor.py
    │   ├── resize_transform.py
    │   └── train_api.py
    ├── mmseg_custom
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── align_resize.cpython-37.pyc
    │   │   └── align_resize.cpython-38.pyc
    │   └── align_resize.py
    ├── scripts
    │   ├── train_sfpn_transxnet_base.sh
    │   ├── train_sfpn_transxnet_small.sh
    │   └── train_sfpn_transxnet_tiny.sh
    ├── test.py
    ├── train.py
    └── transxnet.py
├── train.py
└── validate.py


/README.md:
--------------------------------------------------------------------------------
 1 | # [[TNNLS 2025] TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979)
 2 | 
 3 | > This is an official PyTorch implementation of "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://doi.org/10.1109/TNNLS.2025.3550979)".
 4 | > 
 5 | > 📝 Paper: [Journal Version](https://doi.org/10.1109/TNNLS.2025.3550979) | [arXiv Version](https://arxiv.org/abs/2310.19380)
 6 | 
 7 | 
 8 | # Introduction
 9 | **TransXNet** is a CNN-Transformer hybrid vision backbone that can model both global and local dynamics with a Dual Dynamic Token Mixer (D-Mixer), achieving superior performance over both CNN and Transformer-based models.
10 | <center> 
11 | <img src="assets/architecture.png" width="70%" height="auto">
12 | </center>
13 | 
14 | # Image Classification
15 | 
16 | ## 1. Requirements
17 | We highly suggest using our provided dependencies to ensure reproducibility:
18 | ```
19 | # Environments:
20 | cuda==11.6
21 | python==3.8.15
22 | # Packages:
23 | mmcv==1.7.1
24 | timm==0.6.12
25 | torch==1.13.1
26 | torchvision==0.14.1
27 | ```
28 | ## 2. Data Preparation
29 | Prepare [ImageNet](https://image-net.org/) with the following folder structure, you can extract ImageNet by this [script](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4).
30 | 
31 | ```
32 | │imagenet/
33 | ├──train/
34 | │  ├── n01440764
35 | │  │   ├── n01440764_10026.JPEG
36 | │  │   ├── n01440764_10027.JPEG
37 | │  │   ├── ......
38 | │  ├── ......
39 | ├──val/
40 | │  ├── n01440764
41 | │  │   ├── ILSVRC2012_val_00000293.JPEG
42 | │  │   ├── ILSVRC2012_val_00002138.JPEG
43 | │  │   ├── ......
44 | │  ├── ......
45 | ```
46 | 
47 | ## 3. Main Results on ImageNet with Pretrained Models
48 | 
49 | | Models      | Input Size | FLOPs (G) | Params (M) | Top-1 Acc.(%) | Download |
50 | |:-----------:|:----------:|:---------:|:----------:|:----------:|:----------:|
51 | | TransXNet-T | 224x224    | 1.8       | 12.8       | 81.6       | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar)     |
52 | | TransXNet-S | 224x224    | 4.5       | 26.9       | 83.8       | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar)     |
53 | | TransXNet-B | 224x224    | 8.3       | 48.0       | 84.6       | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar)     |
54 | | TransXNet-B | 384x384    | 24.2       | 48.0       | 85.5       | [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b-384.pth.tar)     |
55 | 
56 | ## 4. Train
57 | To train ```TransXNet``` models on ImageNet-1K with 8 gpus (single node), run:
58 | ```
59 | bash scripts/train_tiny.sh  # train TransXNet-T
60 | bash scripts/train_small.sh # train TransXNet-S
61 | bash scripts/train_base.sh  # train TransXNet-B
62 | ```
63 | 
64 | ## 5. Validation
65 | To evaluate ```TransXNet``` on ImageNet-1K, run:
66 | ```
67 | MODEL=transxnet_t # transxnet_{t, s, b}
68 | python3 validate.py \
69 | /path/to/imagenet \
70 | --model $MODEL -b 128 \
71 | --pretrained # or --checkpoint /path/to/checkpoint 
72 | ```
73 | 
74 | # Object Detection and Semantic Segmentation
75 | > [Object Detection](object_detection)    
76 | > [Semantic Segmentation](semantic_segmentation)
77 | 
78 | # Citation
79 | If you find this project useful for your research, please consider citing:
80 | ```
81 | @article{lou2023transxnet,
82 |   title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
83 |   author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
84 |   journal={IEEE Transactions on Neural Networks and Learning Systems},
85 |   year={2025}
86 | }
87 | ```
88 | 
89 | # Acknowledgment
90 | Our implementation is mainly based on the following codebases. We gratefully thank the authors for their wonderful works.
91 | > [poolformer](https://github.com/sail-sg/poolformer)    
92 | > [mmdetection](https://github.com/open-mmlab/mmdetection)     
93 | > [mmsegmentation](https://github.com/open-mmlab/mmsegmentation)     
94 | > [pytorch-image-models](https://github.com/rwightman/pytorch-image-models)  
95 | 
96 | # Contact
97 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
98 | 


--------------------------------------------------------------------------------
/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/assets/architecture.png


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .poolformer import *
2 | from .transxnet import *


--------------------------------------------------------------------------------
/object_detection/README.md:
--------------------------------------------------------------------------------
 1 | # Applying TransXNet to Object Detection and Instance Segmentation
 2 | 
 3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)".   
 4 | 
 5 | ## 1. Requirements
 6 | ```
 7 | # Environments:
 8 | cuda==11.3
 9 | python==3.8.15
10 | # Packages:
11 | mmcv==1.7.1
12 | mmdet==2.28.2
13 | timm==0.6.12
14 | torch==1.12.1
15 | torchvision==0.13.1
16 | ```
17 | 
18 | 
19 | ## 2. Data Preparation
20 | 
21 | Prepare COCO 2017 according to the [guidelines](https://github.com/open-mmlab/mmdetection/blob/2.x/docs/en/1_exist_data_model.md).  
22 | 
23 | ## 3. Main Results on COCO with Pretrained Models
24 | 
25 | 
26 | | Method     | Backbone | Pretrain    | Lr schd | Aug | box AP | mask AP | Config                                               | Download |
27 | |------------|----------|-------------|:-------:|:---:|:------:|:-------:|------------------------------------------------------|----------|
28 | | RetinaNet  | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) |    1x   |  No |  43.1  |    -    | [config](configs/retinanet_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_tiny.pth) |
29 | | RetinaNet  | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) |    1x   |  No |  46.4  |    -    | [config](configs/retinanet_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_small.pth) |
30 | | RetinaNet  | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) |    1x   |  No |  47.6  |    -    | [config](configs/retinanet_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/reinanet_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/retinanet_base.pth) |
31 | | Mask R-CNN | TransXNet-T | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) |    1x   |  No |  44.5 |   40.7  | [config](configs/mask_rcnn_transx_t_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_tiny.pth) |
32 | | Mask R-CNN | TransXNet-S | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) |    1x   |  No |  47.7  |   43.1  | [config](configs/mask_rcnn_transx_s_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_small.pth) |
33 | | Mask R-CNN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) |    1x   |  No |  48.8  |   43.8  | [config](configs/mask_rcnn_transx_b_fpn_1x_coco.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/maskrcnn_base.pth) |
34 | 
35 | 
36 | ## 4. Train
37 | To train ``TransXNet-T + RetinaNet`` models on COCO train2017 with 8 gpus (single node), run:
38 | ```
39 | bash dist_train.sh configs/retinanet_transx_t_fpn_1x_coco.py 8
40 | ```
41 | To train ``TransXNet-T + Mask R-CNN`` models on COCO train2017 with 8 gpus (single node), run:
42 | ```
43 | bash dist_train.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py 8
44 | ```
45 | 
46 | ## 5. Validation
47 | To evaluate ``TransXNet-T + RetinaNet`` models on COCO val2017, run:
48 | ```
49 | bash dist_test.sh configs/retinanet_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox
50 | ```
51 | To evaluate ``TransXNet-T + Mask R-CNN`` models on COCO val2017, run:
52 | ```
53 | bash dist_test.sh configs/mask_rcnn_transx_t_fpn_1x_coco.py /path/to/checkpoint_file 8 --out results.pkl --eval bbox segm
54 | ```
55 | 
56 | ## Citation
57 | If you find this project useful for your research, please consider citing:
58 | ```
59 | @article{lou2023transxnet,
60 |   title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
61 |   author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
62 |   journal={IEEE Transactions on Neural Networks and Learning Systems},
63 |   year={2025}
64 | }
65 | ```
66 | 
67 | ## Contact
68 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
69 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/analyze_logs.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | from collections import defaultdict
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import seaborn as sns
  8 | 
  9 | 
 10 | def cal_train_time(log_dicts, args):
 11 |     for i, log_dict in enumerate(log_dicts):
 12 |         print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
 13 |         all_times = []
 14 |         for epoch in log_dict.keys():
 15 |             if args.include_outliers:
 16 |                 all_times.append(log_dict[epoch]['time'])
 17 |             else:
 18 |                 all_times.append(log_dict[epoch]['time'][1:])
 19 |         all_times = np.array(all_times)
 20 |         epoch_ave_time = all_times.mean(-1)
 21 |         slowest_epoch = epoch_ave_time.argmax()
 22 |         fastest_epoch = epoch_ave_time.argmin()
 23 |         std_over_epoch = epoch_ave_time.std()
 24 |         print(f'slowest epoch {slowest_epoch + 1}, '
 25 |               f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
 26 |         print(f'fastest epoch {fastest_epoch + 1}, '
 27 |               f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
 28 |         print(f'time std over epochs is {std_over_epoch:.4f}')
 29 |         print(f'average iter time: {np.mean(all_times):.4f} s/iter')
 30 |         print()
 31 | 
 32 | 
 33 | def plot_curve(log_dicts, args):
 34 |     if args.backend is not None:
 35 |         plt.switch_backend(args.backend)
 36 |     sns.set_style(args.style)
 37 |     # if legend is None, use {filename}_{key} as legend
 38 |     legend = args.legend
 39 |     if legend is None:
 40 |         legend = []
 41 |         for json_log in args.json_logs:
 42 |             for metric in args.keys:
 43 |                 legend.append(f'{json_log}_{metric}')
 44 |     assert len(legend) == (len(args.json_logs) * len(args.keys))
 45 |     metrics = args.keys
 46 | 
 47 |     num_metrics = len(metrics)
 48 |     for i, log_dict in enumerate(log_dicts):
 49 |         epochs = list(log_dict.keys())
 50 |         for j, metric in enumerate(metrics):
 51 |             print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
 52 |             if metric not in log_dict[epochs[0]]:
 53 |                 raise KeyError(
 54 |                     f'{args.json_logs[i]} does not contain metric {metric}')
 55 | 
 56 |             if 'mAP' in metric:
 57 |                 xs = np.arange(1, max(epochs) + 1)
 58 |                 ys = []
 59 |                 for epoch in epochs:
 60 |                     ys += log_dict[epoch][metric]
 61 |                 ax = plt.gca()
 62 |                 ax.set_xticks(xs)
 63 |                 plt.xlabel('epoch')
 64 |                 plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
 65 |             else:
 66 |                 xs = []
 67 |                 ys = []
 68 |                 num_iters_per_epoch = log_dict[epochs[0]]['iter'][-1]
 69 |                 for epoch in epochs:
 70 |                     iters = log_dict[epoch]['iter']
 71 |                     if log_dict[epoch]['mode'][-1] == 'val':
 72 |                         iters = iters[:-1]
 73 |                     xs.append(
 74 |                         np.array(iters) + (epoch - 1) * num_iters_per_epoch)
 75 |                     ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
 76 |                 xs = np.concatenate(xs)
 77 |                 ys = np.concatenate(ys)
 78 |                 plt.xlabel('iter')
 79 |                 plt.plot(
 80 |                     xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
 81 |             plt.legend()
 82 |         if args.title is not None:
 83 |             plt.title(args.title)
 84 |     if args.out is None:
 85 |         plt.show()
 86 |     else:
 87 |         print(f'save curve to: {args.out}')
 88 |         plt.savefig(args.out)
 89 |         plt.cla()
 90 | 
 91 | 
 92 | def add_plot_parser(subparsers):
 93 |     parser_plt = subparsers.add_parser(
 94 |         'plot_curve', help='parser for plotting curves')
 95 |     parser_plt.add_argument(
 96 |         'json_logs',
 97 |         type=str,
 98 |         nargs='+',
 99 |         help='path of train log in json format')
100 |     parser_plt.add_argument(
101 |         '--keys',
102 |         type=str,
103 |         nargs='+',
104 |         default=['bbox_mAP'],
105 |         help='the metric that you want to plot')
106 |     parser_plt.add_argument('--title', type=str, help='title of figure')
107 |     parser_plt.add_argument(
108 |         '--legend',
109 |         type=str,
110 |         nargs='+',
111 |         default=None,
112 |         help='legend of each plot')
113 |     parser_plt.add_argument(
114 |         '--backend', type=str, default=None, help='backend of plt')
115 |     parser_plt.add_argument(
116 |         '--style', type=str, default='dark', help='style of plt')
117 |     parser_plt.add_argument('--out', type=str, default=None)
118 | 
119 | 
120 | def add_time_parser(subparsers):
121 |     parser_time = subparsers.add_parser(
122 |         'cal_train_time',
123 |         help='parser for computing the average time per training iteration')
124 |     parser_time.add_argument(
125 |         'json_logs',
126 |         type=str,
127 |         nargs='+',
128 |         help='path of train log in json format')
129 |     parser_time.add_argument(
130 |         '--include-outliers',
131 |         action='store_true',
132 |         help='include the first value of every epoch when computing '
133 |         'the average time')
134 | 
135 | 
136 | def parse_args():
137 |     parser = argparse.ArgumentParser(description='Analyze Json Log')
138 |     # currently only support plot curve and calculate average train time
139 |     subparsers = parser.add_subparsers(dest='task', help='task parser')
140 |     add_plot_parser(subparsers)
141 |     add_time_parser(subparsers)
142 |     args = parser.parse_args()
143 |     return args
144 | 
145 | 
146 | def load_json_logs(json_logs):
147 |     # load and convert json_logs to log_dict, key is epoch, value is a sub dict
148 |     # keys of sub dict is different metrics, e.g. memory, bbox_mAP
149 |     # value of sub dict is a list of corresponding values of all iterations
150 |     log_dicts = [dict() for _ in json_logs]
151 |     for json_log, log_dict in zip(json_logs, log_dicts):
152 |         with open(json_log, 'r') as log_file:
153 |             for line in log_file:
154 |                 log = json.loads(line.strip())
155 |                 # skip lines without `epoch` field
156 |                 if 'epoch' not in log:
157 |                     continue
158 |                 epoch = log.pop('epoch')
159 |                 if epoch not in log_dict:
160 |                     log_dict[epoch] = defaultdict(list)
161 |                 for k, v in log.items():
162 |                     log_dict[epoch][k].append(v)
163 |     return log_dicts
164 | 
165 | 
166 | def main():
167 |     args = parse_args()
168 | 
169 |     json_logs = args.json_logs
170 |     for json_log in json_logs:
171 |         assert json_log.endswith('.json')
172 | 
173 |     log_dicts = load_json_logs(json_logs)
174 | 
175 |     eval(args.task)(log_dicts, args)
176 | 
177 | 
178 | if __name__ == '__main__':
179 |     main()
180 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/analyze_results.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | 
  4 | import mmcv
  5 | import numpy as np
  6 | from mmcv import Config, DictAction
  7 | 
  8 | from mmdet.core.evaluation import eval_map
  9 | from mmdet.core.visualization import imshow_gt_det_bboxes
 10 | from mmdet.datasets import build_dataset, get_loading_pipeline
 11 | 
 12 | 
 13 | def bbox_map_eval(det_result, annotation):
 14 |     """Evaluate mAP of single image det result.
 15 | 
 16 |     Args:
 17 |         det_result (list[list]): [[cls1_det, cls2_det, ...], ...].
 18 |             The outer list indicates images, and the inner list indicates
 19 |             per-class detected bboxes.
 20 |         annotation (dict): Ground truth annotations where keys of
 21 |              annotations are:
 22 | 
 23 |             - bboxes: numpy array of shape (n, 4)
 24 |             - labels: numpy array of shape (n, )
 25 |             - bboxes_ignore (optional): numpy array of shape (k, 4)
 26 |             - labels_ignore (optional): numpy array of shape (k, )
 27 | 
 28 |     Returns:
 29 |         float: mAP
 30 |     """
 31 | 
 32 |     # use only bbox det result
 33 |     if isinstance(det_result, tuple):
 34 |         bbox_det_result = [det_result[0]]
 35 |     else:
 36 |         bbox_det_result = [det_result]
 37 |     # mAP
 38 |     iou_thrs = np.linspace(
 39 |         .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
 40 |     mean_aps = []
 41 |     for thr in iou_thrs:
 42 |         mean_ap, _ = eval_map(
 43 |             bbox_det_result, [annotation], iou_thr=thr, logger='silent')
 44 |         mean_aps.append(mean_ap)
 45 |     return sum(mean_aps) / len(mean_aps)
 46 | 
 47 | 
 48 | class ResultVisualizer(object):
 49 |     """Display and save evaluation results.
 50 | 
 51 |     Args:
 52 |         show (bool): Whether to show the image. Default: True
 53 |         wait_time (float): Value of waitKey param. Default: 0.
 54 |         score_thr (float): Minimum score of bboxes to be shown.
 55 |            Default: 0
 56 |     """
 57 | 
 58 |     def __init__(self, show=False, wait_time=0, score_thr=0):
 59 |         self.show = show
 60 |         self.wait_time = wait_time
 61 |         self.score_thr = score_thr
 62 | 
 63 |     def _save_image_gts_results(self, dataset, results, mAPs, out_dir=None):
 64 |         mmcv.mkdir_or_exist(out_dir)
 65 | 
 66 |         for mAP_info in mAPs:
 67 |             index, mAP = mAP_info
 68 |             data_info = dataset.prepare_train_img(index)
 69 | 
 70 |             # calc save file path
 71 |             filename = data_info['filename']
 72 |             if data_info['img_prefix'] is not None:
 73 |                 filename = osp.join(data_info['img_prefix'], filename)
 74 |             else:
 75 |                 filename = data_info['filename']
 76 |             fname, name = osp.splitext(osp.basename(filename))
 77 |             save_filename = fname + '_' + str(round(mAP, 3)) + name
 78 |             out_file = osp.join(out_dir, save_filename)
 79 |             imshow_gt_det_bboxes(
 80 |                 data_info['img'],
 81 |                 data_info,
 82 |                 results[index],
 83 |                 dataset.CLASSES,
 84 |                 show=self.show,
 85 |                 score_thr=self.score_thr,
 86 |                 wait_time=self.wait_time,
 87 |                 out_file=out_file)
 88 | 
 89 |     def evaluate_and_show(self,
 90 |                           dataset,
 91 |                           results,
 92 |                           topk=20,
 93 |                           show_dir='work_dir',
 94 |                           eval_fn=None):
 95 |         """Evaluate and show results.
 96 | 
 97 |         Args:
 98 |             dataset (Dataset): A PyTorch dataset.
 99 |             results (list): Det results from test results pkl file
100 |             topk (int): Number of the highest topk and
101 |                 lowest topk after evaluation index sorting. Default: 20
102 |             show_dir (str, optional): The filename to write the image.
103 |                 Default: 'work_dir'
104 |             eval_fn (callable, optional): Eval function, Default: None
105 |         """
106 | 
107 |         assert topk > 0
108 |         if (topk * 2) > len(dataset):
109 |             topk = len(dataset) // 2
110 | 
111 |         if eval_fn is None:
112 |             eval_fn = bbox_map_eval
113 |         else:
114 |             assert callable(eval_fn)
115 | 
116 |         prog_bar = mmcv.ProgressBar(len(results))
117 |         _mAPs = {}
118 |         for i, (result, ) in enumerate(zip(results)):
119 |             # self.dataset[i] should not call directly
120 |             # because there is a risk of mismatch
121 |             data_info = dataset.prepare_train_img(i)
122 |             mAP = eval_fn(result, data_info['ann_info'])
123 |             _mAPs[i] = mAP
124 |             prog_bar.update()
125 | 
126 |         # descending select topk image
127 |         _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1]))
128 |         good_mAPs = _mAPs[-topk:]
129 |         bad_mAPs = _mAPs[:topk]
130 | 
131 |         good_dir = osp.abspath(osp.join(show_dir, 'good'))
132 |         bad_dir = osp.abspath(osp.join(show_dir, 'bad'))
133 |         self._save_image_gts_results(dataset, results, good_mAPs, good_dir)
134 |         self._save_image_gts_results(dataset, results, bad_mAPs, bad_dir)
135 | 
136 | 
137 | def parse_args():
138 |     parser = argparse.ArgumentParser(
139 |         description='MMDet eval image prediction result for each')
140 |     parser.add_argument('config', help='test config file path')
141 |     parser.add_argument(
142 |         'prediction_path', help='prediction path where test pkl result')
143 |     parser.add_argument(
144 |         'show_dir', help='directory where painted images will be saved')
145 |     parser.add_argument('--show', action='store_true', help='show results')
146 |     parser.add_argument(
147 |         '--wait-time',
148 |         type=float,
149 |         default=0,
150 |         help='the interval of show (s), 0 is block')
151 |     parser.add_argument(
152 |         '--topk',
153 |         default=20,
154 |         type=int,
155 |         help='saved Number of the highest topk '
156 |         'and lowest topk after index sorting')
157 |     parser.add_argument(
158 |         '--show-score-thr',
159 |         type=float,
160 |         default=0,
161 |         help='score threshold (default: 0.)')
162 |     parser.add_argument(
163 |         '--cfg-options',
164 |         nargs='+',
165 |         action=DictAction,
166 |         help='override some settings in the used config, the key-value pair '
167 |         'in xxx=yyy format will be merged into config file. If the value to '
168 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
169 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
170 |         'Note that the quotation marks are necessary and that no white space '
171 |         'is allowed.')
172 |     args = parser.parse_args()
173 |     return args
174 | 
175 | 
176 | def main():
177 |     args = parse_args()
178 | 
179 |     mmcv.check_file_exist(args.prediction_path)
180 | 
181 |     cfg = Config.fromfile(args.config)
182 |     if args.cfg_options is not None:
183 |         cfg.merge_from_dict(args.cfg_options)
184 |     cfg.data.test.test_mode = True
185 |     # import modules from string list.
186 |     if cfg.get('custom_imports', None):
187 |         from mmcv.utils import import_modules_from_strings
188 |         import_modules_from_strings(**cfg['custom_imports'])
189 | 
190 |     cfg.data.test.pop('samples_per_gpu', 0)
191 |     cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline)
192 |     dataset = build_dataset(cfg.data.test)
193 |     outputs = mmcv.load(args.prediction_path)
194 | 
195 |     result_visualizer = ResultVisualizer(args.show, args.wait_time,
196 |                                          args.show_score_thr)
197 |     result_visualizer.evaluate_and_show(
198 |         dataset, outputs, topk=args.topk, show_dir=args.show_dir)
199 | 
200 | 
201 | if __name__ == '__main__':
202 |     main()
203 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import torch
  5 | from mmcv import Config, DictAction
  6 | from mmcv.cnn import fuse_conv_bn
  7 | from mmcv.parallel import MMDataParallel
  8 | from mmcv.runner import load_checkpoint, wrap_fp16_model
  9 | 
 10 | from mmdet.datasets import (build_dataloader, build_dataset,
 11 |                             replace_ImageToTensor)
 12 | from mmdet.models import build_detector
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
 17 |     parser.add_argument('config', help='test config file path')
 18 |     parser.add_argument('checkpoint', help='checkpoint file')
 19 |     parser.add_argument(
 20 |         '--log-interval', default=50, help='interval of logging')
 21 |     parser.add_argument(
 22 |         '--fuse-conv-bn',
 23 |         action='store_true',
 24 |         help='Whether to fuse conv and bn, this will slightly increase'
 25 |         'the inference speed')
 26 |     parser.add_argument(
 27 |         '--cfg-options',
 28 |         nargs='+',
 29 |         action=DictAction,
 30 |         help='override some settings in the used config, the key-value pair '
 31 |         'in xxx=yyy format will be merged into config file. If the value to '
 32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 34 |         'Note that the quotation marks are necessary and that no white space '
 35 |         'is allowed.')
 36 |     args = parser.parse_args()
 37 |     return args
 38 | 
 39 | 
 40 | def main():
 41 |     args = parse_args()
 42 | 
 43 |     cfg = Config.fromfile(args.config)
 44 |     if args.cfg_options is not None:
 45 |         cfg.merge_from_dict(args.cfg_options)
 46 |     # import modules from string list.
 47 |     if cfg.get('custom_imports', None):
 48 |         from mmcv.utils import import_modules_from_strings
 49 |         import_modules_from_strings(**cfg['custom_imports'])
 50 |     # set cudnn_benchmark
 51 |     if cfg.get('cudnn_benchmark', False):
 52 |         torch.backends.cudnn.benchmark = True
 53 |     cfg.model.pretrained = None
 54 |     cfg.data.test.test_mode = True
 55 | 
 56 |     # build the dataloader
 57 |     samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
 58 |     if samples_per_gpu > 1:
 59 |         # Replace 'ImageToTensor' to 'DefaultFormatBundle'
 60 |         cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 61 |     dataset = build_dataset(cfg.data.test)
 62 |     data_loader = build_dataloader(
 63 |         dataset,
 64 |         samples_per_gpu=1,
 65 |         workers_per_gpu=cfg.data.workers_per_gpu,
 66 |         dist=False,
 67 |         shuffle=False)
 68 | 
 69 |     # build the model and load checkpoint
 70 |     cfg.model.train_cfg = None
 71 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
 72 |     fp16_cfg = cfg.get('fp16', None)
 73 |     if fp16_cfg is not None:
 74 |         wrap_fp16_model(model)
 75 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
 76 |     if args.fuse_conv_bn:
 77 |         model = fuse_conv_bn(model)
 78 | 
 79 |     model = MMDataParallel(model, device_ids=[0])
 80 | 
 81 |     model.eval()
 82 | 
 83 |     # the first several iterations may be very slow so skip them
 84 |     num_warmup = 5
 85 |     pure_inf_time = 0
 86 | 
 87 |     # benchmark with 2000 image and take the average
 88 |     for i, data in enumerate(data_loader):
 89 | 
 90 |         torch.cuda.synchronize()
 91 |         start_time = time.perf_counter()
 92 | 
 93 |         with torch.no_grad():
 94 |             model(return_loss=False, rescale=True, **data)
 95 | 
 96 |         torch.cuda.synchronize()
 97 |         elapsed = time.perf_counter() - start_time
 98 | 
 99 |         if i >= num_warmup:
100 |             pure_inf_time += elapsed
101 |             if (i + 1) % args.log_interval == 0:
102 |                 fps = (i + 1 - num_warmup) / pure_inf_time
103 |                 print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
104 | 
105 |         if (i + 1) == 2000:
106 |             pure_inf_time += elapsed
107 |             fps = (i + 1 - num_warmup) / pure_inf_time
108 |             print(f'Overall fps: {fps:.1f} img / s')
109 |             break
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import mmcv
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
11 |                                      'results saved in pkl format')
12 |     parser.add_argument('config', help='Config of the model')
13 |     parser.add_argument('pkl_results', help='Results in pickle format')
14 |     parser.add_argument(
15 |         '--format-only',
16 |         action='store_true',
17 |         help='Format the output results without perform evaluation. It is'
18 |         'useful when you want to format the result to a specific format and '
19 |         'submit it to the test server')
20 |     parser.add_argument(
21 |         '--eval',
22 |         type=str,
23 |         nargs='+',
24 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
25 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
26 |     parser.add_argument(
27 |         '--cfg-options',
28 |         nargs='+',
29 |         action=DictAction,
30 |         help='override some settings in the used config, the key-value pair '
31 |         'in xxx=yyy format will be merged into config file. If the value to '
32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 |         'Note that the quotation marks are necessary and that no white space '
35 |         'is allowed.')
36 |     parser.add_argument(
37 |         '--eval-options',
38 |         nargs='+',
39 |         action=DictAction,
40 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
41 |         'format will be kwargs for dataset.evaluate() function')
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | 
46 | def main():
47 |     args = parse_args()
48 | 
49 |     cfg = Config.fromfile(args.config)
50 |     assert args.eval or args.format_only, (
51 |         'Please specify at least one operation (eval/format the results) with '
52 |         'the argument "--eval", "--format-only"')
53 |     if args.eval and args.format_only:
54 |         raise ValueError('--eval and --format_only cannot be both specified')
55 | 
56 |     if args.cfg_options is not None:
57 |         cfg.merge_from_dict(args.cfg_options)
58 |     # import modules from string list.
59 |     if cfg.get('custom_imports', None):
60 |         from mmcv.utils import import_modules_from_strings
61 |         import_modules_from_strings(**cfg['custom_imports'])
62 |     cfg.data.test.test_mode = True
63 | 
64 |     dataset = build_dataset(cfg.data.test)
65 |     outputs = mmcv.load(args.pkl_results)
66 | 
67 |     kwargs = {} if args.eval_options is None else args.eval_options
68 |     if args.format_only:
69 |         dataset.format_results(outputs, **kwargs)
70 |     if args.eval:
71 |         eval_kwargs = cfg.get('evaluation', {}).copy()
72 |         # hard-code way to remove EvalHook args
73 |         for key in [
74 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
75 |                 'rule'
76 |         ]:
77 |             eval_kwargs.pop(key, None)
78 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
79 |         print(dataset.evaluate(outputs, **eval_kwargs))
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | import torch
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet.models import build_detector
 7 | 
 8 | try:
 9 |     from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 |     raise ImportError('Please upgrade mmcv to >0.6.2')
12 | 
13 | sys.path.append(".") 
14 | import convformer
15 | 
16 | def parse_args():
17 |     parser = argparse.ArgumentParser(description='Train a detector')
18 |     parser.add_argument('config', help='train config file path')
19 |     parser.add_argument(
20 |         '--shape',
21 |         type=int,
22 |         nargs='+',
23 |         default=[1280, 800],
24 |         help='input image size')
25 |     parser.add_argument(
26 |         '--cfg-options',
27 |         nargs='+',
28 |         action=DictAction,
29 |         help='override some settings in the used config, the key-value pair '
30 |         'in xxx=yyy format will be merged into config file. If the value to '
31 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
32 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
33 |         'Note that the quotation marks are necessary and that no white space '
34 |         'is allowed.')
35 |     args = parser.parse_args()
36 |     return args
37 | 
38 | 
39 | def main():
40 | 
41 |     args = parse_args()
42 | 
43 |     if len(args.shape) == 1:
44 |         input_shape = (1, 9, args.shape[0], args.shape[0])
45 |     elif len(args.shape) == 2:
46 |         input_shape = (3, ) + tuple(args.shape)
47 |     else:
48 |         raise ValueError('invalid input shape')
49 | 
50 |     cfg = Config.fromfile(args.config)
51 |     if args.cfg_options is not None:
52 |         cfg.merge_from_dict(args.cfg_options)
53 |     # import modules from string list.
54 |     if cfg.get('custom_imports', None):
55 |         from mmcv.utils import import_modules_from_strings
56 |         import_modules_from_strings(**cfg['custom_imports'])
57 | 
58 |     model = build_detector(
59 |         cfg.model,
60 |         train_cfg=cfg.get('train_cfg'),
61 |         test_cfg=cfg.get('test_cfg'))
62 |     if torch.cuda.is_available():
63 |         model.cuda()
64 |     model.eval()
65 | 
66 |     if hasattr(model, 'forward_dummy'):
67 |         model.forward = model.forward_dummy
68 |     else:
69 |         raise NotImplementedError(
70 |             'FLOPs counter is currently not currently supported with {}'.
71 |             format(model.__class__.__name__))
72 | 
73 |     flops, params = get_model_complexity_info(model, input_shape)
74 |     split_line = '=' * 30
75 |     print(f'{split_line}\nInput shape: {input_shape}\n'
76 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
77 |     print('!!!Please be cautious if you use the results in papers. '
78 |           'You may need to check if all ops are supported and verify that the '
79 |           'flops computation is correct.')
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/object_detection/analysis_tools/robustness_eval.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | from argparse import ArgumentParser
  3 | 
  4 | import mmcv
  5 | import numpy as np
  6 | 
  7 | 
  8 | def print_coco_results(results):
  9 | 
 10 |     def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100):
 11 |         titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
 12 |         typeStr = '(AP)' if ap == 1 else '(AR)'
 13 |         iouStr = '0.50:0.95' \
 14 |             if iouThr is None else f'{iouThr:0.2f}'
 15 |         iStr = f' {titleStr:<18} {typeStr} @[ IoU={iouStr:<9} | '
 16 |         iStr += f'area={areaRng:>6s} | maxDets={maxDets:>3d} ] = {result:0.3f}'
 17 |         print(iStr)
 18 | 
 19 |     stats = np.zeros((12, ))
 20 |     stats[0] = _print(results[0], 1)
 21 |     stats[1] = _print(results[1], 1, iouThr=.5)
 22 |     stats[2] = _print(results[2], 1, iouThr=.75)
 23 |     stats[3] = _print(results[3], 1, areaRng='small')
 24 |     stats[4] = _print(results[4], 1, areaRng='medium')
 25 |     stats[5] = _print(results[5], 1, areaRng='large')
 26 |     stats[6] = _print(results[6], 0, maxDets=1)
 27 |     stats[7] = _print(results[7], 0, maxDets=10)
 28 |     stats[8] = _print(results[8], 0)
 29 |     stats[9] = _print(results[9], 0, areaRng='small')
 30 |     stats[10] = _print(results[10], 0, areaRng='medium')
 31 |     stats[11] = _print(results[11], 0, areaRng='large')
 32 | 
 33 | 
 34 | def get_coco_style_results(filename,
 35 |                            task='bbox',
 36 |                            metric=None,
 37 |                            prints='mPC',
 38 |                            aggregate='benchmark'):
 39 | 
 40 |     assert aggregate in ['benchmark', 'all']
 41 | 
 42 |     if prints == 'all':
 43 |         prints = ['P', 'mPC', 'rPC']
 44 |     elif isinstance(prints, str):
 45 |         prints = [prints]
 46 |     for p in prints:
 47 |         assert p in ['P', 'mPC', 'rPC']
 48 | 
 49 |     if metric is None:
 50 |         metrics = [
 51 |             'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
 52 |             'ARs', 'ARm', 'ARl'
 53 |         ]
 54 |     elif isinstance(metric, list):
 55 |         metrics = metric
 56 |     else:
 57 |         metrics = [metric]
 58 | 
 59 |     for metric_name in metrics:
 60 |         assert metric_name in [
 61 |             'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',
 62 |             'ARs', 'ARm', 'ARl'
 63 |         ]
 64 | 
 65 |     eval_output = mmcv.load(filename)
 66 | 
 67 |     num_distortions = len(list(eval_output.keys()))
 68 |     results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32')
 69 | 
 70 |     for corr_i, distortion in enumerate(eval_output):
 71 |         for severity in eval_output[distortion]:
 72 |             for metric_j, metric_name in enumerate(metrics):
 73 |                 mAP = eval_output[distortion][severity][task][metric_name]
 74 |                 results[corr_i, severity, metric_j] = mAP
 75 | 
 76 |     P = results[0, 0, :]
 77 |     if aggregate == 'benchmark':
 78 |         mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
 79 |     else:
 80 |         mPC = np.mean(results[:, 1:, :], axis=(0, 1))
 81 |     rPC = mPC / P
 82 | 
 83 |     print(f'\nmodel: {osp.basename(filename)}')
 84 |     if metric is None:
 85 |         if 'P' in prints:
 86 |             print(f'Performance on Clean Data [P] ({task})')
 87 |             print_coco_results(P)
 88 |         if 'mPC' in prints:
 89 |             print(f'Mean Performance under Corruption [mPC] ({task})')
 90 |             print_coco_results(mPC)
 91 |         if 'rPC' in prints:
 92 |             print(f'Relative Performance under Corruption [rPC] ({task})')
 93 |             print_coco_results(rPC)
 94 |     else:
 95 |         if 'P' in prints:
 96 |             print(f'Performance on Clean Data [P] ({task})')
 97 |             for metric_i, metric_name in enumerate(metrics):
 98 |                 print(f'{metric_name:5} =  {P[metric_i]:0.3f}')
 99 |         if 'mPC' in prints:
100 |             print(f'Mean Performance under Corruption [mPC] ({task})')
101 |             for metric_i, metric_name in enumerate(metrics):
102 |                 print(f'{metric_name:5} =  {mPC[metric_i]:0.3f}')
103 |         if 'rPC' in prints:
104 |             print(f'Relative Performance under Corruption [rPC] ({task})')
105 |             for metric_i, metric_name in enumerate(metrics):
106 |                 print(f'{metric_name:5} => {rPC[metric_i] * 100:0.1f} %')
107 | 
108 |     return results
109 | 
110 | 
111 | def get_voc_style_results(filename, prints='mPC', aggregate='benchmark'):
112 | 
113 |     assert aggregate in ['benchmark', 'all']
114 | 
115 |     if prints == 'all':
116 |         prints = ['P', 'mPC', 'rPC']
117 |     elif isinstance(prints, str):
118 |         prints = [prints]
119 |     for p in prints:
120 |         assert p in ['P', 'mPC', 'rPC']
121 | 
122 |     eval_output = mmcv.load(filename)
123 | 
124 |     num_distortions = len(list(eval_output.keys()))
125 |     results = np.zeros((num_distortions, 6, 20), dtype='float32')
126 | 
127 |     for i, distortion in enumerate(eval_output):
128 |         for severity in eval_output[distortion]:
129 |             mAP = [
130 |                 eval_output[distortion][severity][j]['ap']
131 |                 for j in range(len(eval_output[distortion][severity]))
132 |             ]
133 |             results[i, severity, :] = mAP
134 | 
135 |     P = results[0, 0, :]
136 |     if aggregate == 'benchmark':
137 |         mPC = np.mean(results[:15, 1:, :], axis=(0, 1))
138 |     else:
139 |         mPC = np.mean(results[:, 1:, :], axis=(0, 1))
140 |     rPC = mPC / P
141 | 
142 |     print(f'\nmodel: {osp.basename(filename)}')
143 |     if 'P' in prints:
144 |         print(f'Performance on Clean Data [P] in AP50 = {np.mean(P):0.3f}')
145 |     if 'mPC' in prints:
146 |         print('Mean Performance under Corruption [mPC] in AP50 = '
147 |               f'{np.mean(mPC):0.3f}')
148 |     if 'rPC' in prints:
149 |         print('Relative Performance under Corruption [rPC] in % = '
150 |               f'{np.mean(rPC) * 100:0.1f}')
151 | 
152 |     return np.mean(results, axis=2, keepdims=True)
153 | 
154 | 
155 | def get_results(filename,
156 |                 dataset='coco',
157 |                 task='bbox',
158 |                 metric=None,
159 |                 prints='mPC',
160 |                 aggregate='benchmark'):
161 |     assert dataset in ['coco', 'voc', 'cityscapes']
162 | 
163 |     if dataset in ['coco', 'cityscapes']:
164 |         results = get_coco_style_results(
165 |             filename,
166 |             task=task,
167 |             metric=metric,
168 |             prints=prints,
169 |             aggregate=aggregate)
170 |     elif dataset == 'voc':
171 |         if task != 'bbox':
172 |             print('Only bbox analysis is supported for Pascal VOC')
173 |             print('Will report bbox results\n')
174 |         if metric not in [None, ['AP'], ['AP50']]:
175 |             print('Only the AP50 metric is supported for Pascal VOC')
176 |             print('Will report AP50 metric\n')
177 |         results = get_voc_style_results(
178 |             filename, prints=prints, aggregate=aggregate)
179 | 
180 |     return results
181 | 
182 | 
183 | def get_distortions_from_file(filename):
184 | 
185 |     eval_output = mmcv.load(filename)
186 | 
187 |     return get_distortions_from_results(eval_output)
188 | 
189 | 
190 | def get_distortions_from_results(eval_output):
191 |     distortions = []
192 |     for i, distortion in enumerate(eval_output):
193 |         distortions.append(distortion.replace('_', ' '))
194 |     return distortions
195 | 
196 | 
197 | def main():
198 |     parser = ArgumentParser(description='Corruption Result Analysis')
199 |     parser.add_argument('filename', help='result file path')
200 |     parser.add_argument(
201 |         '--dataset',
202 |         type=str,
203 |         choices=['coco', 'voc', 'cityscapes'],
204 |         default='coco',
205 |         help='dataset type')
206 |     parser.add_argument(
207 |         '--task',
208 |         type=str,
209 |         nargs='+',
210 |         choices=['bbox', 'segm'],
211 |         default=['bbox'],
212 |         help='task to report')
213 |     parser.add_argument(
214 |         '--metric',
215 |         nargs='+',
216 |         choices=[
217 |             None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',
218 |             'AR100', 'ARs', 'ARm', 'ARl'
219 |         ],
220 |         default=None,
221 |         help='metric to report')
222 |     parser.add_argument(
223 |         '--prints',
224 |         type=str,
225 |         nargs='+',
226 |         choices=['P', 'mPC', 'rPC'],
227 |         default='mPC',
228 |         help='corruption benchmark metric to print')
229 |     parser.add_argument(
230 |         '--aggregate',
231 |         type=str,
232 |         choices=['all', 'benchmark'],
233 |         default='benchmark',
234 |         help='aggregate all results or only those \
235 |         for benchmark corruptions')
236 | 
237 |     args = parser.parse_args()
238 | 
239 |     for task in args.task:
240 |         get_results(
241 |             args.filename,
242 |             dataset=args.dataset,
243 |             task=task,
244 |             metric=args.metric,
245 |             prints=args.prints,
246 |             aggregate=args.aggregate)
247 | 
248 | 
249 | if __name__ == '__main__':
250 |     main()
251 | 


--------------------------------------------------------------------------------
/object_detection/checkpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import io
  3 | import os
  4 | import os.path as osp
  5 | import pkgutil
  6 | import re
  7 | import time
  8 | import warnings
  9 | from collections import OrderedDict
 10 | from importlib import import_module
 11 | from tempfile import TemporaryDirectory
 12 | 
 13 | import torch
 14 | import torchvision
 15 | from torch.optim import Optimizer
 16 | from torch.utils import model_zoo
 17 | 
 18 | import mmcv
 19 | from mmcv.parallel import is_module_wrapper
 20 | from mmcv.runner.dist_utils import get_dist_info
 21 | 
 22 | ENV_MMCV_HOME = 'MMCV_HOME'
 23 | ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'
 24 | DEFAULT_CACHE_DIR = '~/.cache'
 25 | 
 26 | 
 27 | def load_state_dict(module, state_dict, strict=False, logger=None):
 28 |     """Load state_dict to a module.
 29 | 
 30 |     This method is modified from :meth:`torch.nn.Module.load_state_dict`.
 31 |     Default value for ``strict`` is set to ``False`` and the message for
 32 |     param mismatch will be shown even if strict is False.
 33 | 
 34 |     Args:
 35 |         module (Module): Module that receives the state_dict.
 36 |         state_dict (OrderedDict): Weights.
 37 |         strict (bool): whether to strictly enforce that the keys
 38 |             in :attr:`state_dict` match the keys returned by this module's
 39 |             :meth:`~torch.nn.Module.state_dict` function. Default: ``False``.
 40 |         logger (:obj:`logging.Logger`, optional): Logger to log the error
 41 |             message. If not specified, print function will be used.
 42 |     """
 43 |     unexpected_keys = []
 44 |     all_missing_keys = []
 45 |     err_msg = []
 46 | 
 47 |     metadata = getattr(state_dict, '_metadata', None)
 48 |     state_dict = state_dict.copy()
 49 |     if metadata is not None:
 50 |         state_dict._metadata = metadata
 51 | 
 52 |     # use _load_from_state_dict to enable checkpoint version control
 53 |     def load(module, prefix=''):
 54 |         # recursively check parallel module in case that the model has a
 55 |         # complicated structure, e.g., nn.Module(nn.Module(DDP))
 56 |         if is_module_wrapper(module):
 57 |             module = module.module
 58 |         local_metadata = {} if metadata is None else metadata.get(
 59 |             prefix[:-1], {})
 60 |         module._load_from_state_dict(state_dict, prefix, local_metadata, True,
 61 |                                      all_missing_keys, unexpected_keys,
 62 |                                      err_msg)
 63 |         for name, child in module._modules.items():
 64 |             if child is not None:
 65 |                 load(child, prefix + name + '.')
 66 | 
 67 |     load(module)
 68 |     load = None  # break load->load reference cycle
 69 | 
 70 |     # ignore "num_batches_tracked" of BN layers
 71 |     missing_keys = [
 72 |         key for key in all_missing_keys if 'num_batches_tracked' not in key
 73 |     ]
 74 | 
 75 |     if unexpected_keys:
 76 |         err_msg.append('unexpected key in source '
 77 |                        f'state_dict: {", ".join(unexpected_keys)}\n')
 78 |     if missing_keys:
 79 |         err_msg.append(
 80 |             f'missing keys in source state_dict: {", ".join(missing_keys)}\n')
 81 | 
 82 |     rank, _ = get_dist_info()
 83 |     if len(err_msg) > 0 and rank == 0:
 84 |         err_msg.insert(
 85 |             0, 'The model and loaded state dict do not match exactly\n')
 86 |         err_msg = '\n'.join(err_msg)
 87 |         if strict:
 88 |             raise RuntimeError(err_msg)
 89 |         elif logger is not None:
 90 |             logger.warning(err_msg)
 91 |         else:
 92 |             print(err_msg)
 93 | 
 94 | 
 95 | 
 96 | 
 97 | class CheckpointLoader:
 98 |     """A general checkpoint loader to manage all schemes."""
 99 | 
100 |     _schemes = {}
101 | 
102 |     @classmethod
103 |     def _register_scheme(cls, prefixes, loader, force=False):
104 |         if isinstance(prefixes, str):
105 |             prefixes = [prefixes]
106 |         else:
107 |             assert isinstance(prefixes, (list, tuple))
108 |         for prefix in prefixes:
109 |             if (prefix not in cls._schemes) or force:
110 |                 cls._schemes[prefix] = loader
111 |             else:
112 |                 raise KeyError(
113 |                     f'{prefix} is already registered as a loader backend, '
114 |                     'add "force=True" if you want to override it')
115 |         # sort, longer prefixes take priority
116 |         cls._schemes = OrderedDict(
117 |             sorted(cls._schemes.items(), key=lambda t: t[0], reverse=True))
118 | 
119 |     @classmethod
120 |     def register_scheme(cls, prefixes, loader=None, force=False):
121 |         """Register a loader to CheckpointLoader.
122 | 
123 |         This method can be used as a normal class method or a decorator.
124 | 
125 |         Args:
126 |             prefixes (str or list[str] or tuple[str]):
127 |             The prefix of the registered loader.
128 |             loader (function, optional): The loader function to be registered.
129 |                 When this method is used as a decorator, loader is None.
130 |                 Defaults to None.
131 |             force (bool, optional): Whether to override the loader
132 |                 if the prefix has already been registered. Defaults to False.
133 |         """
134 | 
135 |         if loader is not None:
136 |             cls._register_scheme(prefixes, loader, force=force)
137 |             return
138 | 
139 |         def _register(loader_cls):
140 |             cls._register_scheme(prefixes, loader_cls, force=force)
141 |             return loader_cls
142 | 
143 |         return _register
144 | 
145 |     @classmethod
146 |     def _get_checkpoint_loader(cls, path):
147 |         """Finds a loader that supports the given path. Falls back to the local
148 |         loader if no other loader is found.
149 | 
150 |         Args:
151 |             path (str): checkpoint path
152 | 
153 |         Returns:
154 |             loader (function): checkpoint loader
155 |         """
156 | 
157 |         for p in cls._schemes:
158 |             if path.startswith(p):
159 |                 return cls._schemes[p]
160 | 
161 |     @classmethod
162 |     def load_checkpoint(cls, filename, map_location=None, logger=None):
163 |         """load checkpoint through URL scheme path.
164 | 
165 |         Args:
166 |             filename (str): checkpoint file name with given prefix
167 |             map_location (str, optional): Same as :func:`torch.load`.
168 |                 Default: None
169 |             logger (:mod:`logging.Logger`, optional): The logger for message.
170 |                 Default: None
171 | 
172 |         Returns:
173 |             dict or OrderedDict: The loaded checkpoint.
174 |         """
175 | 
176 |         checkpoint_loader = cls._get_checkpoint_loader(filename)
177 |         class_name = checkpoint_loader.__name__
178 |         mmcv.print_log(
179 |             f'load checkpoint from {class_name[10:]} path: {filename}', logger)
180 |         return checkpoint_loader(filename, map_location)
181 | 
182 | 
183 | 
184 | def _load_checkpoint(filename, map_location=None, logger=None):
185 |     """Load checkpoint from somewhere (modelzoo, file, url).
186 | 
187 |     Args:
188 |         filename (str): Accept local filepath, URL, ``torchvision://xxx``,
189 |             ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
190 |             details.
191 |         map_location (str, optional): Same as :func:`torch.load`.
192 |            Default: None.
193 |         logger (:mod:`logging.Logger`, optional): The logger for error message.
194 |            Default: None
195 | 
196 |     Returns:
197 |         dict or OrderedDict: The loaded checkpoint. It can be either an
198 |            OrderedDict storing model weights or a dict containing other
199 |            information, which depends on the checkpoint.
200 |     """
201 |     return CheckpointLoader.load_checkpoint(filename, map_location, logger)
202 | 
203 | 
204 | def load_checkpoint(model,
205 |                     filename,
206 |                     map_location=None,
207 |                     strict=False,
208 |                     logger=None,
209 |                     revise_keys=[(r'^module\.', '')]):
210 |     """Load checkpoint from a file or URI.
211 | 
212 |     Args:
213 |         model (Module): Module to load checkpoint.
214 |         filename (str): Accept local filepath, URL, ``torchvision://xxx``,
215 |             ``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
216 |             details.
217 |         map_location (str): Same as :func:`torch.load`.
218 |         strict (bool): Whether to allow different params for the model and
219 |             checkpoint.
220 |         logger (:mod:`logging.Logger` or None): The logger for error message.
221 |         revise_keys (list): A list of customized keywords to modify the
222 |             state_dict in checkpoint. Each item is a (pattern, replacement)
223 |             pair of the regular expression operations. Default: strip
224 |             the prefix 'module.' by [(r'^module\\.', '')].
225 | 
226 |     Returns:
227 |         dict or OrderedDict: The loaded checkpoint.
228 |     """
229 |     checkpoint = _load_checkpoint(filename, map_location, logger)
230 |     # OrderedDict is a subclass of dict
231 |     if not isinstance(checkpoint, dict):
232 |         raise RuntimeError(
233 |             f'No state_dict found in checkpoint file {filename}')
234 |     # get state_dict from checkpoint
235 |     import pdb; pdb.set_trace()
236 |     if 'state_dict' in checkpoint:
237 |         state_dict = checkpoint['state_dict']
238 |     elif 'model' in checkpoint:
239 |         state_dict = checkpoint['model']
240 |     else:
241 |         state_dict = checkpoint
242 | 
243 |     # strip prefix of state_dict
244 |     metadata = getattr(state_dict, '_metadata', OrderedDict())
245 |     for p, r in revise_keys:
246 |         state_dict = OrderedDict(
247 |             {re.sub(p, r, k): v
248 |              for k, v in state_dict.items()})
249 |     # Keep metadata in state_dict
250 |     state_dict._metadata = metadata
251 | 
252 |     # load state_dict
253 |     load_state_dict(model, state_dict, strict, logger)
254 |     return checkpoint


--------------------------------------------------------------------------------
/object_detection/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = '/mnt/private_dataset/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=4,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + '/annotations/instances_train2017.json',
37 |         img_prefix=data_root + '/train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + '/annotations/instances_val2017.json',
42 |         img_prefix=data_root + '/val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + '/annotations/instances_val2017.json',
47 |         img_prefix=data_root + '/val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(interval=1, metric='bbox')


--------------------------------------------------------------------------------
/object_detection/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'CocoDataset'
 3 | data_root = '/mnt/private_dataset/coco/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1333, 800),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=4,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/instances_train2017.json',
37 |         img_prefix=data_root + 'train2017/',
38 |         pipeline=train_pipeline),
39 |     val=dict(
40 |         type=dataset_type,
41 |         ann_file=data_root + 'annotations/instances_val2017.json',
42 |         img_prefix=data_root + 'val2017/',
43 |         pipeline=test_pipeline),
44 |     test=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/instances_val2017.json',
47 |         img_prefix=data_root + 'val2017/',
48 |         pipeline=test_pipeline))
49 | evaluation = dict(metric=['bbox', 'segm'])


--------------------------------------------------------------------------------
/object_detection/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | custom_hooks = [dict(type='NumClassCheckHook')]
11 | 
12 | dist_params = dict(backend='nccl')
13 | log_level = 'INFO'
14 | load_from = None
15 | resume_from = None
16 | workflow = [('train', 1)]
17 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_mask_rcnn_pvtv2_b2_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     backbone=dict(
  5 |         type='pvt_v2_b2',
  6 |         style='pytorch'),
  7 |     neck=dict(
  8 |         type='FPN',
  9 |         in_channels=[64, 128, 320, 512],
 10 |         out_channels=256,
 11 |         num_outs=5),
 12 |     rpn_head=dict(
 13 |         type='RPNHead',
 14 |         in_channels=256,
 15 |         feat_channels=256,
 16 |         anchor_generator=dict(
 17 |             type='AnchorGenerator',
 18 |             scales=[8],
 19 |             ratios=[0.5, 1.0, 2.0],
 20 |             strides=[4, 8, 16, 32, 64]),
 21 |         bbox_coder=dict(
 22 |             type='DeltaXYWHBBoxCoder',
 23 |             target_means=[.0, .0, .0, .0],
 24 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 25 |         loss_cls=dict(
 26 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 27 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 28 |     roi_head=dict(
 29 |         type='CascadeRoIHead',
 30 |         num_stages=3,
 31 |         stage_loss_weights=[1, 0.5, 0.25],
 32 |         bbox_roi_extractor=dict(
 33 |             type='SingleRoIExtractor',
 34 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 35 |             out_channels=256,
 36 |             featmap_strides=[4, 8, 16, 32]),
 37 |         bbox_head=[
 38 |             dict(
 39 |                 type='Shared2FCBBoxHead',
 40 |                 in_channels=256,
 41 |                 fc_out_channels=1024,
 42 |                 roi_feat_size=7,
 43 |                 num_classes=80,
 44 |                 bbox_coder=dict(
 45 |                     type='DeltaXYWHBBoxCoder',
 46 |                     target_means=[0., 0., 0., 0.],
 47 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 48 |                 reg_class_agnostic=True,
 49 |                 loss_cls=dict(
 50 |                     type='CrossEntropyLoss',
 51 |                     use_sigmoid=False,
 52 |                     loss_weight=1.0),
 53 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 54 |                                loss_weight=1.0)),
 55 |             dict(
 56 |                 type='Shared2FCBBoxHead',
 57 |                 in_channels=256,
 58 |                 fc_out_channels=1024,
 59 |                 roi_feat_size=7,
 60 |                 num_classes=80,
 61 |                 bbox_coder=dict(
 62 |                     type='DeltaXYWHBBoxCoder',
 63 |                     target_means=[0., 0., 0., 0.],
 64 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 65 |                 reg_class_agnostic=True,
 66 |                 loss_cls=dict(
 67 |                     type='CrossEntropyLoss',
 68 |                     use_sigmoid=False,
 69 |                     loss_weight=1.0),
 70 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 71 |                                loss_weight=1.0)),
 72 |             dict(
 73 |                 type='Shared2FCBBoxHead',
 74 |                 in_channels=256,
 75 |                 fc_out_channels=1024,
 76 |                 roi_feat_size=7,
 77 |                 num_classes=80,
 78 |                 bbox_coder=dict(
 79 |                     type='DeltaXYWHBBoxCoder',
 80 |                     target_means=[0., 0., 0., 0.],
 81 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 82 |                 reg_class_agnostic=True,
 83 |                 loss_cls=dict(
 84 |                     type='CrossEntropyLoss',
 85 |                     use_sigmoid=False,
 86 |                     loss_weight=1.0),
 87 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 88 |         ],
 89 |         mask_roi_extractor=dict(
 90 |             type='SingleRoIExtractor',
 91 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 92 |             out_channels=256,
 93 |             featmap_strides=[4, 8, 16, 32]),
 94 |         mask_head=dict(
 95 |             type='FCNMaskHead',
 96 |             num_convs=4,
 97 |             in_channels=256,
 98 |             conv_out_channels=256,
 99 |             num_classes=80,
100 |             loss_mask=dict(
101 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
102 |     # model training and testing settings
103 |     train_cfg=dict(
104 |         rpn=dict(
105 |             assigner=dict(
106 |                 type='MaxIoUAssigner',
107 |                 pos_iou_thr=0.7,
108 |                 neg_iou_thr=0.3,
109 |                 min_pos_iou=0.3,
110 |                 match_low_quality=True,
111 |                 ignore_iof_thr=-1),
112 |             sampler=dict(
113 |                 type='RandomSampler',
114 |                 num=256,
115 |                 pos_fraction=0.5,
116 |                 neg_pos_ub=-1,
117 |                 add_gt_as_proposals=False),
118 |             allowed_border=0,
119 |             pos_weight=-1,
120 |             debug=False),
121 |         rpn_proposal=dict(
122 |             nms_across_levels=False,
123 |             nms_pre=2000,
124 |             nms_post=2000,
125 |             max_per_img=2000,
126 |             nms=dict(type='nms', iou_threshold=0.7),
127 |             min_bbox_size=0),
128 |         rcnn=[
129 |             dict(
130 |                 assigner=dict(
131 |                     type='MaxIoUAssigner',
132 |                     pos_iou_thr=0.5,
133 |                     neg_iou_thr=0.5,
134 |                     min_pos_iou=0.5,
135 |                     match_low_quality=False,
136 |                     ignore_iof_thr=-1),
137 |                 sampler=dict(
138 |                     type='RandomSampler',
139 |                     num=512,
140 |                     pos_fraction=0.25,
141 |                     neg_pos_ub=-1,
142 |                     add_gt_as_proposals=True),
143 |                 mask_size=28,
144 |                 pos_weight=-1,
145 |                 debug=False),
146 |             dict(
147 |                 assigner=dict(
148 |                     type='MaxIoUAssigner',
149 |                     pos_iou_thr=0.6,
150 |                     neg_iou_thr=0.6,
151 |                     min_pos_iou=0.6,
152 |                     match_low_quality=False,
153 |                     ignore_iof_thr=-1),
154 |                 sampler=dict(
155 |                     type='RandomSampler',
156 |                     num=512,
157 |                     pos_fraction=0.25,
158 |                     neg_pos_ub=-1,
159 |                     add_gt_as_proposals=True),
160 |                 mask_size=28,
161 |                 pos_weight=-1,
162 |                 debug=False),
163 |             dict(
164 |                 assigner=dict(
165 |                     type='MaxIoUAssigner',
166 |                     pos_iou_thr=0.7,
167 |                     neg_iou_thr=0.7,
168 |                     min_pos_iou=0.7,
169 |                     match_low_quality=False,
170 |                     ignore_iof_thr=-1),
171 |                 sampler=dict(
172 |                     type='RandomSampler',
173 |                     num=512,
174 |                     pos_fraction=0.25,
175 |                     neg_pos_ub=-1,
176 |                     add_gt_as_proposals=True),
177 |                 mask_size=28,
178 |                 pos_weight=-1,
179 |                 debug=False)
180 |         ]),
181 |     test_cfg=dict(
182 |         rpn=dict(
183 |             nms_across_levels=False,
184 |             nms_pre=1000,
185 |             nms_post=1000,
186 |             max_per_img=1000,
187 |             nms=dict(type='nms', iou_threshold=0.7),
188 |             min_bbox_size=0),
189 |         rcnn=dict(
190 |             score_thr=0.05,
191 |             nms=dict(type='nms', iou_threshold=0.5),
192 |             max_per_img=100,
193 |             mask_thr_binary=0.5)))
194 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ],
 96 |         mask_roi_extractor=dict(
 97 |             type='SingleRoIExtractor',
 98 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 99 |             out_channels=256,
100 |             featmap_strides=[4, 8, 16, 32]),
101 |         mask_head=dict(
102 |             type='FCNMaskHead',
103 |             num_convs=4,
104 |             in_channels=256,
105 |             conv_out_channels=256,
106 |             num_classes=80,
107 |             loss_mask=dict(
108 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
109 |     # model training and testing settings
110 |     train_cfg=dict(
111 |         rpn=dict(
112 |             assigner=dict(
113 |                 type='MaxIoUAssigner',
114 |                 pos_iou_thr=0.7,
115 |                 neg_iou_thr=0.3,
116 |                 min_pos_iou=0.3,
117 |                 match_low_quality=True,
118 |                 ignore_iof_thr=-1),
119 |             sampler=dict(
120 |                 type='RandomSampler',
121 |                 num=256,
122 |                 pos_fraction=0.5,
123 |                 neg_pos_ub=-1,
124 |                 add_gt_as_proposals=False),
125 |             allowed_border=0,
126 |             pos_weight=-1,
127 |             debug=False),
128 |         rpn_proposal=dict(
129 |             nms_pre=2000,
130 |             max_per_img=2000,
131 |             nms=dict(type='nms', iou_threshold=0.7),
132 |             min_bbox_size=0),
133 |         rcnn=[
134 |             dict(
135 |                 assigner=dict(
136 |                     type='MaxIoUAssigner',
137 |                     pos_iou_thr=0.5,
138 |                     neg_iou_thr=0.5,
139 |                     min_pos_iou=0.5,
140 |                     match_low_quality=False,
141 |                     ignore_iof_thr=-1),
142 |                 sampler=dict(
143 |                     type='RandomSampler',
144 |                     num=512,
145 |                     pos_fraction=0.25,
146 |                     neg_pos_ub=-1,
147 |                     add_gt_as_proposals=True),
148 |                 mask_size=28,
149 |                 pos_weight=-1,
150 |                 debug=False),
151 |             dict(
152 |                 assigner=dict(
153 |                     type='MaxIoUAssigner',
154 |                     pos_iou_thr=0.6,
155 |                     neg_iou_thr=0.6,
156 |                     min_pos_iou=0.6,
157 |                     match_low_quality=False,
158 |                     ignore_iof_thr=-1),
159 |                 sampler=dict(
160 |                     type='RandomSampler',
161 |                     num=512,
162 |                     pos_fraction=0.25,
163 |                     neg_pos_ub=-1,
164 |                     add_gt_as_proposals=True),
165 |                 mask_size=28,
166 |                 pos_weight=-1,
167 |                 debug=False),
168 |             dict(
169 |                 assigner=dict(
170 |                     type='MaxIoUAssigner',
171 |                     pos_iou_thr=0.7,
172 |                     neg_iou_thr=0.7,
173 |                     min_pos_iou=0.7,
174 |                     match_low_quality=False,
175 |                     ignore_iof_thr=-1),
176 |                 sampler=dict(
177 |                     type='RandomSampler',
178 |                     num=512,
179 |                     pos_fraction=0.25,
180 |                     neg_pos_ub=-1,
181 |                     add_gt_as_proposals=True),
182 |                 mask_size=28,
183 |                 pos_weight=-1,
184 |                 debug=False)
185 |         ]),
186 |     test_cfg=dict(
187 |         rpn=dict(
188 |             nms_pre=1000,
189 |             max_per_img=1000,
190 |             nms=dict(type='nms', iou_threshold=0.7),
191 |             min_bbox_size=0),
192 |         rcnn=dict(
193 |             score_thr=0.05,
194 |             nms=dict(type='nms', iou_threshold=0.5),
195 |             max_per_img=100,
196 |             mask_thr_binary=0.5)))
197 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/cascade_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='CascadeRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='CascadeRoIHead',
 37 |         num_stages=3,
 38 |         stage_loss_weights=[1, 0.5, 0.25],
 39 |         bbox_roi_extractor=dict(
 40 |             type='SingleRoIExtractor',
 41 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 42 |             out_channels=256,
 43 |             featmap_strides=[4, 8, 16, 32]),
 44 |         bbox_head=[
 45 |             dict(
 46 |                 type='Shared2FCBBoxHead',
 47 |                 in_channels=256,
 48 |                 fc_out_channels=1024,
 49 |                 roi_feat_size=7,
 50 |                 num_classes=80,
 51 |                 bbox_coder=dict(
 52 |                     type='DeltaXYWHBBoxCoder',
 53 |                     target_means=[0., 0., 0., 0.],
 54 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 55 |                 reg_class_agnostic=True,
 56 |                 loss_cls=dict(
 57 |                     type='CrossEntropyLoss',
 58 |                     use_sigmoid=False,
 59 |                     loss_weight=1.0),
 60 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 61 |                                loss_weight=1.0)),
 62 |             dict(
 63 |                 type='Shared2FCBBoxHead',
 64 |                 in_channels=256,
 65 |                 fc_out_channels=1024,
 66 |                 roi_feat_size=7,
 67 |                 num_classes=80,
 68 |                 bbox_coder=dict(
 69 |                     type='DeltaXYWHBBoxCoder',
 70 |                     target_means=[0., 0., 0., 0.],
 71 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
 72 |                 reg_class_agnostic=True,
 73 |                 loss_cls=dict(
 74 |                     type='CrossEntropyLoss',
 75 |                     use_sigmoid=False,
 76 |                     loss_weight=1.0),
 77 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 78 |                                loss_weight=1.0)),
 79 |             dict(
 80 |                 type='Shared2FCBBoxHead',
 81 |                 in_channels=256,
 82 |                 fc_out_channels=1024,
 83 |                 roi_feat_size=7,
 84 |                 num_classes=80,
 85 |                 bbox_coder=dict(
 86 |                     type='DeltaXYWHBBoxCoder',
 87 |                     target_means=[0., 0., 0., 0.],
 88 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
 89 |                 reg_class_agnostic=True,
 90 |                 loss_cls=dict(
 91 |                     type='CrossEntropyLoss',
 92 |                     use_sigmoid=False,
 93 |                     loss_weight=1.0),
 94 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
 95 |         ]),
 96 |     # model training and testing settings
 97 |     train_cfg=dict(
 98 |         rpn=dict(
 99 |             assigner=dict(
100 |                 type='MaxIoUAssigner',
101 |                 pos_iou_thr=0.7,
102 |                 neg_iou_thr=0.3,
103 |                 min_pos_iou=0.3,
104 |                 match_low_quality=True,
105 |                 ignore_iof_thr=-1),
106 |             sampler=dict(
107 |                 type='RandomSampler',
108 |                 num=256,
109 |                 pos_fraction=0.5,
110 |                 neg_pos_ub=-1,
111 |                 add_gt_as_proposals=False),
112 |             allowed_border=0,
113 |             pos_weight=-1,
114 |             debug=False),
115 |         rpn_proposal=dict(
116 |             nms_pre=2000,
117 |             max_per_img=2000,
118 |             nms=dict(type='nms', iou_threshold=0.7),
119 |             min_bbox_size=0),
120 |         rcnn=[
121 |             dict(
122 |                 assigner=dict(
123 |                     type='MaxIoUAssigner',
124 |                     pos_iou_thr=0.5,
125 |                     neg_iou_thr=0.5,
126 |                     min_pos_iou=0.5,
127 |                     match_low_quality=False,
128 |                     ignore_iof_thr=-1),
129 |                 sampler=dict(
130 |                     type='RandomSampler',
131 |                     num=512,
132 |                     pos_fraction=0.25,
133 |                     neg_pos_ub=-1,
134 |                     add_gt_as_proposals=True),
135 |                 pos_weight=-1,
136 |                 debug=False),
137 |             dict(
138 |                 assigner=dict(
139 |                     type='MaxIoUAssigner',
140 |                     pos_iou_thr=0.6,
141 |                     neg_iou_thr=0.6,
142 |                     min_pos_iou=0.6,
143 |                     match_low_quality=False,
144 |                     ignore_iof_thr=-1),
145 |                 sampler=dict(
146 |                     type='RandomSampler',
147 |                     num=512,
148 |                     pos_fraction=0.25,
149 |                     neg_pos_ub=-1,
150 |                     add_gt_as_proposals=True),
151 |                 pos_weight=-1,
152 |                 debug=False),
153 |             dict(
154 |                 assigner=dict(
155 |                     type='MaxIoUAssigner',
156 |                     pos_iou_thr=0.7,
157 |                     neg_iou_thr=0.7,
158 |                     min_pos_iou=0.7,
159 |                     match_low_quality=False,
160 |                     ignore_iof_thr=-1),
161 |                 sampler=dict(
162 |                     type='RandomSampler',
163 |                     num=512,
164 |                     pos_fraction=0.25,
165 |                     neg_pos_ub=-1,
166 |                     add_gt_as_proposals=True),
167 |                 pos_weight=-1,
168 |                 debug=False)
169 |         ]),
170 |     test_cfg=dict(
171 |         rpn=dict(
172 |             nms_pre=1000,
173 |             max_per_img=1000,
174 |             nms=dict(type='nms', iou_threshold=0.7),
175 |             min_bbox_size=0),
176 |         rcnn=dict(
177 |             score_thr=0.05,
178 |             nms=dict(type='nms', iou_threshold=0.5),
179 |             max_per_img=100)))
180 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
40 |     # model training and testing settings
41 |     train_cfg=dict(
42 |         rcnn=dict(
43 |             assigner=dict(
44 |                 type='MaxIoUAssigner',
45 |                 pos_iou_thr=0.5,
46 |                 neg_iou_thr=0.5,
47 |                 min_pos_iou=0.5,
48 |                 match_low_quality=False,
49 |                 ignore_iof_thr=-1),
50 |             sampler=dict(
51 |                 type='RandomSampler',
52 |                 num=512,
53 |                 pos_fraction=0.25,
54 |                 neg_pos_ub=-1,
55 |                 add_gt_as_proposals=True),
56 |             pos_weight=-1,
57 |             debug=False)),
58 |     test_cfg=dict(
59 |         rcnn=dict(
60 |             score_thr=0.05,
61 |             nms=dict(type='nms', iou_threshold=0.5),
62 |             max_per_img=100)))
63 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 63 |     # model training and testing settings
 64 |     train_cfg=dict(
 65 |         rpn=dict(
 66 |             assigner=dict(
 67 |                 type='MaxIoUAssigner',
 68 |                 pos_iou_thr=0.7,
 69 |                 neg_iou_thr=0.3,
 70 |                 min_pos_iou=0.3,
 71 |                 match_low_quality=True,
 72 |                 ignore_iof_thr=-1),
 73 |             sampler=dict(
 74 |                 type='RandomSampler',
 75 |                 num=256,
 76 |                 pos_fraction=0.5,
 77 |                 neg_pos_ub=-1,
 78 |                 add_gt_as_proposals=False),
 79 |             allowed_border=0,
 80 |             pos_weight=-1,
 81 |             debug=False),
 82 |         rpn_proposal=dict(
 83 |             nms_pre=12000,
 84 |             max_per_img=2000,
 85 |             nms=dict(type='nms', iou_threshold=0.7),
 86 |             min_bbox_size=0),
 87 |         rcnn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.5,
 91 |                 neg_iou_thr=0.5,
 92 |                 min_pos_iou=0.5,
 93 |                 match_low_quality=False,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=512,
 98 |                 pos_fraction=0.25,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=True),
101 |             pos_weight=-1,
102 |             debug=False)),
103 |     test_cfg=dict(
104 |         rpn=dict(
105 |             nms_pre=6000,
106 |             max_per_img=1000,
107 |             nms=dict(type='nms', iou_threshold=0.7),
108 |             min_bbox_size=0),
109 |         rcnn=dict(
110 |             score_thr=0.05,
111 |             nms=dict(type='nms', iou_threshold=0.5),
112 |             max_per_img=100)))
113 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='FasterRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=4,
 10 |         strides=(1, 2, 2, 1),
 11 |         dilations=(1, 1, 1, 2),
 12 |         out_indices=(3, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=2048,
 20 |         feat_channels=2048,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=2048,
 39 |             featmap_strides=[16]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=2048,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 |     # model training and testing settings
 55 |     train_cfg=dict(
 56 |         rpn=dict(
 57 |             assigner=dict(
 58 |                 type='MaxIoUAssigner',
 59 |                 pos_iou_thr=0.7,
 60 |                 neg_iou_thr=0.3,
 61 |                 min_pos_iou=0.3,
 62 |                 match_low_quality=True,
 63 |                 ignore_iof_thr=-1),
 64 |             sampler=dict(
 65 |                 type='RandomSampler',
 66 |                 num=256,
 67 |                 pos_fraction=0.5,
 68 |                 neg_pos_ub=-1,
 69 |                 add_gt_as_proposals=False),
 70 |             allowed_border=0,
 71 |             pos_weight=-1,
 72 |             debug=False),
 73 |         rpn_proposal=dict(
 74 |             nms_pre=12000,
 75 |             max_per_img=2000,
 76 |             nms=dict(type='nms', iou_threshold=0.7),
 77 |             min_bbox_size=0),
 78 |         rcnn=dict(
 79 |             assigner=dict(
 80 |                 type='MaxIoUAssigner',
 81 |                 pos_iou_thr=0.5,
 82 |                 neg_iou_thr=0.5,
 83 |                 min_pos_iou=0.5,
 84 |                 match_low_quality=False,
 85 |                 ignore_iof_thr=-1),
 86 |             sampler=dict(
 87 |                 type='RandomSampler',
 88 |                 num=512,
 89 |                 pos_fraction=0.25,
 90 |                 neg_pos_ub=-1,
 91 |                 add_gt_as_proposals=True),
 92 |             pos_weight=-1,
 93 |             debug=False)),
 94 |     test_cfg=dict(
 95 |         rpn=dict(
 96 |             nms=dict(type='nms', iou_threshold=0.7),
 97 |             nms_pre=6000,
 98 |             max_per_img=1000,
 99 |             min_bbox_size=0),
100 |         rcnn=dict(
101 |             score_thr=0.05,
102 |             nms=dict(type='nms', iou_threshold=0.5),
103 |             max_per_img=100)))
104 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | norm_cfg = dict(type='BN', requires_grad=False)
  3 | model = dict(
  4 |     type='MaskRCNN',
  5 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
  6 |     backbone=dict(
  7 |         type='ResNet',
  8 |         depth=50,
  9 |         num_stages=3,
 10 |         strides=(1, 2, 2),
 11 |         dilations=(1, 1, 1),
 12 |         out_indices=(2, ),
 13 |         frozen_stages=1,
 14 |         norm_cfg=norm_cfg,
 15 |         norm_eval=True,
 16 |         style='caffe'),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=1024,
 20 |         feat_channels=1024,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[2, 4, 8, 16, 32],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[16]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         shared_head=dict(
 36 |             type='ResLayer',
 37 |             depth=50,
 38 |             stage=3,
 39 |             stride=2,
 40 |             dilation=1,
 41 |             style='caffe',
 42 |             norm_cfg=norm_cfg,
 43 |             norm_eval=True),
 44 |         bbox_roi_extractor=dict(
 45 |             type='SingleRoIExtractor',
 46 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 47 |             out_channels=1024,
 48 |             featmap_strides=[16]),
 49 |         bbox_head=dict(
 50 |             type='BBoxHead',
 51 |             with_avg_pool=True,
 52 |             roi_feat_size=7,
 53 |             in_channels=2048,
 54 |             num_classes=80,
 55 |             bbox_coder=dict(
 56 |                 type='DeltaXYWHBBoxCoder',
 57 |                 target_means=[0., 0., 0., 0.],
 58 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 59 |             reg_class_agnostic=False,
 60 |             loss_cls=dict(
 61 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 62 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 63 |         mask_roi_extractor=None,
 64 |         mask_head=dict(
 65 |             type='FCNMaskHead',
 66 |             num_convs=0,
 67 |             in_channels=2048,
 68 |             conv_out_channels=256,
 69 |             num_classes=80,
 70 |             loss_mask=dict(
 71 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 72 |     # model training and testing settings
 73 |     train_cfg=dict(
 74 |         rpn=dict(
 75 |             assigner=dict(
 76 |                 type='MaxIoUAssigner',
 77 |                 pos_iou_thr=0.7,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 match_low_quality=True,
 81 |                 ignore_iof_thr=-1),
 82 |             sampler=dict(
 83 |                 type='RandomSampler',
 84 |                 num=256,
 85 |                 pos_fraction=0.5,
 86 |                 neg_pos_ub=-1,
 87 |                 add_gt_as_proposals=False),
 88 |             allowed_border=0,
 89 |             pos_weight=-1,
 90 |             debug=False),
 91 |         rpn_proposal=dict(
 92 |             nms_pre=12000,
 93 |             max_per_img=2000,
 94 |             nms=dict(type='nms', iou_threshold=0.7),
 95 |             min_bbox_size=0),
 96 |         rcnn=dict(
 97 |             assigner=dict(
 98 |                 type='MaxIoUAssigner',
 99 |                 pos_iou_thr=0.5,
100 |                 neg_iou_thr=0.5,
101 |                 min_pos_iou=0.5,
102 |                 match_low_quality=False,
103 |                 ignore_iof_thr=-1),
104 |             sampler=dict(
105 |                 type='RandomSampler',
106 |                 num=512,
107 |                 pos_fraction=0.25,
108 |                 neg_pos_ub=-1,
109 |                 add_gt_as_proposals=True),
110 |             mask_size=14,
111 |             pos_weight=-1,
112 |             debug=False)),
113 |     test_cfg=dict(
114 |         rpn=dict(
115 |             nms_pre=6000,
116 |             nms=dict(type='nms', iou_threshold=0.7),
117 |             max_per_img=1000,
118 |             min_bbox_size=0),
119 |         rcnn=dict(
120 |             score_thr=0.05,
121 |             nms=dict(type='nms', iou_threshold=0.5),
122 |             max_per_img=100,
123 |             mask_thr_binary=0.5)))
124 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_pre=2000,
 90 |             max_per_img=1000,
 91 |             nms=dict(type='nms', iou_threshold=0.7),
 92 |             min_bbox_size=0),
 93 |         rcnn=dict(
 94 |             assigner=dict(
 95 |                 type='MaxIoUAssigner',
 96 |                 pos_iou_thr=0.5,
 97 |                 neg_iou_thr=0.5,
 98 |                 min_pos_iou=0.5,
 99 |                 match_low_quality=True,
100 |                 ignore_iof_thr=-1),
101 |             sampler=dict(
102 |                 type='RandomSampler',
103 |                 num=512,
104 |                 pos_fraction=0.25,
105 |                 neg_pos_ub=-1,
106 |                 add_gt_as_proposals=True),
107 |             mask_size=28,
108 |             pos_weight=-1,
109 |             debug=False)),
110 |     test_cfg=dict(
111 |         rpn=dict(
112 |             nms_pre=1000,
113 |             max_per_img=1000,
114 |             nms=dict(type='nms', iou_threshold=0.7),
115 |             min_bbox_size=0),
116 |         rcnn=dict(
117 |             score_thr=0.05,
118 |             nms=dict(type='nms', iou_threshold=0.5),
119 |             max_per_img=100,
120 |             mask_thr_binary=0.5)))
121 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
44 |     # model training and testing settings
45 |     train_cfg=dict(
46 |         assigner=dict(
47 |             type='MaxIoUAssigner',
48 |             pos_iou_thr=0.5,
49 |             neg_iou_thr=0.4,
50 |             min_pos_iou=0,
51 |             ignore_iof_thr=-1),
52 |         allowed_border=-1,
53 |         pos_weight=-1,
54 |         debug=False),
55 |     test_cfg=dict(
56 |         nms_pre=1000,
57 |         min_bbox_size=0,
58 |         score_thr=0.05,
59 |         nms=dict(type='nms', iou_threshold=0.5),
60 |         max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(
35 |         rpn=dict(
36 |             assigner=dict(
37 |                 type='MaxIoUAssigner',
38 |                 pos_iou_thr=0.7,
39 |                 neg_iou_thr=0.3,
40 |                 min_pos_iou=0.3,
41 |                 ignore_iof_thr=-1),
42 |             sampler=dict(
43 |                 type='RandomSampler',
44 |                 num=256,
45 |                 pos_fraction=0.5,
46 |                 neg_pos_ub=-1,
47 |                 add_gt_as_proposals=False),
48 |             allowed_border=0,
49 |             pos_weight=-1,
50 |             debug=False)),
51 |     test_cfg=dict(
52 |         rpn=dict(
53 |             nms_pre=12000,
54 |             max_per_img=2000,
55 |             nms=dict(type='nms', iou_threshold=0.7),
56 |             min_bbox_size=0)))
57 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 |     # model training and testing settings
36 |     train_cfg=dict(
37 |         rpn=dict(
38 |             assigner=dict(
39 |                 type='MaxIoUAssigner',
40 |                 pos_iou_thr=0.7,
41 |                 neg_iou_thr=0.3,
42 |                 min_pos_iou=0.3,
43 |                 ignore_iof_thr=-1),
44 |             sampler=dict(
45 |                 type='RandomSampler',
46 |                 num=256,
47 |                 pos_fraction=0.5,
48 |                 neg_pos_ub=-1,
49 |                 add_gt_as_proposals=False),
50 |             allowed_border=0,
51 |             pos_weight=-1,
52 |             debug=False)),
53 |     test_cfg=dict(
54 |         rpn=dict(
55 |             nms_pre=2000,
56 |             max_per_img=1000,
57 |             nms=dict(type='nms', iou_threshold=0.7),
58 |             min_bbox_size=0)))
59 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])),
31 |     # model training and testing settings
32 |     train_cfg=dict(
33 |         assigner=dict(
34 |             type='MaxIoUAssigner',
35 |             pos_iou_thr=0.5,
36 |             neg_iou_thr=0.5,
37 |             min_pos_iou=0.,
38 |             ignore_iof_thr=-1,
39 |             gt_max_assign_all=False),
40 |         smoothl1_beta=1.,
41 |         allowed_border=-1,
42 |         pos_weight=-1,
43 |         neg_pos_ratio=3,
44 |         debug=False),
45 |     test_cfg=dict(
46 |         nms_pre=1000,
47 |         nms=dict(type='nms', iou_threshold=0.45),
48 |         min_bbox_size=0,
49 |         score_thr=0.02,
50 |         max_per_img=200))
51 | cudnn_benchmark = True
52 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 19])
11 | runner = dict(type='EpochBasedRunner', max_epochs=20)
12 | 


--------------------------------------------------------------------------------
/object_detection/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[16, 22])
11 | runner = dict(type='EpochBasedRunner', max_epochs=24)
12 | 


--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '_base_/datasets/coco_instance.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_b',
15 |         drop_path_rate=0.3,
16 |     ),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[76, 152, 336, 672],
20 |         out_channels=256,
21 |         num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
26 |                                                  'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')


--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '_base_/datasets/coco_instance.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_s',
15 |         drop_path_rate=0.2,
16 |     ),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[64, 128, 320, 512],
20 |         out_channels=256,
21 |         num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
26 |                                                  'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')


--------------------------------------------------------------------------------
/object_detection/configs/mask_rcnn_transx_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '_base_/datasets/coco_instance.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_t',
15 |         drop_path_rate=0.1,
16 |     ),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[48, 96, 224, 448],
20 |         out_channels=256,
21 |         num_outs=5))
22 | # optimizer
23 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.05,
24 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
25 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
26 |                                                  'norm': dict(decay_mult=0.)}))
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')


--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_b_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/retinanet_r50_fpn.py',
 3 |     '_base_/datasets/coco_detection.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_b',
15 |         drop_path_rate=0.3,
16 |         start_level=1,
17 |     ),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[76, 152, 336, 672],
21 |         out_channels=256,
22 |         start_level=1,
23 |         add_extra_convs='on_input',
24 |         num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')
30 | 


--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_s_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/retinanet_r50_fpn.py',
 3 |     '_base_/datasets/coco_detection.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_s',
15 |         drop_path_rate=0.2,
16 |         start_level=1,
17 |     ),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[64, 128, 320, 512],
21 |         out_channels=256,
22 |         start_level=1,
23 |         add_extra_convs='on_input',
24 |         num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict() ## AMP Training
29 | evaluation = dict(save_best='auto')


--------------------------------------------------------------------------------
/object_detection/configs/retinanet_transx_t_fpn_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/retinanet_r50_fpn.py',
 3 |     '_base_/datasets/coco_detection.py',
 4 |     '_base_/schedules/schedule_1x.py',
 5 |     '_base_/default_runtime.py'
 6 | ]
 7 | 
 8 | 
 9 | model = dict(
10 |     pretrained=None,
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_t',
15 |         drop_path_rate=0.1,
16 |         start_level=1,
17 |     ),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[48, 96, 224, 448],
21 |         out_channels=256,
22 |         start_level=1,
23 |         add_extra_convs='on_input',
24 |         num_outs=5))
25 | # optimizer
26 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)
27 | optimizer_config = dict(grad_clip=None)
28 | # fp16 = dict(loss_scale=dict(init_scale=512)) ## AMP Training
29 | evaluation = dict(save_best='auto')


--------------------------------------------------------------------------------
/object_detection/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | CONFIG=$1
3 | CHECKPOINT=$2
4 | GPUS=$3
5 | PORT=${PORT:-29500}
6 | 
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
9 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}


--------------------------------------------------------------------------------
/object_detection/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | CONFIG=$1
3 | GPUS=$2
4 | PORT=${PORT:-29500}
5 | 
6 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
7 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$((RANDOM+10000)) \
8 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}


--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print('apex is not installed')
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {
46 |         'meta': meta,
47 |         'state_dict': weights_to_cpu(get_state_dict(model))
48 |     }
49 |     # save optimizer state dict in the checkpoint
50 |     if isinstance(optimizer, Optimizer):
51 |         checkpoint['optimizer'] = optimizer.state_dict()
52 |     elif isinstance(optimizer, dict):
53 |         checkpoint['optimizer'] = {}
54 |         for name, optim in optimizer.items():
55 |             checkpoint['optimizer'][name] = optim.state_dict()
56 | 
57 |     # save amp state dict in the checkpoint
58 |     checkpoint['amp'] = apex.amp.state_dict()
59 | 
60 |     if filename.startswith('pavi://'):
61 |         try:
62 |             from pavi import modelcloud
63 |             from pavi.exception import NodeNotFoundError
64 |         except ImportError:
65 |             raise ImportError(
66 |                 'Please install pavi to load checkpoint from modelcloud.')
67 |         model_path = filename[7:]
68 |         root = modelcloud.Folder()
69 |         model_dir, model_name = osp.split(model_path)
70 |         try:
71 |             model = modelcloud.get(model_dir)
72 |         except NodeNotFoundError:
73 |             model = root.create_training_model(model_dir)
74 |         with TemporaryDirectory() as tmp_dir:
75 |             checkpoint_file = osp.join(tmp_dir, model_name)
76 |             with open(checkpoint_file, 'wb') as f:
77 |                 torch.save(checkpoint, f)
78 |                 f.flush()
79 |             model.create_file(checkpoint_file, name=model_name)
80 |     else:
81 |         mmcv.mkdir_or_exist(osp.dirname(filename))
82 |         # immediately flush buffer
83 |         with open(filename, 'wb') as f:
84 |             torch.save(checkpoint, f)
85 |             f.flush()


--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Open-MMLab. All rights reserved.
  2 | import os.path as osp
  3 | import platform
  4 | import shutil
  5 | 
  6 | import torch
  7 | from torch.optim import Optimizer
  8 | 
  9 | import mmcv
 10 | from mmcv.runner import RUNNERS, EpochBasedRunner
 11 | from .checkpoint import save_checkpoint
 12 | 
 13 | try:
 14 |     import apex
 15 | except:
 16 |     print('apex is not installed')
 17 | 
 18 | 
 19 | @RUNNERS.register_module()
 20 | class EpochBasedRunnerAmp(EpochBasedRunner):
 21 |     """Epoch-based Runner with AMP support.
 22 | 
 23 |     This runner train models epoch by epoch.
 24 |     """
 25 | 
 26 |     def save_checkpoint(self,
 27 |                         out_dir,
 28 |                         filename_tmpl='epoch_{}.pth',
 29 |                         save_optimizer=True,
 30 |                         meta=None,
 31 |                         create_symlink=True):
 32 |         """Save the checkpoint.
 33 | 
 34 |         Args:
 35 |             out_dir (str): The directory that checkpoints are saved.
 36 |             filename_tmpl (str, optional): The checkpoint filename template,
 37 |                 which contains a placeholder for the epoch number.
 38 |                 Defaults to 'epoch_{}.pth'.
 39 |             save_optimizer (bool, optional): Whether to save the optimizer to
 40 |                 the checkpoint. Defaults to True.
 41 |             meta (dict, optional): The meta information to be saved in the
 42 |                 checkpoint. Defaults to None.
 43 |             create_symlink (bool, optional): Whether to create a symlink
 44 |                 "latest.pth" to point to the latest checkpoint.
 45 |                 Defaults to True.
 46 |         """
 47 |         if meta is None:
 48 |             meta = dict(epoch=self.epoch + 1, iter=self.iter)
 49 |         elif isinstance(meta, dict):
 50 |             meta.update(epoch=self.epoch + 1, iter=self.iter)
 51 |         else:
 52 |             raise TypeError(
 53 |                 f'meta should be a dict or None, but got {type(meta)}')
 54 |         if self.meta is not None:
 55 |             meta.update(self.meta)
 56 | 
 57 |         filename = filename_tmpl.format(self.epoch + 1)
 58 |         filepath = osp.join(out_dir, filename)
 59 |         optimizer = self.optimizer if save_optimizer else None
 60 |         save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
 61 |         # in some environments, `os.symlink` is not supported, you may need to
 62 |         # set `create_symlink` to False
 63 |         if create_symlink:
 64 |             dst_file = osp.join(out_dir, 'latest.pth')
 65 |             if platform.system() != 'Windows':
 66 |                 mmcv.symlink(filename, dst_file)
 67 |             else:
 68 |                 shutil.copy(filepath, dst_file)
 69 | 
 70 |     def resume(self,
 71 |                checkpoint,
 72 |                resume_optimizer=True,
 73 |                map_location='default'):
 74 |         if map_location == 'default':
 75 |             if torch.cuda.is_available():
 76 |                 device_id = torch.cuda.current_device()
 77 |                 checkpoint = self.load_checkpoint(
 78 |                     checkpoint,
 79 |                     map_location=lambda storage, loc: storage.cuda(device_id))
 80 |             else:
 81 |                 checkpoint = self.load_checkpoint(checkpoint)
 82 |         else:
 83 |             checkpoint = self.load_checkpoint(
 84 |                 checkpoint, map_location=map_location)
 85 | 
 86 |         self._epoch = checkpoint['meta']['epoch']
 87 |         self._iter = checkpoint['meta']['iter']
 88 |         if 'optimizer' in checkpoint and resume_optimizer:
 89 |             if isinstance(self.optimizer, Optimizer):
 90 |                 self.optimizer.load_state_dict(checkpoint['optimizer'])
 91 |             elif isinstance(self.optimizer, dict):
 92 |                 for k in self.optimizer.keys():
 93 |                     self.optimizer[k].load_state_dict(
 94 |                         checkpoint['optimizer'][k])
 95 |             else:
 96 |                 raise TypeError(
 97 |                     'Optimizer should be dict or torch.optim.Optimizer '
 98 |                     f'but got {type(self.optimizer)}')
 99 | 
100 |         if 'amp' in checkpoint:
101 |             apex.amp.load_state_dict(checkpoint['amp'])
102 |             self.logger.info('load amp state dict')
103 | 
104 |         self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter)
105 | 


--------------------------------------------------------------------------------
/object_detection/mmcv_custom/runner/optimizer.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import OptimizerHook, HOOKS
 2 | try:
 3 |     import apex
 4 | except:
 5 |     print('apex is not installed')
 6 | 
 7 | 
 8 | @HOOKS.register_module()
 9 | class DistOptimizerHook(OptimizerHook):
10 |     """Optimizer hook for distributed training."""
11 | 
12 |     def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False):
13 |         self.grad_clip = grad_clip
14 |         self.coalesce = coalesce
15 |         self.bucket_size_mb = bucket_size_mb
16 |         self.update_interval = update_interval
17 |         self.use_fp16 = use_fp16
18 | 
19 |     def before_run(self, runner):
20 |         runner.optimizer.zero_grad()
21 | 
22 |     def after_train_iter(self, runner):
23 |         runner.outputs['loss'] /= self.update_interval
24 |         if self.use_fp16:
25 |             with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss:
26 |                 scaled_loss.backward()
27 |         else:
28 |             runner.outputs['loss'].backward()
29 |         if self.every_n_iters(runner, self.update_interval):
30 |             if self.grad_clip is not None:
31 |                 self.clip_grads(runner.model.parameters())
32 |             runner.optimizer.step()
33 |             runner.optimizer.zero_grad()


--------------------------------------------------------------------------------
/object_detection/mmdet_custom/apis/train.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import warnings
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  7 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
  8 |                          Fp16OptimizerHook, OptimizerHook, build_optimizer,
  9 |                          build_runner)
 10 | from mmcv.utils import build_from_cfg
 11 | 
 12 | from mmdet.core import DistEvalHook, EvalHook
 13 | from mmdet.datasets import (build_dataloader, build_dataset,
 14 |                             replace_ImageToTensor)
 15 | from mmdet.utils import get_root_logger
 16 | try:
 17 |     import apex
 18 | except:
 19 |     print('apex is not installed')
 20 | 
 21 | 
 22 | def set_random_seed(seed, deterministic=False):
 23 |     """Set random seed.
 24 | 
 25 |     Args:
 26 |         seed (int): Seed to be used.
 27 |         deterministic (bool): Whether to set the deterministic option for
 28 |             CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
 29 |             to True and `torch.backends.cudnn.benchmark` to False.
 30 |             Default: False.
 31 |     """
 32 |     random.seed(seed)
 33 |     np.random.seed(seed)
 34 |     torch.manual_seed(seed)
 35 |     torch.cuda.manual_seed_all(seed)
 36 |     if deterministic:
 37 |         torch.backends.cudnn.deterministic = True
 38 |         torch.backends.cudnn.benchmark = False
 39 | 
 40 | 
 41 | def train_detector(model,
 42 |                    dataset,
 43 |                    cfg,
 44 |                    distributed=False,
 45 |                    validate=False,
 46 |                    timestamp=None,
 47 |                    meta=None):
 48 |     logger = get_root_logger(cfg.log_level)
 49 | 
 50 |     # prepare data loaders
 51 |     dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
 52 |     if 'imgs_per_gpu' in cfg.data:
 53 |         logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
 54 |                        'Please use "samples_per_gpu" instead')
 55 |         if 'samples_per_gpu' in cfg.data:
 56 |             logger.warning(
 57 |                 f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
 58 |                 f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
 59 |                 f'={cfg.data.imgs_per_gpu} is used in this experiments')
 60 |         else:
 61 |             logger.warning(
 62 |                 'Automatically set "samples_per_gpu"="imgs_per_gpu"='
 63 |                 f'{cfg.data.imgs_per_gpu} in this experiments')
 64 |         cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
 65 | 
 66 |     data_loaders = [
 67 |         build_dataloader(
 68 |             ds,
 69 |             cfg.data.samples_per_gpu,
 70 |             cfg.data.workers_per_gpu,
 71 |             # cfg.gpus will be ignored if distributed
 72 |             len(cfg.gpu_ids),
 73 |             dist=distributed,
 74 |             seed=cfg.seed) for ds in dataset
 75 |     ]
 76 | 
 77 |     # build optimizer
 78 |     optimizer = build_optimizer(model, cfg.optimizer)
 79 | 
 80 |     # use apex fp16 optimizer
 81 |     if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook":
 82 |         if cfg.optimizer_config.get("use_fp16", False):
 83 |             model, optimizer = apex.amp.initialize(
 84 |                 model.cuda(), optimizer, opt_level="O1")
 85 |             for m in model.modules():
 86 |                 if hasattr(m, "fp16_enabled"):
 87 |                     m.fp16_enabled = True
 88 | 
 89 |     # put model on gpus
 90 |     if distributed:
 91 |         find_unused_parameters = cfg.get('find_unused_parameters', False)
 92 |         # Sets the `find_unused_parameters` parameter in
 93 |         # torch.nn.parallel.DistributedDataParallel
 94 |         model = MMDistributedDataParallel(
 95 |             model.cuda(),
 96 |             device_ids=[torch.cuda.current_device()],
 97 |             broadcast_buffers=False,
 98 |             find_unused_parameters=find_unused_parameters)
 99 |     else:
100 |         model = MMDataParallel(
101 |             model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
102 | 
103 |     if 'runner' not in cfg:
104 |         cfg.runner = {
105 |             'type': 'EpochBasedRunner',
106 |             'max_epochs': cfg.total_epochs
107 |         }
108 |         warnings.warn(
109 |             'config is now expected to have a `runner` section, '
110 |             'please set `runner` in your config.', UserWarning)
111 |     else:
112 |         if 'total_epochs' in cfg:
113 |             assert cfg.total_epochs == cfg.runner.max_epochs
114 | 
115 |     # build runner
116 |     runner = build_runner(
117 |         cfg.runner,
118 |         default_args=dict(
119 |             model=model,
120 |             optimizer=optimizer,
121 |             work_dir=cfg.work_dir,
122 |             logger=logger,
123 |             meta=meta))
124 | 
125 |     # an ugly workaround to make .log and .log.json filenames the same
126 |     runner.timestamp = timestamp
127 | 
128 |     # fp16 setting
129 |     fp16_cfg = cfg.get('fp16', None)
130 |     if fp16_cfg is not None:
131 |         optimizer_config = Fp16OptimizerHook(
132 |             **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
133 |     elif distributed and 'type' not in cfg.optimizer_config:
134 |         optimizer_config = OptimizerHook(**cfg.optimizer_config)
135 |     else:
136 |         optimizer_config = cfg.optimizer_config
137 | 
138 |     # register hooks
139 |     runner.register_training_hooks(cfg.lr_config, optimizer_config,
140 |                                    cfg.checkpoint_config, cfg.log_config,
141 |                                    cfg.get('momentum_config', None))
142 |     if distributed:
143 |         if isinstance(runner, EpochBasedRunner):
144 |             runner.register_hook(DistSamplerSeedHook())
145 | 
146 |     # register eval hooks
147 |     if validate:
148 |         # Support batch_size > 1 in validation
149 |         val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
150 |         if val_samples_per_gpu > 1:
151 |             # Replace 'ImageToTensor' to 'DefaultFormatBundle'
152 |             cfg.data.val.pipeline = replace_ImageToTensor(
153 |                 cfg.data.val.pipeline)
154 |         val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
155 |         val_dataloader = build_dataloader(
156 |             val_dataset,
157 |             samples_per_gpu=val_samples_per_gpu,
158 |             workers_per_gpu=cfg.data.workers_per_gpu,
159 |             dist=distributed,
160 |             shuffle=False)
161 |         eval_cfg = cfg.get('evaluation', {})
162 |         eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
163 |         eval_hook = DistEvalHook if distributed else EvalHook
164 |         runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
165 | 
166 |     # user-defined hooks
167 |     if cfg.get('custom_hooks', None):
168 |         custom_hooks = cfg.custom_hooks
169 |         assert isinstance(custom_hooks, list), \
170 |             f'custom_hooks expect list type, but got {type(custom_hooks)}'
171 |         for hook_cfg in cfg.custom_hooks:
172 |             assert isinstance(hook_cfg, dict), \
173 |                 'Each item in custom_hooks expects dict type, but got ' \
174 |                 f'{type(hook_cfg)}'
175 |             hook_cfg = hook_cfg.copy()
176 |             priority = hook_cfg.pop('priority', 'NORMAL')
177 |             hook = build_from_cfg(hook_cfg, HOOKS)
178 |             runner.register_hook(hook, priority=priority)
179 | 
180 |     if cfg.resume_from:
181 |         runner.resume(cfg.resume_from)
182 |     elif cfg.load_from:
183 |         runner.load_checkpoint(cfg.load_from)
184 |     runner.run(data_loaders, cfg.workflow)


--------------------------------------------------------------------------------
/object_detection/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | import warnings
  7 | 
  8 | import mmcv
  9 | import torch
 10 | from mmcv import Config, DictAction
 11 | from mmcv.runner import get_dist_info, init_dist
 12 | from mmcv.utils import get_git_hash
 13 | 
 14 | from mmdet import __version__
 15 | from mmdet.apis import set_random_seed #, train_detector
 16 | from mmdet_custom.apis.train import train_detector
 17 | from mmdet.datasets import build_dataset
 18 | from mmdet.models import build_detector
 19 | from mmdet.utils import collect_env, get_root_logger
 20 | import mmcv_custom.runner.epoch_based_runner
 21 | import mmcv_custom.runner.optimizer
 22 | 
 23 | import transxnet
 24 | 
 25 | def parse_args():
 26 |     parser = argparse.ArgumentParser(description='Train a detector')
 27 |     parser.add_argument('config', help='train config file path')
 28 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 29 |     parser.add_argument(
 30 |         '--resume-from', help='the checkpoint file to resume from')
 31 |     parser.add_argument(
 32 |         '--no-validate',
 33 |         action='store_true',
 34 |         help='whether not to evaluate the checkpoint during training')
 35 |     group_gpus = parser.add_mutually_exclusive_group()
 36 |     group_gpus.add_argument(
 37 |         '--gpus',
 38 |         type=int,
 39 |         help='number of gpus to use '
 40 |         '(only applicable to non-distributed training)')
 41 |     group_gpus.add_argument(
 42 |         '--gpu-ids',
 43 |         type=int,
 44 |         nargs='+',
 45 |         help='ids of gpus to use '
 46 |         '(only applicable to non-distributed training)')
 47 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 48 |     parser.add_argument(
 49 |         '--deterministic',
 50 |         action='store_true',
 51 |         help='whether to set deterministic options for CUDNN backend.')
 52 |     parser.add_argument(
 53 |         '--options',
 54 |         nargs='+',
 55 |         action=DictAction,
 56 |         help='override some settings in the used config, the key-value pair '
 57 |         'in xxx=yyy format will be merged into config file (deprecate), '
 58 |         'change to --cfg-options instead.')
 59 |     parser.add_argument(
 60 |         '--cfg-options',
 61 |         nargs='+',
 62 |         action=DictAction,
 63 |         help='override some settings in the used config, the key-value pair '
 64 |         'in xxx=yyy format will be merged into config file. If the value to '
 65 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 66 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 67 |         'Note that the quotation marks are necessary and that no white space '
 68 |         'is allowed.')
 69 |     parser.add_argument(
 70 |         '--launcher',
 71 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 72 |         default='none',
 73 |         help='job launcher')
 74 |     parser.add_argument('--local_rank', type=int, default=0)
 75 |     args = parser.parse_args()
 76 |     if 'LOCAL_RANK' not in os.environ:
 77 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 78 | 
 79 |     if args.options and args.cfg_options:
 80 |         raise ValueError(
 81 |             '--options and --cfg-options cannot be both '
 82 |             'specified, --options is deprecated in favor of --cfg-options')
 83 |     if args.options:
 84 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 85 |         args.cfg_options = args.options
 86 | 
 87 |     return args
 88 | 
 89 | 
 90 | def main():
 91 |     args = parse_args()
 92 | 
 93 |     cfg = Config.fromfile(args.config)
 94 |     if args.cfg_options is not None:
 95 |         cfg.merge_from_dict(args.cfg_options)
 96 |     # import modules from string list.
 97 |     if cfg.get('custom_imports', None):
 98 |         from mmcv.utils import import_modules_from_strings
 99 |         import_modules_from_strings(**cfg['custom_imports'])
100 |     # set cudnn_benchmark
101 |     if cfg.get('cudnn_benchmark', False):
102 |         torch.backends.cudnn.benchmark = True
103 | 
104 |     # work_dir is determined in this priority: CLI > segment in file > filename
105 |     if args.work_dir is not None:
106 |         # update configs according to CLI args if args.work_dir is not None
107 |         cfg.work_dir = args.work_dir
108 |     elif cfg.get('work_dir', None) is None:
109 |         # use config filename as default work_dir if cfg.work_dir is None
110 |         cfg.work_dir = osp.join('./work_dirs',
111 |                                 osp.splitext(osp.basename(args.config))[0])
112 |     if args.resume_from is not None:
113 |         cfg.resume_from = args.resume_from
114 |     if args.gpu_ids is not None:
115 |         cfg.gpu_ids = args.gpu_ids
116 |     else:
117 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
118 | 
119 |     # init distributed env first, since logger depends on the dist info.
120 |     if args.launcher == 'none':
121 |         distributed = False
122 |     else:
123 |         distributed = True
124 |         init_dist(args.launcher, **cfg.dist_params)
125 |         # re-set gpu_ids with distributed training mode
126 |         _, world_size = get_dist_info()
127 |         cfg.gpu_ids = range(world_size)
128 | 
129 |     # create work_dir
130 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
131 |     # dump config
132 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
133 |     # init the logger before other steps
134 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
135 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
136 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
137 | 
138 |     # init the meta dict to record some important information such as
139 |     # environment info and seed, which will be logged
140 |     meta = dict()
141 |     # log env info
142 |     env_info_dict = collect_env()
143 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
144 |     dash_line = '-' * 60 + '\n'
145 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
146 |                 dash_line)
147 |     meta['env_info'] = env_info
148 |     meta['config'] = cfg.pretty_text
149 |     # log some basic info
150 |     logger.info(f'Distributed training: {distributed}')
151 |     logger.info(f'Config:\n{cfg.pretty_text}')
152 | 
153 |     # set random seeds
154 |     if args.seed is not None:
155 |         logger.info(f'Set random seed to {args.seed}, '
156 |                     f'deterministic: {args.deterministic}')
157 |         set_random_seed(args.seed, deterministic=args.deterministic)
158 |     cfg.seed = args.seed
159 |     meta['seed'] = args.seed
160 |     meta['exp_name'] = osp.basename(args.config)
161 | 
162 |     model = build_detector(
163 |         cfg.model,
164 |         train_cfg=cfg.get('train_cfg'),
165 |         test_cfg=cfg.get('test_cfg'))
166 |     model.init_weights()
167 | 
168 |     logger.info(model)
169 | 
170 |     datasets = [build_dataset(cfg.data.train)]
171 |     if len(cfg.workflow) == 2:
172 |         val_dataset = copy.deepcopy(cfg.data.val)
173 |         val_dataset.pipeline = cfg.data.train.pipeline
174 |         datasets.append(build_dataset(val_dataset))
175 |     if cfg.checkpoint_config is not None:
176 |         # save mmdet version, config file content and class names in
177 |         # checkpoints as meta data
178 |         cfg.checkpoint_config.meta = dict(
179 |             mmdet_version=__version__ + get_git_hash()[:7],
180 |             CLASSES=datasets[0].CLASSES)
181 |     # add an attribute for visualization convenience
182 |     model.CLASSES = datasets[0].CLASSES
183 |     train_detector(
184 |         model,
185 |         datasets,
186 |         cfg,
187 |         distributed=distributed,
188 |         validate=(not args.no_validate),
189 |         timestamp=timestamp,
190 |         meta=meta)
191 | 
192 | 
193 | if __name__ == '__main__':
194 |     torch.cuda.empty_cache()
195 |     main()


--------------------------------------------------------------------------------
/scripts/train_base.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3 -m torch.distributed.launch \
 3 | --nproc_per_node=8 \
 4 | train.py \
 5 | /path/to/imagenet/ \
 6 | --batch-size 128 \
 7 | --pin-mem \
 8 | --model transxnet_b \
 9 | --drop-path 0.4 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.99985 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/


--------------------------------------------------------------------------------
/scripts/train_small.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3 -m torch.distributed.launch \
 3 | --nproc_per_node=8 \
 4 | train.py \
 5 | /path/to/imagenet/ \
 6 | --batch-size 128 \
 7 | --pin-mem \
 8 | --model transxnet_s \
 9 | --drop-path 0.2 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.99985 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/


--------------------------------------------------------------------------------
/scripts/train_tiny.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | python3 -m torch.distributed.launch \
 3 | --nproc_per_node=8 \
 4 | train.py \
 5 | /path/to/imagenet/ \
 6 | --batch-size 128 \
 7 | --pin-mem \
 8 | --model transxnet_t \
 9 | --drop-path 0.1 \
10 | --lr 2e-3 \
11 | --warmup-epochs 5 \
12 | --sync-bn \
13 | --model-ema \
14 | --model-ema-decay 0.9998 \
15 | --val-start-epoch 250 \
16 | --val-freq 50 \
17 | --native-amp \
18 | --output /path/to/save-checkpoint/


--------------------------------------------------------------------------------
/semantic_segmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Applying TransXNet to Semantic Segmentation
 2 | 
 3 | For details, please address "[TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition](https://arxiv.org/abs/2310.19380)". 
 4 | 
 5 | ## 1. Requirements
 6 | 
 7 | We highly suggest using our provided dependencies to ensure reproducibility:   
 8 | ```
 9 | # Environments:
10 | cuda==11.3
11 | python==3.8.15
12 | # Packages:
13 | mmcv==1.7.1
14 | timm==0.6.12
15 | torch==1.12.1
16 | torchvision==0.13.1
17 | mmsegmentation==0.30.0
18 | ```
19 | 
20 | ## 2. Data Preparation
21 | 
22 | Prepare ADE20K according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/main/docs/en/user_guides/2_dataset_prepare.md).
23 | 
24 | 
25 | ## 3. Main Results on ADE20K with Pretrained Models
26 | 
27 | | Method | Backbone | Pretrain | Iters | mIoU | Config | Download |
28 | | --- | --- | --- |:---:|:---:| --- | --- |
29 | | Semantic FPN | TransXNet-T   | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-t.pth.tar) |  80K  |     45.5    | [config](configs/sfpn_transxnet_tiny.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_tiny.pth) |
30 | | Semantic FPN | TransXNet-S  | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-s.pth.tar) |  80K  |     48.5    | [config](configs/sfpn_transxnet_small.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_small.pth) |
31 | | Semantic FPN | TransXNet-B | [ImageNet-1K](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/transx-b.pth.tar) |  80k  |     49.9    | [config](configs/sfpn_transxnet_base.py) | [log](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base_log.json) & [model](https://github.com/LMMMEng/TransXNet/releases/download/v1.0/sfpn_transxnet_base.pth) |
32 | 
33 | 
34 | ## 4. Train
35 | To train ``TransXNet + Semantic FPN`` models on ADE20K with 8 gpus (single node), run:
36 | ```
37 | bash scripts/train_sfpn_transxnet_tiny.sh # train TransXNet-T + Semantic FPN
38 | bash scripts/train_sfpn_transxnet_small.sh # train TransXNet-S + Semantic FPN
39 | bash scripts/train_sfpn_transxnet_base.sh # train TransXNet-B + Semantic FPN
40 | ```
41 | 
42 | ## 5. Validation
43 | To evaluate ``TransXNet + Semantic FPN`` models on ADE20K, run:
44 | ```
45 | # Take TransXNet-T + Semantic FPN as an example:
46 | python3 -m torch.distributed.launch \
47 | --nproc_per_node=8 \
48 | --master_port=$((RANDOM+8888)) \
49 | test.py \
50 | configs/sfpn_transxnet_tiny.py \
51 | path/to/checkpoint \
52 | --out work_dirs/output.pkl \
53 | --eval mIoU \
54 | --launcher pytorch
55 | ```
56 | 
57 | ## Citation
58 | If you find this project useful for your research, please consider citing:
59 | ```
60 | @article{lou2023transxnet,
61 |   title={TransXNet: Learning Both Global and Local Dynamics with a Dual Dynamic Token Mixer for Visual Recognition},
62 |   author={Meng Lou and Shu Zhang and Hong-Yu Zhou and Sibei Yang and Chuan Wu and Yizhou Yu},
63 |   journal={IEEE Transactions on Neural Networks and Learning Systems},
64 |   year={2025}
65 | }
66 | ```
67 | 
68 | ## Contact
69 | If you have any questions, please feel free to [create issues](https://github.com/LMMMEng/TransXNet/issues) or contact me at lmzmm.0921@gmail.com.
70 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/datasets/ade20k_sfpn.py:
--------------------------------------------------------------------------------
 1 | # copied from uniformer
 2 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/configs/_base_/datasets/ade20k.py
 3 | 
 4 | # dataset settings
 5 | dataset_type = 'ADE20KDataset'
 6 | data_root = '/mnt/private_dataset/ade/ADEChallengeData2016/'
 7 | img_norm_cfg = dict(
 8 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 9 | crop_size = (512, 512)
10 | train_pipeline = [
11 |     dict(type='LoadImageFromFile'),
12 |     dict(type='LoadAnnotations', reduce_zero_label=True),
13 |     dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
14 |     dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
15 |     dict(type='RandomFlip', prob=0.5),
16 |     dict(type='PhotoMetricDistortion'),
17 |     dict(type='Normalize', **img_norm_cfg),
18 |     dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255),
19 |     dict(type='DefaultFormatBundle'),
20 |     dict(type='Collect', keys=['img', 'gt_semantic_seg']),
21 | ]
22 | test_pipeline = [
23 |     dict(type='LoadImageFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=(2048, 512),
27 |         # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='AlignResize', keep_ratio=True, size_divisor=32),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='ImageToTensor', keys=['img']),
34 |             dict(type='Collect', keys=['img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     samples_per_gpu=4,
39 |     workers_per_gpu=4,
40 |     train=dict(
41 |         type='RepeatDataset',
42 |         times=50,
43 |         dataset=dict(
44 |             type=dataset_type,
45 |             data_root=data_root,
46 |             img_dir='images/training',
47 |             ann_dir='annotations/training',
48 |             pipeline=train_pipeline)),
49 |     val=dict(
50 |         type=dataset_type,
51 |         data_root=data_root,
52 |         img_dir='images/validation',
53 |         ann_dir='annotations/validation',
54 |         pipeline=test_pipeline),
55 |     test=dict(
56 |         type=dataset_type,
57 |         data_root=data_root,
58 |         img_dir='images/validation',
59 |         ann_dir='annotations/validation',
60 |         pipeline=test_pipeline))
61 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | # yapf:disable
 2 | log_config = dict(
 3 |     interval=50,
 4 |     hooks=[
 5 |         dict(type='TextLoggerHook', by_epoch=False),
 6 |         # dict(type='CustomizedTextLoggerHook', by_epoch=False),
 7 |         # dict(type='TensorboardLoggerHook'),
 8 |     ])
 9 | # yapf:enable
10 | dist_params = dict(backend='nccl')
11 | log_level = 'INFO'
12 | load_from = None
13 | resume_from = None
14 | workflow = [('train', 1)]
15 | cudnn_benchmark = True
16 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/models/fpn_r50.py:
--------------------------------------------------------------------------------
 1 | # copied from mmsegmentaion official config
 2 | # https://github.com/open-mmlab/mmsegmentation/blob/master/configs/_base_/models/fpn_r50.py
 3 | 
 4 | 
 5 | # model settings
 6 | norm_cfg = dict(type='SyncBN', requires_grad=True)
 7 | model = dict(
 8 |     type='EncoderDecoder',
 9 |     pretrained='open-mmlab://resnet50_v1c',
10 |     backbone=dict(
11 |         type='ResNetV1c',
12 |         depth=50,
13 |         num_stages=4,
14 |         out_indices=(0, 1, 2, 3),
15 |         dilations=(1, 1, 1, 1),
16 |         strides=(1, 2, 2, 2),
17 |         norm_cfg=norm_cfg,
18 |         norm_eval=False,
19 |         style='pytorch',
20 |         contract_dilation=True),
21 |     neck=dict(
22 |         type='FPN',
23 |         in_channels=[256, 512, 1024, 2048],
24 |         out_channels=256,
25 |         num_outs=4),
26 |     decode_head=dict(
27 |         type='FPNHead',
28 |         in_channels=[256, 256, 256, 256],
29 |         in_index=[0, 1, 2, 3],
30 |         feature_strides=[4, 8, 16, 32],
31 |         channels=128,
32 |         dropout_ratio=0.1,
33 |         num_classes=19,
34 |         norm_cfg=norm_cfg,
35 |         align_corners=False,
36 |         loss_decode=dict(
37 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
38 |     # model training and testing settings
39 |     train_cfg=dict(),
40 |     test_cfg=dict(mode='whole'))
41 |     


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_160k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=160000)
8 | checkpoint_config = dict(by_epoch=False, interval=16000)
9 | evaluation = dict(interval=16000, metric='mIoU')


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_20k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=20000)
 8 | checkpoint_config = dict(by_epoch=False, interval=2000)
 9 | evaluation = dict(interval=2000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_40k.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
 3 | optimizer_config = dict()
 4 | # learning policy
 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
 6 | # runtime settings
 7 | runner = dict(type='IterBasedRunner', max_iters=40000)
 8 | checkpoint_config = dict(by_epoch=False, interval=4000)
 9 | evaluation = dict(interval=4000, metric='mIoU')
10 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/_base_/schedules/schedule_80k.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
3 | optimizer_config = dict()
4 | # learning policy
5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
6 | # runtime settings
7 | runner = dict(type='IterBasedRunner', max_iters=80000)
8 | checkpoint_config = dict(by_epoch=False, interval=8000)
9 | evaluation = dict(interval=8000, metric='mIoU')


--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_base.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/fpn_r50.py',
 3 |     '_base_/datasets/ade20k_sfpn.py',
 4 |     '_base_/default_runtime.py',
 5 | ]
 6 | 
 7 | # model.pretrained is actually loaded by backbone, see
 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
 9 | 
10 | model=dict(
11 |     pretrained=None, 
12 |     backbone=dict(
13 |         _delete_=True,
14 |         pretrained=True,
15 |         type='transxnet_b',
16 |         drop_path_rate=0.3,
17 |     ),
18 |     neck=dict(in_channels=[76, 152, 336, 672],),
19 |     decode_head=dict(num_classes=150))
20 | 
21 | ############## below we strictly follow uniformer ####################################
22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
23 | #############################################################################
24 | gpu_multiples = 2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
27 | # learning policy
28 | lr_config = dict(
29 |     policy='CosineAnnealing',
30 |     warmup='linear',
31 |     warmup_iters=1000,
32 |     warmup_ratio=1.0 / 10,
33 |     min_lr_ratio=1e-8)
34 | # runtime settings
35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
38 | #############################################################################
39 | 
40 | # NOTE: True is conflict with checkpoint 
41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
42 | find_unused_parameters = False
43 | 
44 | # place holder for new verison mmseg compatiability
45 | resume_from = None
46 | device = 'cuda'
47 | 
48 | # fp32 training ->
49 | optimizer_config = dict()
50 | 
51 | # AMP ->
52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
53 | # fp16 = dict()


--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_small.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/fpn_r50.py',
 3 |     '_base_/datasets/ade20k_sfpn.py',
 4 |     '_base_/default_runtime.py',
 5 | ]
 6 | 
 7 | # model.pretrained is actually loaded by backbone, see
 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
 9 | 
10 | model=dict(
11 |     pretrained=None, 
12 |     backbone=dict(
13 |         _delete_=True,
14 |         pretrained=True,
15 |         type='transxnet_s',
16 |         drop_path_rate=0.2,
17 |     ),
18 |     neck=dict(in_channels=[64, 128, 320, 512]),
19 |     decode_head=dict(num_classes=150))
20 | 
21 | ############## below we strictly follow uniformer ####################################
22 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
23 | #############################################################################
24 | gpu_multiples=2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
25 | # optimizer
26 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
27 | # learning policy
28 | lr_config = dict(
29 |     policy='CosineAnnealing',
30 |     warmup='linear',
31 |     warmup_iters=1000,
32 |     warmup_ratio=1.0 / 10,
33 |     min_lr_ratio=1e-8)
34 | # runtime settings
35 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
36 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
37 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
38 | #############################################################################
39 | 
40 | # NOTE: True is conflict with checkpoint 
41 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
42 | find_unused_parameters = False
43 | 
44 | # place holder for new verison mmseg compatiability
45 | resume_from = None
46 | device = 'cuda'
47 | 
48 | # fp32 training ->
49 | optimizer_config = dict()
50 | 
51 | # AMP ->
52 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
53 | # fp16 = dict()
54 | 


--------------------------------------------------------------------------------
/semantic_segmentation/configs/sfpn_transxnet_tiny.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '_base_/models/fpn_r50.py',
 3 |     '_base_/datasets/ade20k_sfpn.py',
 4 |     '_base_/default_runtime.py',
 5 | ]
 6 | 
 7 | # model.pretrained is actually loaded by backbone, see
 8 | # https://github.com/open-mmlab/mmsegmentation/blob/186572a3ce64ac9b6b37e66d58c76515000c3280/mmseg/models/segmentors/encoder_decoder.py#L32
 9 | model=dict(
10 |     pretrained=None, 
11 |     backbone=dict(
12 |         _delete_=True,
13 |         pretrained=True,
14 |         type='transxnet_t',
15 |         drop_path_rate=0.1,
16 |     ),
17 |     neck=dict(in_channels=[48, 96, 224, 448]),
18 |     decode_head=dict(num_classes=150))
19 | 
20 | ############## below we strictly follow uniformer ####################################
21 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/fpn_seg/exp/fpn_global_small/config.py
22 | #############################################################################
23 | gpu_multiples=2  # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2
24 | # optimizer
25 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001)
26 | # learning policy
27 | lr_config = dict(
28 |     policy='CosineAnnealing',
29 |     warmup='linear',
30 |     warmup_iters=1000,
31 |     warmup_ratio=1.0 / 10,
32 |     min_lr_ratio=1e-8)
33 | # runtime settings
34 | runner = dict(type='IterBasedRunner', max_iters=160000//gpu_multiples)
35 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples, max_keep_ckpts=1)
36 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU', save_best='mIoU')
37 | #############################################################################
38 | 
39 | # NOTE: True is conflict with checkpoint 
40 | # https://github.com/allenai/longformer/issues/63#issuecomment-648861503
41 | find_unused_parameters = False
42 | 
43 | # place holder for new verison mmseg compatiability
44 | resume_from = None
45 | device = 'cuda'
46 | 
47 | # fp32 training ->
48 | optimizer_config = dict()
49 | 
50 | # AMP ->
51 | # optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.)
52 | # fp16 = dict()


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # All rights reserved.
 4 | 
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 
11 | # from .apex_runner.optimizer import DistOptimizerHook
12 | from .checkpoint import load_checkpoint
13 | from .customized_text import CustomizedTextLoggerHook
14 | from .layer_decay_optimizer_constructor import \
15 |     LearningRateDecayOptimizerConstructor
16 | from .resize_transform import SETR_Resize
17 | 
18 | # from .train_api import train_segmentor
19 | 
20 | # __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'DistOptimizerHook', 'train_segmentor', 'CustomizedTextLoggerHook']
21 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'SETR_Resize', 'CustomizedTextLoggerHook']
22 | 


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/checkpoint.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/customized_text.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/layer_decay_optimizer_constructor.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmcv_custom/__pycache__/resize_transform.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/apex_runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print('apex is not installed')
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {
46 |         'meta': meta,
47 |         'state_dict': weights_to_cpu(get_state_dict(model))
48 |     }
49 |     # save optimizer state dict in the checkpoint
50 |     if isinstance(optimizer, Optimizer):
51 |         checkpoint['optimizer'] = optimizer.state_dict()
52 |     elif isinstance(optimizer, dict):
53 |         checkpoint['optimizer'] = {}
54 |         for name, optim in optimizer.items():
55 |             checkpoint['optimizer'][name] = optim.state_dict()
56 | 
57 |     # save amp state dict in the checkpoint
58 |     # checkpoint['amp'] = apex.amp.state_dict()
59 | 
60 |     if filename.startswith('pavi://'):
61 |         try:
62 |             from pavi import modelcloud
63 |             from pavi.exception import NodeNotFoundError
64 |         except ImportError:
65 |             raise ImportError(
66 |                 'Please install pavi to load checkpoint from modelcloud.')
67 |         model_path = filename[7:]
68 |         root = modelcloud.Folder()
69 |         model_dir, model_name = osp.split(model_path)
70 |         try:
71 |             model = modelcloud.get(model_dir)
72 |         except NodeNotFoundError:
73 |             model = root.create_training_model(model_dir)
74 |         with TemporaryDirectory() as tmp_dir:
75 |             checkpoint_file = osp.join(tmp_dir, model_name)
76 |             with open(checkpoint_file, 'wb') as f:
77 |                 torch.save(checkpoint, f)
78 |                 f.flush()
79 |             model.create_file(checkpoint_file, name=model_name)
80 |     else:
81 |         mmcv.mkdir_or_exist(osp.dirname(filename))
82 |         # immediately flush buffer
83 |         with open(filename, 'wb') as f:
84 |             torch.save(checkpoint, f)
85 |             f.flush()
86 | 


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/customized_text.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import datetime
 10 | from collections import OrderedDict
 11 | 
 12 | import mmcv
 13 | import torch
 14 | from mmcv.runner import HOOKS, TextLoggerHook
 15 | 
 16 | 
 17 | @HOOKS.register_module()
 18 | class CustomizedTextLoggerHook(TextLoggerHook):
 19 |     """Customized Text Logger hook.
 20 | 
 21 |     This logger prints out both lr and layer_0_lr.
 22 |         
 23 |     """
 24 |     
 25 |     def _log_info(self, log_dict, runner):
 26 |         # print exp name for users to distinguish experiments
 27 |         # at every ``interval_exp_name`` iterations and the end of each epoch
 28 |         if runner.meta is not None and 'exp_name' in runner.meta:
 29 |             if (self.every_n_iters(runner, self.interval_exp_name)) or (
 30 |                     self.by_epoch and self.end_of_epoch(runner)):
 31 |                 exp_info = f'Exp name: {runner.meta["exp_name"]}'
 32 |                 runner.logger.info(exp_info)
 33 | 
 34 |         if log_dict['mode'] == 'train':
 35 |             lr_str = {}
 36 |             for lr_type in ['lr', 'layer_0_lr']:
 37 |                 if isinstance(log_dict[lr_type], dict):
 38 |                     lr_str[lr_type] = []
 39 |                     for k, val in log_dict[lr_type].items():
 40 |                         lr_str.append(f'{lr_type}_{k}: {val:.3e}')
 41 |                     lr_str[lr_type] = ' '.join(lr_str)
 42 |                 else:
 43 |                     lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}'
 44 | 
 45 |             # by epoch: Epoch [4][100/1000]
 46 |             # by iter:  Iter [100/100000]
 47 |             if self.by_epoch:
 48 |                 log_str = f'Epoch [{log_dict["epoch"]}]' \
 49 |                           f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
 50 |             else:
 51 |                 log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
 52 |             log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, '
 53 | 
 54 |             if 'time' in log_dict.keys():
 55 |                 self.time_sec_tot += (log_dict['time'] * self.interval)
 56 |                 time_sec_avg = self.time_sec_tot / (
 57 |                     runner.iter - self.start_iter + 1)
 58 |                 eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
 59 |                 eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
 60 |                 log_str += f'eta: {eta_str}, '
 61 |                 log_str += f'time: {log_dict["time"]:.3f}, ' \
 62 |                            f'data_time: {log_dict["data_time"]:.3f}, '
 63 |                 # statistic memory
 64 |                 if torch.cuda.is_available():
 65 |                     log_str += f'memory: {log_dict["memory"]}, '
 66 |         else:
 67 |             # val/test time
 68 |             # here 1000 is the length of the val dataloader
 69 |             # by epoch: Epoch[val] [4][1000]
 70 |             # by iter: Iter[val] [1000]
 71 |             if self.by_epoch:
 72 |                 log_str = f'Epoch({log_dict["mode"]}) ' \
 73 |                     f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
 74 |             else:
 75 |                 log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
 76 | 
 77 |         log_items = []
 78 |         for name, val in log_dict.items():
 79 |             # TODO: resolve this hack
 80 |             # these items have been in log_str
 81 |             if name in [
 82 |                     'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time',
 83 |                     'memory', 'epoch'
 84 |             ]:
 85 |                 continue
 86 |             if isinstance(val, float):
 87 |                 val = f'{val:.4f}'
 88 |             log_items.append(f'{name}: {val}')
 89 |         log_str += ', '.join(log_items)
 90 | 
 91 |         runner.logger.info(log_str)
 92 | 
 93 | 
 94 |     def log(self, runner):
 95 |         if 'eval_iter_num' in runner.log_buffer.output:
 96 |             # this doesn't modify runner.iter and is regardless of by_epoch
 97 |             cur_iter = runner.log_buffer.output.pop('eval_iter_num')
 98 |         else:
 99 |             cur_iter = self.get_iter(runner, inner_iter=True)
100 | 
101 |         log_dict = OrderedDict(
102 |             mode=self.get_mode(runner),
103 |             epoch=self.get_epoch(runner),
104 |             iter=cur_iter)
105 | 
106 |         # record lr and layer_0_lr
107 |         cur_lr = runner.current_lr()
108 |         if isinstance(cur_lr, list):
109 |             log_dict['layer_0_lr'] = min(cur_lr)
110 |             log_dict['lr'] = max(cur_lr)
111 |         else:
112 |             assert isinstance(cur_lr, dict)
113 |             log_dict['lr'], log_dict['layer_0_lr'] = {}, {}
114 |             for k, lr_ in cur_lr.items():
115 |                 assert isinstance(lr_, list)
116 |                 log_dict['layer_0_lr'].update({k: min(lr_)})
117 |                 log_dict['lr'].update({k: max(lr_)})
118 | 
119 |         if 'time' in runner.log_buffer.output:
120 |             # statistic memory
121 |             if torch.cuda.is_available():
122 |                 log_dict['memory'] = self._get_max_memory(runner)
123 | 
124 |         log_dict = dict(log_dict, **runner.log_buffer.output)
125 | 
126 |         self._log_info(log_dict, runner)
127 |         self._dump_log(log_dict, runner)
128 |         return log_dict
129 | 


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import json
 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
 11 | from mmcv.runner import get_dist_info
 12 | 
 13 | 
 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12):
 15 |     
 16 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 17 |         return 0
 18 |     elif var_name.startswith("backbone.downsample_layers"):
 19 |         stage_id = int(var_name.split('.')[2])
 20 |         if stage_id == 0:
 21 |             layer_id = 0
 22 |         elif stage_id == 1:
 23 |             layer_id = 2
 24 |         elif stage_id == 2:
 25 |             layer_id = 3
 26 |         elif stage_id == 3:
 27 |             layer_id = num_max_layer
 28 |         return layer_id
 29 |     elif var_name.startswith("backbone.stages"):
 30 |         stage_id = int(var_name.split('.')[2])
 31 |         block_id = int(var_name.split('.')[3])
 32 |         if stage_id == 0:
 33 |             layer_id = 1
 34 |         elif stage_id == 1:
 35 |             layer_id = 2
 36 |         elif stage_id == 2:
 37 |             layer_id = 3 + block_id // 3
 38 |         elif stage_id == 3:
 39 |             layer_id = num_max_layer
 40 |         return layer_id
 41 |     else:
 42 |         return num_max_layer + 1
 43 | 
 44 | 
 45 | def get_num_layer_stage_wise(var_name, num_max_layer):
 46 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 47 |         return 0
 48 |     elif var_name.startswith("backbone.downsample_layers"):
 49 |         return 0
 50 |     elif var_name.startswith("backbone.stages"):
 51 |         stage_id = int(var_name.split('.')[2])
 52 |         return stage_id + 1
 53 |     else:
 54 |         return num_max_layer - 1
 55 |         
 56 | 
 57 | @OPTIMIZER_BUILDERS.register_module()
 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
 59 |     def add_params(self, params, module, prefix='', is_dcn_module=None):
 60 |         """Add all parameters of module to the params list.
 61 |         The parameters of the given module will be added to the list of param
 62 |         groups, with specific rules defined by paramwise_cfg.
 63 |         Args:
 64 |             params (list[dict]): A list of param groups, it will be modified
 65 |                 in place.
 66 |             module (nn.Module): The module to be added.
 67 |             prefix (str): The prefix of the module
 68 |             is_dcn_module (int|float|None): If the current module is a
 69 |                 submodule of DCN, `is_dcn_module` will be passed to
 70 |                 control conv_offset layer's learning rate. Defaults to None.
 71 |         """
 72 |         parameter_groups = {}
 73 |         print(self.paramwise_cfg)
 74 |         num_layers = self.paramwise_cfg.get('num_layers') + 2
 75 |         decay_rate = self.paramwise_cfg.get('decay_rate')
 76 |         decay_type = self.paramwise_cfg.get('decay_type', "layer_wise")
 77 |         print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers))
 78 |         weight_decay = self.base_wd
 79 | 
 80 |         for name, param in module.named_parameters():
 81 |             if not param.requires_grad:
 82 |                 continue  # frozen weights
 83 |             if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
 84 |                 group_name = "no_decay"
 85 |                 this_weight_decay = 0.
 86 |             else:
 87 |                 group_name = "decay"
 88 |                 this_weight_decay = weight_decay
 89 | 
 90 |             if decay_type == "layer_wise":
 91 |                 layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers'))
 92 |             elif decay_type == "stage_wise":
 93 |                 layer_id = get_num_layer_stage_wise(name, num_layers)
 94 |                 
 95 |             group_name = "layer_%d_%s" % (layer_id, group_name)
 96 | 
 97 |             if group_name not in parameter_groups:
 98 |                 scale = decay_rate ** (num_layers - layer_id - 1)
 99 | 
100 |                 parameter_groups[group_name] = {
101 |                     "weight_decay": this_weight_decay,
102 |                     "params": [],
103 |                     "param_names": [], 
104 |                     "lr_scale": scale, 
105 |                     "group_name": group_name, 
106 |                     "lr": scale * self.base_lr, 
107 |                 }
108 | 
109 |             parameter_groups[group_name]["params"].append(param)
110 |             parameter_groups[group_name]["param_names"].append(name)
111 |         rank, _ = get_dist_info()
112 |         if rank == 0:
113 |             to_display = {}
114 |             for key in parameter_groups:
115 |                 to_display[key] = {
116 |                     "param_names": parameter_groups[key]["param_names"], 
117 |                     "lr_scale": parameter_groups[key]["lr_scale"], 
118 |                     "lr": parameter_groups[key]["lr"], 
119 |                     "weight_decay": parameter_groups[key]["weight_decay"], 
120 |                 }
121 |             print("Param groups = %s" % json.dumps(to_display, indent=2))
122 |         
123 |         params.extend(parameter_groups.values())
124 | 


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/resize_transform.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | from mmseg.datasets.builder import PIPELINES
  4 | 
  5 | 
  6 | @PIPELINES.register_module()
  7 | class SETR_Resize(object):
  8 |     """Resize images & seg.
  9 | 
 10 |     This transform resizes the input image to some scale. If the input dict
 11 |     contains the key "scale", then the scale in the input dict is used,
 12 |     otherwise the specified scale in the init method is used.
 13 | 
 14 |     ``img_scale`` can either be a tuple (single-scale) or a list of tuple
 15 |     (multi-scale). There are 3 multiscale modes:
 16 | 
 17 |     - ``ratio_range is not None``: randomly sample a ratio from the ratio range
 18 |     and multiply it with the image scale.
 19 | 
 20 |     - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a
 21 |     scale from the a range.
 22 | 
 23 |     - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a
 24 |     scale from multiple scales.
 25 | 
 26 |     Args:
 27 |         img_scale (tuple or list[tuple]): Images scales for resizing.
 28 |         multiscale_mode (str): Either "range" or "value".
 29 |         ratio_range (tuple[float]): (min_ratio, max_ratio)
 30 |         keep_ratio (bool): Whether to keep the aspect ratio when resizing the
 31 |             image.
 32 |     """
 33 | 
 34 |     def __init__(self,
 35 |                  img_scale=None,
 36 |                  multiscale_mode='range',
 37 |                  ratio_range=None,
 38 |                  keep_ratio=True,
 39 |                  crop_size=None,
 40 |                  setr_multi_scale=False):
 41 | 
 42 |         if img_scale is None:
 43 |             self.img_scale = None
 44 |         else:
 45 |             if isinstance(img_scale, list):
 46 |                 self.img_scale = img_scale
 47 |             else:
 48 |                 self.img_scale = [img_scale]
 49 |             # assert mmcv.is_list_of(self.img_scale, tuple)
 50 | 
 51 |         if ratio_range is not None:
 52 |             # mode 1: given a scale and a range of image ratio
 53 |             assert len(self.img_scale) == 1
 54 |         else:
 55 |             # mode 2: given multiple scales or a range of scales
 56 |             assert multiscale_mode in ['value', 'range']
 57 | 
 58 |         self.multiscale_mode = multiscale_mode
 59 |         self.ratio_range = ratio_range
 60 |         self.keep_ratio = keep_ratio
 61 |         self.crop_size = crop_size
 62 |         self.setr_multi_scale = setr_multi_scale
 63 | 
 64 |     @staticmethod
 65 |     def random_select(img_scales):
 66 |         """Randomly select an img_scale from given candidates.
 67 | 
 68 |         Args:
 69 |             img_scales (list[tuple]): Images scales for selection.
 70 | 
 71 |         Returns:
 72 |             (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
 73 |                 where ``img_scale`` is the selected image scale and
 74 |                 ``scale_idx`` is the selected index in the given candidates.
 75 |         """
 76 | 
 77 |         assert mmcv.is_list_of(img_scales, tuple)
 78 |         scale_idx = np.random.randint(len(img_scales))
 79 |         img_scale = img_scales[scale_idx]
 80 |         return img_scale, scale_idx
 81 | 
 82 |     @staticmethod
 83 |     def random_sample(img_scales):
 84 |         """Randomly sample an img_scale when ``multiscale_mode=='range'``.
 85 | 
 86 |         Args:
 87 |             img_scales (list[tuple]): Images scale range for sampling.
 88 |                 There must be two tuples in img_scales, which specify the lower
 89 |                 and uper bound of image scales.
 90 | 
 91 |         Returns:
 92 |             (tuple, None): Returns a tuple ``(img_scale, None)``, where
 93 |                 ``img_scale`` is sampled scale and None is just a placeholder
 94 |                 to be consistent with :func:`random_select`.
 95 |         """
 96 | 
 97 |         assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
 98 |         img_scale_long = [max(s) for s in img_scales]
 99 |         img_scale_short = [min(s) for s in img_scales]
100 |         long_edge = np.random.randint(
101 |             min(img_scale_long),
102 |             max(img_scale_long) + 1)
103 |         short_edge = np.random.randint(
104 |             min(img_scale_short),
105 |             max(img_scale_short) + 1)
106 |         img_scale = (long_edge, short_edge)
107 |         return img_scale, None
108 | 
109 |     @staticmethod
110 |     def random_sample_ratio(img_scale, ratio_range):
111 |         """Randomly sample an img_scale when ``ratio_range`` is specified.
112 | 
113 |         A ratio will be randomly sampled from the range specified by
114 |         ``ratio_range``. Then it would be multiplied with ``img_scale`` to
115 |         generate sampled scale.
116 | 
117 |         Args:
118 |             img_scale (tuple): Images scale base to multiply with ratio.
119 |             ratio_range (tuple[float]): The minimum and maximum ratio to scale
120 |                 the ``img_scale``.
121 | 
122 |         Returns:
123 |             (tuple, None): Returns a tuple ``(scale, None)``, where
124 |                 ``scale`` is sampled ratio multiplied with ``img_scale`` and
125 |                 None is just a placeholder to be consistent with
126 |                 :func:`random_select`.
127 |         """
128 | 
129 |         assert isinstance(img_scale, tuple) and len(img_scale) == 2
130 |         min_ratio, max_ratio = ratio_range
131 |         assert min_ratio <= max_ratio
132 |         ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
133 |         scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
134 |         return scale, None
135 | 
136 |     def _random_scale(self, results):
137 |         """Randomly sample an img_scale according to ``ratio_range`` and
138 |         ``multiscale_mode``.
139 | 
140 |         If ``ratio_range`` is specified, a ratio will be sampled and be
141 |         multiplied with ``img_scale``.
142 |         If multiple scales are specified by ``img_scale``, a scale will be
143 |         sampled according to ``multiscale_mode``.
144 |         Otherwise, single scale will be used.
145 | 
146 |         Args:
147 |             results (dict): Result dict from :obj:`dataset`.
148 | 
149 |         Returns:
150 |             dict: Two new keys 'scale` and 'scale_idx` are added into
151 |                 ``results``, which would be used by subsequent pipelines.
152 |         """
153 | 
154 |         if self.ratio_range is not None:
155 |             scale, scale_idx = self.random_sample_ratio(
156 |                 self.img_scale[0], self.ratio_range)
157 |         elif len(self.img_scale) == 1:
158 |             scale, scale_idx = self.img_scale[0], 0
159 |         elif self.multiscale_mode == 'range':
160 |             scale, scale_idx = self.random_sample(self.img_scale)
161 |         elif self.multiscale_mode == 'value':
162 |             scale, scale_idx = self.random_select(self.img_scale)
163 |         else:
164 |             raise NotImplementedError
165 | 
166 |         results['scale'] = scale
167 |         results['scale_idx'] = scale_idx
168 | 
169 |     def _resize_img(self, results):
170 |         """Resize images with ``results['scale']``."""
171 | 
172 |         if self.keep_ratio:
173 |             if self.setr_multi_scale:
174 |                 if min(results['scale']) < self.crop_size[0]:
175 |                     new_short = self.crop_size[0]
176 |                 else:
177 |                     new_short = min(results['scale'])
178 |                     
179 |                 h, w = results['img'].shape[:2]
180 |                 if h > w:
181 |                     new_h, new_w = new_short * h / w, new_short
182 |                 else:
183 |                     new_h, new_w = new_short, new_short * w / h
184 |                 results['scale'] = (new_h, new_w)
185 | 
186 |             img, scale_factor = mmcv.imrescale(
187 |                 results['img'], results['scale'], return_scale=True)
188 |             # the w_scale and h_scale has minor difference
189 |             # a real fix should be done in the mmcv.imrescale in the future
190 |             new_h, new_w = img.shape[:2]
191 |             h, w = results['img'].shape[:2]
192 |             w_scale = new_w / w
193 |             h_scale = new_h / h
194 |         else:
195 |             img, w_scale, h_scale = mmcv.imresize(
196 |                 results['img'], results['scale'], return_scale=True)
197 |         scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
198 |                                 dtype=np.float32)
199 |         results['img'] = img
200 |         results['img_shape'] = img.shape
201 |         results['pad_shape'] = img.shape  # in case that there is no padding
202 |         results['scale_factor'] = scale_factor
203 |         results['keep_ratio'] = self.keep_ratio
204 | 
205 |     def _resize_seg(self, results):
206 |         """Resize semantic segmentation map with ``results['scale']``."""
207 |         for key in results.get('seg_fields', []):
208 |             if self.keep_ratio:
209 |                 gt_seg = mmcv.imrescale(
210 |                     results[key], results['scale'], interpolation='nearest')
211 |             else:
212 |                 gt_seg = mmcv.imresize(
213 |                     results[key], results['scale'], interpolation='nearest')
214 |             results['gt_semantic_seg'] = gt_seg
215 | 
216 |     def __call__(self, results):
217 |         """Call function to resize images, bounding boxes, masks, semantic
218 |         segmentation map.
219 | 
220 |         Args:
221 |             results (dict): Result dict from loading pipeline.
222 | 
223 |         Returns:
224 |             dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
225 |                 'keep_ratio' keys are added into result dict.
226 |         """
227 | 
228 |         if 'scale' not in results:
229 |             self._random_scale(results)
230 |         self._resize_img(results)
231 |         self._resize_seg(results)
232 |         return results
233 | 
234 |     def __repr__(self):
235 |         repr_str = self.__class__.__name__
236 |         repr_str += (f'(img_scale={self.img_scale}, '
237 |                      f'multiscale_mode={self.multiscale_mode}, '
238 |                      f'ratio_range={self.ratio_range}, '
239 |                      f'keep_ratio={self.keep_ratio})')
240 |         return repr_str


--------------------------------------------------------------------------------
/semantic_segmentation/mmcv_custom/train_api.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import warnings
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  7 | from mmcv.runner import build_optimizer, build_runner
  8 | from mmseg.core import DistEvalHook, EvalHook
  9 | from mmseg.datasets import build_dataloader, build_dataset
 10 | from mmseg.utils import get_root_logger
 11 | 
 12 | try:
 13 |     import apex
 14 | except:
 15 |     print('apex is not installed')
 16 | 
 17 | 
 18 | def set_random_seed(seed, deterministic=False):
 19 |     """Set random seed.
 20 | 
 21 |     Args:
 22 |         seed (int): Seed to be used.
 23 |         deterministic (bool): Whether to set the deterministic option for
 24 |             CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
 25 |             to True and `torch.backends.cudnn.benchmark` to False.
 26 |             Default: False.
 27 |     """
 28 |     random.seed(seed)
 29 |     np.random.seed(seed)
 30 |     torch.manual_seed(seed)
 31 |     torch.cuda.manual_seed_all(seed)
 32 |     if deterministic:
 33 |         torch.backends.cudnn.deterministic = True
 34 |         torch.backends.cudnn.benchmark = False
 35 | 
 36 | 
 37 | def train_segmentor(model,
 38 |                     dataset,
 39 |                     cfg,
 40 |                     distributed=False,
 41 |                     validate=False,
 42 |                     timestamp=None,
 43 |                     meta=None):
 44 |     """Launch segmentor training."""
 45 |     logger = get_root_logger(cfg.log_level)
 46 | 
 47 |     # prepare data loaders
 48 |     dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
 49 |     data_loaders = [
 50 |         build_dataloader(
 51 |             ds,
 52 |             cfg.data.samples_per_gpu,
 53 |             cfg.data.workers_per_gpu,
 54 |             # cfg.gpus will be ignored if distributed
 55 |             len(cfg.gpu_ids),
 56 |             dist=distributed,
 57 |             seed=cfg.seed,
 58 |             drop_last=True) for ds in dataset
 59 |     ]
 60 | 
 61 |     # build optimizer
 62 |     optimizer = build_optimizer(model, cfg.optimizer)
 63 | 
 64 |     # use apex fp16 optimizer
 65 |     if cfg.optimizer_config.get("type", None) and cfg.optimizer_config["type"] == "DistOptimizerHook":
 66 |         if cfg.optimizer_config.get("use_fp16", False):
 67 |             model, optimizer = apex.amp.initialize(
 68 |                 model.cuda(), optimizer, opt_level="O1")
 69 |             for m in model.modules():
 70 |                 if hasattr(m, "fp16_enabled"):
 71 |                     m.fp16_enabled = True
 72 | 
 73 |     # put model on gpus
 74 |     if distributed:
 75 |         find_unused_parameters = cfg.get('find_unused_parameters', False)
 76 |         # Sets the `find_unused_parameters` parameter in
 77 |         # torch.nn.parallel.DistributedDataParallel
 78 |         model = MMDistributedDataParallel(
 79 |             model.cuda(),
 80 |             device_ids=[torch.cuda.current_device()],
 81 |             broadcast_buffers=False,
 82 |             find_unused_parameters=find_unused_parameters)
 83 |     else:
 84 |         model = MMDataParallel(
 85 |             model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
 86 | 
 87 |     if cfg.get('runner') is None:
 88 |         cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters}
 89 |         warnings.warn(
 90 |             'config is now expected to have a `runner` section, '
 91 |             'please set `runner` in your config.', UserWarning)
 92 | 
 93 |     runner = build_runner(
 94 |         cfg.runner,
 95 |         default_args=dict(
 96 |             model=model,
 97 |             batch_processor=None,
 98 |             optimizer=optimizer,
 99 |             work_dir=cfg.work_dir,
100 |             logger=logger,
101 |             meta=meta))
102 | 
103 |     # register hooks
104 |     runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config,
105 |                                    cfg.checkpoint_config, cfg.log_config,
106 |                                    cfg.get('momentum_config', None))
107 | 
108 |     # an ugly walkaround to make the .log and .log.json filenames the same
109 |     runner.timestamp = timestamp
110 | 
111 |     # register eval hooks
112 |     if validate:
113 |         val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
114 |         val_dataloader = build_dataloader(
115 |             val_dataset,
116 |             samples_per_gpu=1,
117 |             workers_per_gpu=cfg.data.workers_per_gpu,
118 |             dist=distributed,
119 |             shuffle=False)
120 |         eval_cfg = cfg.get('evaluation', {})
121 |         eval_cfg['by_epoch'] = 'IterBasedRunner' not in cfg.runner['type']
122 |         eval_hook = DistEvalHook if distributed else EvalHook
123 |         runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
124 | 
125 |     if cfg.resume_from:
126 |         runner.resume(cfg.resume_from)
127 |     elif cfg.load_from:
128 |         runner.load_checkpoint(cfg.load_from)
129 |     runner.run(data_loaders, cfg.workflow)
130 |     


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .align_resize import AlignResize
3 | 
4 | __all__=['AlignResize']


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-37.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LMMMEng/TransXNet/e4826f4894ce2d8c37b5dd1bb15d2f176c24d6e1/semantic_segmentation/mmseg_custom/__pycache__/align_resize.cpython-38.pyc


--------------------------------------------------------------------------------
/semantic_segmentation/mmseg_custom/align_resize.py:
--------------------------------------------------------------------------------
  1 | #########
  2 | # copied from uniformer
  3 | # https://github.com/Sense-X/UniFormer/blob/main/semantic_segmentation/tools/align_resize.py
  4 | #########
  5 | import mmcv
  6 | import numpy as np
  7 | from mmcv.utils import deprecated_api_warning, is_tuple_of
  8 | from mmseg.datasets.builder import PIPELINES
  9 | from numpy import random
 10 | 
 11 | 
 12 | @PIPELINES.register_module()
 13 | class AlignResize(object):
 14 |     """Resize images & seg. Align
 15 |     """
 16 | 
 17 |     def __init__(self,
 18 |                  img_scale=None,
 19 |                  multiscale_mode='range',
 20 |                  ratio_range=None,
 21 |                  keep_ratio=True,
 22 |                  size_divisor=32):
 23 |         if img_scale is None:
 24 |             self.img_scale = None
 25 |         else:
 26 |             if isinstance(img_scale, list):
 27 |                 self.img_scale = img_scale
 28 |             else:
 29 |                 self.img_scale = [img_scale]
 30 |             assert mmcv.is_list_of(self.img_scale, tuple)
 31 | 
 32 |         if ratio_range is not None:
 33 |             # mode 1: given img_scale=None and a range of image ratio
 34 |             # mode 2: given a scale and a range of image ratio
 35 |             assert self.img_scale is None or len(self.img_scale) == 1
 36 |         else:
 37 |             # mode 3 and 4: given multiple scales or a range of scales
 38 |             assert multiscale_mode in ['value', 'range']
 39 | 
 40 |         self.multiscale_mode = multiscale_mode
 41 |         self.ratio_range = ratio_range
 42 |         self.keep_ratio = keep_ratio
 43 |         self.size_divisor = size_divisor
 44 | 
 45 |     @staticmethod
 46 |     def random_select(img_scales):
 47 |         """Randomly select an img_scale from given candidates.
 48 |         Args:
 49 |             img_scales (list[tuple]): Images scales for selection.
 50 |         Returns:
 51 |             (tuple, int): Returns a tuple ``(img_scale, scale_dix)``,
 52 |                 where ``img_scale`` is the selected image scale and
 53 |                 ``scale_idx`` is the selected index in the given candidates.
 54 |         """
 55 | 
 56 |         assert mmcv.is_list_of(img_scales, tuple)
 57 |         scale_idx = np.random.randint(len(img_scales))
 58 |         img_scale = img_scales[scale_idx]
 59 |         return img_scale, scale_idx
 60 | 
 61 |     @staticmethod
 62 |     def random_sample(img_scales):
 63 |         """Randomly sample an img_scale when ``multiscale_mode=='range'``.
 64 |         Args:
 65 |             img_scales (list[tuple]): Images scale range for sampling.
 66 |                 There must be two tuples in img_scales, which specify the lower
 67 |                 and uper bound of image scales.
 68 |         Returns:
 69 |             (tuple, None): Returns a tuple ``(img_scale, None)``, where
 70 |                 ``img_scale`` is sampled scale and None is just a placeholder
 71 |                 to be consistent with :func:`random_select`.
 72 |         """
 73 | 
 74 |         assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2
 75 |         img_scale_long = [max(s) for s in img_scales]
 76 |         img_scale_short = [min(s) for s in img_scales]
 77 |         long_edge = np.random.randint(
 78 |             min(img_scale_long),
 79 |             max(img_scale_long) + 1)
 80 |         short_edge = np.random.randint(
 81 |             min(img_scale_short),
 82 |             max(img_scale_short) + 1)
 83 |         img_scale = (long_edge, short_edge)
 84 |         return img_scale, None
 85 | 
 86 |     @staticmethod
 87 |     def random_sample_ratio(img_scale, ratio_range):
 88 |         """Randomly sample an img_scale when ``ratio_range`` is specified.
 89 |         A ratio will be randomly sampled from the range specified by
 90 |         ``ratio_range``. Then it would be multiplied with ``img_scale`` to
 91 |         generate sampled scale.
 92 |         Args:
 93 |             img_scale (tuple): Images scale base to multiply with ratio.
 94 |             ratio_range (tuple[float]): The minimum and maximum ratio to scale
 95 |                 the ``img_scale``.
 96 |         Returns:
 97 |             (tuple, None): Returns a tuple ``(scale, None)``, where
 98 |                 ``scale`` is sampled ratio multiplied with ``img_scale`` and
 99 |                 None is just a placeholder to be consistent with
100 |                 :func:`random_select`.
101 |         """
102 | 
103 |         assert isinstance(img_scale, tuple) and len(img_scale) == 2
104 |         min_ratio, max_ratio = ratio_range
105 |         assert min_ratio <= max_ratio
106 |         ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio
107 |         scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio)
108 |         return scale, None
109 | 
110 |     def _random_scale(self, results):
111 |         """Randomly sample an img_scale according to ``ratio_range`` and
112 |         ``multiscale_mode``.
113 |         If ``ratio_range`` is specified, a ratio will be sampled and be
114 |         multiplied with ``img_scale``.
115 |         If multiple scales are specified by ``img_scale``, a scale will be
116 |         sampled according to ``multiscale_mode``.
117 |         Otherwise, single scale will be used.
118 |         Args:
119 |             results (dict): Result dict from :obj:`dataset`.
120 |         Returns:
121 |             dict: Two new keys 'scale` and 'scale_idx` are added into
122 |                 ``results``, which would be used by subsequent pipelines.
123 |         """
124 | 
125 |         if self.ratio_range is not None:
126 |             if self.img_scale is None:
127 |                 h, w = results['img'].shape[:2]
128 |                 scale, scale_idx = self.random_sample_ratio((w, h),
129 |                                                             self.ratio_range)
130 |             else:
131 |                 scale, scale_idx = self.random_sample_ratio(
132 |                     self.img_scale[0], self.ratio_range)
133 |         elif len(self.img_scale) == 1:
134 |             scale, scale_idx = self.img_scale[0], 0
135 |         elif self.multiscale_mode == 'range':
136 |             scale, scale_idx = self.random_sample(self.img_scale)
137 |         elif self.multiscale_mode == 'value':
138 |             scale, scale_idx = self.random_select(self.img_scale)
139 |         else:
140 |             raise NotImplementedError
141 | 
142 |         results['scale'] = scale
143 |         results['scale_idx'] = scale_idx
144 | 
145 |     def _align(self, img, size_divisor, interpolation=None):
146 |         align_h = int(np.ceil(img.shape[0] / size_divisor)) * size_divisor
147 |         align_w = int(np.ceil(img.shape[1] / size_divisor)) * size_divisor
148 |         if interpolation == None:
149 |             img = mmcv.imresize(img, (align_w, align_h))
150 |         else:
151 |             img = mmcv.imresize(img, (align_w, align_h), interpolation=interpolation)
152 |         return img
153 | 
154 |     def _resize_img(self, results):
155 |         """Resize images with ``results['scale']``."""
156 |         if self.keep_ratio:
157 |             img, scale_factor = mmcv.imrescale(
158 |                 results['img'], results['scale'], return_scale=True)
159 |             #### align ####
160 |             img = self._align(img, self.size_divisor)
161 |             # the w_scale and h_scale has minor difference
162 |             # a real fix should be done in the mmcv.imrescale in the future
163 |             new_h, new_w = img.shape[:2]
164 |             h, w = results['img'].shape[:2]
165 |             w_scale = new_w / w
166 |             h_scale = new_h / h
167 |         else:
168 |             img, w_scale, h_scale = mmcv.imresize(
169 |                 results['img'], results['scale'], return_scale=True)
170 | 
171 |             h, w = img.shape[:2]
172 |             assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
173 |                    int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
174 |                    "img size not align. h:{} w:{}".format(h,w)
175 |         scale_factor = np.array([w_scale, h_scale, w_scale, h_scale],
176 |                                 dtype=np.float32)
177 |         results['img'] = img
178 |         results['img_shape'] = img.shape
179 |         results['pad_shape'] = img.shape  # in case that there is no padding
180 |         results['scale_factor'] = scale_factor
181 |         results['keep_ratio'] = self.keep_ratio
182 | 
183 |     def _resize_seg(self, results):
184 |         """Resize semantic segmentation map with ``results['scale']``."""
185 |         for key in results.get('seg_fields', []):
186 |             if self.keep_ratio:
187 |                 gt_seg = mmcv.imrescale(
188 |                     results[key], results['scale'], interpolation='nearest')
189 |                 gt_seg = self._align(gt_seg, self.size_divisor, interpolation='nearest')
190 |             else:
191 |                 gt_seg = mmcv.imresize(
192 |                     results[key], results['scale'], interpolation='nearest')
193 |                 h, w = gt_seg.shape[:2]
194 |                 assert int(np.ceil(h / self.size_divisor)) * self.size_divisor == h and \
195 |                        int(np.ceil(w / self.size_divisor)) * self.size_divisor == w, \
196 |                     "gt_seg size not align. h:{} w:{}".format(h, w)
197 |             results[key] = gt_seg
198 | 
199 |     def __call__(self, results):
200 |         """Call function to resize images, bounding boxes, masks, semantic
201 |         segmentation map.
202 |         Args:
203 |             results (dict): Result dict from loading pipeline.
204 |         Returns:
205 |             dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
206 |                 'keep_ratio' keys are added into result dict.
207 |         """
208 | 
209 |         if 'scale' not in results:
210 |             self._random_scale(results)
211 |         self._resize_img(results)
212 |         self._resize_seg(results)
213 |         return results
214 | 
215 |     def __repr__(self):
216 |         repr_str = self.__class__.__name__
217 |         repr_str += (f'(img_scale={self.img_scale}, '
218 |                      f'multiscale_mode={self.multiscale_mode}, '
219 |                      f'ratio_range={self.ratio_range}, '
220 |                      f'keep_ratio={self.keep_ratio})')
221 |         return repr_str
222 | 


--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_base.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_base.py \
7 | --work-dir work_dirs/sfpn_transxnet_base/ \
8 | --launcher pytorch


--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_small.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_small.py \
7 | --work-dir work_dirs/sfpn_transxnet_small/ \
8 | --launcher pytorch


--------------------------------------------------------------------------------
/semantic_segmentation/scripts/train_sfpn_transxnet_tiny.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python3 -m torch.distributed.launch \
3 | --nproc_per_node=8 \
4 | --master_port=$((RANDOM+10000)) \
5 | train.py \
6 | configs/sfpn_transxnet_tiny.py \
7 | --work-dir work_dirs/sfpn_transxnet_tiny/ \
8 | --launcher pytorch


--------------------------------------------------------------------------------
/semantic_segmentation/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import mmcv
  5 | import torch
  6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  7 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
  8 |                          wrap_fp16_model)
  9 | from mmcv.utils import DictAction
 10 | 
 11 | from mmseg.apis import multi_gpu_test, single_gpu_test
 12 | from mmseg.datasets import build_dataloader, build_dataset
 13 | from mmseg.models import build_segmentor
 14 | 
 15 | import mmcv_custom
 16 | import mmseg_custom
 17 | import transxnet
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(
 21 |         description='mmseg test (and eval) a model')
 22 |     parser.add_argument('config', help='test config file path')
 23 |     parser.add_argument('checkpoint', help='checkpoint file')
 24 |     parser.add_argument(
 25 |         '--aug-test', action='store_true', help='Use Flip and Multi scale aug')
 26 |     parser.add_argument('--out', help='output result file in pickle format')
 27 |     parser.add_argument(
 28 |         '--format-only',
 29 |         action='store_true',
 30 |         help='Format the output results without perform evaluation. It is'
 31 |         'useful when you want to format the result to a specific format and '
 32 |         'submit it to the test server')
 33 |     parser.add_argument(
 34 |         '--eval',
 35 |         type=str,
 36 |         nargs='+',
 37 |         help='evaluation metrics, which depends on the dataset, e.g., "mIoU"'
 38 |         ' for generic datasets, and "cityscapes" for Cityscapes')
 39 |     parser.add_argument('--show', action='store_true', help='show results')
 40 |     parser.add_argument(
 41 |         '--show-dir', help='directory where painted images will be saved')
 42 |     parser.add_argument(
 43 |         '--gpu-collect',
 44 |         action='store_true',
 45 |         help='whether to use gpu to collect results.')
 46 |     parser.add_argument(
 47 |         '--tmpdir',
 48 |         help='tmp directory used for collecting results from multiple '
 49 |         'workers, available when gpu_collect is not specified')
 50 |     parser.add_argument(
 51 |         '--options', nargs='+', action=DictAction, help='custom options')
 52 |     parser.add_argument(
 53 |         '--eval-options',
 54 |         nargs='+',
 55 |         action=DictAction,
 56 |         help='custom options for evaluation')
 57 |     parser.add_argument(
 58 |         '--launcher',
 59 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 60 |         default='none',
 61 |         help='job launcher')
 62 |     parser.add_argument(
 63 |         '--opacity',
 64 |         type=float,
 65 |         default=0.5,
 66 |         help='Opacity of painted segmentation map. In (0, 1] range.')
 67 |     parser.add_argument('--local_rank', type=int, default=0)
 68 |     args = parser.parse_args()
 69 |     if 'LOCAL_RANK' not in os.environ:
 70 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 71 |     return args
 72 | 
 73 | 
 74 | def main():
 75 |     args = parse_args()
 76 | 
 77 |     assert args.out or args.eval or args.format_only or args.show \
 78 |         or args.show_dir, \
 79 |         ('Please specify at least one operation (save/eval/format/show the '
 80 |          'results / save the results) with the argument "--out", "--eval"'
 81 |          ', "--format-only", "--show" or "--show-dir"')
 82 | 
 83 |     if args.eval and args.format_only:
 84 |         raise ValueError('--eval and --format_only cannot be both specified')
 85 | 
 86 |     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
 87 |         raise ValueError('The output file must be a pkl file.')
 88 | 
 89 |     cfg = mmcv.Config.fromfile(args.config)
 90 |     if args.options is not None:
 91 |         cfg.merge_from_dict(args.options)
 92 |     # set cudnn_benchmark
 93 |     if cfg.get('cudnn_benchmark', False):
 94 |         torch.backends.cudnn.benchmark = True
 95 |     if args.aug_test:
 96 |         # hard code index
 97 |         cfg.data.test.pipeline[1].img_ratios = [
 98 |             0.5, 0.75, 1.0, 1.25, 1.5, 1.75
 99 |         ]
100 |         cfg.data.test.pipeline[1].flip = True
101 |     cfg.model.pretrained = None
102 |     cfg.data.test.test_mode = True
103 | 
104 |     # init distributed env first, since logger depends on the dist info.
105 |     if args.launcher == 'none':
106 |         distributed = False
107 |     else:
108 |         distributed = True
109 |         init_dist(args.launcher, **cfg.dist_params)
110 | 
111 |     # build the dataloader
112 |     # TODO: support multiple images per gpu (only minor changes are needed)
113 |     dataset = build_dataset(cfg.data.test)
114 |     data_loader = build_dataloader(
115 |         dataset,
116 |         samples_per_gpu=1,
117 |         workers_per_gpu=cfg.data.workers_per_gpu,
118 |         dist=distributed,
119 |         shuffle=False)
120 | 
121 |     # build the model and load checkpoint
122 |     cfg.model.train_cfg = None
123 |     model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg'))
124 |     
125 |     fp16_cfg = cfg.get('fp16', None)
126 |     if fp16_cfg is not None:
127 |         wrap_fp16_model(model)
128 |     checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
129 |     model.CLASSES = dataset.CLASSES # checkpoint['meta']['CLASSES']
130 |     model.PALETTE = dataset.PALETTE # checkpoint['meta']['PALETTE']
131 | 
132 |     efficient_test = False
133 |     if args.eval_options is not None:
134 |         efficient_test = args.eval_options.get('efficient_test', False)
135 | 
136 |     if not distributed:
137 |         model = MMDataParallel(model, device_ids=[0])
138 |         outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
139 |                                   efficient_test, args.opacity)
140 |     else:
141 |         model = MMDistributedDataParallel(
142 |             model.cuda(),
143 |             device_ids=[torch.cuda.current_device()],
144 |             broadcast_buffers=False)
145 |         outputs = multi_gpu_test(model, data_loader, args.tmpdir,
146 |                                  args.gpu_collect, efficient_test)
147 | 
148 |     rank, _ = get_dist_info()
149 |     if rank == 0:
150 |         if args.out:
151 |             print(f'\nwriting results to {args.out}')
152 |             mmcv.dump(outputs, args.out)
153 |         kwargs = {} if args.eval_options is None else args.eval_options
154 |         if args.format_only:
155 |             dataset.format_results(outputs, **kwargs)
156 |         if args.eval:
157 |             dataset.evaluate(outputs, args.eval, **kwargs)
158 | 
159 | 
160 | if __name__ == '__main__':
161 |     main()
162 | 


--------------------------------------------------------------------------------
/semantic_segmentation/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import time
  4 | import copy
  5 | import argparse
  6 | 
  7 | import mmcv
  8 | import torch
  9 | from mmcv.runner import init_dist
 10 | from mmcv.utils import Config, DictAction, get_git_hash
 11 | from mmseg import __version__
 12 | from mmseg.apis import set_random_seed, train_segmentor
 13 | from mmseg.datasets import build_dataset
 14 | from mmseg.models import build_segmentor
 15 | from mmseg.utils import collect_env, get_root_logger
 16 | 
 17 | # imoprt custom utils
 18 | import mmcv_custom
 19 | import mmseg_custom
 20 | # import models
 21 | import transxnet
 22 | 
 23 | 
 24 | def parse_args():
 25 |     parser = argparse.ArgumentParser(description='Train a segmentor')
 26 |     parser.add_argument('config', help='train config file path')
 27 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 28 |     parser.add_argument(
 29 |         '--load-from', help='the checkpoint file to load weights from')
 30 |     parser.add_argument(
 31 |         '--resume-from', help='the checkpoint file to resume from')
 32 |     parser.add_argument(
 33 |         '--no-validate',
 34 |         action='store_true',
 35 |         help='whether not to evaluate the checkpoint during training')
 36 |     group_gpus = parser.add_mutually_exclusive_group()
 37 |     group_gpus.add_argument(
 38 |         '--gpus',
 39 |         type=int,
 40 |         help='number of gpus to use '
 41 |         '(only applicable to non-distributed training)')
 42 |     group_gpus.add_argument(
 43 |         '--gpu-ids',
 44 |         type=int,
 45 |         nargs='+',
 46 |         help='ids of gpus to use '
 47 |         '(only applicable to non-distributed training)')
 48 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 49 |     parser.add_argument(
 50 |         '--deterministic',
 51 |         action='store_true',
 52 |         help='whether to set deterministic options for CUDNN backend.')
 53 |     parser.add_argument(
 54 |         '--options', nargs='+', action=DictAction, help='custom options')
 55 |     parser.add_argument(
 56 |         '--launcher',
 57 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 58 |         default='none',
 59 |         help='job launcher')
 60 |     parser.add_argument('--local_rank', type=int, default=0)
 61 |     args = parser.parse_args()
 62 |     if 'LOCAL_RANK' not in os.environ:
 63 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 64 | 
 65 |     return args
 66 | 
 67 | 
 68 | def main():
 69 |     args = parse_args()
 70 | 
 71 |     cfg = Config.fromfile(args.config)
 72 |     if args.options is not None:
 73 |         cfg.merge_from_dict(args.options)
 74 |     # set cudnn_benchmark
 75 |     if cfg.get('cudnn_benchmark', False):
 76 |         torch.backends.cudnn.benchmark = True
 77 | 
 78 |     # work_dir is determined in this priority: CLI > segment in file > filename
 79 |     if args.work_dir is not None:
 80 |         # update configs according to CLI args if args.work_dir is not None
 81 |         cfg.work_dir = args.work_dir
 82 |     elif cfg.get('work_dir', None) is None:
 83 |         # use config filename as default work_dir if cfg.work_dir is None
 84 |         cfg.work_dir = osp.join('./work_dirs',
 85 |                                 osp.splitext(osp.basename(args.config))[0])
 86 |     if args.load_from is not None:
 87 |         cfg.load_from = args.load_from
 88 |     if args.resume_from is not None:
 89 |         cfg.resume_from = args.resume_from
 90 |     if args.gpu_ids is not None:
 91 |         cfg.gpu_ids = args.gpu_ids
 92 |     else:
 93 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
 94 | 
 95 |     # init distributed env first, since logger depends on the dist info.
 96 |     if args.launcher == 'none':
 97 |         distributed = False
 98 |     else:
 99 |         distributed = True
100 |         init_dist(args.launcher, **cfg.dist_params)
101 | 
102 |     # create work_dir
103 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
104 |     # dump config
105 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
106 |     # init the logger before other steps
107 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
108 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
109 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
110 | 
111 |     # init the meta dict to record some important information such as
112 |     # environment info and seed, which will be logged
113 |     meta = dict()
114 |     # log env info
115 |     env_info_dict = collect_env()
116 |     env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
117 |     dash_line = '-' * 60 + '\n'
118 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
119 |                 dash_line)
120 |     meta['env_info'] = env_info
121 | 
122 |     # log some basic info
123 |     logger.info(f'Distributed training: {distributed}')
124 |     logger.info(f'Config:\n{cfg.pretty_text}')
125 | 
126 |     # set random seeds
127 |     if args.seed is not None:
128 |         logger.info(f'Set random seed to {args.seed}, deterministic: '
129 |                     f'{args.deterministic}')
130 |         set_random_seed(args.seed, deterministic=args.deterministic)
131 |     cfg.seed = args.seed
132 |     meta['seed'] = args.seed
133 |     meta['exp_name'] = osp.basename(args.config)
134 | 
135 |     model = build_segmentor(
136 |         cfg.model,
137 |         train_cfg=cfg.get('train_cfg'),
138 |         test_cfg=cfg.get('test_cfg'))
139 | 
140 |     logger.info(model)
141 | 
142 |     datasets = [build_dataset(cfg.data.train)]
143 |     if len(cfg.workflow) == 2:
144 |         val_dataset = copy.deepcopy(cfg.data.val)
145 |         val_dataset.pipeline = cfg.data.train.pipeline
146 |         datasets.append(build_dataset(val_dataset))
147 |     if cfg.checkpoint_config is not None:
148 |         # save mmseg version, config file content and class names in
149 |         # checkpoints as meta data
150 |         cfg.checkpoint_config.meta = dict(
151 |             mmseg_version=f'{__version__}+{get_git_hash()[:7]}',
152 |             config=cfg.pretty_text,
153 |             CLASSES=datasets[0].CLASSES,
154 |             PALETTE=datasets[0].PALETTE)
155 |     # add an attribute for visualization convenience
156 |     model.CLASSES = datasets[0].CLASSES
157 |     train_segmentor(
158 |         model,
159 |         datasets,
160 |         cfg,
161 |         distributed=distributed,
162 |         validate=(not args.no_validate),
163 |         timestamp=timestamp,
164 |         meta=meta)
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     main()


--------------------------------------------------------------------------------