├── .gitignore ├── LICENSE ├── README.md ├── benchmark_onnx.py ├── detection ├── Dockerfile_mmdetseg ├── README.md ├── backbone │ ├── __init__.py │ └── efficientMod.py ├── checkpoint.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── cityscapes_detection.py │ │ │ ├── cityscapes_instance.py │ │ │ ├── coco_detection.py │ │ │ ├── coco_instance.py │ │ │ ├── coco_instance_semantic.py │ │ │ ├── deepfashion.py │ │ │ ├── lvis_v0.5_instance.py │ │ │ ├── lvis_v1_instance.py │ │ │ ├── voc0712.py │ │ │ └── wider_face.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── cascade_mask_rcnn_pvtv2_b2_fpn.py │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── cascade_rcnn_r50_fpn.py │ │ │ ├── fast_rcnn_r50_fpn.py │ │ │ ├── faster_rcnn_r50_caffe_c4.py │ │ │ ├── faster_rcnn_r50_caffe_dc5.py │ │ │ ├── faster_rcnn_r50_fpn.py │ │ │ ├── mask_rcnn_r50_caffe_c4.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── retinanet_r50_fpn.py │ │ │ ├── rpn_r50_caffe_c4.py │ │ │ ├── rpn_r50_fpn.py │ │ │ └── ssd300.py │ │ └── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ ├── efficientMod_s.py │ └── efficientMod_s_Conv.py ├── dist_test.sh ├── dist_train.sh ├── get_flops.py ├── mmcv_custom │ ├── __init__.py │ ├── customized_text.py │ ├── layer_decay_optimizer_constructor.py │ └── runner │ │ ├── checkpoint.py │ │ ├── epoch_based_runner.py │ │ └── optimizer.py ├── mmdet_custom │ └── apis │ │ └── train.py ├── test.py ├── train.py └── video_demo.py ├── models ├── EfficientMod.py └── __init__.py ├── segmentation ├── README.md ├── align_resize.py ├── backbone │ ├── EfficientMod.py │ └── __init__.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ └── ade20k.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ └── fpn_r50.py │ │ └── schedules │ │ │ ├── schedule_160k.py │ │ │ ├── schedule_20k.py │ │ │ ├── schedule_40k.py │ │ │ └── schedule_80k.py │ └── sem_fpn │ │ ├── efficientMod_s.py │ │ └── efficientMod_s_Conv.py ├── dist_test.sh ├── dist_train.sh ├── get_flops.py ├── image_demo.py ├── images │ ├── 1.jpg │ ├── 2.jpg │ ├── 3.jpg │ ├── 4.jpg │ ├── 5.jpg │ ├── 6.jpg │ └── 7.jpg ├── test.py ├── tools │ ├── analyze_logs.py │ ├── benchmark.py │ ├── browse_dataset.py │ ├── convert_datasets │ │ ├── chase_db1.py │ │ ├── cityscapes.py │ │ ├── coco_stuff10k.py │ │ ├── coco_stuff164k.py │ │ ├── drive.py │ │ ├── hrf.py │ │ ├── pascal_context.py │ │ ├── stare.py │ │ └── voc_aug.py │ ├── deploy_test.py │ ├── dist_test.sh │ ├── dist_train.sh │ ├── model_converters │ │ ├── mit2mmseg.py │ │ ├── swin2mmseg.py │ │ └── vit2mmseg.py │ ├── onnx2tensorrt.py │ ├── print_config.py │ ├── publish_model.py │ ├── pytorch2onnx.py │ ├── pytorch2torchscript.py │ ├── slurm_test.sh │ ├── slurm_train.sh │ ├── test.py │ ├── torchserve │ │ ├── mmseg2torchserve.py │ │ ├── mmseg_handler.py │ │ └── test_torchserve.py │ └── train.py └── train.py ├── train.py ├── utils.py └── validate.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Efficient Modulation for Vision Networks] (ICLR 2024) 2 | 3 | ## News & TODO & Updates: 4 | - [ ] will improve the performance with better training recipe. 5 | - [ ] Simplify model by moving unnecessary settings and renaming the classes to ease understanding. 6 | - [ ] Upload benchmark script to ease latency benchmark. 7 | 8 | ## Image Classification 9 | ### 1. Requirements 10 | 11 | torch>=1.7.0; torchvision>=0.8.0; pyyaml; timm==0.6.13; 12 | 13 | data prepare: ImageNet with the following folder structure, you can extract ImageNet by this [script](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4). 14 | 15 | ``` 16 | │imagenet/ 17 | ├──train/ 18 | │ ├── n01440764 19 | │ │ ├── n01440764_10026.JPEG 20 | │ │ ├── n01440764_10027.JPEG 21 | │ │ ├── ...... 22 | │ ├── ...... 23 | ├──val/ 24 | │ ├── n01440764 25 | │ │ ├── ILSVRC2012_val_00000293.JPEG 26 | │ │ ├── ILSVRC2012_val_00002138.JPEG 27 | │ │ ├── ...... 28 | │ ├── ...... 29 | ``` 30 | 31 | 32 | 33 | ### 2. Pre-trained Context Cluster Models 34 | We upload the **checkpoints** with distillation and **logs** to google drive. Feel free to download. 35 | 36 | | Model | #params | Image resolution | Top1 Acc| Download | 37 | | :--- | :---: | :---: | :---: |:---: | 38 | | EfficientMod-xxs | 4.7M | 224 | 77.1 | [[checkpoint & logs]](https://drive.google.com/drive/folders/1c0dlnN7w1bHlAsKcJFhGVA2mIhoA6ZHz?usp=sharing) | 39 | | EfficientMod-xs | 6.6M | 224 | 79.4 | [[checkpoint & logs]](https://drive.google.com/drive/folders/1PPQFO891WfJRUiH58NlWOgHDEzDnwC0_?usp=share_link) | 40 | | EfficientMod-s | 12.9M | 224 | 81.9 | [[checkpoint & logs]](https://drive.google.com/drive/folders/1rJs8LcWmdTFmj-IJ0cmlVp_MxGfZFsFk?usp=share_link) | 41 | | EfficientMod-s-Conv (No Distill.) | 12.9M | 224 | 80.5 | [[checkpoint & logs]](https://drive.google.com/drive/folders/1EY637XRiDPL4AwrVGESJWsK-ZP2GhnaI?usp=share_link) | 42 | 43 | ### 3. Validation 44 | 45 | To evaluate our EfficientMod models, run: 46 | 47 | ```bash 48 | python3 validate.py /path/to/imagenet --model {model} -b 256 --checkpoint {/path/to/checkpoint} 49 | ``` 50 | 51 | 52 | 53 | ### 4. Train 54 | We show how to train EfficientMod on 8 GPUs. 55 | 56 | ```bash 57 | python3 -m torch.distributed.launch --nproc_per_node=8 train.py --data {path-to-imagenet} --model {model} -b 256 --lr 4e-3 --amp --model-ema --distillation-type soft --distillation-tau 1 --auto-resume --exp_tag {experiment_tag} 58 | 59 | ``` 60 | 61 | ### 5. Benchmark ONNX speed on CPU and GPU :v::v::v: 62 | We also provide a script to help benchmark model latency on different platforms, which is important but always not available. 63 | 64 | In this script, we can benchmark **different models**, **different input resolution**, **different hardwares** (ONNX on CPU, ONNX on GPU, Pytorch on GPU) using [ONNX Runtime](https://github.com/microsoft/onnxruntime). 65 | 66 | Meanwhile, we can **save a detailed log file** ({args.results_file}, e.g., debug.csv) that can log almost all detailed information (including model related logs, data related, benchmark results, system / hardware related logs) for each benchmark. 67 | 68 | #### 1. Requirements 69 | 70 | onnxruntime-gpu==1.13.1; onnx==1.13.0; tensorrt==8.5.2.2; torch>=1.7.0; torchvision>=0.8.0; timm==0.6.13; fvcore; thop; py-cpuinfo; 71 | 72 | #### 2. Run benchmark script 73 | ```bash 74 | # Please feel free to add / modify configs if necessary 75 | # Benchmark results will be printed and saved to {args.results_file}, appending to the last row. 76 | CUDA_VISIBLE_DEVICES=0 python3 benchmark_onnx.py --model {model-name} --input-size 3 244 244 --benchmark_cpu 77 | ``` 78 | 79 | 80 | **See folder [detection](detection/) for Detection and instance segmentation tasks on COCO.**. 81 | 82 | **See folder [segmentation](segmentation/) for Semantic Segmentation task on ADE20K.** 83 | 84 | ## BibTeX 85 | 86 | @inproceedings{ 87 | ma2024efficient, 88 | title={Efficient Modulation for Vision Networks}, 89 | author={Xu Ma and Xiyang Dai and Jianwei Yang and Bin Xiao and Yinpeng Chen and Yun Fu and Lu Yuan}, 90 | booktitle={The Twelfth International Conference on Learning Representations}, 91 | year={2024}, 92 | url={https://openreview.net/forum?id=ip5LHJs6QX} 93 | } 94 | -------------------------------------------------------------------------------- /detection/Dockerfile_mmdetseg: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.6.0" 2 | ARG CUDA="10.1" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 8 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 9 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 10 | 11 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* 14 | 15 | RUN conda clean --all 16 | 17 | # Install MMCV 18 | RUN pip install mmcv-full==1.3.14 -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html 19 | 20 | 21 | # Install 22 | RUN pip install timm==0.4.5 23 | 24 | # Install MMDetection 25 | RUN conda clean --all 26 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 27 | WORKDIR /mmdetection 28 | ENV FORCE_CUDA="1" 29 | RUN pip install -r requirements/build.txt 30 | RUN pip install --no-cache-dir -e . 31 | 32 | # Install MMSegmentation 33 | RUN pip install importlib-metadata==4.2 34 | RUN git clone https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation 35 | WORKDIR /mmsegmentation 36 | ENV FORCE_CUDA="1" 37 | RUN pip install -r requirements.txt 38 | RUN pip install --no-cache-dir -e . 39 | 40 | # Install apex 41 | RUN git clone https://github.com/NVIDIA/apex /apex 42 | WORKDIR /apex 43 | RUN python setup.py install --cpp_ext --cuda_ext 44 | 45 | WORKDIR /workspace 46 | -------------------------------------------------------------------------------- /detection/README.md: -------------------------------------------------------------------------------- 1 | # Applying EfficientMod to Object Detection 2 | 3 | Our detection implementation is based on [MMDetection v2.19.0](https://github.com/open-mmlab/mmdetection/tree/v2.19.0) and [PVT detection](https://github.com/whai362/PVT/tree/v2/detection). Thank the authors for their wonderful works. 4 | 5 | 6 | 7 | ## Usage 8 | 9 | Install [MMDetection v2.19.0](https://github.com/open-mmlab/mmdetection/tree/v2.19.0) from souce cocde, 10 | 11 | or 12 | 13 | ``` 14 | pip install mmdet==2.19.0 --user 15 | ``` 16 | 17 | Apex (optional): 18 | ``` 19 | git clone https://github.com/NVIDIA/apex 20 | cd apex 21 | python setup.py install --cpp_ext --cuda_ext --user 22 | ``` 23 | 24 | 25 | ## Data preparation 26 | 27 | Prepare COCO according to the guidelines in [MMDetection v2.19.0](https://github.com/open-mmlab/mmdetection/tree/v2.19.0). 28 | 29 | 30 | ## Results and models on COCO 31 | 32 | 33 | | Method | Backbone | Pretrain | Lr schd | box AP | mask AP | Download | 34 | |------------|----------|-------------|:-------:|:---:|:------:|-----| 35 | | Mask R-CNN | EfficientMod-s-Conv | ImageNet-1K | 1x | 42.1 | 38.5 | [checkpoint & log](https://drive.google.com/drive/folders/1EYplGBr0osoITnYlA_ImbSGgfWBFbBuf?usp=share_link) | 36 | | Mask R-CNN | EfficientMod-s | ImageNet-1K | 1x | 43.6 | 40.3 | [checkpoint & log](https://drive.google.com/drive/folders/1hiZst1cbvYiIFJ6dnPb4KUOfxq3mjzha?usp=share_link) | 37 | 38 | 39 | 40 | ## Evaluation 41 | 42 | To evaluate EfficientMod + Mask R-CNN on COCO val2017, run: 43 | ``` 44 | dist_test.sh configs/{configure-file} /path/to/checkpoint_file 8 --out results.pkl --eval bbox segm 45 | ``` 46 | 47 | 48 | ## Training 49 | To train EfficientMod + Mask R-CNN on COCO train2017: 50 | ``` 51 | dist_train.sh configs/{configure-file} 8 52 | ``` 53 | -------------------------------------------------------------------------------- /detection/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .efficientMod import * 2 | __all__ = [ 3 | "efficientMod_s", "efficientMod_s_Conv" 4 | ] 5 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict( 10 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=1, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type='RepeatDataset', 37 | times=8, 38 | dataset=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 41 | 'annotations/instancesonly_filtered_gtFine_train.json', 42 | img_prefix=data_root + 'leftImg8bit/train/', 43 | pipeline=train_pipeline)), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 47 | 'annotations/instancesonly_filtered_gtFine_val.json', 48 | img_prefix=data_root + 'leftImg8bit/val/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 53 | 'annotations/instancesonly_filtered_gtFine_test.json', 54 | img_prefix=data_root + 'leftImg8bit/test/', 55 | pipeline=test_pipeline)) 56 | evaluation = dict(interval=1, metric='bbox') 57 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/cityscapes_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict( 10 | type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=(2048, 1024), 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | samples_per_gpu=1, 34 | workers_per_gpu=2, 35 | train=dict( 36 | type='RepeatDataset', 37 | times=8, 38 | dataset=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 41 | 'annotations/instancesonly_filtered_gtFine_train.json', 42 | img_prefix=data_root + 'leftImg8bit/train/', 43 | pipeline=train_pipeline)), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 47 | 'annotations/instancesonly_filtered_gtFine_val.json', 48 | img_prefix=data_root + 'leftImg8bit/val/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 53 | 'annotations/instancesonly_filtered_gtFine_test.json', 54 | img_prefix=data_root + 'leftImg8bit/test/', 55 | pipeline=test_pipeline)) 56 | evaluation = dict(metric=['bbox', 'segm']) 57 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(interval=1, metric='bbox') 50 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = '/dev/shm/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1333, 800), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=4, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/instances_train2017.json', 37 | img_prefix=data_root + 'train2017/', 38 | pipeline=train_pipeline), 39 | val=dict( 40 | type=dataset_type, 41 | ann_file=data_root + 'annotations/instances_val2017.json', 42 | img_prefix=data_root + 'val2017/', 43 | pipeline=test_pipeline), 44 | test=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline)) 49 | evaluation = dict(metric=['bbox', 'segm']) 50 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/coco_instance_semantic.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CocoDataset' 3 | data_root = 'data/coco/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict( 9 | type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True), 10 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='SegRescale', scale_factor=1 / 8), 15 | dict(type='DefaultFormatBundle'), 16 | dict( 17 | type='Collect', 18 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']), 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=(1333, 800), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip', flip_ratio=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img']), 32 | dict(type='Collect', keys=['img']), 33 | ]) 34 | ] 35 | data = dict( 36 | samples_per_gpu=2, 37 | workers_per_gpu=2, 38 | train=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_train2017.json', 41 | img_prefix=data_root + 'train2017/', 42 | seg_prefix=data_root + 'stuffthingmaps/train2017/', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/instances_val2017.json', 47 | img_prefix=data_root + 'val2017/', 48 | pipeline=test_pipeline), 49 | test=dict( 50 | type=dataset_type, 51 | ann_file=data_root + 'annotations/instances_val2017.json', 52 | img_prefix=data_root + 'val2017/', 53 | pipeline=test_pipeline)) 54 | evaluation = dict(metric=['bbox', 'segm']) 55 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'DeepFashionDataset' 3 | data_root = 'data/DeepFashion/In-shop/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=(750, 1101), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(750, 1101), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | imgs_per_gpu=2, 33 | workers_per_gpu=1, 34 | train=dict( 35 | type=dataset_type, 36 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 37 | img_prefix=data_root + 'Img/', 38 | pipeline=train_pipeline, 39 | data_root=data_root), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json', 43 | img_prefix=data_root + 'Img/', 44 | pipeline=test_pipeline, 45 | data_root=data_root), 46 | test=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 49 | 'annotations/DeepFashion_segmentation_gallery.json', 50 | img_prefix=data_root + 'Img/', 51 | pipeline=test_pipeline, 52 | data_root=data_root)) 53 | evaluation = dict(interval=5, metric=['bbox', 'segm']) 54 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v0.5_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV05Dataset' 4 | data_root = 'data/lvis_v0.5/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v0.5_train.json', 15 | img_prefix=data_root + 'train2017/')), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 19 | img_prefix=data_root + 'val2017/'), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v0.5_val.json', 23 | img_prefix=data_root + 'val2017/')) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/lvis_v1_instance.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | _base_ = 'coco_instance.py' 3 | dataset_type = 'LVISV1Dataset' 4 | data_root = 'data/lvis_v1/' 5 | data = dict( 6 | samples_per_gpu=2, 7 | workers_per_gpu=2, 8 | train=dict( 9 | _delete_=True, 10 | type='ClassBalancedDataset', 11 | oversample_thr=1e-3, 12 | dataset=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/lvis_v1_train.json', 15 | img_prefix=data_root)), 16 | val=dict( 17 | type=dataset_type, 18 | ann_file=data_root + 'annotations/lvis_v1_val.json', 19 | img_prefix=data_root), 20 | test=dict( 21 | type=dataset_type, 22 | ann_file=data_root + 'annotations/lvis_v1_val.json', 23 | img_prefix=data_root)) 24 | evaluation = dict(metric=['bbox', 'segm']) 25 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/voc0712.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'VOCDataset' 3 | data_root = 'data/VOCdevkit/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True), 9 | dict(type='Resize', img_scale=(1000, 600), keep_ratio=True), 10 | dict(type='RandomFlip', flip_ratio=0.5), 11 | dict(type='Normalize', **img_norm_cfg), 12 | dict(type='Pad', size_divisor=32), 13 | dict(type='DefaultFormatBundle'), 14 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 15 | ] 16 | test_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict( 19 | type='MultiScaleFlipAug', 20 | img_scale=(1000, 600), 21 | flip=False, 22 | transforms=[ 23 | dict(type='Resize', keep_ratio=True), 24 | dict(type='RandomFlip'), 25 | dict(type='Normalize', **img_norm_cfg), 26 | dict(type='Pad', size_divisor=32), 27 | dict(type='ImageToTensor', keys=['img']), 28 | dict(type='Collect', keys=['img']), 29 | ]) 30 | ] 31 | data = dict( 32 | samples_per_gpu=2, 33 | workers_per_gpu=2, 34 | train=dict( 35 | type='RepeatDataset', 36 | times=3, 37 | dataset=dict( 38 | type=dataset_type, 39 | ann_file=[ 40 | data_root + 'VOC2007/ImageSets/Main/trainval.txt', 41 | data_root + 'VOC2012/ImageSets/Main/trainval.txt' 42 | ], 43 | img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'], 44 | pipeline=train_pipeline)), 45 | val=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 48 | img_prefix=data_root + 'VOC2007/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt', 53 | img_prefix=data_root + 'VOC2007/', 54 | pipeline=test_pipeline)) 55 | evaluation = dict(interval=1, metric='mAP') 56 | -------------------------------------------------------------------------------- /detection/configs/_base_/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'WIDERFaceDataset' 3 | data_root = 'data/WIDERFace/' 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile', to_float32=True), 7 | dict(type='LoadAnnotations', with_bbox=True), 8 | dict( 9 | type='PhotoMetricDistortion', 10 | brightness_delta=32, 11 | contrast_range=(0.5, 1.5), 12 | saturation_range=(0.5, 1.5), 13 | hue_delta=18), 14 | dict( 15 | type='Expand', 16 | mean=img_norm_cfg['mean'], 17 | to_rgb=img_norm_cfg['to_rgb'], 18 | ratio_range=(1, 4)), 19 | dict( 20 | type='MinIoURandomCrop', 21 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 22 | min_crop_size=0.3), 23 | dict(type='Resize', img_scale=(300, 300), keep_ratio=False), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='RandomFlip', flip_ratio=0.5), 26 | dict(type='DefaultFormatBundle'), 27 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 28 | ] 29 | test_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict( 32 | type='MultiScaleFlipAug', 33 | img_scale=(300, 300), 34 | flip=False, 35 | transforms=[ 36 | dict(type='Resize', keep_ratio=False), 37 | dict(type='Normalize', **img_norm_cfg), 38 | dict(type='ImageToTensor', keys=['img']), 39 | dict(type='Collect', keys=['img']), 40 | ]) 41 | ] 42 | data = dict( 43 | samples_per_gpu=60, 44 | workers_per_gpu=2, 45 | train=dict( 46 | type='RepeatDataset', 47 | times=2, 48 | dataset=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 'train.txt', 51 | img_prefix=data_root + 'WIDER_train/', 52 | min_size=17, 53 | pipeline=train_pipeline)), 54 | val=dict( 55 | type=dataset_type, 56 | ann_file=data_root + 'val.txt', 57 | img_prefix=data_root + 'WIDER_val/', 58 | pipeline=test_pipeline), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'val.txt', 62 | img_prefix=data_root + 'WIDER_val/', 63 | pipeline=test_pipeline)) 64 | -------------------------------------------------------------------------------- /detection/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable 3 | log_config = dict( 4 | interval=50, 5 | hooks=[ 6 | dict(type='TextLoggerHook'), 7 | # dict(type='TensorboardLoggerHook') 8 | ]) 9 | # yapf:enable 10 | custom_hooks = [dict(type='NumClassCheckHook')] 11 | 12 | dist_params = dict(backend='nccl') 13 | log_level = 'INFO' 14 | load_from = None 15 | resume_from = None 16 | workflow = [('train', 1)] 17 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/cascade_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='CascadeRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 35 | roi_head=dict( 36 | type='CascadeRoIHead', 37 | num_stages=3, 38 | stage_loss_weights=[1, 0.5, 0.25], 39 | bbox_roi_extractor=dict( 40 | type='SingleRoIExtractor', 41 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 42 | out_channels=256, 43 | featmap_strides=[4, 8, 16, 32]), 44 | bbox_head=[ 45 | dict( 46 | type='Shared2FCBBoxHead', 47 | in_channels=256, 48 | fc_out_channels=1024, 49 | roi_feat_size=7, 50 | num_classes=80, 51 | bbox_coder=dict( 52 | type='DeltaXYWHBBoxCoder', 53 | target_means=[0., 0., 0., 0.], 54 | target_stds=[0.1, 0.1, 0.2, 0.2]), 55 | reg_class_agnostic=True, 56 | loss_cls=dict( 57 | type='CrossEntropyLoss', 58 | use_sigmoid=False, 59 | loss_weight=1.0), 60 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 61 | loss_weight=1.0)), 62 | dict( 63 | type='Shared2FCBBoxHead', 64 | in_channels=256, 65 | fc_out_channels=1024, 66 | roi_feat_size=7, 67 | num_classes=80, 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[0., 0., 0., 0.], 71 | target_stds=[0.05, 0.05, 0.1, 0.1]), 72 | reg_class_agnostic=True, 73 | loss_cls=dict( 74 | type='CrossEntropyLoss', 75 | use_sigmoid=False, 76 | loss_weight=1.0), 77 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 78 | loss_weight=1.0)), 79 | dict( 80 | type='Shared2FCBBoxHead', 81 | in_channels=256, 82 | fc_out_channels=1024, 83 | roi_feat_size=7, 84 | num_classes=80, 85 | bbox_coder=dict( 86 | type='DeltaXYWHBBoxCoder', 87 | target_means=[0., 0., 0., 0.], 88 | target_stds=[0.033, 0.033, 0.067, 0.067]), 89 | reg_class_agnostic=True, 90 | loss_cls=dict( 91 | type='CrossEntropyLoss', 92 | use_sigmoid=False, 93 | loss_weight=1.0), 94 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 95 | ]), 96 | # model training and testing settings 97 | train_cfg=dict( 98 | rpn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.7, 102 | neg_iou_thr=0.3, 103 | min_pos_iou=0.3, 104 | match_low_quality=True, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=256, 109 | pos_fraction=0.5, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=False), 112 | allowed_border=0, 113 | pos_weight=-1, 114 | debug=False), 115 | rpn_proposal=dict( 116 | nms_pre=2000, 117 | max_per_img=2000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=[ 121 | dict( 122 | assigner=dict( 123 | type='MaxIoUAssigner', 124 | pos_iou_thr=0.5, 125 | neg_iou_thr=0.5, 126 | min_pos_iou=0.5, 127 | match_low_quality=False, 128 | ignore_iof_thr=-1), 129 | sampler=dict( 130 | type='RandomSampler', 131 | num=512, 132 | pos_fraction=0.25, 133 | neg_pos_ub=-1, 134 | add_gt_as_proposals=True), 135 | pos_weight=-1, 136 | debug=False), 137 | dict( 138 | assigner=dict( 139 | type='MaxIoUAssigner', 140 | pos_iou_thr=0.6, 141 | neg_iou_thr=0.6, 142 | min_pos_iou=0.6, 143 | match_low_quality=False, 144 | ignore_iof_thr=-1), 145 | sampler=dict( 146 | type='RandomSampler', 147 | num=512, 148 | pos_fraction=0.25, 149 | neg_pos_ub=-1, 150 | add_gt_as_proposals=True), 151 | pos_weight=-1, 152 | debug=False), 153 | dict( 154 | assigner=dict( 155 | type='MaxIoUAssigner', 156 | pos_iou_thr=0.7, 157 | neg_iou_thr=0.7, 158 | min_pos_iou=0.7, 159 | match_low_quality=False, 160 | ignore_iof_thr=-1), 161 | sampler=dict( 162 | type='RandomSampler', 163 | num=512, 164 | pos_fraction=0.25, 165 | neg_pos_ub=-1, 166 | add_gt_as_proposals=True), 167 | pos_weight=-1, 168 | debug=False) 169 | ]), 170 | test_cfg=dict( 171 | rpn=dict( 172 | nms_pre=1000, 173 | max_per_img=1000, 174 | nms=dict(type='nms', iou_threshold=0.7), 175 | min_bbox_size=0), 176 | rcnn=dict( 177 | score_thr=0.05, 178 | nms=dict(type='nms', iou_threshold=0.5), 179 | max_per_img=100))) 180 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/fast_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | roi_head=dict( 20 | type='StandardRoIHead', 21 | bbox_roi_extractor=dict( 22 | type='SingleRoIExtractor', 23 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 24 | out_channels=256, 25 | featmap_strides=[4, 8, 16, 32]), 26 | bbox_head=dict( 27 | type='Shared2FCBBoxHead', 28 | in_channels=256, 29 | fc_out_channels=1024, 30 | roi_feat_size=7, 31 | num_classes=80, 32 | bbox_coder=dict( 33 | type='DeltaXYWHBBoxCoder', 34 | target_means=[0., 0., 0., 0.], 35 | target_stds=[0.1, 0.1, 0.2, 0.2]), 36 | reg_class_agnostic=False, 37 | loss_cls=dict( 38 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 39 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 40 | # model training and testing settings 41 | train_cfg=dict( 42 | rcnn=dict( 43 | assigner=dict( 44 | type='MaxIoUAssigner', 45 | pos_iou_thr=0.5, 46 | neg_iou_thr=0.5, 47 | min_pos_iou=0.5, 48 | match_low_quality=False, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | type='RandomSampler', 52 | num=512, 53 | pos_fraction=0.25, 54 | neg_pos_ub=-1, 55 | add_gt_as_proposals=True), 56 | pos_weight=-1, 57 | debug=False)), 58 | test_cfg=dict( 59 | rcnn=dict( 60 | score_thr=0.05, 61 | nms=dict(type='nms', iou_threshold=0.5), 62 | max_per_img=100))) 63 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 63 | # model training and testing settings 64 | train_cfg=dict( 65 | rpn=dict( 66 | assigner=dict( 67 | type='MaxIoUAssigner', 68 | pos_iou_thr=0.7, 69 | neg_iou_thr=0.3, 70 | min_pos_iou=0.3, 71 | match_low_quality=True, 72 | ignore_iof_thr=-1), 73 | sampler=dict( 74 | type='RandomSampler', 75 | num=256, 76 | pos_fraction=0.5, 77 | neg_pos_ub=-1, 78 | add_gt_as_proposals=False), 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | rpn_proposal=dict( 83 | nms_pre=12000, 84 | max_per_img=2000, 85 | nms=dict(type='nms', iou_threshold=0.7), 86 | min_bbox_size=0), 87 | rcnn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.5, 91 | neg_iou_thr=0.5, 92 | min_pos_iou=0.5, 93 | match_low_quality=False, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=512, 98 | pos_fraction=0.25, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=True), 101 | pos_weight=-1, 102 | debug=False)), 103 | test_cfg=dict( 104 | rpn=dict( 105 | nms_pre=6000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | score_thr=0.05, 111 | nms=dict(type='nms', iou_threshold=0.5), 112 | max_per_img=100))) 113 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=4, 10 | strides=(1, 2, 2, 1), 11 | dilations=(1, 1, 1, 2), 12 | out_indices=(3, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=2048, 20 | feat_channels=2048, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=2048, 39 | featmap_strides=[16]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=2048, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | rpn=dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.7, 60 | neg_iou_thr=0.3, 61 | min_pos_iou=0.3, 62 | match_low_quality=True, 63 | ignore_iof_thr=-1), 64 | sampler=dict( 65 | type='RandomSampler', 66 | num=256, 67 | pos_fraction=0.5, 68 | neg_pos_ub=-1, 69 | add_gt_as_proposals=False), 70 | allowed_border=0, 71 | pos_weight=-1, 72 | debug=False), 73 | rpn_proposal=dict( 74 | nms_pre=12000, 75 | max_per_img=2000, 76 | nms=dict(type='nms', iou_threshold=0.7), 77 | min_bbox_size=0), 78 | rcnn=dict( 79 | assigner=dict( 80 | type='MaxIoUAssigner', 81 | pos_iou_thr=0.5, 82 | neg_iou_thr=0.5, 83 | min_pos_iou=0.5, 84 | match_low_quality=False, 85 | ignore_iof_thr=-1), 86 | sampler=dict( 87 | type='RandomSampler', 88 | num=512, 89 | pos_fraction=0.25, 90 | neg_pos_ub=-1, 91 | add_gt_as_proposals=True), 92 | pos_weight=-1, 93 | debug=False)), 94 | test_cfg=dict( 95 | rpn=dict( 96 | nms=dict(type='nms', iou_threshold=0.7), 97 | nms_pre=6000, 98 | max_per_img=1000, 99 | min_bbox_size=0), 100 | rcnn=dict( 101 | score_thr=0.05, 102 | nms=dict(type='nms', iou_threshold=0.5), 103 | max_per_img=100))) 104 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_pre=2000, 77 | max_per_img=1000, 78 | nms=dict(type='nms', iou_threshold=0.7), 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_pre=1000, 99 | max_per_img=1000, 100 | nms=dict(type='nms', iou_threshold=0.7), 101 | min_bbox_size=0), 102 | rcnn=dict( 103 | score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100) 106 | # soft-nms is also supported for rcnn testing 107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 108 | )) 109 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=False) 3 | model = dict( 4 | type='MaskRCNN', 5 | pretrained='open-mmlab://detectron2/resnet50_caffe', 6 | backbone=dict( 7 | type='ResNet', 8 | depth=50, 9 | num_stages=3, 10 | strides=(1, 2, 2), 11 | dilations=(1, 1, 1), 12 | out_indices=(2, ), 13 | frozen_stages=1, 14 | norm_cfg=norm_cfg, 15 | norm_eval=True, 16 | style='caffe'), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | shared_head=dict( 36 | type='ResLayer', 37 | depth=50, 38 | stage=3, 39 | stride=2, 40 | dilation=1, 41 | style='caffe', 42 | norm_cfg=norm_cfg, 43 | norm_eval=True), 44 | bbox_roi_extractor=dict( 45 | type='SingleRoIExtractor', 46 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 47 | out_channels=1024, 48 | featmap_strides=[16]), 49 | bbox_head=dict( 50 | type='BBoxHead', 51 | with_avg_pool=True, 52 | roi_feat_size=7, 53 | in_channels=2048, 54 | num_classes=80, 55 | bbox_coder=dict( 56 | type='DeltaXYWHBBoxCoder', 57 | target_means=[0., 0., 0., 0.], 58 | target_stds=[0.1, 0.1, 0.2, 0.2]), 59 | reg_class_agnostic=False, 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 62 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 63 | mask_roi_extractor=None, 64 | mask_head=dict( 65 | type='FCNMaskHead', 66 | num_convs=0, 67 | in_channels=2048, 68 | conv_out_channels=256, 69 | num_classes=80, 70 | loss_mask=dict( 71 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 72 | # model training and testing settings 73 | train_cfg=dict( 74 | rpn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.7, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | match_low_quality=True, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=256, 85 | pos_fraction=0.5, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=False), 88 | allowed_border=0, 89 | pos_weight=-1, 90 | debug=False), 91 | rpn_proposal=dict( 92 | nms_pre=12000, 93 | max_per_img=2000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | assigner=dict( 98 | type='MaxIoUAssigner', 99 | pos_iou_thr=0.5, 100 | neg_iou_thr=0.5, 101 | min_pos_iou=0.5, 102 | match_low_quality=False, 103 | ignore_iof_thr=-1), 104 | sampler=dict( 105 | type='RandomSampler', 106 | num=512, 107 | pos_fraction=0.25, 108 | neg_pos_ub=-1, 109 | add_gt_as_proposals=True), 110 | mask_size=14, 111 | pos_weight=-1, 112 | debug=False)), 113 | test_cfg=dict( 114 | rpn=dict( 115 | nms_pre=6000, 116 | nms=dict(type='nms', iou_threshold=0.7), 117 | max_per_img=1000, 118 | min_bbox_size=0), 119 | rcnn=dict( 120 | score_thr=0.05, 121 | nms=dict(type='nms', iou_threshold=0.5), 122 | max_per_img=100, 123 | mask_thr_binary=0.5))) 124 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_pre=2000, 90 | max_per_img=1000, 91 | nms=dict(type='nms', iou_threshold=0.7), 92 | min_bbox_size=0), 93 | rcnn=dict( 94 | assigner=dict( 95 | type='MaxIoUAssigner', 96 | pos_iou_thr=0.5, 97 | neg_iou_thr=0.5, 98 | min_pos_iou=0.5, 99 | match_low_quality=True, 100 | ignore_iof_thr=-1), 101 | sampler=dict( 102 | type='RandomSampler', 103 | num=512, 104 | pos_fraction=0.25, 105 | neg_pos_ub=-1, 106 | add_gt_as_proposals=True), 107 | mask_size=28, 108 | pos_weight=-1, 109 | debug=False)), 110 | test_cfg=dict( 111 | rpn=dict( 112 | nms_pre=1000, 113 | max_per_img=1000, 114 | nms=dict(type='nms', iou_threshold=0.7), 115 | min_bbox_size=0), 116 | rcnn=dict( 117 | score_thr=0.05, 118 | nms=dict(type='nms', iou_threshold=0.5), 119 | max_per_img=100, 120 | mask_thr_binary=0.5))) 121 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs='on_input', 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=80, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | anchor_generator=dict( 28 | type='AnchorGenerator', 29 | octave_base_scale=4, 30 | scales_per_octave=3, 31 | ratios=[0.5, 1.0, 2.0], 32 | strides=[8, 16, 32, 64, 128]), 33 | bbox_coder=dict( 34 | type='DeltaXYWHBBoxCoder', 35 | target_means=[.0, .0, .0, .0], 36 | target_stds=[1.0, 1.0, 1.0, 1.0]), 37 | loss_cls=dict( 38 | type='FocalLoss', 39 | use_sigmoid=True, 40 | gamma=2.0, 41 | alpha=0.25, 42 | loss_weight=1.0), 43 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 44 | # model training and testing settings 45 | train_cfg=dict( 46 | assigner=dict( 47 | type='MaxIoUAssigner', 48 | pos_iou_thr=0.5, 49 | neg_iou_thr=0.4, 50 | min_pos_iou=0, 51 | ignore_iof_thr=-1), 52 | allowed_border=-1, 53 | pos_weight=-1, 54 | debug=False), 55 | test_cfg=dict( 56 | nms_pre=1000, 57 | min_bbox_size=0, 58 | score_thr=0.05, 59 | nms=dict(type='nms', iou_threshold=0.5), 60 | max_per_img=100)) 61 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_caffe_c4.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://detectron2/resnet50_caffe', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=3, 9 | strides=(1, 2, 2), 10 | dilations=(1, 1, 1), 11 | out_indices=(2, ), 12 | frozen_stages=1, 13 | norm_cfg=dict(type='BN', requires_grad=False), 14 | norm_eval=True, 15 | style='caffe'), 16 | neck=None, 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=1024, 20 | feat_channels=1024, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[2, 4, 8, 16, 32], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[16]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict( 35 | rpn=dict( 36 | assigner=dict( 37 | type='MaxIoUAssigner', 38 | pos_iou_thr=0.7, 39 | neg_iou_thr=0.3, 40 | min_pos_iou=0.3, 41 | ignore_iof_thr=-1), 42 | sampler=dict( 43 | type='RandomSampler', 44 | num=256, 45 | pos_fraction=0.5, 46 | neg_pos_ub=-1, 47 | add_gt_as_proposals=False), 48 | allowed_border=0, 49 | pos_weight=-1, 50 | debug=False)), 51 | test_cfg=dict( 52 | rpn=dict( 53 | nms_pre=12000, 54 | max_per_img=2000, 55 | nms=dict(type='nms', iou_threshold=0.7), 56 | min_bbox_size=0))) 57 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/rpn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict( 37 | rpn=dict( 38 | assigner=dict( 39 | type='MaxIoUAssigner', 40 | pos_iou_thr=0.7, 41 | neg_iou_thr=0.3, 42 | min_pos_iou=0.3, 43 | ignore_iof_thr=-1), 44 | sampler=dict( 45 | type='RandomSampler', 46 | num=256, 47 | pos_fraction=0.5, 48 | neg_pos_ub=-1, 49 | add_gt_as_proposals=False), 50 | allowed_border=0, 51 | pos_weight=-1, 52 | debug=False)), 53 | test_cfg=dict( 54 | rpn=dict( 55 | nms_pre=2000, 56 | max_per_img=1000, 57 | nms=dict(type='nms', iou_threshold=0.7), 58 | min_bbox_size=0))) 59 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | in_channels=(512, 1024, 512, 256, 256, 256), 19 | num_classes=80, 20 | anchor_generator=dict( 21 | type='SSDAnchorGenerator', 22 | scale_major=False, 23 | input_size=input_size, 24 | basesize_ratio_range=(0.15, 0.9), 25 | strides=[8, 16, 32, 64, 100, 300], 26 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 27 | bbox_coder=dict( 28 | type='DeltaXYWHBBoxCoder', 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[0.1, 0.1, 0.2, 0.2])), 31 | # model training and testing settings 32 | train_cfg=dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.5, 37 | min_pos_iou=0., 38 | ignore_iof_thr=-1, 39 | gt_max_assign_all=False), 40 | smoothl1_beta=1., 41 | allowed_border=-1, 42 | pos_weight=-1, 43 | neg_pos_ratio=3, 44 | debug=False), 45 | test_cfg=dict( 46 | nms_pre=1000, 47 | nms=dict(type='nms', iou_threshold=0.45), 48 | min_bbox_size=0, 49 | score_thr=0.02, 50 | max_per_img=200)) 51 | cudnn_benchmark = True 52 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 19]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=20) 12 | -------------------------------------------------------------------------------- /detection/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[16, 22]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=24) 12 | -------------------------------------------------------------------------------- /detection/configs/efficientMod_s.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/mask_rcnn_r50_fpn.py', 3 | '_base_/datasets/coco_instance.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | # optimizer 8 | model = dict( 9 | backbone=dict( 10 | type='efficientMod_s', 11 | style='pytorch', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint=\ 15 | '{path-to-checkpoint}', 16 | ), 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[32, 64, 144, 312], 21 | out_channels=256, 22 | num_outs=5)) 23 | # optimizer 24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.05) 25 | optimizer_config = dict(grad_clip=None) 26 | 27 | -------------------------------------------------------------------------------- /detection/configs/efficientMod_s_Conv.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '_base_/models/mask_rcnn_r50_fpn.py', 3 | '_base_/datasets/coco_instance.py', 4 | '_base_/schedules/schedule_1x.py', 5 | '_base_/default_runtime.py' 6 | ] 7 | # optimizer 8 | model = dict( 9 | backbone=dict( 10 | type='efficientMod_s_Conv', 11 | style='pytorch', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint=\ 15 | '{path-to-checkpoint}', 16 | ), 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[40,80,160,344], 21 | out_channels=256, 22 | num_outs=5)) 23 | # optimizer 24 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0002, weight_decay=0.05) 25 | optimizer_config = dict(grad_clip=None) 26 | 27 | -------------------------------------------------------------------------------- /detection/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /detection/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=63333 \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /detection/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv import Config, DictAction 7 | 8 | from mmdet.models import build_detector 9 | from backbone import * 10 | try: 11 | from mmcv.cnn import get_model_complexity_info 12 | except ImportError: 13 | raise ImportError('Please upgrade mmcv to >0.6.2') 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='Train a detector') 18 | parser.add_argument('config', help='train config file path') 19 | parser.add_argument( 20 | '--shape', 21 | type=int, 22 | nargs='+', 23 | default=[1280, 800], 24 | help='input image size') 25 | parser.add_argument( 26 | '--cfg-options', 27 | nargs='+', 28 | action=DictAction, 29 | help='override some settings in the used config, the key-value pair ' 30 | 'in xxx=yyy format will be merged into config file. If the value to ' 31 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 32 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 33 | 'Note that the quotation marks are necessary and that no white space ' 34 | 'is allowed.') 35 | parser.add_argument( 36 | '--size-divisor', 37 | type=int, 38 | default=32, 39 | help='Pad the input image, the minimum size that is divisible ' 40 | 'by size_divisor, -1 means do not pad the image.') 41 | args = parser.parse_args() 42 | return args 43 | 44 | 45 | def main(): 46 | 47 | args = parse_args() 48 | 49 | if len(args.shape) == 1: 50 | h = w = args.shape[0] 51 | elif len(args.shape) == 2: 52 | h, w = args.shape 53 | else: 54 | raise ValueError('invalid input shape') 55 | ori_shape = (3, h, w) 56 | divisor = args.size_divisor 57 | if divisor > 0: 58 | h = int(np.ceil(h / divisor)) * divisor 59 | w = int(np.ceil(w / divisor)) * divisor 60 | 61 | input_shape = (3, h, w) 62 | 63 | cfg = Config.fromfile(args.config) 64 | if args.cfg_options is not None: 65 | cfg.merge_from_dict(args.cfg_options) 66 | 67 | model = build_detector( 68 | cfg.model, 69 | train_cfg=cfg.get('train_cfg'), 70 | test_cfg=cfg.get('test_cfg')) 71 | if torch.cuda.is_available(): 72 | model.cuda() 73 | model.eval() 74 | 75 | if hasattr(model, 'forward_dummy'): 76 | model.forward = model.forward_dummy 77 | else: 78 | raise NotImplementedError( 79 | 'FLOPs counter is currently not currently supported with {}'. 80 | format(model.__class__.__name__)) 81 | 82 | flops, params = get_model_complexity_info(model, input_shape) 83 | split_line = '=' * 30 84 | 85 | if divisor > 0 and \ 86 | input_shape != ori_shape: 87 | print(f'{split_line}\nUse size divisor set input shape ' 88 | f'from {ori_shape} to {input_shape}\n') 89 | print(f'{split_line}\nInput shape: {input_shape}\n' 90 | f'Flops: {flops}\nParams: {params}\n{split_line}') 91 | print('!!!Please be cautious if you use the results in papers. ' 92 | 'You may need to check if all ops are supported and verify that the ' 93 | 'flops computation is correct.') 94 | 95 | 96 | if __name__ == '__main__': 97 | main() 98 | -------------------------------------------------------------------------------- /detection/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | 11 | # from .checkpoint import load_checkpoint 12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor 13 | from .customized_text import CustomizedTextLoggerHook 14 | 15 | # __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook'] 16 | __all__ = [ 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook'] 17 | -------------------------------------------------------------------------------- /detection/mmcv_custom/customized_text.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import datetime 10 | from collections import OrderedDict 11 | 12 | import torch 13 | 14 | import mmcv 15 | from mmcv.runner import HOOKS 16 | from mmcv.runner import TextLoggerHook 17 | 18 | 19 | @HOOKS.register_module() 20 | class CustomizedTextLoggerHook(TextLoggerHook): 21 | """Customized Text Logger hook. 22 | 23 | This logger prints out both lr and layer_0_lr. 24 | 25 | """ 26 | 27 | def _log_info(self, log_dict, runner): 28 | # print exp name for users to distinguish experiments 29 | # at every ``interval_exp_name`` iterations and the end of each epoch 30 | if runner.meta is not None and 'exp_name' in runner.meta: 31 | if (self.every_n_iters(runner, self.interval_exp_name)) or ( 32 | self.by_epoch and self.end_of_epoch(runner)): 33 | exp_info = f'Exp name: {runner.meta["exp_name"]}' 34 | runner.logger.info(exp_info) 35 | 36 | if log_dict['mode'] == 'train': 37 | lr_str = {} 38 | for lr_type in ['lr', 'layer_0_lr']: 39 | if isinstance(log_dict[lr_type], dict): 40 | lr_str[lr_type] = [] 41 | for k, val in log_dict[lr_type].items(): 42 | lr_str.append(f'{lr_type}_{k}: {val:.3e}') 43 | lr_str[lr_type] = ' '.join(lr_str) 44 | else: 45 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}' 46 | 47 | # by epoch: Epoch [4][100/1000] 48 | # by iter: Iter [100/100000] 49 | if self.by_epoch: 50 | log_str = f'Epoch [{log_dict["epoch"]}]' \ 51 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' 52 | else: 53 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' 54 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, ' 55 | 56 | if 'time' in log_dict.keys(): 57 | self.time_sec_tot += (log_dict['time'] * self.interval) 58 | time_sec_avg = self.time_sec_tot / ( 59 | runner.iter - self.start_iter + 1) 60 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) 61 | eta_str = str(datetime.timedelta(seconds=int(eta_sec))) 62 | log_str += f'eta: {eta_str}, ' 63 | log_str += f'time: {log_dict["time"]:.3f}, ' \ 64 | f'data_time: {log_dict["data_time"]:.3f}, ' 65 | # statistic memory 66 | if torch.cuda.is_available(): 67 | log_str += f'memory: {log_dict["memory"]}, ' 68 | else: 69 | # val/test time 70 | # here 1000 is the length of the val dataloader 71 | # by epoch: Epoch[val] [4][1000] 72 | # by iter: Iter[val] [1000] 73 | if self.by_epoch: 74 | log_str = f'Epoch({log_dict["mode"]}) ' \ 75 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' 76 | else: 77 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' 78 | 79 | log_items = [] 80 | for name, val in log_dict.items(): 81 | # TODO: resolve this hack 82 | # these items have been in log_str 83 | if name in [ 84 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time', 85 | 'memory', 'epoch' 86 | ]: 87 | continue 88 | if isinstance(val, float): 89 | val = f'{val:.4f}' 90 | log_items.append(f'{name}: {val}') 91 | log_str += ', '.join(log_items) 92 | 93 | runner.logger.info(log_str) 94 | 95 | 96 | def log(self, runner): 97 | if 'eval_iter_num' in runner.log_buffer.output: 98 | # this doesn't modify runner.iter and is regardless of by_epoch 99 | cur_iter = runner.log_buffer.output.pop('eval_iter_num') 100 | else: 101 | cur_iter = self.get_iter(runner, inner_iter=True) 102 | 103 | log_dict = OrderedDict( 104 | mode=self.get_mode(runner), 105 | epoch=self.get_epoch(runner), 106 | iter=cur_iter) 107 | 108 | # record lr and layer_0_lr 109 | cur_lr = runner.current_lr() 110 | if isinstance(cur_lr, list): 111 | log_dict['layer_0_lr'] = min(cur_lr) 112 | log_dict['lr'] = max(cur_lr) 113 | else: 114 | assert isinstance(cur_lr, dict) 115 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {} 116 | for k, lr_ in cur_lr.items(): 117 | assert isinstance(lr_, list) 118 | log_dict['layer_0_lr'].update({k: min(lr_)}) 119 | log_dict['lr'].update({k: max(lr_)}) 120 | 121 | if 'time' in runner.log_buffer.output: 122 | # statistic memory 123 | if torch.cuda.is_available(): 124 | log_dict['memory'] = self._get_max_memory(runner) 125 | 126 | log_dict = dict(log_dict, **runner.log_buffer.output) 127 | 128 | self._log_info(log_dict, runner) 129 | self._dump_log(log_dict, runner) 130 | return log_dict 131 | -------------------------------------------------------------------------------- /detection/mmcv_custom/layer_decay_optimizer_constructor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import json 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor 11 | from mmcv.runner import get_dist_info 12 | 13 | 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12): 15 | 16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 17 | return 0 18 | elif var_name.startswith("backbone.downsample_layers"): 19 | stage_id = int(var_name.split('.')[2]) 20 | if stage_id == 0: 21 | layer_id = 0 22 | elif stage_id == 1: 23 | layer_id = 2 24 | elif stage_id == 2: 25 | layer_id = 3 26 | elif stage_id == 3: 27 | layer_id = num_max_layer 28 | return layer_id 29 | elif var_name.startswith("backbone.stages"): 30 | stage_id = int(var_name.split('.')[2]) 31 | block_id = int(var_name.split('.')[3]) 32 | if stage_id == 0: 33 | layer_id = 1 34 | elif stage_id == 1: 35 | layer_id = 2 36 | elif stage_id == 2: 37 | layer_id = 3 + block_id // 3 38 | elif stage_id == 3: 39 | layer_id = num_max_layer 40 | return layer_id 41 | else: 42 | return num_max_layer + 1 43 | 44 | 45 | def get_num_layer_stage_wise(var_name, num_max_layer): 46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 47 | return 0 48 | elif var_name.startswith("backbone.downsample_layers"): 49 | return 0 50 | elif var_name.startswith("backbone.stages"): 51 | stage_id = int(var_name.split('.')[2]) 52 | return stage_id + 1 53 | else: 54 | return num_max_layer - 1 55 | 56 | 57 | @OPTIMIZER_BUILDERS.register_module() 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): 59 | def add_params(self, params, module, prefix='', is_dcn_module=None): 60 | """Add all parameters of module to the params list. 61 | The parameters of the given module will be added to the list of param 62 | groups, with specific rules defined by paramwise_cfg. 63 | Args: 64 | params (list[dict]): A list of param groups, it will be modified 65 | in place. 66 | module (nn.Module): The module to be added. 67 | prefix (str): The prefix of the module 68 | is_dcn_module (int|float|None): If the current module is a 69 | submodule of DCN, `is_dcn_module` will be passed to 70 | control conv_offset layer's learning rate. Defaults to None. 71 | """ 72 | parameter_groups = {} 73 | print(self.paramwise_cfg) 74 | num_layers = self.paramwise_cfg.get('num_layers') + 2 75 | decay_rate = self.paramwise_cfg.get('decay_rate') 76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise") 77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers)) 78 | weight_decay = self.base_wd 79 | 80 | for name, param in module.named_parameters(): 81 | if not param.requires_grad: 82 | continue # frozen weights 83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'): 84 | group_name = "no_decay" 85 | this_weight_decay = 0. 86 | else: 87 | group_name = "decay" 88 | this_weight_decay = weight_decay 89 | 90 | if decay_type == "layer_wise": 91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers')) 92 | elif decay_type == "stage_wise": 93 | layer_id = get_num_layer_stage_wise(name, num_layers) 94 | 95 | group_name = "layer_%d_%s" % (layer_id, group_name) 96 | 97 | if group_name not in parameter_groups: 98 | scale = decay_rate ** (num_layers - layer_id - 1) 99 | 100 | parameter_groups[group_name] = { 101 | "weight_decay": this_weight_decay, 102 | "params": [], 103 | "param_names": [], 104 | "lr_scale": scale, 105 | "group_name": group_name, 106 | "lr": scale * self.base_lr, 107 | } 108 | 109 | parameter_groups[group_name]["params"].append(param) 110 | parameter_groups[group_name]["param_names"].append(name) 111 | rank, _ = get_dist_info() 112 | if rank == 0: 113 | to_display = {} 114 | for key in parameter_groups: 115 | to_display[key] = { 116 | "param_names": parameter_groups[key]["param_names"], 117 | "lr_scale": parameter_groups[key]["lr_scale"], 118 | "lr": parameter_groups[key]["lr"], 119 | "weight_decay": parameter_groups[key]["weight_decay"], 120 | } 121 | print("Param groups = %s" % json.dumps(to_display, indent=2)) 122 | 123 | params.extend(parameter_groups.values()) 124 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import platform 4 | import shutil 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.runner import RUNNERS, EpochBasedRunner 11 | from .checkpoint import save_checkpoint 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | @RUNNERS.register_module() 20 | class EpochBasedRunnerAmp(EpochBasedRunner): 21 | """Epoch-based Runner with AMP support. 22 | 23 | This runner train models epoch by epoch. 24 | """ 25 | 26 | def save_checkpoint(self, 27 | out_dir, 28 | filename_tmpl='epoch_{}.pth', 29 | save_optimizer=True, 30 | meta=None, 31 | create_symlink=True): 32 | """Save the checkpoint. 33 | 34 | Args: 35 | out_dir (str): The directory that checkpoints are saved. 36 | filename_tmpl (str, optional): The checkpoint filename template, 37 | which contains a placeholder for the epoch number. 38 | Defaults to 'epoch_{}.pth'. 39 | save_optimizer (bool, optional): Whether to save the optimizer to 40 | the checkpoint. Defaults to True. 41 | meta (dict, optional): The meta information to be saved in the 42 | checkpoint. Defaults to None. 43 | create_symlink (bool, optional): Whether to create a symlink 44 | "latest.pth" to point to the latest checkpoint. 45 | Defaults to True. 46 | """ 47 | if meta is None: 48 | meta = dict(epoch=self.epoch + 1, iter=self.iter) 49 | elif isinstance(meta, dict): 50 | meta.update(epoch=self.epoch + 1, iter=self.iter) 51 | else: 52 | raise TypeError( 53 | f'meta should be a dict or None, but got {type(meta)}') 54 | if self.meta is not None: 55 | meta.update(self.meta) 56 | 57 | filename = filename_tmpl.format(self.epoch + 1) 58 | filepath = osp.join(out_dir, filename) 59 | optimizer = self.optimizer if save_optimizer else None 60 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 61 | # in some environments, `os.symlink` is not supported, you may need to 62 | # set `create_symlink` to False 63 | if create_symlink: 64 | dst_file = osp.join(out_dir, 'latest.pth') 65 | if platform.system() != 'Windows': 66 | mmcv.symlink(filename, dst_file) 67 | else: 68 | shutil.copy(filepath, dst_file) 69 | 70 | def resume(self, 71 | checkpoint, 72 | resume_optimizer=True, 73 | map_location='default'): 74 | if map_location == 'default': 75 | if torch.cuda.is_available(): 76 | device_id = torch.cuda.current_device() 77 | checkpoint = self.load_checkpoint( 78 | checkpoint, 79 | map_location=lambda storage, loc: storage.cuda(device_id)) 80 | else: 81 | checkpoint = self.load_checkpoint(checkpoint) 82 | else: 83 | checkpoint = self.load_checkpoint( 84 | checkpoint, map_location=map_location) 85 | 86 | self._epoch = checkpoint['meta']['epoch'] 87 | self._iter = checkpoint['meta']['iter'] 88 | if 'optimizer' in checkpoint and resume_optimizer: 89 | if isinstance(self.optimizer, Optimizer): 90 | self.optimizer.load_state_dict(checkpoint['optimizer']) 91 | elif isinstance(self.optimizer, dict): 92 | for k in self.optimizer.keys(): 93 | self.optimizer[k].load_state_dict( 94 | checkpoint['optimizer'][k]) 95 | else: 96 | raise TypeError( 97 | 'Optimizer should be dict or torch.optim.Optimizer ' 98 | f'but got {type(self.optimizer)}') 99 | 100 | if 'amp' in checkpoint: 101 | apex.amp.load_state_dict(checkpoint['amp']) 102 | self.logger.info('load amp state dict') 103 | 104 | self.logger.info('resumed epoch %d, iter %d', self.epoch, self.iter) 105 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/optimizer.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OptimizerHook, HOOKS 2 | try: 3 | import apex 4 | except: 5 | print('apex is not installed') 6 | 7 | 8 | @HOOKS.register_module() 9 | class DistOptimizerHook(OptimizerHook): 10 | """Optimizer hook for distributed training.""" 11 | 12 | def __init__(self, update_interval=1, grad_clip=None, coalesce=True, bucket_size_mb=-1, use_fp16=False): 13 | self.grad_clip = grad_clip 14 | self.coalesce = coalesce 15 | self.bucket_size_mb = bucket_size_mb 16 | self.update_interval = update_interval 17 | self.use_fp16 = use_fp16 18 | 19 | def before_run(self, runner): 20 | runner.optimizer.zero_grad() 21 | 22 | def after_train_iter(self, runner): 23 | runner.outputs['loss'] /= self.update_interval 24 | if self.use_fp16: 25 | with apex.amp.scale_loss(runner.outputs['loss'], runner.optimizer) as scaled_loss: 26 | scaled_loss.backward() 27 | else: 28 | runner.outputs['loss'].backward() 29 | if self.every_n_iters(runner, self.update_interval): 30 | if self.grad_clip is not None: 31 | self.clip_grads(runner.model.parameters()) 32 | runner.optimizer.step() 33 | runner.optimizer.zero_grad() -------------------------------------------------------------------------------- /detection/video_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | #usage example: 3 | # python video_demo.py fire_dance.mp4 configs/mask_rcnn_coc_b24_7_fpn_1x_coco_video.py work_dirs/mask_rcnn_coc_b24_7_fpn_1x_coco/epoch_12.pth --out result2.mp4 4 | 5 | 6 | import argparse 7 | 8 | import cv2 9 | import mmcv 10 | 11 | from mmdet.apis import inference_detector, init_detector 12 | import sys 13 | sys.path.append('../') 14 | import models 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDetection video demo') 19 | parser.add_argument('video', help='Video file') 20 | parser.add_argument('config', help='Config file') 21 | parser.add_argument('checkpoint', help='Checkpoint file') 22 | parser.add_argument( 23 | '--device', default='cuda:0', help='Device used for inference') 24 | parser.add_argument( 25 | '--score-thr', type=float, default=0.5, help='Bbox score threshold') 26 | parser.add_argument('--out', type=str, help='Output video file') 27 | parser.add_argument('--show', action='store_true', help='Show video') 28 | parser.add_argument( 29 | '--wait-time', 30 | type=float, 31 | default=1, 32 | help='The interval of show (s), 0 is block') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | args = parse_args() 39 | assert args.out or args.show, \ 40 | ('Please specify at least one operation (save/show the ' 41 | 'video) with the argument "--out" or "--show"') 42 | 43 | model = init_detector(args.config, args.checkpoint, device=args.device) 44 | 45 | video_reader = mmcv.VideoReader(args.video) 46 | video_writer = None 47 | if args.out: 48 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 49 | video_writer = cv2.VideoWriter( 50 | args.out, fourcc, video_reader.fps, 51 | (video_reader.width, video_reader.height)) 52 | 53 | for frame in mmcv.track_iter_progress(video_reader): 54 | result = inference_detector(model, frame) 55 | frame = model.show_result(frame, result, score_thr=args.score_thr) 56 | if args.show: 57 | cv2.namedWindow('video', 0) 58 | mmcv.imshow(frame, 'video', args.wait_time) 59 | if args.out: 60 | video_writer.write(frame) 61 | 62 | if video_writer: 63 | video_writer.release() 64 | cv2.destroyAllWindows() 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from timm.models import * 2 | from .EfficientMod import * 3 | -------------------------------------------------------------------------------- /segmentation/README.md: -------------------------------------------------------------------------------- 1 | # Applying EfficientMod to Semantic Segmentation 2 | 3 | Our semantic segmentation implementation is based on [MMSegmentation v0.19.0](https://github.com/open-mmlab/mmsegmentation/tree/v0.19.0) and [PVT segmentation](https://github.com/whai362/PVT/tree/v2/segmentation). Thank the authors for their wonderful works. 4 | 5 | ## Usage 6 | 7 | Install MMSegmentation v0.19.0. 8 | 9 | ## Data preparation 10 | 11 | Prepare ADE20K according to the [guidelines](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/dataset_prepare.md#prepare-datasets) in MMSegmentation. 12 | 13 | 14 | ## Results and models 15 | 16 | | Method | Backbone | Pretrain | Iters | mIoU | Download | 17 | | --- | --- | --- |:---:|:---:| --- | 18 | | Semantic FPN | EfficientMod-s-Conv | ImageNet-1K | 40K | 43.5 | [[checkpoint & log]](https://drive.google.com/drive/folders/1XXTCgh4o5sNrSdGmuqqPK22TvmRsaUvk?usp=share_link) | 19 | | Semantic FPN | EfficientMod-s | ImageNet-1K | 40K | 46.0 |[[checkpoint & log]](https://drive.google.com/drive/folders/1ih0zO9X1yklbsVOHSEIeNn5goleCkFxs?usp=share_link) | 20 | 21 | 22 | ## Evaluation 23 | To evaluate EfficientMod + Semantic FPN on a single node with 8 GPUs run: 24 | ``` 25 | dist_test.sh configs/sem_fpn/{configure-file}.py /path/to/checkpoint_file 8 --out results.pkl --eval mIoU 26 | ``` 27 | 28 | 29 | ## Training 30 | To train EfficientMod + Semantic FPN on a single node with 8 GPUs run: 31 | 32 | ``` 33 | dist_train.sh configs/sem_fpn/{configure-file}.py 8 34 | ``` 35 | -------------------------------------------------------------------------------- /segmentation/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .EfficientMod import * 2 | 3 | __all__ = [ "efficientMod_s", "efficientMod_s_Conv"] 4 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/datasets/ade20k.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ADE20KDataset' 3 | data_root = '/dev/shm/ade/ADEChallengeData2016' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations', reduce_zero_label=True), 10 | dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(2048, 512), 24 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 25 | flip=False, 26 | transforms=[ 27 | dict(type='AlignResize', keep_ratio=True, size_divisor=32), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | samples_per_gpu=4, 36 | workers_per_gpu=4, 37 | train=dict( 38 | type='RepeatDataset', 39 | times=50, 40 | dataset=dict( 41 | type=dataset_type, 42 | data_root=data_root, 43 | img_dir='images/training', 44 | ann_dir='annotations/training', 45 | pipeline=train_pipeline)), 46 | val=dict( 47 | type=dataset_type, 48 | data_root=data_root, 49 | img_dir='images/validation', 50 | ann_dir='annotations/validation', 51 | pipeline=test_pipeline), 52 | test=dict( 53 | type=dataset_type, 54 | data_root=data_root, 55 | img_dir='images/validation', 56 | ann_dir='annotations/validation', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # yapf:disable 2 | log_config = dict( 3 | interval=50, 4 | hooks=[ 5 | dict(type='TextLoggerHook', by_epoch=False), 6 | # dict(type='TensorboardLoggerHook') 7 | ]) 8 | # yapf:enable 9 | dist_params = dict(backend='nccl') 10 | log_level = 'INFO' 11 | load_from = None 12 | resume_from = None 13 | workflow = [('train', 1)] 14 | cudnn_benchmark = True 15 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/models/fpn_r50.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='SyncBN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 1, 1), 12 | strides=(1, 2, 2, 2), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=4), 22 | decode_head=dict( 23 | type='FPNHead', 24 | in_channels=[256, 256, 256, 256], 25 | in_index=[0, 1, 2, 3], 26 | feature_strides=[4, 8, 16, 32], 27 | channels=128, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_160k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=160000) 8 | checkpoint_config = dict(by_epoch=False, interval=16000) 9 | evaluation = dict(interval=16000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_20k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=20000) 8 | checkpoint_config = dict(by_epoch=False, interval=2000) 9 | evaluation = dict(interval=2000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_40k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=40000) 8 | checkpoint_config = dict(by_epoch=False, interval=4000) 9 | evaluation = dict(interval=4000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/_base_/schedules/schedule_80k.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) 3 | optimizer_config = dict() 4 | # learning policy 5 | lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) 6 | # runtime settings 7 | runner = dict(type='IterBasedRunner', max_iters=80000) 8 | checkpoint_config = dict(by_epoch=False, interval=8000) 9 | evaluation = dict(interval=8000, metric='mIoU') 10 | -------------------------------------------------------------------------------- /segmentation/configs/sem_fpn/efficientMod_s.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_r50.py', 3 | '../_base_/datasets/ade20k.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | type='EncoderDecoder', 9 | backbone=dict( 10 | type='efficientMod_s', 11 | style='pytorch', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint=\ 15 | '{path-to-pre-trained-checkpoint}', 16 | ), 17 | ), 18 | neck=dict(in_channels=[40,80,160,344]), 19 | decode_head=dict(num_classes=150)) 20 | 21 | 22 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 23 | # optimizer 24 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001) 25 | optimizer_config = dict() 26 | # learning policy 27 | lr_config = dict( policy='CosineAnnealing', 28 | warmup='linear', 29 | warmup_iters=1000, 30 | warmup_ratio=1.0 / 10, 31 | by_epoch=False, 32 | min_lr_ratio=1e-6) 33 | # runtime settings 34 | runner = dict(type='IterBasedRunner', max_iters=80000//gpu_multiples) 35 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples) 36 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU') 37 | device = 'cuda' 38 | -------------------------------------------------------------------------------- /segmentation/configs/sem_fpn/efficientMod_s_Conv.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/models/fpn_r50.py', 3 | '../_base_/datasets/ade20k.py', 4 | '../_base_/default_runtime.py' 5 | ] 6 | # model settings 7 | model = dict( 8 | type='EncoderDecoder', 9 | backbone=dict( 10 | type='efficientMod_s_Conv', 11 | style='pytorch', 12 | init_cfg=dict( 13 | type='Pretrained', 14 | checkpoint=\ 15 | '{path-to-pre-trained-checkpoint}', 16 | ), 17 | ), 18 | neck=dict(in_channels=[32, 64, 144, 312]), 19 | decode_head=dict(num_classes=150)) 20 | 21 | 22 | gpu_multiples = 2 # we use 8 gpu instead of 4 in mmsegmentation, so lr*2 and max_iters/2 23 | # optimizer 24 | optimizer = dict(type='AdamW', lr=0.0001*gpu_multiples, weight_decay=0.0001) 25 | optimizer_config = dict() 26 | # learning policy 27 | lr_config = dict( policy='CosineAnnealing', 28 | warmup='linear', 29 | warmup_iters=1000, 30 | warmup_ratio=1.0 / 10, 31 | by_epoch=False, 32 | min_lr_ratio=0) 33 | # runtime settings 34 | runner = dict(type='IterBasedRunner', max_iters=80000//gpu_multiples) 35 | checkpoint_config = dict(by_epoch=False, interval=8000//gpu_multiples) 36 | evaluation = dict(interval=8000//gpu_multiples, metric='mIoU') 37 | device = 'cuda' 38 | -------------------------------------------------------------------------------- /segmentation/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} -------------------------------------------------------------------------------- /segmentation/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} -------------------------------------------------------------------------------- /segmentation/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config 5 | from mmcv.cnn import get_model_complexity_info 6 | 7 | from mmseg.models import build_segmentor 8 | import sys 9 | sys.path.append("..") 10 | from backbone import * 11 | import models 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Train a segmentor') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--shape', 18 | type=int, 19 | nargs='+', 20 | default=[512, 512], 21 | help='input image size') 22 | args = parser.parse_args() 23 | return args 24 | 25 | 26 | def main(): 27 | 28 | args = parse_args() 29 | 30 | if len(args.shape) == 1: 31 | input_shape = (3, args.shape[0], args.shape[0]) 32 | elif len(args.shape) == 2: 33 | input_shape = (3, ) + tuple(args.shape) 34 | else: 35 | raise ValueError('invalid input shape') 36 | 37 | cfg = Config.fromfile(args.config) 38 | cfg.model.pretrained = None 39 | model = build_segmentor( 40 | cfg.model, 41 | train_cfg=cfg.get('train_cfg'), 42 | test_cfg=cfg.get('test_cfg')).cuda() 43 | model.eval() 44 | 45 | if hasattr(model, 'forward_dummy'): 46 | model.forward = model.forward_dummy 47 | else: 48 | raise NotImplementedError( 49 | 'FLOPs counter is currently not currently supported with {}'. 50 | format(model.__class__.__name__)) 51 | 52 | flops, params = get_model_complexity_info(model, input_shape) 53 | split_line = '=' * 30 54 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 55 | split_line, input_shape, flops, params)) 56 | print('!!!Please be cautious if you use the results in papers. ' 57 | 'You may need to check if all ops are supported and verify that the ' 58 | 'flops computation is correct.') 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /segmentation/image_demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser 3 | 4 | from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot 5 | from mmseg.core.evaluation import get_palette 6 | 7 | 8 | def main(): 9 | parser = ArgumentParser() 10 | parser.add_argument('img', help='Image file') 11 | parser.add_argument('config', help='Config file') 12 | parser.add_argument('checkpoint', help='Checkpoint file') 13 | parser.add_argument('--out-file', default=None, help='Path to output file') 14 | parser.add_argument( 15 | '--device', default='cuda:0', help='Device used for inference') 16 | parser.add_argument( 17 | '--palette', 18 | default='cityscapes', 19 | help='Color palette used for segmentation map') 20 | parser.add_argument( 21 | '--opacity', 22 | type=float, 23 | default=0.5, 24 | help='Opacity of painted segmentation map. In (0, 1] range.') 25 | args = parser.parse_args() 26 | 27 | # build the model from a config file and a checkpoint file 28 | model = init_segmentor(args.config, args.checkpoint, device=args.device) 29 | # test a single image 30 | result = inference_segmentor(model, args.img) 31 | # show the results 32 | show_result_pyplot( 33 | model, 34 | args.img, 35 | result, 36 | get_palette(args.palette), 37 | opacity=args.opacity, 38 | out_file=args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /segmentation/images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/1.jpg -------------------------------------------------------------------------------- /segmentation/images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/2.jpg -------------------------------------------------------------------------------- /segmentation/images/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/3.jpg -------------------------------------------------------------------------------- /segmentation/images/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/4.jpg -------------------------------------------------------------------------------- /segmentation/images/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/5.jpg -------------------------------------------------------------------------------- /segmentation/images/6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/6.jpg -------------------------------------------------------------------------------- /segmentation/images/7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ma-xu/EfficientMod/ec04df6da43b91414f3fd9ad0ea7a1161be6157c/segmentation/images/7.jpg -------------------------------------------------------------------------------- /segmentation/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import mmcv 5 | import torch 6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 7 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, 8 | wrap_fp16_model) 9 | from mmcv.utils import DictAction 10 | 11 | from mmseg.apis import multi_gpu_test, single_gpu_test 12 | from mmseg.datasets import build_dataloader, build_dataset 13 | from mmseg.models import build_segmentor 14 | from backbone import * 15 | 16 | import sys 17 | sys.path.append('../') 18 | import models 19 | from align_resize import AlignResize 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser( 23 | description='mmseg test (and eval) a model') 24 | parser.add_argument('config', help='test config file path') 25 | parser.add_argument('checkpoint', help='checkpoint file') 26 | parser.add_argument( 27 | '--aug-test', action='store_true', help='Use Flip and Multi scale aug') 28 | parser.add_argument('--out', help='output result file in pickle format') 29 | parser.add_argument( 30 | '--format-only', 31 | action='store_true', 32 | help='Format the output results without perform evaluation. It is' 33 | 'useful when you want to format the result to a specific format and ' 34 | 'submit it to the test server') 35 | parser.add_argument( 36 | '--eval', 37 | type=str, 38 | nargs='+', 39 | help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' 40 | ' for generic datasets, and "cityscapes" for Cityscapes') 41 | parser.add_argument('--show', action='store_true', help='show results') 42 | parser.add_argument( 43 | '--show-dir', help='directory where painted images will be saved') 44 | parser.add_argument( 45 | '--gpu-collect', 46 | action='store_true', 47 | help='whether to use gpu to collect results.') 48 | parser.add_argument( 49 | '--tmpdir', 50 | help='tmp directory used for collecting results from multiple ' 51 | 'workers, available when gpu_collect is not specified') 52 | parser.add_argument( 53 | '--options', nargs='+', action=DictAction, help='custom options') 54 | parser.add_argument( 55 | '--eval-options', 56 | nargs='+', 57 | action=DictAction, 58 | help='custom options for evaluation') 59 | parser.add_argument( 60 | '--launcher', 61 | choices=['none', 'pytorch', 'slurm', 'mpi'], 62 | default='none', 63 | help='job launcher') 64 | parser.add_argument( 65 | '--opacity', 66 | type=float, 67 | default=0.5, 68 | help='Opacity of painted segmentation map. In (0, 1] range.') 69 | parser.add_argument('--local_rank', type=int, default=0) 70 | args = parser.parse_args() 71 | if 'LOCAL_RANK' not in os.environ: 72 | os.environ['LOCAL_RANK'] = str(args.local_rank) 73 | return args 74 | 75 | 76 | def main(): 77 | args = parse_args() 78 | 79 | assert args.out or args.eval or args.format_only or args.show \ 80 | or args.show_dir, \ 81 | ('Please specify at least one operation (save/eval/format/show the ' 82 | 'results / save the results) with the argument "--out", "--eval"' 83 | ', "--format-only", "--show" or "--show-dir"') 84 | 85 | if args.eval and args.format_only: 86 | raise ValueError('--eval and --format_only cannot be both specified') 87 | 88 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): 89 | raise ValueError('The output file must be a pkl file.') 90 | 91 | cfg = mmcv.Config.fromfile(args.config) 92 | if args.options is not None: 93 | cfg.merge_from_dict(args.options) 94 | # set cudnn_benchmark 95 | if cfg.get('cudnn_benchmark', False): 96 | torch.backends.cudnn.benchmark = True 97 | if args.aug_test: 98 | # hard code index 99 | cfg.data.test.pipeline[1].img_ratios = [ 100 | 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 101 | ] 102 | cfg.data.test.pipeline[1].flip = True 103 | cfg.model.pretrained = None 104 | cfg.data.test.test_mode = True 105 | 106 | # init distributed env first, since logger depends on the dist info. 107 | if args.launcher == 'none': 108 | distributed = False 109 | else: 110 | distributed = True 111 | init_dist(args.launcher, **cfg.dist_params) 112 | 113 | # build the dataloader 114 | # TODO: support multiple images per gpu (only minor changes are needed) 115 | dataset = build_dataset(cfg.data.test) 116 | data_loader = build_dataloader( 117 | dataset, 118 | samples_per_gpu=1, 119 | workers_per_gpu=cfg.data.workers_per_gpu, 120 | dist=distributed, 121 | shuffle=False) 122 | 123 | # build the model and load checkpoint 124 | cfg.model.train_cfg = None 125 | model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) 126 | fp16_cfg = cfg.get('fp16', None) 127 | if fp16_cfg is not None: 128 | wrap_fp16_model(model) 129 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') 130 | model.CLASSES = dataset.CLASSES # checkpoint['meta']['CLASSES'] 131 | model.PALETTE = dataset.PALETTE # checkpoint['meta']['PALETTE'] 132 | 133 | efficient_test = False 134 | if args.eval_options is not None: 135 | efficient_test = args.eval_options.get('efficient_test', False) 136 | 137 | if not distributed: 138 | model = MMDataParallel(model, device_ids=[0]) 139 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, 140 | efficient_test, args.opacity) 141 | else: 142 | model = MMDistributedDataParallel( 143 | model.cuda(), 144 | device_ids=[torch.cuda.current_device()], 145 | broadcast_buffers=False) 146 | outputs = multi_gpu_test(model, data_loader, args.tmpdir, 147 | args.gpu_collect, efficient_test) 148 | 149 | rank, _ = get_dist_info() 150 | if rank == 0: 151 | if args.out: 152 | print(f'\nwriting results to {args.out}') 153 | mmcv.dump(outputs, args.out) 154 | kwargs = {} if args.eval_options is None else args.eval_options 155 | if args.format_only: 156 | dataset.format_results(outputs, **kwargs) 157 | if args.eval: 158 | dataset.evaluate(outputs, args.eval, **kwargs) 159 | 160 | 161 | if __name__ == '__main__': 162 | main() 163 | -------------------------------------------------------------------------------- /segmentation/tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | """Modified from https://github.com/open- 3 | mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py.""" 4 | import argparse 5 | import json 6 | from collections import defaultdict 7 | 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns 10 | 11 | 12 | def plot_curve(log_dicts, args): 13 | if args.backend is not None: 14 | plt.switch_backend(args.backend) 15 | sns.set_style(args.style) 16 | # if legend is None, use {filename}_{key} as legend 17 | legend = args.legend 18 | if legend is None: 19 | legend = [] 20 | for json_log in args.json_logs: 21 | for metric in args.keys: 22 | legend.append(f'{json_log}_{metric}') 23 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 24 | metrics = args.keys 25 | 26 | num_metrics = len(metrics) 27 | for i, log_dict in enumerate(log_dicts): 28 | epochs = list(log_dict.keys()) 29 | for j, metric in enumerate(metrics): 30 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 31 | plot_epochs = [] 32 | plot_iters = [] 33 | plot_values = [] 34 | # In some log files, iters number is not correct, `pre_iter` is 35 | # used to prevent generate wrong lines. 36 | pre_iter = -1 37 | for epoch in epochs: 38 | epoch_logs = log_dict[epoch] 39 | if metric not in epoch_logs.keys(): 40 | continue 41 | if metric in ['mIoU', 'mAcc', 'aAcc']: 42 | plot_epochs.append(epoch) 43 | plot_values.append(epoch_logs[metric][0]) 44 | else: 45 | for idx in range(len(epoch_logs[metric])): 46 | if pre_iter > epoch_logs['iter'][idx]: 47 | continue 48 | pre_iter = epoch_logs['iter'][idx] 49 | plot_iters.append(epoch_logs['iter'][idx]) 50 | plot_values.append(epoch_logs[metric][idx]) 51 | ax = plt.gca() 52 | label = legend[i * num_metrics + j] 53 | if metric in ['mIoU', 'mAcc', 'aAcc']: 54 | ax.set_xticks(plot_epochs) 55 | plt.xlabel('epoch') 56 | plt.plot(plot_epochs, plot_values, label=label, marker='o') 57 | else: 58 | plt.xlabel('iter') 59 | plt.plot(plot_iters, plot_values, label=label, linewidth=0.5) 60 | plt.legend() 61 | if args.title is not None: 62 | plt.title(args.title) 63 | if args.out is None: 64 | plt.show() 65 | else: 66 | print(f'save curve to: {args.out}') 67 | plt.savefig(args.out) 68 | plt.cla() 69 | 70 | 71 | def parse_args(): 72 | parser = argparse.ArgumentParser(description='Analyze Json Log') 73 | parser.add_argument( 74 | 'json_logs', 75 | type=str, 76 | nargs='+', 77 | help='path of train log in json format') 78 | parser.add_argument( 79 | '--keys', 80 | type=str, 81 | nargs='+', 82 | default=['mIoU'], 83 | help='the metric that you want to plot') 84 | parser.add_argument('--title', type=str, help='title of figure') 85 | parser.add_argument( 86 | '--legend', 87 | type=str, 88 | nargs='+', 89 | default=None, 90 | help='legend of each plot') 91 | parser.add_argument( 92 | '--backend', type=str, default=None, help='backend of plt') 93 | parser.add_argument( 94 | '--style', type=str, default='dark', help='style of plt') 95 | parser.add_argument('--out', type=str, default=None) 96 | args = parser.parse_args() 97 | return args 98 | 99 | 100 | def load_json_logs(json_logs): 101 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 102 | # keys of sub dict is different metrics 103 | # value of sub dict is a list of corresponding values of all iterations 104 | log_dicts = [dict() for _ in json_logs] 105 | for json_log, log_dict in zip(json_logs, log_dicts): 106 | with open(json_log, 'r') as log_file: 107 | for line in log_file: 108 | log = json.loads(line.strip()) 109 | # skip lines without `epoch` field 110 | if 'epoch' not in log: 111 | continue 112 | epoch = log.pop('epoch') 113 | if epoch not in log_dict: 114 | log_dict[epoch] = defaultdict(list) 115 | for k, v in log.items(): 116 | log_dict[epoch][k].append(v) 117 | return log_dicts 118 | 119 | 120 | def main(): 121 | args = parse_args() 122 | json_logs = args.json_logs 123 | for json_log in json_logs: 124 | assert json_log.endswith('.json') 125 | log_dicts = load_json_logs(json_logs) 126 | plot_curve(log_dicts, args) 127 | 128 | 129 | if __name__ == '__main__': 130 | main() 131 | -------------------------------------------------------------------------------- /segmentation/tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | 5 | import torch 6 | from mmcv import Config 7 | from mmcv.parallel import MMDataParallel 8 | from mmcv.runner import load_checkpoint, wrap_fp16_model 9 | 10 | from mmseg.datasets import build_dataloader, build_dataset 11 | from mmseg.models import build_segmentor 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='MMSeg benchmark a model') 16 | parser.add_argument('config', help='test config file path') 17 | parser.add_argument('checkpoint', help='checkpoint file') 18 | parser.add_argument( 19 | '--log-interval', type=int, default=50, help='interval of logging') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | cfg = Config.fromfile(args.config) 28 | # set cudnn_benchmark 29 | torch.backends.cudnn.benchmark = False 30 | cfg.model.pretrained = None 31 | cfg.data.test.test_mode = True 32 | 33 | # build the dataloader 34 | # TODO: support multiple images per gpu (only minor changes are needed) 35 | dataset = build_dataset(cfg.data.test) 36 | data_loader = build_dataloader( 37 | dataset, 38 | samples_per_gpu=1, 39 | workers_per_gpu=cfg.data.workers_per_gpu, 40 | dist=False, 41 | shuffle=False) 42 | 43 | # build the model and load checkpoint 44 | cfg.model.train_cfg = None 45 | model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) 46 | fp16_cfg = cfg.get('fp16', None) 47 | if fp16_cfg is not None: 48 | wrap_fp16_model(model) 49 | load_checkpoint(model, args.checkpoint, map_location='cpu') 50 | 51 | model = MMDataParallel(model, device_ids=[0]) 52 | 53 | model.eval() 54 | 55 | # the first several iterations may be very slow so skip them 56 | num_warmup = 5 57 | pure_inf_time = 0 58 | total_iters = 200 59 | 60 | # benchmark with 200 image and take the average 61 | for i, data in enumerate(data_loader): 62 | 63 | torch.cuda.synchronize() 64 | start_time = time.perf_counter() 65 | 66 | with torch.no_grad(): 67 | model(return_loss=False, rescale=True, **data) 68 | 69 | torch.cuda.synchronize() 70 | elapsed = time.perf_counter() - start_time 71 | 72 | if i >= num_warmup: 73 | pure_inf_time += elapsed 74 | if (i + 1) % args.log_interval == 0: 75 | fps = (i + 1 - num_warmup) / pure_inf_time 76 | print(f'Done image [{i + 1:<3}/ {total_iters}], ' 77 | f'fps: {fps:.2f} img / s') 78 | 79 | if (i + 1) == total_iters: 80 | fps = (i + 1 - num_warmup) / pure_inf_time 81 | print(f'Overall fps: {fps:.2f} img / s') 82 | break 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /segmentation/tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import warnings 4 | from pathlib import Path 5 | 6 | import mmcv 7 | import numpy as np 8 | from mmcv import Config 9 | 10 | from mmseg.datasets.builder import build_dataset 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Browse a dataset') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--show-origin', 18 | default=False, 19 | action='store_true', 20 | help='if True, omit all augmentation in pipeline,' 21 | ' show origin image and seg map') 22 | parser.add_argument( 23 | '--skip-type', 24 | type=str, 25 | nargs='+', 26 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 27 | help='skip some useless pipeline,if `show-origin` is true, ' 28 | 'all pipeline except `Load` will be skipped') 29 | parser.add_argument( 30 | '--output-dir', 31 | default='./output', 32 | type=str, 33 | help='If there is no display interface, you can save it') 34 | parser.add_argument('--show', default=False, action='store_true') 35 | parser.add_argument( 36 | '--show-interval', 37 | type=int, 38 | default=999, 39 | help='the interval of show (ms)') 40 | parser.add_argument( 41 | '--opacity', 42 | type=float, 43 | default=0.5, 44 | help='the opacity of semantic map') 45 | args = parser.parse_args() 46 | return args 47 | 48 | 49 | def imshow_semantic(img, 50 | seg, 51 | class_names, 52 | palette=None, 53 | win_name='', 54 | show=False, 55 | wait_time=0, 56 | out_file=None, 57 | opacity=0.5): 58 | """Draw `result` over `img`. 59 | 60 | Args: 61 | img (str or Tensor): The image to be displayed. 62 | seg (Tensor): The semantic segmentation results to draw over 63 | `img`. 64 | class_names (list[str]): Names of each classes. 65 | palette (list[list[int]]] | np.ndarray | None): The palette of 66 | segmentation map. If None is given, random palette will be 67 | generated. Default: None 68 | win_name (str): The window name. 69 | wait_time (int): Value of waitKey param. 70 | Default: 0. 71 | show (bool): Whether to show the image. 72 | Default: False. 73 | out_file (str or None): The filename to write the image. 74 | Default: None. 75 | opacity(float): Opacity of painted segmentation map. 76 | Default 0.5. 77 | Must be in (0, 1] range. 78 | Returns: 79 | img (Tensor): Only if not `show` or `out_file` 80 | """ 81 | img = mmcv.imread(img) 82 | img = img.copy() 83 | if palette is None: 84 | palette = np.random.randint(0, 255, size=(len(class_names), 3)) 85 | palette = np.array(palette) 86 | assert palette.shape[0] == len(class_names) 87 | assert palette.shape[1] == 3 88 | assert len(palette.shape) == 2 89 | assert 0 < opacity <= 1.0 90 | color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) 91 | for label, color in enumerate(palette): 92 | color_seg[seg == label, :] = color 93 | # convert to BGR 94 | color_seg = color_seg[..., ::-1] 95 | 96 | img = img * (1 - opacity) + color_seg * opacity 97 | img = img.astype(np.uint8) 98 | # if out_file specified, do not show image in window 99 | if out_file is not None: 100 | show = False 101 | 102 | if show: 103 | mmcv.imshow(img, win_name, wait_time) 104 | if out_file is not None: 105 | mmcv.imwrite(img, out_file) 106 | 107 | if not (show or out_file): 108 | warnings.warn('show==False and out_file is not specified, only ' 109 | 'result image will be returned') 110 | return img 111 | 112 | 113 | def _retrieve_data_cfg(_data_cfg, skip_type, show_origin): 114 | if show_origin is True: 115 | # only keep pipeline of Loading data and ann 116 | _data_cfg['pipeline'] = [ 117 | x for x in _data_cfg.pipeline if 'Load' in x['type'] 118 | ] 119 | else: 120 | _data_cfg['pipeline'] = [ 121 | x for x in _data_cfg.pipeline if x['type'] not in skip_type 122 | ] 123 | 124 | 125 | def retrieve_data_cfg(config_path, skip_type, show_origin=False): 126 | cfg = Config.fromfile(config_path) 127 | train_data_cfg = cfg.data.train 128 | if isinstance(train_data_cfg, list): 129 | for _data_cfg in train_data_cfg: 130 | if 'pipeline' in _data_cfg: 131 | _retrieve_data_cfg(_data_cfg, skip_type, show_origin) 132 | elif 'dataset' in _data_cfg: 133 | _retrieve_data_cfg(_data_cfg['dataset'], skip_type, 134 | show_origin) 135 | else: 136 | raise ValueError 137 | elif 'dataset' in train_data_cfg: 138 | _retrieve_data_cfg(train_data_cfg['dataset'], skip_type, show_origin) 139 | else: 140 | _retrieve_data_cfg(train_data_cfg, skip_type, show_origin) 141 | return cfg 142 | 143 | 144 | def main(): 145 | args = parse_args() 146 | cfg = retrieve_data_cfg(args.config, args.skip_type, args.show_origin) 147 | dataset = build_dataset(cfg.data.train) 148 | progress_bar = mmcv.ProgressBar(len(dataset)) 149 | for item in dataset: 150 | filename = os.path.join(args.output_dir, 151 | Path(item['filename']).name 152 | ) if args.output_dir is not None else None 153 | imshow_semantic( 154 | item['img'], 155 | item['gt_semantic_seg'], 156 | dataset.CLASSES, 157 | dataset.PALETTE, 158 | show=args.show, 159 | wait_time=args.show_interval, 160 | out_file=filename, 161 | opacity=args.opacity, 162 | ) 163 | progress_bar.update() 164 | 165 | 166 | if __name__ == '__main__': 167 | main() 168 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/chase_db1.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import mmcv 9 | 10 | CHASE_DB1_LEN = 28 * 3 11 | TRAINING_LEN = 60 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert CHASE_DB1 dataset to mmsegmentation format') 17 | parser.add_argument('dataset_path', help='path of CHASEDB1.zip') 18 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 19 | parser.add_argument('-o', '--out_dir', help='output path') 20 | args = parser.parse_args() 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | dataset_path = args.dataset_path 27 | if args.out_dir is None: 28 | out_dir = osp.join('data', 'CHASE_DB1') 29 | else: 30 | out_dir = args.out_dir 31 | 32 | print('Making directories...') 33 | mmcv.mkdir_or_exist(out_dir) 34 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 35 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 36 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 37 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 38 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 39 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 40 | 41 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 42 | print('Extracting CHASEDB1.zip...') 43 | zip_file = zipfile.ZipFile(dataset_path) 44 | zip_file.extractall(tmp_dir) 45 | 46 | print('Generating training dataset...') 47 | 48 | assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ 49 | 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN) 50 | 51 | for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 52 | img = mmcv.imread(osp.join(tmp_dir, img_name)) 53 | if osp.splitext(img_name)[1] == '.jpg': 54 | mmcv.imwrite( 55 | img, 56 | osp.join(out_dir, 'images', 'training', 57 | osp.splitext(img_name)[0] + '.png')) 58 | else: 59 | # The annotation img should be divided by 128, because some of 60 | # the annotation imgs are not standard. We should set a 61 | # threshold to convert the nonstandard annotation imgs. The 62 | # value divided by 128 is equivalent to '1 if value >= 128 63 | # else 0' 64 | mmcv.imwrite( 65 | img[:, :, 0] // 128, 66 | osp.join(out_dir, 'annotations', 'training', 67 | osp.splitext(img_name)[0] + '.png')) 68 | 69 | for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 70 | img = mmcv.imread(osp.join(tmp_dir, img_name)) 71 | if osp.splitext(img_name)[1] == '.jpg': 72 | mmcv.imwrite( 73 | img, 74 | osp.join(out_dir, 'images', 'validation', 75 | osp.splitext(img_name)[0] + '.png')) 76 | else: 77 | mmcv.imwrite( 78 | img[:, :, 0] // 128, 79 | osp.join(out_dir, 'annotations', 'validation', 80 | osp.splitext(img_name)[0] + '.png')) 81 | 82 | print('Removing the temporary files...') 83 | 84 | print('Done!') 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | 5 | import mmcv 6 | from cityscapesscripts.preparation.json2labelImg import json2labelImg 7 | 8 | 9 | def convert_json_to_label(json_file): 10 | label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') 11 | json2labelImg(json_file, label_file, 'trainIds') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert Cityscapes annotations to TrainIds') 17 | parser.add_argument('cityscapes_path', help='cityscapes data path') 18 | parser.add_argument('--gt-dir', default='gtFine', type=str) 19 | parser.add_argument('-o', '--out-dir', help='output path') 20 | parser.add_argument( 21 | '--nproc', default=1, type=int, help='number of process') 22 | args = parser.parse_args() 23 | return args 24 | 25 | 26 | def main(): 27 | args = parse_args() 28 | cityscapes_path = args.cityscapes_path 29 | out_dir = args.out_dir if args.out_dir else cityscapes_path 30 | mmcv.mkdir_or_exist(out_dir) 31 | 32 | gt_dir = osp.join(cityscapes_path, args.gt_dir) 33 | 34 | poly_files = [] 35 | for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True): 36 | poly_file = osp.join(gt_dir, poly) 37 | poly_files.append(poly_file) 38 | if args.nproc > 1: 39 | mmcv.track_parallel_progress(convert_json_to_label, poly_files, 40 | args.nproc) 41 | else: 42 | mmcv.track_progress(convert_json_to_label, poly_files) 43 | 44 | split_names = ['train', 'val', 'test'] 45 | 46 | for split in split_names: 47 | filenames = [] 48 | for poly in mmcv.scandir( 49 | osp.join(gt_dir, split), '_polygons.json', recursive=True): 50 | filenames.append(poly.replace('_gtFine_polygons.json', '')) 51 | with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: 52 | f.writelines(f + '\n' for f in filenames) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() 57 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/coco_stuff164k.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | import shutil 4 | from functools import partial 5 | from glob import glob 6 | 7 | import mmcv 8 | import numpy as np 9 | from PIL import Image 10 | 11 | COCO_LEN = 123287 12 | 13 | clsID_to_trID = { 14 | 0: 0, 15 | 1: 1, 16 | 2: 2, 17 | 3: 3, 18 | 4: 4, 19 | 5: 5, 20 | 6: 6, 21 | 7: 7, 22 | 8: 8, 23 | 9: 9, 24 | 10: 10, 25 | 12: 11, 26 | 13: 12, 27 | 14: 13, 28 | 15: 14, 29 | 16: 15, 30 | 17: 16, 31 | 18: 17, 32 | 19: 18, 33 | 20: 19, 34 | 21: 20, 35 | 22: 21, 36 | 23: 22, 37 | 24: 23, 38 | 26: 24, 39 | 27: 25, 40 | 30: 26, 41 | 31: 27, 42 | 32: 28, 43 | 33: 29, 44 | 34: 30, 45 | 35: 31, 46 | 36: 32, 47 | 37: 33, 48 | 38: 34, 49 | 39: 35, 50 | 40: 36, 51 | 41: 37, 52 | 42: 38, 53 | 43: 39, 54 | 45: 40, 55 | 46: 41, 56 | 47: 42, 57 | 48: 43, 58 | 49: 44, 59 | 50: 45, 60 | 51: 46, 61 | 52: 47, 62 | 53: 48, 63 | 54: 49, 64 | 55: 50, 65 | 56: 51, 66 | 57: 52, 67 | 58: 53, 68 | 59: 54, 69 | 60: 55, 70 | 61: 56, 71 | 62: 57, 72 | 63: 58, 73 | 64: 59, 74 | 66: 60, 75 | 69: 61, 76 | 71: 62, 77 | 72: 63, 78 | 73: 64, 79 | 74: 65, 80 | 75: 66, 81 | 76: 67, 82 | 77: 68, 83 | 78: 69, 84 | 79: 70, 85 | 80: 71, 86 | 81: 72, 87 | 83: 73, 88 | 84: 74, 89 | 85: 75, 90 | 86: 76, 91 | 87: 77, 92 | 88: 78, 93 | 89: 79, 94 | 91: 80, 95 | 92: 81, 96 | 93: 82, 97 | 94: 83, 98 | 95: 84, 99 | 96: 85, 100 | 97: 86, 101 | 98: 87, 102 | 99: 88, 103 | 100: 89, 104 | 101: 90, 105 | 102: 91, 106 | 103: 92, 107 | 104: 93, 108 | 105: 94, 109 | 106: 95, 110 | 107: 96, 111 | 108: 97, 112 | 109: 98, 113 | 110: 99, 114 | 111: 100, 115 | 112: 101, 116 | 113: 102, 117 | 114: 103, 118 | 115: 104, 119 | 116: 105, 120 | 117: 106, 121 | 118: 107, 122 | 119: 108, 123 | 120: 109, 124 | 121: 110, 125 | 122: 111, 126 | 123: 112, 127 | 124: 113, 128 | 125: 114, 129 | 126: 115, 130 | 127: 116, 131 | 128: 117, 132 | 129: 118, 133 | 130: 119, 134 | 131: 120, 135 | 132: 121, 136 | 133: 122, 137 | 134: 123, 138 | 135: 124, 139 | 136: 125, 140 | 137: 126, 141 | 138: 127, 142 | 139: 128, 143 | 140: 129, 144 | 141: 130, 145 | 142: 131, 146 | 143: 132, 147 | 144: 133, 148 | 145: 134, 149 | 146: 135, 150 | 147: 136, 151 | 148: 137, 152 | 149: 138, 153 | 150: 139, 154 | 151: 140, 155 | 152: 141, 156 | 153: 142, 157 | 154: 143, 158 | 155: 144, 159 | 156: 145, 160 | 157: 146, 161 | 158: 147, 162 | 159: 148, 163 | 160: 149, 164 | 161: 150, 165 | 162: 151, 166 | 163: 152, 167 | 164: 153, 168 | 165: 154, 169 | 166: 155, 170 | 167: 156, 171 | 168: 157, 172 | 169: 158, 173 | 170: 159, 174 | 171: 160, 175 | 172: 161, 176 | 173: 162, 177 | 174: 163, 178 | 175: 164, 179 | 176: 165, 180 | 177: 166, 181 | 178: 167, 182 | 179: 168, 183 | 180: 169, 184 | 181: 170, 185 | 255: 255 186 | } 187 | 188 | 189 | def convert_to_trainID(maskpath, out_mask_dir, is_train): 190 | mask = np.array(Image.open(maskpath)) 191 | mask_copy = mask.copy() 192 | for clsID, trID in clsID_to_trID.items(): 193 | mask_copy[mask == clsID] = trID 194 | seg_filename = osp.join( 195 | out_mask_dir, 'train2017', 196 | osp.basename(maskpath).split('.')[0] + 197 | '_labelTrainIds.png') if is_train else osp.join( 198 | out_mask_dir, 'val2017', 199 | osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png') 200 | Image.fromarray(mask_copy).save(seg_filename, 'PNG') 201 | 202 | 203 | def parse_args(): 204 | parser = argparse.ArgumentParser( 205 | description=\ 206 | 'Convert COCO Stuff 164k annotations to mmsegmentation format') # noqa 207 | parser.add_argument('coco_path', help='coco stuff path') 208 | parser.add_argument('-o', '--out_dir', help='output path') 209 | parser.add_argument( 210 | '--nproc', default=16, type=int, help='number of process') 211 | args = parser.parse_args() 212 | return args 213 | 214 | 215 | def main(): 216 | args = parse_args() 217 | coco_path = args.coco_path 218 | nproc = args.nproc 219 | 220 | out_dir = args.out_dir or coco_path 221 | out_img_dir = osp.join(out_dir, 'images') 222 | out_mask_dir = osp.join(out_dir, 'annotations') 223 | 224 | mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'train2017')) 225 | mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'val2017')) 226 | 227 | if out_dir != coco_path: 228 | shutil.copytree(osp.join(coco_path, 'images'), out_img_dir) 229 | 230 | train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png')) 231 | train_list = [file for file in train_list if '_labelTrainIds' not in file] 232 | test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png')) 233 | test_list = [file for file in test_list if '_labelTrainIds' not in file] 234 | assert (len(train_list) + 235 | len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( 236 | len(train_list), len(test_list)) 237 | 238 | if args.nproc > 1: 239 | mmcv.track_parallel_progress( 240 | partial( 241 | convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), 242 | train_list, 243 | nproc=nproc) 244 | mmcv.track_parallel_progress( 245 | partial( 246 | convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), 247 | test_list, 248 | nproc=nproc) 249 | else: 250 | mmcv.track_progress( 251 | partial( 252 | convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), 253 | train_list) 254 | mmcv.track_progress( 255 | partial( 256 | convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), 257 | test_list) 258 | 259 | print('Done!') 260 | 261 | 262 | if __name__ == '__main__': 263 | main() 264 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/drive.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import cv2 9 | import mmcv 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | description='Convert DRIVE dataset to mmsegmentation format') 15 | parser.add_argument( 16 | 'training_path', help='the training part of DRIVE dataset') 17 | parser.add_argument( 18 | 'testing_path', help='the testing part of DRIVE dataset') 19 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 20 | parser.add_argument('-o', '--out_dir', help='output path') 21 | args = parser.parse_args() 22 | return args 23 | 24 | 25 | def main(): 26 | args = parse_args() 27 | training_path = args.training_path 28 | testing_path = args.testing_path 29 | if args.out_dir is None: 30 | out_dir = osp.join('data', 'DRIVE') 31 | else: 32 | out_dir = args.out_dir 33 | 34 | print('Making directories...') 35 | mmcv.mkdir_or_exist(out_dir) 36 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 37 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 38 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 39 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 40 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 41 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 42 | 43 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 44 | print('Extracting training.zip...') 45 | zip_file = zipfile.ZipFile(training_path) 46 | zip_file.extractall(tmp_dir) 47 | 48 | print('Generating training dataset...') 49 | now_dir = osp.join(tmp_dir, 'training', 'images') 50 | for img_name in os.listdir(now_dir): 51 | img = mmcv.imread(osp.join(now_dir, img_name)) 52 | mmcv.imwrite( 53 | img, 54 | osp.join( 55 | out_dir, 'images', 'training', 56 | osp.splitext(img_name)[0].replace('_training', '') + 57 | '.png')) 58 | 59 | now_dir = osp.join(tmp_dir, 'training', '1st_manual') 60 | for img_name in os.listdir(now_dir): 61 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 62 | ret, img = cap.read() 63 | mmcv.imwrite( 64 | img[:, :, 0] // 128, 65 | osp.join(out_dir, 'annotations', 'training', 66 | osp.splitext(img_name)[0] + '.png')) 67 | 68 | print('Extracting test.zip...') 69 | zip_file = zipfile.ZipFile(testing_path) 70 | zip_file.extractall(tmp_dir) 71 | 72 | print('Generating validation dataset...') 73 | now_dir = osp.join(tmp_dir, 'test', 'images') 74 | for img_name in os.listdir(now_dir): 75 | img = mmcv.imread(osp.join(now_dir, img_name)) 76 | mmcv.imwrite( 77 | img, 78 | osp.join( 79 | out_dir, 'images', 'validation', 80 | osp.splitext(img_name)[0].replace('_test', '') + '.png')) 81 | 82 | now_dir = osp.join(tmp_dir, 'test', '1st_manual') 83 | if osp.exists(now_dir): 84 | for img_name in os.listdir(now_dir): 85 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 86 | ret, img = cap.read() 87 | # The annotation img should be divided by 128, because some of 88 | # the annotation imgs are not standard. We should set a 89 | # threshold to convert the nonstandard annotation imgs. The 90 | # value divided by 128 is equivalent to '1 if value >= 128 91 | # else 0' 92 | mmcv.imwrite( 93 | img[:, :, 0] // 128, 94 | osp.join(out_dir, 'annotations', 'validation', 95 | osp.splitext(img_name)[0] + '.png')) 96 | 97 | now_dir = osp.join(tmp_dir, 'test', '2nd_manual') 98 | if osp.exists(now_dir): 99 | for img_name in os.listdir(now_dir): 100 | cap = cv2.VideoCapture(osp.join(now_dir, img_name)) 101 | ret, img = cap.read() 102 | mmcv.imwrite( 103 | img[:, :, 0] // 128, 104 | osp.join(out_dir, 'annotations', 'validation', 105 | osp.splitext(img_name)[0] + '.png')) 106 | 107 | print('Removing the temporary files...') 108 | 109 | print('Done!') 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/hrf.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import tempfile 6 | import zipfile 7 | 8 | import mmcv 9 | 10 | HRF_LEN = 15 11 | TRAINING_LEN = 5 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser( 16 | description='Convert HRF dataset to mmsegmentation format') 17 | parser.add_argument('healthy_path', help='the path of healthy.zip') 18 | parser.add_argument( 19 | 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') 20 | parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') 21 | parser.add_argument( 22 | 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') 23 | parser.add_argument( 24 | 'diabetic_retinopathy_path', 25 | help='the path of diabetic_retinopathy.zip') 26 | parser.add_argument( 27 | 'diabetic_retinopathy_manualsegm_path', 28 | help='the path of diabetic_retinopathy_manualsegm.zip') 29 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 30 | parser.add_argument('-o', '--out_dir', help='output path') 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def main(): 36 | args = parse_args() 37 | images_path = [ 38 | args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path 39 | ] 40 | annotations_path = [ 41 | args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, 42 | args.diabetic_retinopathy_manualsegm_path 43 | ] 44 | if args.out_dir is None: 45 | out_dir = osp.join('data', 'HRF') 46 | else: 47 | out_dir = args.out_dir 48 | 49 | print('Making directories...') 50 | mmcv.mkdir_or_exist(out_dir) 51 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 52 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 53 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 54 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 55 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 56 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 57 | 58 | print('Generating images...') 59 | for now_path in images_path: 60 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 61 | zip_file = zipfile.ZipFile(now_path) 62 | zip_file.extractall(tmp_dir) 63 | 64 | assert len(os.listdir(tmp_dir)) == HRF_LEN, \ 65 | 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) 66 | 67 | for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 68 | img = mmcv.imread(osp.join(tmp_dir, filename)) 69 | mmcv.imwrite( 70 | img, 71 | osp.join(out_dir, 'images', 'training', 72 | osp.splitext(filename)[0] + '.png')) 73 | for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 74 | img = mmcv.imread(osp.join(tmp_dir, filename)) 75 | mmcv.imwrite( 76 | img, 77 | osp.join(out_dir, 'images', 'validation', 78 | osp.splitext(filename)[0] + '.png')) 79 | 80 | print('Generating annotations...') 81 | for now_path in annotations_path: 82 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 83 | zip_file = zipfile.ZipFile(now_path) 84 | zip_file.extractall(tmp_dir) 85 | 86 | assert len(os.listdir(tmp_dir)) == HRF_LEN, \ 87 | 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) 88 | 89 | for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: 90 | img = mmcv.imread(osp.join(tmp_dir, filename)) 91 | # The annotation img should be divided by 128, because some of 92 | # the annotation imgs are not standard. We should set a 93 | # threshold to convert the nonstandard annotation imgs. The 94 | # value divided by 128 is equivalent to '1 if value >= 128 95 | # else 0' 96 | mmcv.imwrite( 97 | img[:, :, 0] // 128, 98 | osp.join(out_dir, 'annotations', 'training', 99 | osp.splitext(filename)[0] + '.png')) 100 | for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: 101 | img = mmcv.imread(osp.join(tmp_dir, filename)) 102 | mmcv.imwrite( 103 | img[:, :, 0] // 128, 104 | osp.join(out_dir, 'annotations', 'validation', 105 | osp.splitext(filename)[0] + '.png')) 106 | 107 | print('Done!') 108 | 109 | 110 | if __name__ == '__main__': 111 | main() 112 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/pascal_context.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from functools import partial 5 | 6 | import mmcv 7 | import numpy as np 8 | from detail import Detail 9 | from PIL import Image 10 | 11 | _mapping = np.sort( 12 | np.array([ 13 | 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, 14 | 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, 15 | 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, 16 | 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 17 | ])) 18 | _key = np.array(range(len(_mapping))).astype('uint8') 19 | 20 | 21 | def generate_labels(img_id, detail, out_dir): 22 | 23 | def _class_to_index(mask, _mapping, _key): 24 | # assert the values 25 | values = np.unique(mask) 26 | for i in range(len(values)): 27 | assert (values[i] in _mapping) 28 | index = np.digitize(mask.ravel(), _mapping, right=True) 29 | return _key[index].reshape(mask.shape) 30 | 31 | mask = Image.fromarray( 32 | _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) 33 | filename = img_id['file_name'] 34 | mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) 35 | return osp.splitext(osp.basename(filename))[0] 36 | 37 | 38 | def parse_args(): 39 | parser = argparse.ArgumentParser( 40 | description='Convert PASCAL VOC annotations to mmsegmentation format') 41 | parser.add_argument('devkit_path', help='pascal voc devkit path') 42 | parser.add_argument('json_path', help='annoation json filepath') 43 | parser.add_argument('-o', '--out_dir', help='output path') 44 | args = parser.parse_args() 45 | return args 46 | 47 | 48 | def main(): 49 | args = parse_args() 50 | devkit_path = args.devkit_path 51 | if args.out_dir is None: 52 | out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') 53 | else: 54 | out_dir = args.out_dir 55 | json_path = args.json_path 56 | mmcv.mkdir_or_exist(out_dir) 57 | img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') 58 | 59 | train_detail = Detail(json_path, img_dir, 'train') 60 | train_ids = train_detail.getImgs() 61 | 62 | val_detail = Detail(json_path, img_dir, 'val') 63 | val_ids = val_detail.getImgs() 64 | 65 | mmcv.mkdir_or_exist( 66 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) 67 | 68 | train_list = mmcv.track_progress( 69 | partial(generate_labels, detail=train_detail, out_dir=out_dir), 70 | train_ids) 71 | with open( 72 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', 73 | 'train.txt'), 'w') as f: 74 | f.writelines(line + '\n' for line in sorted(train_list)) 75 | 76 | val_list = mmcv.track_progress( 77 | partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) 78 | with open( 79 | osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', 80 | 'val.txt'), 'w') as f: 81 | f.writelines(line + '\n' for line in sorted(val_list)) 82 | 83 | print('Done!') 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/stare.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import gzip 4 | import os 5 | import os.path as osp 6 | import tarfile 7 | import tempfile 8 | 9 | import mmcv 10 | 11 | STARE_LEN = 20 12 | TRAINING_LEN = 10 13 | 14 | 15 | def un_gz(src, dst): 16 | g_file = gzip.GzipFile(src) 17 | with open(dst, 'wb+') as f: 18 | f.write(g_file.read()) 19 | g_file.close() 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser( 24 | description='Convert STARE dataset to mmsegmentation format') 25 | parser.add_argument('image_path', help='the path of stare-images.tar') 26 | parser.add_argument('labels_ah', help='the path of labels-ah.tar') 27 | parser.add_argument('labels_vk', help='the path of labels-vk.tar') 28 | parser.add_argument('--tmp_dir', help='path of the temporary directory') 29 | parser.add_argument('-o', '--out_dir', help='output path') 30 | args = parser.parse_args() 31 | return args 32 | 33 | 34 | def main(): 35 | args = parse_args() 36 | image_path = args.image_path 37 | labels_ah = args.labels_ah 38 | labels_vk = args.labels_vk 39 | if args.out_dir is None: 40 | out_dir = osp.join('data', 'STARE') 41 | else: 42 | out_dir = args.out_dir 43 | 44 | print('Making directories...') 45 | mmcv.mkdir_or_exist(out_dir) 46 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) 47 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) 48 | mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) 49 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) 50 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) 51 | mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) 52 | 53 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 54 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) 55 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) 56 | 57 | print('Extracting stare-images.tar...') 58 | with tarfile.open(image_path) as f: 59 | f.extractall(osp.join(tmp_dir, 'gz')) 60 | 61 | for filename in os.listdir(osp.join(tmp_dir, 'gz')): 62 | un_gz( 63 | osp.join(tmp_dir, 'gz', filename), 64 | osp.join(tmp_dir, 'files', 65 | osp.splitext(filename)[0])) 66 | 67 | now_dir = osp.join(tmp_dir, 'files') 68 | 69 | assert len(os.listdir(now_dir)) == STARE_LEN, \ 70 | 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) 71 | 72 | for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: 73 | img = mmcv.imread(osp.join(now_dir, filename)) 74 | mmcv.imwrite( 75 | img, 76 | osp.join(out_dir, 'images', 'training', 77 | osp.splitext(filename)[0] + '.png')) 78 | 79 | for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: 80 | img = mmcv.imread(osp.join(now_dir, filename)) 81 | mmcv.imwrite( 82 | img, 83 | osp.join(out_dir, 'images', 'validation', 84 | osp.splitext(filename)[0] + '.png')) 85 | 86 | print('Removing the temporary files...') 87 | 88 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 89 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) 90 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) 91 | 92 | print('Extracting labels-ah.tar...') 93 | with tarfile.open(labels_ah) as f: 94 | f.extractall(osp.join(tmp_dir, 'gz')) 95 | 96 | for filename in os.listdir(osp.join(tmp_dir, 'gz')): 97 | un_gz( 98 | osp.join(tmp_dir, 'gz', filename), 99 | osp.join(tmp_dir, 'files', 100 | osp.splitext(filename)[0])) 101 | 102 | now_dir = osp.join(tmp_dir, 'files') 103 | 104 | assert len(os.listdir(now_dir)) == STARE_LEN, \ 105 | 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) 106 | 107 | for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: 108 | img = mmcv.imread(osp.join(now_dir, filename)) 109 | # The annotation img should be divided by 128, because some of 110 | # the annotation imgs are not standard. We should set a threshold 111 | # to convert the nonstandard annotation imgs. The value divided by 112 | # 128 equivalent to '1 if value >= 128 else 0' 113 | mmcv.imwrite( 114 | img[:, :, 0] // 128, 115 | osp.join(out_dir, 'annotations', 'training', 116 | osp.splitext(filename)[0] + '.png')) 117 | 118 | for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: 119 | img = mmcv.imread(osp.join(now_dir, filename)) 120 | mmcv.imwrite( 121 | img[:, :, 0] // 128, 122 | osp.join(out_dir, 'annotations', 'validation', 123 | osp.splitext(filename)[0] + '.png')) 124 | 125 | print('Removing the temporary files...') 126 | 127 | with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: 128 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) 129 | mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) 130 | 131 | print('Extracting labels-vk.tar...') 132 | with tarfile.open(labels_vk) as f: 133 | f.extractall(osp.join(tmp_dir, 'gz')) 134 | 135 | for filename in os.listdir(osp.join(tmp_dir, 'gz')): 136 | un_gz( 137 | osp.join(tmp_dir, 'gz', filename), 138 | osp.join(tmp_dir, 'files', 139 | osp.splitext(filename)[0])) 140 | 141 | now_dir = osp.join(tmp_dir, 'files') 142 | 143 | assert len(os.listdir(now_dir)) == STARE_LEN, \ 144 | 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) 145 | 146 | for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: 147 | img = mmcv.imread(osp.join(now_dir, filename)) 148 | mmcv.imwrite( 149 | img[:, :, 0] // 128, 150 | osp.join(out_dir, 'annotations', 'training', 151 | osp.splitext(filename)[0] + '.png')) 152 | 153 | for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: 154 | img = mmcv.imread(osp.join(now_dir, filename)) 155 | mmcv.imwrite( 156 | img[:, :, 0] // 128, 157 | osp.join(out_dir, 'annotations', 'validation', 158 | osp.splitext(filename)[0] + '.png')) 159 | 160 | print('Removing the temporary files...') 161 | 162 | print('Done!') 163 | 164 | 165 | if __name__ == '__main__': 166 | main() 167 | -------------------------------------------------------------------------------- /segmentation/tools/convert_datasets/voc_aug.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from functools import partial 5 | 6 | import mmcv 7 | import numpy as np 8 | from PIL import Image 9 | from scipy.io import loadmat 10 | 11 | AUG_LEN = 10582 12 | 13 | 14 | def convert_mat(mat_file, in_dir, out_dir): 15 | data = loadmat(osp.join(in_dir, mat_file)) 16 | mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8) 17 | seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png')) 18 | Image.fromarray(mask).save(seg_filename, 'PNG') 19 | 20 | 21 | def generate_aug_list(merged_list, excluded_list): 22 | return list(set(merged_list) - set(excluded_list)) 23 | 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser( 27 | description='Convert PASCAL VOC annotations to mmsegmentation format') 28 | parser.add_argument('devkit_path', help='pascal voc devkit path') 29 | parser.add_argument('aug_path', help='pascal voc aug path') 30 | parser.add_argument('-o', '--out_dir', help='output path') 31 | parser.add_argument( 32 | '--nproc', default=1, type=int, help='number of process') 33 | args = parser.parse_args() 34 | return args 35 | 36 | 37 | def main(): 38 | args = parse_args() 39 | devkit_path = args.devkit_path 40 | aug_path = args.aug_path 41 | nproc = args.nproc 42 | if args.out_dir is None: 43 | out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') 44 | else: 45 | out_dir = args.out_dir 46 | mmcv.mkdir_or_exist(out_dir) 47 | in_dir = osp.join(aug_path, 'dataset', 'cls') 48 | 49 | mmcv.track_parallel_progress( 50 | partial(convert_mat, in_dir=in_dir, out_dir=out_dir), 51 | list(mmcv.scandir(in_dir, suffix='.mat')), 52 | nproc=nproc) 53 | 54 | full_aug_list = [] 55 | with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: 56 | full_aug_list += [line.strip() for line in f] 57 | with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: 58 | full_aug_list += [line.strip() for line in f] 59 | 60 | with open( 61 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 62 | 'train.txt')) as f: 63 | ori_train_list = [line.strip() for line in f] 64 | with open( 65 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 66 | 'val.txt')) as f: 67 | val_list = [line.strip() for line in f] 68 | 69 | aug_train_list = generate_aug_list(ori_train_list + full_aug_list, 70 | val_list) 71 | assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( 72 | AUG_LEN) 73 | 74 | with open( 75 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 76 | 'trainaug.txt'), 'w') as f: 77 | f.writelines(line + '\n' for line in aug_train_list) 78 | 79 | aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) 80 | assert len(aug_list) == AUG_LEN - len( 81 | ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - 82 | len(ori_train_list)) 83 | with open( 84 | osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), 85 | 'w') as f: 86 | f.writelines(line + '\n' for line in aug_list) 87 | 88 | print('Done!') 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /segmentation/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 10 | -------------------------------------------------------------------------------- /segmentation/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/mit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_mit(ckpt): 12 | new_ckpt = OrderedDict() 13 | # Process the concat between q linear weights and kv linear weights 14 | for k, v in ckpt.items(): 15 | if k.startswith('head'): 16 | continue 17 | # patch embedding conversion 18 | elif k.startswith('patch_embed'): 19 | stage_i = int(k.split('.')[0].replace('patch_embed', '')) 20 | new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0') 21 | new_v = v 22 | if 'proj.' in new_k: 23 | new_k = new_k.replace('proj.', 'projection.') 24 | # transformer encoder layer conversion 25 | elif k.startswith('block'): 26 | stage_i = int(k.split('.')[0].replace('block', '')) 27 | new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1') 28 | new_v = v 29 | if 'attn.q.' in new_k: 30 | sub_item_k = k.replace('q.', 'kv.') 31 | new_k = new_k.replace('q.', 'attn.in_proj_') 32 | new_v = torch.cat([v, ckpt[sub_item_k]], dim=0) 33 | elif 'attn.kv.' in new_k: 34 | continue 35 | elif 'attn.proj.' in new_k: 36 | new_k = new_k.replace('proj.', 'attn.out_proj.') 37 | elif 'attn.sr.' in new_k: 38 | new_k = new_k.replace('sr.', 'sr.') 39 | elif 'mlp.' in new_k: 40 | string = f'{new_k}-' 41 | new_k = new_k.replace('mlp.', 'ffn.layers.') 42 | if 'fc1.weight' in new_k or 'fc2.weight' in new_k: 43 | new_v = v.reshape((*v.shape, 1, 1)) 44 | new_k = new_k.replace('fc1.', '0.') 45 | new_k = new_k.replace('dwconv.dwconv.', '1.') 46 | new_k = new_k.replace('fc2.', '4.') 47 | string += f'{new_k} {v.shape}-{new_v.shape}' 48 | # norm layer conversion 49 | elif k.startswith('norm'): 50 | stage_i = int(k.split('.')[0].replace('norm', '')) 51 | new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2') 52 | new_v = v 53 | else: 54 | new_k = k 55 | new_v = v 56 | new_ckpt[new_k] = new_v 57 | return new_ckpt 58 | 59 | 60 | def main(): 61 | parser = argparse.ArgumentParser( 62 | description='Convert keys in official pretrained segformer to ' 63 | 'MMSegmentation style.') 64 | parser.add_argument('src', help='src model path or url') 65 | # The dst path must be a full path of the new checkpoint. 66 | parser.add_argument('dst', help='save path') 67 | args = parser.parse_args() 68 | 69 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 70 | if 'state_dict' in checkpoint: 71 | state_dict = checkpoint['state_dict'] 72 | elif 'model' in checkpoint: 73 | state_dict = checkpoint['model'] 74 | else: 75 | state_dict = checkpoint 76 | weight = convert_mit(state_dict) 77 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 78 | torch.save(weight, args.dst) 79 | 80 | 81 | if __name__ == '__main__': 82 | main() 83 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/swin2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_swin(ckpt): 12 | new_ckpt = OrderedDict() 13 | 14 | def correct_unfold_reduction_order(x): 15 | out_channel, in_channel = x.shape 16 | x = x.reshape(out_channel, 4, in_channel // 4) 17 | x = x[:, [0, 2, 1, 3], :].transpose(1, 18 | 2).reshape(out_channel, in_channel) 19 | return x 20 | 21 | def correct_unfold_norm_order(x): 22 | in_channel = x.shape[0] 23 | x = x.reshape(4, in_channel // 4) 24 | x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel) 25 | return x 26 | 27 | for k, v in ckpt.items(): 28 | if k.startswith('head'): 29 | continue 30 | elif k.startswith('layers'): 31 | new_v = v 32 | if 'attn.' in k: 33 | new_k = k.replace('attn.', 'attn.w_msa.') 34 | elif 'mlp.' in k: 35 | if 'mlp.fc1.' in k: 36 | new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.') 37 | elif 'mlp.fc2.' in k: 38 | new_k = k.replace('mlp.fc2.', 'ffn.layers.1.') 39 | else: 40 | new_k = k.replace('mlp.', 'ffn.') 41 | elif 'downsample' in k: 42 | new_k = k 43 | if 'reduction.' in k: 44 | new_v = correct_unfold_reduction_order(v) 45 | elif 'norm.' in k: 46 | new_v = correct_unfold_norm_order(v) 47 | else: 48 | new_k = k 49 | new_k = new_k.replace('layers', 'stages', 1) 50 | elif k.startswith('patch_embed'): 51 | new_v = v 52 | if 'proj' in k: 53 | new_k = k.replace('proj', 'projection') 54 | else: 55 | new_k = k 56 | else: 57 | new_v = v 58 | new_k = k 59 | 60 | new_ckpt[new_k] = new_v 61 | 62 | return new_ckpt 63 | 64 | 65 | def main(): 66 | parser = argparse.ArgumentParser( 67 | description='Convert keys in official pretrained swin models to' 68 | 'MMSegmentation style.') 69 | parser.add_argument('src', help='src model path or url') 70 | # The dst path must be a full path of the new checkpoint. 71 | parser.add_argument('dst', help='save path') 72 | args = parser.parse_args() 73 | 74 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 75 | if 'state_dict' in checkpoint: 76 | state_dict = checkpoint['state_dict'] 77 | elif 'model' in checkpoint: 78 | state_dict = checkpoint['model'] 79 | else: 80 | state_dict = checkpoint 81 | weight = convert_swin(state_dict) 82 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 83 | torch.save(weight, args.dst) 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /segmentation/tools/model_converters/vit2mmseg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os.path as osp 4 | from collections import OrderedDict 5 | 6 | import mmcv 7 | import torch 8 | from mmcv.runner import CheckpointLoader 9 | 10 | 11 | def convert_vit(ckpt): 12 | 13 | new_ckpt = OrderedDict() 14 | 15 | for k, v in ckpt.items(): 16 | if k.startswith('head'): 17 | continue 18 | if k.startswith('norm'): 19 | new_k = k.replace('norm.', 'ln1.') 20 | elif k.startswith('patch_embed'): 21 | if 'proj' in k: 22 | new_k = k.replace('proj', 'projection') 23 | else: 24 | new_k = k 25 | elif k.startswith('blocks'): 26 | if 'norm' in k: 27 | new_k = k.replace('norm', 'ln') 28 | elif 'mlp.fc1' in k: 29 | new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') 30 | elif 'mlp.fc2' in k: 31 | new_k = k.replace('mlp.fc2', 'ffn.layers.1') 32 | elif 'attn.qkv' in k: 33 | new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_') 34 | elif 'attn.proj' in k: 35 | new_k = k.replace('attn.proj', 'attn.attn.out_proj') 36 | else: 37 | new_k = k 38 | new_k = new_k.replace('blocks.', 'layers.') 39 | else: 40 | new_k = k 41 | new_ckpt[new_k] = v 42 | 43 | return new_ckpt 44 | 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser( 48 | description='Convert keys in timm pretrained vit models to ' 49 | 'MMSegmentation style.') 50 | parser.add_argument('src', help='src model path or url') 51 | # The dst path must be a full path of the new checkpoint. 52 | parser.add_argument('dst', help='save path') 53 | args = parser.parse_args() 54 | 55 | checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') 56 | if 'state_dict' in checkpoint: 57 | # timm checkpoint 58 | state_dict = checkpoint['state_dict'] 59 | elif 'model' in checkpoint: 60 | # deit checkpoint 61 | state_dict = checkpoint['model'] 62 | else: 63 | state_dict = checkpoint 64 | weight = convert_vit(state_dict) 65 | mmcv.mkdir_or_exist(osp.dirname(args.dst)) 66 | torch.save(weight, args.dst) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /segmentation/tools/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | from mmcv import Config, DictAction 5 | 6 | from mmseg.apis import init_segmentor 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Print the whole config') 11 | parser.add_argument('config', help='config file path') 12 | parser.add_argument( 13 | '--graph', action='store_true', help='print the models graph') 14 | parser.add_argument( 15 | '--options', nargs='+', action=DictAction, help='arguments in dict') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | cfg = Config.fromfile(args.config) 25 | if args.options is not None: 26 | cfg.merge_from_dict(args.options) 27 | print(f'Config:\n{cfg.pretty_text}') 28 | # dump config 29 | cfg.dump('example.py') 30 | # dump models graph 31 | if args.graph: 32 | model = init_segmentor(args.config, device='cpu') 33 | print(f'Model graph:\n{str(model)}') 34 | with open('example-graph.txt', 'w') as f: 35 | f.writelines(str(model)) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /segmentation/tools/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | 5 | import torch 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='Process a checkpoint to be published') 11 | parser.add_argument('in_file', help='input checkpoint filename') 12 | parser.add_argument('out_file', help='output checkpoint filename') 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | def process_checkpoint(in_file, out_file): 18 | checkpoint = torch.load(in_file, map_location='cpu') 19 | # remove optimizer for smaller file size 20 | if 'optimizer' in checkpoint: 21 | del checkpoint['optimizer'] 22 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 23 | # add the code here. 24 | torch.save(checkpoint, out_file) 25 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 26 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 27 | subprocess.Popen(['mv', out_file, final_file]) 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | process_checkpoint(args.in_file, args.out_file) 33 | 34 | 35 | if __name__ == '__main__': 36 | main() 37 | -------------------------------------------------------------------------------- /segmentation/tools/pytorch2torchscript.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | 4 | import mmcv 5 | import numpy as np 6 | import torch 7 | import torch._C 8 | import torch.serialization 9 | from mmcv.runner import load_checkpoint 10 | from torch import nn 11 | 12 | from mmseg.models import build_segmentor 13 | 14 | torch.manual_seed(3) 15 | 16 | 17 | def digit_version(version_str): 18 | digit_version = [] 19 | for x in version_str.split('.'): 20 | if x.isdigit(): 21 | digit_version.append(int(x)) 22 | elif x.find('rc') != -1: 23 | patch_version = x.split('rc') 24 | digit_version.append(int(patch_version[0]) - 1) 25 | digit_version.append(int(patch_version[1])) 26 | return digit_version 27 | 28 | 29 | def check_torch_version(): 30 | torch_minimum_version = '1.8.0' 31 | torch_version = digit_version(torch.__version__) 32 | 33 | assert (torch_version >= digit_version(torch_minimum_version)), \ 34 | f'Torch=={torch.__version__} is not support for converting to ' \ 35 | f'torchscript. Please install pytorch>={torch_minimum_version}.' 36 | 37 | 38 | def _convert_batchnorm(module): 39 | module_output = module 40 | if isinstance(module, torch.nn.SyncBatchNorm): 41 | module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, 42 | module.momentum, module.affine, 43 | module.track_running_stats) 44 | if module.affine: 45 | module_output.weight.data = module.weight.data.clone().detach() 46 | module_output.bias.data = module.bias.data.clone().detach() 47 | # keep requires_grad unchanged 48 | module_output.weight.requires_grad = module.weight.requires_grad 49 | module_output.bias.requires_grad = module.bias.requires_grad 50 | module_output.running_mean = module.running_mean 51 | module_output.running_var = module.running_var 52 | module_output.num_batches_tracked = module.num_batches_tracked 53 | for name, child in module.named_children(): 54 | module_output.add_module(name, _convert_batchnorm(child)) 55 | del module 56 | return module_output 57 | 58 | 59 | def _demo_mm_inputs(input_shape, num_classes): 60 | """Create a superset of inputs needed to run test or train batches. 61 | 62 | Args: 63 | input_shape (tuple): 64 | input batch dimensions 65 | num_classes (int): 66 | number of semantic classes 67 | """ 68 | (N, C, H, W) = input_shape 69 | rng = np.random.RandomState(0) 70 | imgs = rng.rand(*input_shape) 71 | segs = rng.randint( 72 | low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) 73 | img_metas = [{ 74 | 'img_shape': (H, W, C), 75 | 'ori_shape': (H, W, C), 76 | 'pad_shape': (H, W, C), 77 | 'filename': '.png', 78 | 'scale_factor': 1.0, 79 | 'flip': False, 80 | } for _ in range(N)] 81 | mm_inputs = { 82 | 'imgs': torch.FloatTensor(imgs).requires_grad_(True), 83 | 'img_metas': img_metas, 84 | 'gt_semantic_seg': torch.LongTensor(segs) 85 | } 86 | return mm_inputs 87 | 88 | 89 | def pytorch2libtorch(model, 90 | input_shape, 91 | show=False, 92 | output_file='tmp.pt', 93 | verify=False): 94 | """Export Pytorch model to TorchScript model and verify the outputs are 95 | same between Pytorch and TorchScript. 96 | 97 | Args: 98 | model (nn.Module): Pytorch model we want to export. 99 | input_shape (tuple): Use this input shape to construct 100 | the corresponding dummy input and execute the model. 101 | show (bool): Whether print the computation graph. Default: False. 102 | output_file (string): The path to where we store the 103 | output TorchScript model. Default: `tmp.pt`. 104 | verify (bool): Whether compare the outputs between 105 | Pytorch and TorchScript. Default: False. 106 | """ 107 | if isinstance(model.decode_head, nn.ModuleList): 108 | num_classes = model.decode_head[-1].num_classes 109 | else: 110 | num_classes = model.decode_head.num_classes 111 | 112 | mm_inputs = _demo_mm_inputs(input_shape, num_classes) 113 | 114 | imgs = mm_inputs.pop('imgs') 115 | 116 | # replace the original forword with forward_dummy 117 | model.forward = model.forward_dummy 118 | model.eval() 119 | traced_model = torch.jit.trace( 120 | model, 121 | example_inputs=imgs, 122 | check_trace=verify, 123 | ) 124 | 125 | if show: 126 | print(traced_model.graph) 127 | 128 | traced_model.save(output_file) 129 | print('Successfully exported TorchScript model: {}'.format(output_file)) 130 | 131 | 132 | def parse_args(): 133 | parser = argparse.ArgumentParser( 134 | description='Convert MMSeg to TorchScript') 135 | parser.add_argument('config', help='test config file path') 136 | parser.add_argument('--checkpoint', help='checkpoint file', default=None) 137 | parser.add_argument( 138 | '--show', action='store_true', help='show TorchScript graph') 139 | parser.add_argument( 140 | '--verify', action='store_true', help='verify the TorchScript model') 141 | parser.add_argument('--output-file', type=str, default='tmp.pt') 142 | parser.add_argument( 143 | '--shape', 144 | type=int, 145 | nargs='+', 146 | default=[512, 512], 147 | help='input image size (height, width)') 148 | args = parser.parse_args() 149 | return args 150 | 151 | 152 | if __name__ == '__main__': 153 | args = parse_args() 154 | check_torch_version() 155 | 156 | if len(args.shape) == 1: 157 | input_shape = (1, 3, args.shape[0], args.shape[0]) 158 | elif len(args.shape) == 2: 159 | input_shape = ( 160 | 1, 161 | 3, 162 | ) + tuple(args.shape) 163 | else: 164 | raise ValueError('invalid input shape') 165 | 166 | cfg = mmcv.Config.fromfile(args.config) 167 | cfg.model.pretrained = None 168 | 169 | # build the model and load checkpoint 170 | cfg.model.train_cfg = None 171 | segmentor = build_segmentor( 172 | cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) 173 | # convert SyncBN to BN 174 | segmentor = _convert_batchnorm(segmentor) 175 | 176 | if args.checkpoint: 177 | load_checkpoint(segmentor, args.checkpoint, map_location='cpu') 178 | 179 | # convert the PyTorch model to LibTorch model 180 | pytorch2libtorch( 181 | segmentor, 182 | input_shape, 183 | show=args.show, 184 | output_file=args.output_file, 185 | verify=args.verify) 186 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-4} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /segmentation/tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | GPUS=${GPUS:-4} 9 | GPUS_PER_NODE=${GPUS_PER_NODE:-4} 10 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 11 | SRUN_ARGS=${SRUN_ARGS:-""} 12 | PY_ARGS=${@:4} 13 | 14 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/mmseg2torchserve.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from argparse import ArgumentParser, Namespace 3 | from pathlib import Path 4 | from tempfile import TemporaryDirectory 5 | 6 | import mmcv 7 | 8 | try: 9 | from model_archiver.model_packaging import package_model 10 | from model_archiver.model_packaging_utils import ModelExportUtils 11 | except ImportError: 12 | package_model = None 13 | 14 | 15 | def mmseg2torchserve( 16 | config_file: str, 17 | checkpoint_file: str, 18 | output_folder: str, 19 | model_name: str, 20 | model_version: str = '1.0', 21 | force: bool = False, 22 | ): 23 | """Converts mmsegmentation model (config + checkpoint) to TorchServe 24 | `.mar`. 25 | 26 | Args: 27 | config_file: 28 | In MMSegmentation config format. 29 | The contents vary for each task repository. 30 | checkpoint_file: 31 | In MMSegmentation checkpoint format. 32 | The contents vary for each task repository. 33 | output_folder: 34 | Folder where `{model_name}.mar` will be created. 35 | The file created will be in TorchServe archive format. 36 | model_name: 37 | If not None, used for naming the `{model_name}.mar` file 38 | that will be created under `output_folder`. 39 | If None, `{Path(checkpoint_file).stem}` will be used. 40 | model_version: 41 | Model's version. 42 | force: 43 | If True, if there is an existing `{model_name}.mar` 44 | file under `output_folder` it will be overwritten. 45 | """ 46 | mmcv.mkdir_or_exist(output_folder) 47 | 48 | config = mmcv.Config.fromfile(config_file) 49 | 50 | with TemporaryDirectory() as tmpdir: 51 | config.dump(f'{tmpdir}/config.py') 52 | 53 | args = Namespace( 54 | **{ 55 | 'model_file': f'{tmpdir}/config.py', 56 | 'serialized_file': checkpoint_file, 57 | 'handler': f'{Path(__file__).parent}/mmseg_handler.py', 58 | 'model_name': model_name or Path(checkpoint_file).stem, 59 | 'version': model_version, 60 | 'export_path': output_folder, 61 | 'force': force, 62 | 'requirements_file': None, 63 | 'extra_files': None, 64 | 'runtime': 'python', 65 | 'archive_format': 'default' 66 | }) 67 | manifest = ModelExportUtils.generate_manifest_json(args) 68 | package_model(args, manifest) 69 | 70 | 71 | def parse_args(): 72 | parser = ArgumentParser( 73 | description='Convert mmseg models to TorchServe `.mar` format.') 74 | parser.add_argument('config', type=str, help='config file path') 75 | parser.add_argument('checkpoint', type=str, help='checkpoint file path') 76 | parser.add_argument( 77 | '--output-folder', 78 | type=str, 79 | required=True, 80 | help='Folder where `{model_name}.mar` will be created.') 81 | parser.add_argument( 82 | '--model-name', 83 | type=str, 84 | default=None, 85 | help='If not None, used for naming the `{model_name}.mar`' 86 | 'file that will be created under `output_folder`.' 87 | 'If None, `{Path(checkpoint_file).stem}` will be used.') 88 | parser.add_argument( 89 | '--model-version', 90 | type=str, 91 | default='1.0', 92 | help='Number used for versioning.') 93 | parser.add_argument( 94 | '-f', 95 | '--force', 96 | action='store_true', 97 | help='overwrite the existing `{model_name}.mar`') 98 | args = parser.parse_args() 99 | 100 | return args 101 | 102 | 103 | if __name__ == '__main__': 104 | args = parse_args() 105 | 106 | if package_model is None: 107 | raise ImportError('`torch-model-archiver` is required.' 108 | 'Try: pip install torch-model-archiver') 109 | 110 | mmseg2torchserve(args.config, args.checkpoint, args.output_folder, 111 | args.model_name, args.model_version, args.force) 112 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/mmseg_handler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import base64 3 | import os 4 | 5 | import cv2 6 | import mmcv 7 | import torch 8 | from mmcv.cnn.utils.sync_bn import revert_sync_batchnorm 9 | from ts.torch_handler.base_handler import BaseHandler 10 | 11 | from mmseg.apis import inference_segmentor, init_segmentor 12 | 13 | 14 | class MMsegHandler(BaseHandler): 15 | 16 | def initialize(self, context): 17 | properties = context.system_properties 18 | self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' 19 | self.device = torch.device(self.map_location + ':' + 20 | str(properties.get('gpu_id')) if torch.cuda. 21 | is_available() else self.map_location) 22 | self.manifest = context.manifest 23 | 24 | model_dir = properties.get('model_dir') 25 | serialized_file = self.manifest['model']['serializedFile'] 26 | checkpoint = os.path.join(model_dir, serialized_file) 27 | self.config_file = os.path.join(model_dir, 'config.py') 28 | 29 | self.model = init_segmentor(self.config_file, checkpoint, self.device) 30 | self.model = revert_sync_batchnorm(self.model) 31 | self.initialized = True 32 | 33 | def preprocess(self, data): 34 | images = [] 35 | 36 | for row in data: 37 | image = row.get('data') or row.get('body') 38 | if isinstance(image, str): 39 | image = base64.b64decode(image) 40 | image = mmcv.imfrombytes(image) 41 | images.append(image) 42 | 43 | return images 44 | 45 | def inference(self, data, *args, **kwargs): 46 | results = [inference_segmentor(self.model, img) for img in data] 47 | return results 48 | 49 | def postprocess(self, data): 50 | output = [] 51 | 52 | for image_result in data: 53 | _, buffer = cv2.imencode('.png', image_result[0].astype('uint8')) 54 | content = buffer.tobytes() 55 | output.append(content) 56 | return output 57 | -------------------------------------------------------------------------------- /segmentation/tools/torchserve/test_torchserve.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from io import BytesIO 3 | 4 | import matplotlib.pyplot as plt 5 | import mmcv 6 | import requests 7 | 8 | from mmseg.apis import inference_segmentor, init_segmentor 9 | 10 | 11 | def parse_args(): 12 | parser = ArgumentParser( 13 | description='Compare result of torchserve and pytorch,' 14 | 'and visualize them.') 15 | parser.add_argument('img', help='Image file') 16 | parser.add_argument('config', help='Config file') 17 | parser.add_argument('checkpoint', help='Checkpoint file') 18 | parser.add_argument('model_name', help='The model name in the server') 19 | parser.add_argument( 20 | '--inference-addr', 21 | default='127.0.0.1:8080', 22 | help='Address and port of the inference server') 23 | parser.add_argument( 24 | '--result-image', 25 | type=str, 26 | default=None, 27 | help='save server output in result-image') 28 | parser.add_argument( 29 | '--device', default='cuda:0', help='Device used for inference') 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def main(args): 36 | url = 'http://' + args.inference_addr + '/predictions/' + args.model_name 37 | with open(args.img, 'rb') as image: 38 | tmp_res = requests.post(url, image) 39 | content = tmp_res.content 40 | if args.result_image: 41 | with open(args.result_image, 'wb') as out_image: 42 | out_image.write(content) 43 | plt.imshow(mmcv.imread(args.result_image, 'grayscale')) 44 | plt.show() 45 | else: 46 | plt.imshow(plt.imread(BytesIO(content))) 47 | plt.show() 48 | model = init_segmentor(args.config, args.checkpoint, args.device) 49 | image = mmcv.imread(args.img) 50 | result = inference_segmentor(model, image) 51 | plt.imshow(result[0]) 52 | plt.show() 53 | 54 | 55 | if __name__ == '__main__': 56 | args = parse_args() 57 | main(args) 58 | -------------------------------------------------------------------------------- /segmentation/tools/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import copy 4 | import os 5 | import os.path as osp 6 | import time 7 | import warnings 8 | 9 | import mmcv 10 | import torch 11 | from mmcv.cnn.utils import revert_sync_batchnorm 12 | from mmcv.runner import get_dist_info, init_dist 13 | from mmcv.utils import Config, DictAction, get_git_hash 14 | 15 | from mmseg import __version__ 16 | from mmseg.apis import set_random_seed, train_segmentor 17 | from mmseg.datasets import build_dataset 18 | from mmseg.models import build_segmentor 19 | from mmseg.utils import collect_env, get_root_logger 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Train a segmentor') 24 | parser.add_argument('config', help='train config file path') 25 | parser.add_argument('--work-dir', help='the dir to save logs and models') 26 | parser.add_argument( 27 | '--load-from', help='the checkpoint file to load weights from') 28 | parser.add_argument( 29 | '--resume-from', help='the checkpoint file to resume from') 30 | parser.add_argument( 31 | '--no-validate', 32 | action='store_true', 33 | help='whether not to evaluate the checkpoint during training') 34 | group_gpus = parser.add_mutually_exclusive_group() 35 | group_gpus.add_argument( 36 | '--gpus', 37 | type=int, 38 | help='number of gpus to use ' 39 | '(only applicable to non-distributed training)') 40 | group_gpus.add_argument( 41 | '--gpu-ids', 42 | type=int, 43 | nargs='+', 44 | help='ids of gpus to use ' 45 | '(only applicable to non-distributed training)') 46 | parser.add_argument('--seed', type=int, default=None, help='random seed') 47 | parser.add_argument( 48 | '--deterministic', 49 | action='store_true', 50 | help='whether to set deterministic options for CUDNN backend.') 51 | parser.add_argument( 52 | '--options', nargs='+', action=DictAction, help='custom options') 53 | parser.add_argument( 54 | '--launcher', 55 | choices=['none', 'pytorch', 'slurm', 'mpi'], 56 | default='none', 57 | help='job launcher') 58 | parser.add_argument('--local_rank', type=int, default=0) 59 | args = parser.parse_args() 60 | if 'LOCAL_RANK' not in os.environ: 61 | os.environ['LOCAL_RANK'] = str(args.local_rank) 62 | 63 | return args 64 | 65 | 66 | def main(): 67 | args = parse_args() 68 | 69 | cfg = Config.fromfile(args.config) 70 | if args.options is not None: 71 | cfg.merge_from_dict(args.options) 72 | # set cudnn_benchmark 73 | if cfg.get('cudnn_benchmark', False): 74 | torch.backends.cudnn.benchmark = True 75 | 76 | # work_dir is determined in this priority: CLI > segment in file > filename 77 | if args.work_dir is not None: 78 | # update configs according to CLI args if args.work_dir is not None 79 | cfg.work_dir = args.work_dir 80 | elif cfg.get('work_dir', None) is None: 81 | # use config filename as default work_dir if cfg.work_dir is None 82 | cfg.work_dir = osp.join('./work_dirs', 83 | osp.splitext(osp.basename(args.config))[0]) 84 | if args.load_from is not None: 85 | cfg.load_from = args.load_from 86 | if args.resume_from is not None: 87 | cfg.resume_from = args.resume_from 88 | if args.gpu_ids is not None: 89 | cfg.gpu_ids = args.gpu_ids 90 | else: 91 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 92 | 93 | # init distributed env first, since logger depends on the dist info. 94 | if args.launcher == 'none': 95 | distributed = False 96 | else: 97 | distributed = True 98 | init_dist(args.launcher, **cfg.dist_params) 99 | # gpu_ids is used to calculate iter when resuming checkpoint 100 | _, world_size = get_dist_info() 101 | cfg.gpu_ids = range(world_size) 102 | 103 | 104 | # cfg.device='cuda' 105 | # create work_dir 106 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 107 | # dump config 108 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 109 | # init the logger before other steps 110 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 111 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 112 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 113 | 114 | # init the meta dict to record some important information such as 115 | # environment info and seed, which will be logged 116 | meta = dict() 117 | # log env info 118 | env_info_dict = collect_env() 119 | env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) 120 | dash_line = '-' * 60 + '\n' 121 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 122 | dash_line) 123 | meta['env_info'] = env_info 124 | # meta['config'] = cfg.pretty_text 125 | # log some basic info 126 | logger.info(f'Distributed training: {distributed}') 127 | logger.info(f'Config:\n{cfg.pretty_text}') 128 | 129 | # set random seeds 130 | if args.seed is not None: 131 | logger.info(f'Set random seed to {args.seed}, deterministic: ' 132 | f'{args.deterministic}') 133 | set_random_seed(args.seed, deterministic=args.deterministic) 134 | cfg.seed = args.seed 135 | meta['seed'] = args.seed 136 | meta['exp_name'] = osp.basename(args.config) 137 | 138 | model = build_segmentor( 139 | cfg.model, 140 | train_cfg=cfg.get('train_cfg'), 141 | test_cfg=cfg.get('test_cfg')) 142 | model.init_weights() 143 | 144 | # SyncBN is not support for DP 145 | if not distributed: 146 | warnings.warn( 147 | 'SyncBN is only supported with DDP. To be compatible with DP, ' 148 | 'we convert SyncBN to BN. Please use dist_train.sh which can ' 149 | 'avoid this error.') 150 | model = revert_sync_batchnorm(model) 151 | 152 | logger.info(model) 153 | 154 | datasets = [build_dataset(cfg.data.train)] 155 | if len(cfg.workflow) == 2: 156 | val_dataset = copy.deepcopy(cfg.data.val) 157 | val_dataset.pipeline = cfg.data.train.pipeline 158 | datasets.append(build_dataset(val_dataset)) 159 | if cfg.checkpoint_config is not None: 160 | # save mmseg version, config file content and class names in 161 | # checkpoints as meta data 162 | cfg.checkpoint_config.meta = dict( 163 | mmseg_version=f'{__version__}+{get_git_hash()[:7]}', 164 | config=cfg.pretty_text, 165 | CLASSES=datasets[0].CLASSES, 166 | PALETTE=datasets[0].PALETTE) 167 | # add an attribute for visualization convenience 168 | model.CLASSES = datasets[0].CLASSES 169 | # passing checkpoint meta for saving best checkpoint 170 | meta.update(cfg.checkpoint_config.meta) 171 | train_segmentor( 172 | model, 173 | datasets, 174 | cfg, 175 | distributed=distributed, 176 | validate=(not args.no_validate), 177 | timestamp=timestamp, 178 | meta=meta) 179 | 180 | 181 | if __name__ == '__main__': 182 | main() 183 | -------------------------------------------------------------------------------- /segmentation/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | 7 | import mmcv 8 | import torch 9 | from mmcv.runner import init_dist 10 | from mmcv.utils import Config, DictAction, get_git_hash 11 | 12 | from mmseg import __version__ 13 | from mmseg.apis import set_random_seed, train_segmentor 14 | from mmseg.datasets import build_dataset 15 | from mmseg.models import build_segmentor 16 | from mmseg.utils import collect_env, get_root_logger 17 | from backbone import * 18 | 19 | import sys 20 | sys.path.append('../') 21 | import models 22 | from align_resize import AlignResize 23 | 24 | def parse_args(): 25 | parser = argparse.ArgumentParser(description='Train a segmentor') 26 | parser.add_argument('config', help='train config file path') 27 | parser.add_argument('--work-dir', help='the dir to save logs and models') 28 | parser.add_argument( 29 | '--load-from', help='the checkpoint file to load weights from') 30 | parser.add_argument( 31 | '--resume-from', help='the checkpoint file to resume from') 32 | parser.add_argument( 33 | '--no-validate', 34 | action='store_true', 35 | help='whether not to evaluate the checkpoint during training') 36 | group_gpus = parser.add_mutually_exclusive_group() 37 | group_gpus.add_argument( 38 | '--gpus', 39 | type=int, 40 | help='number of gpus to use ' 41 | '(only applicable to non-distributed training)') 42 | group_gpus.add_argument( 43 | '--gpu-ids', 44 | type=int, 45 | nargs='+', 46 | help='ids of gpus to use ' 47 | '(only applicable to non-distributed training)') 48 | parser.add_argument('--seed', type=int, default=None, help='random seed') 49 | parser.add_argument( 50 | '--deterministic', 51 | action='store_true', 52 | help='whether to set deterministic options for CUDNN backend.') 53 | parser.add_argument( 54 | '--options', nargs='+', action=DictAction, help='custom options') 55 | parser.add_argument( 56 | '--launcher', 57 | choices=['none', 'pytorch', 'slurm', 'mpi'], 58 | default='none', 59 | help='job launcher') 60 | parser.add_argument('--local_rank', type=int, default=0) 61 | args = parser.parse_args() 62 | if 'LOCAL_RANK' not in os.environ: 63 | os.environ['LOCAL_RANK'] = str(args.local_rank) 64 | 65 | return args 66 | 67 | 68 | def main(): 69 | args = parse_args() 70 | 71 | cfg = Config.fromfile(args.config) 72 | if args.options is not None: 73 | cfg.merge_from_dict(args.options) 74 | # set cudnn_benchmark 75 | if cfg.get('cudnn_benchmark', False): 76 | torch.backends.cudnn.benchmark = True 77 | 78 | # work_dir is determined in this priority: CLI > segment in file > filename 79 | if args.work_dir is not None: 80 | # update configs according to CLI args if args.work_dir is not None 81 | cfg.work_dir = args.work_dir 82 | elif cfg.get('work_dir', None) is None: 83 | # use config filename as default work_dir if cfg.work_dir is None 84 | cfg.work_dir = osp.join('./work_dirs', 85 | osp.splitext(osp.basename(args.config))[0]) 86 | if args.load_from is not None: 87 | cfg.load_from = args.load_from 88 | if args.resume_from is not None: 89 | cfg.resume_from = args.resume_from 90 | if args.gpu_ids is not None: 91 | cfg.gpu_ids = args.gpu_ids 92 | else: 93 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 94 | 95 | # init distributed env first, since logger depends on the dist info. 96 | if args.launcher == 'none': 97 | distributed = False 98 | else: 99 | distributed = True 100 | init_dist(args.launcher, **cfg.dist_params) 101 | 102 | # create work_dir 103 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 104 | # dump config 105 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 106 | # init the logger before other steps 107 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 108 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 109 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 110 | 111 | # init the meta dict to record some important information such as 112 | # environment info and seed, which will be logged 113 | meta = dict() 114 | # log env info 115 | env_info_dict = collect_env() 116 | env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) 117 | dash_line = '-' * 60 + '\n' 118 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 119 | dash_line) 120 | meta['env_info'] = env_info 121 | 122 | # log some basic info 123 | logger.info(f'Distributed training: {distributed}') 124 | logger.info(f'Config:\n{cfg.pretty_text}') 125 | 126 | # set random seeds 127 | if args.seed is not None: 128 | logger.info(f'Set random seed to {args.seed}, deterministic: ' 129 | f'{args.deterministic}') 130 | set_random_seed(args.seed, deterministic=args.deterministic) 131 | cfg.seed = args.seed 132 | meta['seed'] = args.seed 133 | meta['exp_name'] = osp.basename(args.config) 134 | 135 | model = build_segmentor( 136 | cfg.model, 137 | train_cfg=cfg.get('train_cfg'), 138 | test_cfg=cfg.get('test_cfg')) 139 | 140 | logger.info(model) 141 | 142 | datasets = [build_dataset(cfg.data.train)] 143 | if len(cfg.workflow) == 2: 144 | val_dataset = copy.deepcopy(cfg.data.val) 145 | val_dataset.pipeline = cfg.data.train.pipeline 146 | datasets.append(build_dataset(val_dataset)) 147 | if cfg.checkpoint_config is not None: 148 | # save mmseg version, config file content and class names in 149 | # checkpoints as meta data 150 | cfg.checkpoint_config.meta = dict( 151 | mmseg_version=f'{__version__}+{get_git_hash()[:7]}', 152 | config=cfg.pretty_text, 153 | CLASSES=datasets[0].CLASSES, 154 | PALETTE=datasets[0].PALETTE) 155 | # add an attribute for visualization convenience 156 | model.CLASSES = datasets[0].CLASSES 157 | train_segmentor( 158 | model, 159 | datasets, 160 | cfg, 161 | distributed=distributed, 162 | validate=(not args.no_validate), 163 | timestamp=timestamp, 164 | meta=meta) 165 | 166 | 167 | if __name__ == '__main__': 168 | main() 169 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # Modifed form timm and swin repo. 2 | 3 | """ CUDA / AMP utils 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | import os 8 | import torch 9 | from torch.nn import functional as F 10 | 11 | try: 12 | from apex import amp 13 | has_apex = True 14 | except ImportError: 15 | amp = None 16 | has_apex = False 17 | 18 | from timm.utils.clip_grad import dispatch_clip_grad 19 | 20 | 21 | class ApexScalerAccum: 22 | state_dict_key = "amp" 23 | 24 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, 25 | update_grad=True): 26 | with amp.scale_loss(loss, optimizer) as scaled_loss: 27 | scaled_loss.backward(create_graph=create_graph) 28 | if update_grad: 29 | if clip_grad is not None: 30 | dispatch_clip_grad(amp.master_params(optimizer), clip_grad, mode=clip_mode) 31 | optimizer.step() 32 | 33 | def state_dict(self): 34 | if 'state_dict' in amp.__dict__: 35 | return amp.state_dict() 36 | 37 | def load_state_dict(self, state_dict): 38 | if 'load_state_dict' in amp.__dict__: 39 | amp.load_state_dict(state_dict) 40 | 41 | 42 | class NativeScalerAccum: 43 | state_dict_key = "amp_scaler" 44 | 45 | def __init__(self): 46 | self._scaler = torch.cuda.amp.GradScaler() 47 | 48 | def __call__(self, loss, optimizer, clip_grad=None, clip_mode='norm', parameters=None, create_graph=False, 49 | update_grad=True): 50 | self._scaler.scale(loss).backward(create_graph=create_graph) 51 | if update_grad: 52 | if clip_grad is not None: 53 | assert parameters is not None 54 | self._scaler.unscale_(optimizer) # unscale the gradients of optimizer's assigned params in-place 55 | dispatch_clip_grad(parameters, clip_grad, mode=clip_mode) 56 | self._scaler.step(optimizer) 57 | self._scaler.update() 58 | 59 | def state_dict(self): 60 | return self._scaler.state_dict() 61 | 62 | def load_state_dict(self, state_dict): 63 | self._scaler.load_state_dict(state_dict) 64 | 65 | 66 | class DistillationLoss(torch.nn.Module): 67 | """ 68 | This module wraps a standard criterion and adds an extra knowledge distillation loss by 69 | taking a teacher model prediction and using it as additional supervision. 70 | """ 71 | 72 | def __init__(self, base_criterion: torch.nn.Module, teacher_model: torch.nn.Module, 73 | distillation_type: str, alpha: float, tau: float, num_classes: int = 1000): 74 | super().__init__() 75 | self.base_criterion = base_criterion 76 | self.teacher_model = teacher_model 77 | assert distillation_type in ['none', 'soft', 'hard'] 78 | self.distillation_type = distillation_type 79 | self.alpha = alpha 80 | self.tau = tau 81 | self.num_classes = num_classes 82 | 83 | def forward(self, inputs, outputs, labels): 84 | """ 85 | Args: 86 | inputs: The original inputs that are feed to the teacher model 87 | outputs: the outputs of the model to be trained. It is expected to be 88 | either a Tensor, or a Tuple[Tensor, Tensor], with the original output 89 | in the first position and the distillation predictions as the second output 90 | labels: the labels for the base criterion 91 | """ 92 | base_loss = self.base_criterion(outputs, labels) 93 | if self.distillation_type == 'none': 94 | return base_loss 95 | 96 | # don't backprop throught the teacher 97 | with torch.no_grad(): 98 | teacher_outputs = self.teacher_model(inputs) 99 | 100 | if self.distillation_type == 'soft': 101 | T = self.tau 102 | # taken from https://github.com/peterliht/knowledge-distillation-pytorch/blob/master/model/net.py#L100 103 | # with slight modifications 104 | distillation_loss = F.kl_div( 105 | F.log_softmax(outputs / T, dim=1), 106 | F.log_softmax(teacher_outputs / T, dim=1), 107 | reduction= "batchmean", 108 | log_target = True 109 | ) * (T * T) 110 | elif self.distillation_type == 'hard': 111 | distillation_loss = F.cross_entropy( 112 | outputs, teacher_outputs.argmax(dim=1)) 113 | 114 | loss = base_loss * (1 - self.alpha) + distillation_loss * self.alpha 115 | # if args.local_rank == 0: 116 | # print(f"loss: {loss} | base_loss: {base_loss} | distillation_loss: {distillation_loss}") 117 | return loss 118 | 119 | 120 | def auto_resume_helper(output_dir): 121 | if not os.path.exists(output_dir): 122 | return None 123 | checkpoints = os.listdir(output_dir) 124 | checkpoints = [ckpt for ckpt in checkpoints if ckpt.endswith('pth.tar')] 125 | print(f"All checkpoints founded in {output_dir}: {checkpoints}") 126 | if len(checkpoints)>0 and "last.pth.tar" in checkpoints: 127 | auto_resume_file =os.path.join(output_dir, "last.pth.tar") 128 | # if len(checkpoints) > 0: 129 | # latest_checkpoint = max([os.path.join(output_dir, d) for d in checkpoints], key=os.path.getmtime) 130 | # print(f"The latest checkpoint founded: {latest_checkpoint}") 131 | # auto_resume_file = latest_checkpoint 132 | else: 133 | auto_resume_file = None 134 | return auto_resume_file 135 | --------------------------------------------------------------------------------