├── .gitignore ├── LICENSE ├── README.md ├── assets └── framework.png ├── det ├── README.md ├── _init_paths.py ├── cfgs │ ├── res101.yml │ ├── res101_ls.yml │ ├── res50.yml │ └── vgg16.yml ├── demo.py ├── lib │ ├── datasets │ │ ├── VOCdevkit-matlab-wrapper │ │ │ ├── get_voc_opts.m │ │ │ ├── voc_eval.m │ │ │ └── xVOCap.m │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── ds_utils.py │ │ ├── factory.py │ │ ├── imagenet.py │ │ ├── imdb.py │ │ ├── pascal_voc.py │ │ ├── pascal_voc_rbg.py │ │ ├── tools │ │ │ └── mcg_munge.py │ │ ├── vg.py │ │ ├── vg_eval.py │ │ └── voc_eval.py │ ├── make.sh │ ├── model │ │ ├── __init__.py │ │ ├── csrc │ │ │ ├── ROIAlign.h │ │ │ ├── ROIPool.h │ │ │ ├── cpu │ │ │ │ ├── ROIAlign_cpu.cpp │ │ │ │ ├── nms_cpu.cpp │ │ │ │ └── vision.h │ │ │ ├── cuda │ │ │ │ ├── ROIAlign_cuda.cu │ │ │ │ ├── ROIPool_cuda.cu │ │ │ │ ├── nms.cu │ │ │ │ └── vision.h │ │ │ ├── nms.h │ │ │ └── vision.cpp │ │ ├── faster_rcnn │ │ │ ├── __init__.py │ │ │ ├── faster_rcnn.py │ │ │ ├── resnet.py │ │ │ ├── style_hallucination.py │ │ │ └── vgg16.py │ │ ├── nms │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── nms │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── make.sh │ │ │ ├── nms_cpu.py │ │ │ ├── nms_gpu.py │ │ │ ├── nms_kernel.cu │ │ │ ├── nms_wrapper.py │ │ │ └── src │ │ │ │ ├── nms_cuda.h │ │ │ │ ├── nms_cuda_kernel.cu │ │ │ │ └── nms_cuda_kernel.h │ │ ├── roi_align │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_align.py │ │ │ └── src │ │ │ │ ├── roi_align.c │ │ │ │ ├── roi_align.h │ │ │ │ ├── roi_align_cuda.c │ │ │ │ ├── roi_align_cuda.h │ │ │ │ ├── roi_align_kernel.cu │ │ │ │ └── roi_align_kernel.h │ │ ├── roi_crop │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize │ │ │ │ │ └── __init__.py │ │ │ │ └── roi_crop │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ ├── crop_resize.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ ├── make.sh │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ ├── gridgen.py │ │ │ │ └── roi_crop.py │ │ │ └── src │ │ │ │ ├── roi_crop.c │ │ │ │ ├── roi_crop.h │ │ │ │ ├── roi_crop_cuda.c │ │ │ │ ├── roi_crop_cuda.h │ │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ │ └── roi_crop_cuda_kernel.h │ │ ├── roi_layers │ │ │ ├── __init__.py │ │ │ ├── nms.py │ │ │ ├── roi_align.py │ │ │ └── roi_pool.py │ │ ├── roi_pooling │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pooling │ │ │ │ │ └── __init__.py │ │ │ ├── build.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── roi_pool.py │ │ │ └── src │ │ │ │ ├── roi_pooling.c │ │ │ │ ├── roi_pooling.h │ │ │ │ ├── roi_pooling_cuda.c │ │ │ │ ├── roi_pooling_cuda.h │ │ │ │ ├── roi_pooling_kernel.cu │ │ │ │ └── roi_pooling_kernel.h │ │ ├── rpn │ │ │ ├── __init__.py │ │ │ ├── anchor_target_layer.py │ │ │ ├── bbox_transform.py │ │ │ ├── generate_anchors.py │ │ │ ├── proposal_layer.py │ │ │ ├── proposal_target_layer_cascade.py │ │ │ └── rpn.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bbox.pyx │ │ │ ├── blob.py │ │ │ ├── config.py │ │ │ ├── logger.py │ │ │ └── net_utils.py │ ├── pycocotools │ │ ├── UPSTREAM_REV │ │ ├── __init__.py │ │ ├── _mask.c │ │ ├── _mask.pyx │ │ ├── coco.py │ │ ├── cocoeval.py │ │ ├── license.txt │ │ ├── mask.py │ │ ├── maskApi.c │ │ └── maskApi.h │ ├── roi_data_layer │ │ ├── __init__.py │ │ ├── minibatch.py │ │ ├── roibatchLoader.py │ │ └── roidb.py │ └── setup.py ├── requirements.txt ├── scripts │ └── test.sh ├── test.py └── train.py ├── imcls ├── README.md ├── data │ ├── JigsawLoader.py │ ├── StandardDataset.py │ ├── __init__.py │ ├── concat_dataset.py │ ├── correct_txt_lists │ │ ├── art_painting_crossval_kfold.txt │ │ ├── art_painting_test_kfold.txt │ │ ├── art_painting_train_kfold.txt │ │ ├── cartoon_crossval_kfold.txt │ │ ├── cartoon_test_kfold.txt │ │ ├── cartoon_train_kfold.txt │ │ ├── photo_crossval_kfold.txt │ │ ├── photo_test_kfold.txt │ │ ├── photo_train_kfold.txt │ │ ├── sketch_crossval_kfold.txt │ │ ├── sketch_test_kfold.txt │ │ └── sketch_train_kfold.txt │ ├── data_helper.py │ └── txt_lists │ │ ├── CALTECH_test.txt │ │ ├── CALTECH_train.txt │ │ ├── LABELME_test.txt │ │ ├── LABELME_train.txt │ │ ├── PASCAL_test.txt │ │ ├── PASCAL_train.txt │ │ ├── SUN_test.txt │ │ ├── SUN_train.txt │ │ ├── amazon10_test.txt │ │ ├── amazon10_train.txt │ │ ├── amazon_test.txt │ │ ├── amazon_train.txt │ │ ├── art_pada_test.txt │ │ ├── art_painting_test.txt │ │ ├── art_painting_train.txt │ │ ├── cartoon_test.txt │ │ ├── cartoon_train.txt │ │ ├── clipart_pada_test.txt │ │ ├── dslr10_test.txt │ │ ├── dslr10_train.txt │ │ ├── dslr_test.txt │ │ ├── dslr_train.txt │ │ ├── jhuit_test_test.txt │ │ ├── jhuit_train_train.txt │ │ ├── mnist_m_test.txt │ │ ├── mnist_train.txt │ │ ├── photo_test.txt │ │ ├── photo_train.txt │ │ ├── product_pada_test.txt │ │ ├── realworld_pada_test.txt │ │ ├── sketch_test.txt │ │ ├── sketch_train.txt │ │ ├── svhn_test.txt │ │ ├── synth_digits_test.txt │ │ ├── usps_test.txt │ │ ├── webcam10_test.txt │ │ ├── webcam10_train.txt │ │ ├── webcam_test.txt │ │ └── webcam_train.txt ├── models │ ├── __init__.py │ ├── augnet.py │ ├── model_factory.py │ ├── model_utils.py │ ├── resnet.py │ ├── resnet_l2d.py │ └── style_hallucination.py ├── optimizer │ ├── __init__.py │ └── optimizer_helper.py ├── requirements.txt ├── scripts │ ├── train_erm.sh │ ├── train_l2d.sh │ └── train_rsc.sh ├── train_shade.py ├── train_shade_l2d.py └── utils │ ├── Logger.py │ ├── __init__.py │ ├── contrastive_loss.py │ ├── fps.py │ ├── loss_utils.py │ ├── tf_logger.py │ ├── util.py │ └── vis.py └── sseg ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── cityscapes_half_512x512.py │ │ ├── gta_to_cityscapes_512x512.py │ │ ├── gta_to_cityscapes_rsc_512x512.py │ │ ├── uda_cityscapes_to_acdc_512x512.py │ │ ├── uda_cityscapes_to_darkzurich_512x512.py │ │ ├── uda_gta_to_cityscapes_512x512.py │ │ └── uda_synthia_to_cityscapes_512x512.py │ ├── default_runtime.py │ ├── models │ │ ├── daformer_aspp_mitb5.py │ │ ├── daformer_conv1_mitb5.py │ │ ├── daformer_isa_mitb5.py │ │ ├── daformer_sepaspp_bottleneck_mitb5.py │ │ ├── daformer_sepaspp_mitb5.py │ │ ├── daformer_sepaspp_mitb5_shm.py │ │ ├── danet_r50-d8.py │ │ ├── deeplabv2_r50-d8.py │ │ ├── deeplabv2red_r50-d8.py │ │ ├── deeplabv3plus_r50-d8.py │ │ ├── isanet_r50-d8.py │ │ ├── segformer.py │ │ ├── segformer_b5.py │ │ ├── segformer_r101.py │ │ ├── upernet_ch256_mit.py │ │ └── upernet_mit.py │ ├── schedules │ │ ├── adamw.py │ │ ├── poly10.py │ │ └── poly10warm.py │ └── uda │ │ ├── dacs.py │ │ ├── dacs_a999_fdthings.py │ │ ├── dacs_fd.py │ │ └── dacs_fdthings.py └── dgformer │ ├── gta2cs_source.py │ ├── gta2cs_source_bdd.py │ ├── gta2cs_source_cityscapes.py │ ├── gta2cs_source_mapillary.py │ └── gta2cs_source_rsc_shade.py ├── mmseg ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── ddp_wrapper.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ └── metrics.py │ ├── seg │ │ ├── __init__.py │ │ ├── builder.py │ │ └── sampler │ │ │ ├── __init__.py │ │ │ ├── base_pixel_sampler.py │ │ │ └── ohem_pixel_sampler.py │ ├── shm_init_hook.py │ └── utils │ │ ├── __init__.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── acdc.py │ ├── bdd100k.py │ ├── builder.py │ ├── cityscapes.py │ ├── custom.py │ ├── dark_zurich.py │ ├── dataset_wrappers.py │ ├── gta.py │ ├── mapillary.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── test_time_aug.py │ │ └── transforms.py │ ├── synthia.py │ └── uda_dataset.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── mix_transformer.py │ │ ├── resnest.py │ │ ├── resnet.py │ │ └── resnext.py │ ├── builder.py │ ├── decode_heads │ │ ├── __init__.py │ │ ├── aspp_head.py │ │ ├── da_head.py │ │ ├── daformer_head.py │ │ ├── decode_head.py │ │ ├── dlv2_head.py │ │ ├── fcn_head.py │ │ ├── isa_head.py │ │ ├── psp_head.py │ │ ├── segformer_head.py │ │ ├── sep_aspp_head.py │ │ └── uper_head.py │ ├── dg │ │ ├── __init__.py │ │ ├── dacs.py │ │ └── uda_decorator.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── cross_entropy_loss.py │ │ └── utils.py │ ├── necks │ │ ├── __init__.py │ │ └── segformer_adapter.py │ ├── segmentors │ │ ├── __init__.py │ │ ├── base.py │ │ └── encoder_decoder.py │ └── utils │ │ ├── __init__.py │ │ ├── ckpt_convert.py │ │ ├── dacs_transforms.py │ │ ├── make_divisible.py │ │ ├── res_layer.py │ │ ├── self_attention_block.py │ │ ├── shape_convert.py │ │ ├── style_hallucination.py │ │ └── visualization.py ├── ops │ ├── __init__.py │ ├── encoding.py │ └── wrappers.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── logger.py │ └── utils.py └── version.py ├── requirements.txt ├── resources ├── color_palette.png ├── demo.gif ├── license_dacs ├── license_mmseg ├── license_segformer └── uda_over_time.png ├── run_experiments.py ├── scripts └── test_dg.sh ├── setup.cfg └── tools ├── __init__.py ├── convert_datasets ├── cityscapes.py ├── gta.py ├── mapillary.py └── synthia.py ├── download_checkpoints.sh ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *.cover 46 | .hypothesis/ 47 | .pytest_cache/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | db.sqlite3 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # Environments 84 | .env 85 | .venv 86 | env/ 87 | venv/ 88 | ENV/ 89 | env.bak/ 90 | venv.bak/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | .spyproject 95 | 96 | # Rope project settings 97 | .ropeproject 98 | 99 | # mkdocs documentation 100 | /site 101 | 102 | # mypy 103 | .mypy_cache/ 104 | 105 | .vscode 106 | .idea 107 | 108 | # custom 109 | *.pkl 110 | *.pkl.json 111 | *.log.json 112 | work_dirs/ 113 | mmseg/.mim 114 | 115 | # Pytorch 116 | *.pth 117 | 118 | euler_log.txt 119 | jobs/ 120 | configs/tmp_param.json 121 | configs/generated/ 122 | *.pdf 123 | *.pgf 124 | 125 | rsync.sh 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Yuyang Zhao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /assets/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/assets/framework.png -------------------------------------------------------------------------------- /det/README.md: -------------------------------------------------------------------------------- 1 | ## SHADE on Domain Generalized Object Detection 2 | 3 | This is the implementation of SHADE on domain generalized object detection. The code is based on [faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch/tree/pytorch-1.0). 4 | 5 | ### Setup Environment 6 | 7 | We use python 3.8.5, and pytorch 1.7.1 with cuda 11.0. 8 | ```shell 9 | conda create -n dgdet python=3.8.5 10 | conda activate dgdet 11 | pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | Compile the cuda dependencies using following simple commands: 16 | 17 | ```shell 18 | cd lib 19 | python setup.py build develop 20 | ``` 21 | 22 | ### Data Preparation 23 | 24 | **Urban-scene Detection**: We use the Urban-scene Detection benchmark, including Daytime-Sunny, Night-Sunny, Dusk-Rainy, Night-Rainy, and Daytime-Foggy weather. 25 | 26 | The dataset is available at [[Download Link](https://drive.google.com/drive/folders/1IIUnUrJrvFgPzU8D6KtV0CXa8k1eBV9B)]. 27 | 28 | File structure: 29 | ``` 30 | DGDet/ 31 | |–– daytimeclear/ 32 | |–– daytimefoggy/ 33 | |–– nightrainy/ 34 | |–– duskrainy/ 35 | |–– nightclear/ 36 | ``` 37 | 38 | Download and rename the dataset, and then modify the data root in `det/lib/datasets/factory.py`. 39 | 40 | ### Pretrained Model 41 | 42 | **NOTE**. We use Caffe pretrained ResNet101 as our backbone. You can download it from [[Dropbox](https://www.dropbox.com/s/iev3tkbz5wyyuz9/resnet101_caffe.pth?dl=0), [VT Server](https://filebox.ece.vt.edu/~jw2yang/faster-rcnn/pretrained-base-models/resnet101_caffe.pth)] and put it into `data/pretrained_model`. 43 | 44 | 45 | ### Train 46 | 47 | ```shell 48 | python train.py --cuda --no_freeze --detect_all --color_tf --add_classifier 49 | ``` 50 | 51 | 52 | ### Test 53 | 54 | ``` 55 | bash test.sh ${OUT_DIR} 9 56 | ``` 57 | 58 | 59 | ### Citation 60 | 61 | ``` 62 | @inproceedings{zhao2022shade, 63 | title={Style-Hallucinated Dual Consistency Learning for Domain Generalized Semantic Segmentation}, 64 | author={Zhao, Yuyang and Zhong, Zhun and Zhao, Na and Sebe, Nicu and Lee, Gim Hee}, 65 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 66 | year={2022}} 67 | 68 | @article{zhao2022shadevdg, 69 | title={Style-Hallucinated Dual Consistency Learning: A Unified Framework for Visual Domain Generalization}, 70 | author={Zhao, Yuyang and Zhong, Zhun and Zhao, Na and Sebe, Nicu and Lee, Gim Hee}, 71 | journal={arXiv preprint arXiv:2212.09068}, 72 | year={2022}} 73 | ``` 74 | 75 | 76 | ### Acknowledgements 77 | 78 | This project is based on the following open-source projects. We thank their 79 | authors for making the source code publically available. 80 | 81 | * [faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch/tree/pytorch-1.0) 82 | * [Single-DGOD](https://github.com/AmingWu/Single-DGOD) 83 | -------------------------------------------------------------------------------- /det/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /det/cfgs/res101.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | DOUBLE_BIAS: False 13 | LEARNING_RATE: 0.001 14 | TEST: 15 | HAS_RPN: True 16 | POOLING_SIZE: 7 17 | POOLING_MODE: align 18 | CROP_RESIZE_WITH_MAX_POOL: False 19 | -------------------------------------------------------------------------------- /det/cfgs/res101_ls.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | SCALES: [800] 13 | DOUBLE_BIAS: False 14 | LEARNING_RATE: 0.001 15 | TEST: 16 | HAS_RPN: True 17 | SCALES: [800] 18 | MAX_SIZE: 1200 19 | RPN_POST_NMS_TOP_N: 1000 20 | POOLING_SIZE: 7 21 | POOLING_MODE: align 22 | CROP_RESIZE_WITH_MAX_POOL: False 23 | -------------------------------------------------------------------------------- /det/cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | # IMS_PER_BATCH: 1 5 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 6 | RPN_POSITIVE_OVERLAP: 0.7 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | DISPLAY: 20 11 | BATCH_SIZE: 256 12 | WEIGHT_DECAY: 0.0001 13 | DOUBLE_BIAS: False 14 | SNAPSHOT_PREFIX: res50_faster_rcnn 15 | TEST: 16 | HAS_RPN: True 17 | POOLING_MODE: crop 18 | -------------------------------------------------------------------------------- /det/cfgs/vgg16.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: vgg16 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | BATCH_SIZE: 256 10 | LEARNING_RATE: 0.01 11 | TEST: 12 | HAS_RPN: True 13 | POOLING_MODE: align 14 | CROP_RESIZE_WITH_MAX_POOL: False 15 | -------------------------------------------------------------------------------- /det/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /det/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /det/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /det/lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /det/lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /det/lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | # from datasets.coco import coco 16 | from datasets.imagenet import imagenet 17 | from datasets.vg import vg 18 | 19 | import numpy as np 20 | 21 | # Set up voc__ 22 | for year in ['2007']: 23 | for split in ['train', 'val', 'trainval', 'test']: 24 | for weather in ['daytimeclear','daytimefoggy', 'duskrainy', 'nightrainy', 25 | 'nightclear']: 26 | devkit_path = "/data/yyzhao/DGDet" 27 | name = 'voc_{}_{}_{}'.format(year, split, weather) 28 | __sets[name] = (lambda split=split, year=year, weather=weather, devkit_path=devkit_path: pascal_voc(split, year, weather, devkit_path)) 29 | 30 | # set up image net. 31 | for split in ['train', 'val', 'val1', 'val2', 'test']: 32 | name = 'imagenet_{}'.format(split) 33 | devkit_path = 'data/imagenet/ILSVRC/devkit' 34 | data_path = 'data/imagenet/ILSVRC' 35 | __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) 36 | 37 | def get_imdb(name): 38 | """Get an imdb (image database) by name.""" 39 | if name not in __sets: 40 | raise KeyError('Unknown dataset: {}'.format(name)) 41 | return __sets[name]() 42 | 43 | 44 | def list_imdbs(): 45 | """List all registered imdbs.""" 46 | return list(__sets.keys()) 47 | -------------------------------------------------------------------------------- /det/lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /det/lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CUDA_PATH=/usr/local/cuda/ 6 | #You may also want to ad the following 7 | #export C_INCLUDE_PATH=/opt/cuda/include 8 | 9 | export CXXFLAGS="-std=c++11" 10 | export CFLAGS="-std=c99" 11 | 12 | python setup.py build_ext --inplace 13 | rm -rf build 14 | 15 | # CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 16 | # -gencode arch=compute_35,code=sm_35 \ 17 | # -gencode arch=compute_50,code=sm_50 \ 18 | # -gencode arch=compute_52,code=sm_52 \ 19 | # -gencode arch=compute_60,code=sm_60 \ 20 | # -gencode arch=compute_61,code=sm_61 \ 21 | # -gencode arch=compute_86,code=sm_86" 22 | 23 | CUDA_ARCH="-gencode arch=compute_86,code=sm_86" 24 | 25 | # compile NMS 26 | cd model/nms/src 27 | echo "Compiling nms kernels by nvcc..." 28 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 29 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 30 | 31 | cd ../ 32 | python build.py 33 | 34 | # compile roi_pooling 35 | cd ../../ 36 | cd model/roi_pooling/src 37 | echo "Compiling roi pooling kernels by nvcc..." 38 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 39 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 40 | cd ../ 41 | python build.py 42 | 43 | # compile roi_align 44 | cd ../../ 45 | cd model/roi_align/src 46 | echo "Compiling roi align kernels by nvcc..." 47 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 48 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 49 | cd ../ 50 | python build.py 51 | 52 | # compile roi_crop 53 | cd ../../ 54 | cd model/roi_crop/src 55 | echo "Compiling roi crop kernels by nvcc..." 56 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 57 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 58 | cd ../ 59 | python build.py -------------------------------------------------------------------------------- /det/lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/__init__.py -------------------------------------------------------------------------------- /det/lib/model/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /det/lib/model/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /det/lib/model/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /det/lib/model/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /det/lib/model/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 14 | const at::Tensor& rois, 15 | const float spatial_scale, 16 | const int pooled_height, 17 | const int pooled_width, 18 | const int batch_size, 19 | const int channels, 20 | const int height, 21 | const int width, 22 | const int sampling_ratio); 23 | 24 | 25 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 26 | const at::Tensor& rois, 27 | const float spatial_scale, 28 | const int pooled_height, 29 | const int pooled_width); 30 | 31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 32 | const at::Tensor& input, 33 | const at::Tensor& rois, 34 | const at::Tensor& argmax, 35 | const float spatial_scale, 36 | const int pooled_height, 37 | const int pooled_width, 38 | const int batch_size, 39 | const int channels, 40 | const int height, 41 | const int width); 42 | 43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 44 | 45 | 46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 47 | const int height, 48 | const int width); 49 | -------------------------------------------------------------------------------- /det/lib/model/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /det/lib/model/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("nms", &nms, "non-maximum suppression"); 9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 13 | } 14 | -------------------------------------------------------------------------------- /det/lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /det/lib/model/faster_rcnn/style_hallucination.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | import torch.distributions as tdist 5 | import torch.nn.functional as F 6 | import ipdb 7 | 8 | class StyleHallucination(nn.Module): 9 | ''' 10 | Style Hallucination Module. 11 | Reference: 12 | Zhao et al. Style-Hallucinated Dual Consistency Learning for Domain Generalized Semantic Segmentation. ECCV 2022. 13 | https://arxiv.org/pdf/2204.02548.pdf 14 | ''' 15 | def __init__(self, concentration_coeff, base_style_num): 16 | super().__init__() 17 | self.concentration = torch.tensor([concentration_coeff]*base_style_num, device='cuda') 18 | self._dirichlet = tdist.dirichlet.Dirichlet(concentration=self.concentration) 19 | 20 | self.register_buffer("proto_mean", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 21 | self.register_buffer("proto_std", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 22 | 23 | def forward(self, x): 24 | # ipdb.set_trace() 25 | B,C,H,W = x.size() 26 | x_mean = x.mean(dim=[2,3], keepdim=True) # B,C,1,1 27 | x_std = x.std(dim=[2,3], keepdim=True) + 1e-7 # B,C,1,1 28 | x_mean, x_std = x_mean.detach(), x_std.detach() 29 | 30 | x_norm = (x - x_mean) / x_std 31 | 32 | combine_weights = self._dirichlet.sample((B,)) # B,C 33 | combine_weights = combine_weights.detach() 34 | 35 | new_mean = combine_weights @ self.proto_mean.data # B,C 36 | new_std = combine_weights @ self.proto_std.data 37 | 38 | x_new = x_norm * new_std.unsqueeze(-1).unsqueeze(-1) + new_mean.unsqueeze(-1).unsqueeze(-1) 39 | 40 | return x, x_new 41 | -------------------------------------------------------------------------------- /det/lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | import math 15 | import torchvision.models as models 16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 17 | import pdb 18 | 19 | class vgg16(_fasterRCNN): 20 | def __init__(self, classes, pretrained=False, class_agnostic=False): 21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 22 | self.dout_base_model = 512 23 | self.pretrained = pretrained 24 | self.class_agnostic = class_agnostic 25 | 26 | _fasterRCNN.__init__(self, classes, class_agnostic) 27 | 28 | def _init_modules(self): 29 | vgg = models.vgg16() 30 | if self.pretrained: 31 | print("Loading pretrained weights from %s" %(self.model_path)) 32 | state_dict = torch.load(self.model_path) 33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 34 | 35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 36 | 37 | # not using the last maxpool layer 38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 39 | 40 | # Fix the layers before conv3: 41 | for layer in range(10): 42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 43 | 44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 45 | 46 | self.RCNN_top = vgg.classifier 47 | 48 | # not using the last maxpool layer 49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 50 | 51 | if self.class_agnostic: 52 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 53 | else: 54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 55 | 56 | def _head_to_tail(self, pool5): 57 | 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.RCNN_top(pool5_flat) 60 | 61 | return fc7 62 | 63 | -------------------------------------------------------------------------------- /det/lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /det/lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /det/lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /det/lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /det/lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /det/lib/model/nms/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling stnm kernels by nvcc..." 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /det/lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.maximum(x2[i], x2[order[1:]]) 24 | yy2 = np.maximum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | 36 | 37 | -------------------------------------------------------------------------------- /det/lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /det/lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /det/lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /det/lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | sources = ['src/roi_align.c'] 7 | headers = ['src/roi_align.h'] 8 | extra_objects = [] 9 | #sources = [] 10 | #headers = [] 11 | defines = [] 12 | with_cuda = False 13 | 14 | this_file = os.path.dirname(os.path.realpath(__file__)) 15 | print(this_file) 16 | 17 | if torch.cuda.is_available(): 18 | print('Including CUDA code.') 19 | sources += ['src/roi_align_cuda.c'] 20 | headers += ['src/roi_align_cuda.h'] 21 | defines += [('WITH_CUDA', None)] 22 | with_cuda = True 23 | 24 | extra_objects = ['src/roi_align_kernel.cu.o'] 25 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 26 | 27 | ffi = create_extension( 28 | '_ext.roi_align', 29 | headers=headers, 30 | sources=sources, 31 | define_macros=defines, 32 | relative_to=__file__, 33 | with_cuda=with_cuda, 34 | extra_objects=extra_objects 35 | ) 36 | 37 | if __name__ == '__main__': 38 | ffi.build() 39 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | roi_align.roi_align_forward(self.aligned_height, 30 | self.aligned_width, 31 | self.spatial_scale, features, 32 | rois, output) 33 | # raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/src/roi_align.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); 3 | 4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale, 5 | THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /det/lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_crop/_ext/crop_resize/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._crop_resize import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /det/lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /det/lib/model/roi_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from .nms import nms 4 | from .roi_align import ROIAlign 5 | from .roi_align import roi_align 6 | from .roi_pool import ROIPool 7 | from .roi_pool import roi_pool 8 | 9 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool"] 10 | -------------------------------------------------------------------------------- /det/lib/model/roi_layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from model import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /det/lib/model/roi_layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from model import _C 9 | 10 | import pdb 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | output = _C.roi_align_forward(input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None 44 | 45 | 46 | roi_align = _ROIAlign.apply 47 | 48 | 49 | class ROIAlign(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | super(ROIAlign, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | self.sampling_ratio = sampling_ratio 55 | 56 | def forward(self, input, rois): 57 | return roi_align( 58 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 59 | ) 60 | 61 | def __repr__(self): 62 | tmpstr = self.__class__.__name__ + "(" 63 | tmpstr += "output_size=" + str(self.output_size) 64 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 65 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 66 | tmpstr += ")" 67 | return tmpstr 68 | -------------------------------------------------------------------------------- /det/lib/model/roi_layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from model import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | extra_objects = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | this_file = os.path.dirname(os.path.realpath(__file__)) 14 | print(this_file) 15 | 16 | if torch.cuda.is_available(): 17 | print('Including CUDA code.') 18 | sources += ['src/roi_pooling_cuda.c'] 19 | headers += ['src/roi_pooling_cuda.h'] 20 | defines += [('WITH_CUDA', None)] 21 | with_cuda = True 22 | extra_objects = ['src/roi_pooling.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_pooling', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /det/lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /det/lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /det/lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/det/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /det/lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /det/lib/model/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class Logger(object): 12 | 13 | def __init__(self, log_dir): 14 | """Create a summary writer logging to log_dir.""" 15 | self.writer = tf.summary.FileWriter(log_dir) 16 | 17 | def scalar_summary(self, tag, value, step): 18 | """Log a scalar variable.""" 19 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 40 | 41 | # Create and write Summary 42 | summary = tf.Summary(value=img_summaries) 43 | self.writer.add_summary(summary, step) 44 | 45 | def histo_summary(self, tag, values, step, bins=1000): 46 | """Log a histogram of the tensor of values.""" 47 | 48 | # Create a histogram using numpy 49 | counts, bin_edges = np.histogram(values, bins=bins) 50 | 51 | # Fill the fields of the histogram proto 52 | hist = tf.HistogramProto() 53 | hist.min = float(np.min(values)) 54 | hist.max = float(np.max(values)) 55 | hist.num = int(np.prod(values.shape)) 56 | hist.sum = float(np.sum(values)) 57 | hist.sum_squares = float(np.sum(values**2)) 58 | 59 | # Drop the start of the first bin 60 | bin_edges = bin_edges[1:] 61 | 62 | # Add bin edges and counts 63 | for edge in bin_edges: 64 | hist.bucket_limit.append(edge) 65 | for c in counts: 66 | hist.bucket.append(c) 67 | 68 | # Create and write Summary 69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 70 | self.writer.add_summary(summary, step) 71 | self.writer.flush() 72 | -------------------------------------------------------------------------------- /det/lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /det/lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /det/lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /det/lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /det/lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /det/lib/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "model", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "model._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="faster_rcnn", 61 | version="0.1", 62 | description="object detection in pytorch", 63 | packages=find_packages(exclude=("configs", "tests",)), 64 | # install_requires=requirements, 65 | ext_modules=get_extensions(), 66 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 67 | ) 68 | -------------------------------------------------------------------------------- /det/requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | cffi 3 | opencv-python 4 | scipy 5 | msgpack 6 | easydict 7 | matplotlib 8 | pyyaml 9 | tensorboardX 10 | imageio 11 | ipdb 12 | tqdm -------------------------------------------------------------------------------- /det/scripts/test.sh: -------------------------------------------------------------------------------- 1 | 2 | ## test 3 | OUT_DIR=$1 4 | CHECK_ID=$2 5 | 6 | 7 | DATASET=duskrainy 8 | python test.py --dataset_test voc_2007_train_${DATASET} --checkepoch ${CHECK_ID} --cuda --output_dir ${OUT_DIR} 9 | 10 | DATASET=nightrainy 11 | python test.py --dataset_test voc_2007_train_${DATASET} --checkepoch ${CHECK_ID} --cuda --output_dir ${OUT_DIR} 12 | 13 | DATASET=daytimefoggy 14 | python test.py --dataset_test voc_2007_train_${DATASET} --checkepoch ${CHECK_ID} --cuda --output_dir ${OUT_DIR} 15 | 16 | DATASET=nightclear 17 | ## 18 | python test.py --dataset_test voc_2007_train_${DATASET} --checkepoch ${CHECK_ID} --cuda --output_dir ${OUT_DIR} 19 | -------------------------------------------------------------------------------- /imcls/README.md: -------------------------------------------------------------------------------- 1 | ## SHADE on Domain Generalized Image Classification 2 | 3 | This is the implementation of SHADE on domain generalized image classification. 4 | 5 | ### Setup Environment 6 | 7 | We use python 3.8.5, and pytorch 1.7.1 with cuda 11.0. 8 | ```shell 9 | conda create -n dgcls python=3.8.5 10 | conda activate dgcls 11 | pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | 16 | ### Data Preparation 17 | 18 | **PACS**: We use the PACS benchmark. 19 | 20 | The dataset is available at [[Download Link](https://drive.google.com/file/d/1m4X4fROCCXMO0lRLrr6Zz9Vb3974NWhE/view)]. 21 | 22 | File structure: 23 | ``` 24 | pacs/ 25 | |–– images/ 26 | |–– splits/ 27 | ``` 28 | 29 | 30 | ### Run 31 | 32 | ERM+SHADE: 33 | ```shell 34 | bash scripts/train_erm.sh 35 | ``` 36 | 37 | RSC+SHADE: 38 | ```shell 39 | bash scripts/train_rsc.sh 40 | ``` 41 | 42 | L2D+SHADE: 43 | ```shell 44 | bash scripts/train_l2d.sh 45 | ``` 46 | 47 | 48 | 49 | ### Citation 50 | 51 | ``` 52 | @inproceedings{zhao2022shade, 53 | title={Style-Hallucinated Dual Consistency Learning for Domain Generalized Semantic Segmentation}, 54 | author={Zhao, Yuyang and Zhong, Zhun and Zhao, Na and Sebe, Nicu and Lee, Gim Hee}, 55 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 56 | year={2022}} 57 | 58 | @article{zhao2022shadevdg, 59 | title={Style-Hallucinated Dual Consistency Learning: A Unified Framework for Visual Domain Generalization}, 60 | author={Zhao, Yuyang and Zhong, Zhun and Zhao, Na and Sebe, Nicu and Lee, Gim Hee}, 61 | journal={arXiv preprint arXiv:2212.09068}, 62 | year={2022}} 63 | ``` 64 | 65 | 66 | ### Acknowledgements 67 | 68 | This project is based on the following open-source projects. We thank their 69 | authors for making the source code publically available. 70 | 71 | * [RSC](https://github.com/DeLightCMU/RSC) 72 | * [Learning_to_diversify](https://github.com/BUserName/Learning_to_diversify) 73 | -------------------------------------------------------------------------------- /imcls/data/StandardDataset.py: -------------------------------------------------------------------------------- 1 | from torchvision import datasets 2 | from torchvision import transforms 3 | 4 | 5 | def get_dataset(path, mode, image_size): 6 | if mode == "train": 7 | img_transform = transforms.Compose([ 8 | transforms.RandomResizedCrop(image_size, scale=(0.7, 1.0)), 9 | transforms.RandomHorizontalFlip(), 10 | transforms.ToTensor(), 11 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[1/256., 1/256., 1/256.]) # std=[1/256., 1/256., 1/256.] #[0.229, 0.224, 0.225] 12 | ]) 13 | else: 14 | img_transform = transforms.Compose([ 15 | transforms.Resize(image_size), 16 | # transforms.CenterCrop(image_size), 17 | transforms.ToTensor(), 18 | transforms.Normalize([0.485, 0.456, 0.406], std=[1/256., 1/256., 1/256.]) # std=[1/256., 1/256., 1/256.] 19 | ]) 20 | return datasets.ImageFolder(path, transform=img_transform) 21 | -------------------------------------------------------------------------------- /imcls/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/imcls/data/__init__.py -------------------------------------------------------------------------------- /imcls/data/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | import warnings 3 | 4 | from torch.utils.data import Dataset 5 | 6 | # This is a small variant of the ConcatDataset class, which also returns dataset index 7 | from data.JigsawLoader import JigsawTestDatasetMultiple 8 | 9 | 10 | class ConcatDataset(Dataset): 11 | """ 12 | Dataset to concatenate multiple datasets. 13 | Purpose: useful to assemble different existing datasets, possibly 14 | large-scale datasets as the concatenation operation is done in an 15 | on-the-fly manner. 16 | 17 | Arguments: 18 | datasets (sequence): List of datasets to be concatenated 19 | """ 20 | 21 | @staticmethod 22 | def cumsum(sequence): 23 | r, s = [], 0 24 | for e in sequence: 25 | l = len(e) 26 | r.append(l + s) 27 | s += l 28 | return r 29 | 30 | def isMulti(self): 31 | return isinstance(self.datasets[0], JigsawTestDatasetMultiple) 32 | 33 | def __init__(self, datasets): 34 | super(ConcatDataset, self).__init__() 35 | assert len(datasets) > 0, 'datasets should not be an empty iterable' 36 | self.datasets = list(datasets) 37 | self.cumulative_sizes = self.cumsum(self.datasets) 38 | 39 | def __len__(self): 40 | return self.cumulative_sizes[-1] 41 | 42 | def __getitem__(self, idx): 43 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 44 | if dataset_idx == 0: 45 | sample_idx = idx 46 | else: 47 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 48 | return self.datasets[dataset_idx][sample_idx], dataset_idx 49 | 50 | @property 51 | def cummulative_sizes(self): 52 | warnings.warn("cummulative_sizes attribute is renamed to " 53 | "cumulative_sizes", DeprecationWarning, stacklevel=2) 54 | return self.cumulative_sizes 55 | -------------------------------------------------------------------------------- /imcls/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/imcls/models/__init__.py -------------------------------------------------------------------------------- /imcls/models/model_factory.py: -------------------------------------------------------------------------------- 1 | from models import resnet 2 | 3 | nets_map = { 4 | 'resnet18': resnet.resnet18, 5 | 'resnet50': resnet.resnet50, 6 | } 7 | 8 | 9 | def get_network(name): 10 | if name not in nets_map: 11 | raise ValueError('Name of network unknown %s' % name) 12 | 13 | def get_network_fn(**kwargs): 14 | return nets_map[name](**kwargs) 15 | 16 | return get_network_fn 17 | -------------------------------------------------------------------------------- /imcls/models/model_utils.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | 4 | class GradientKillerLayer(Function): 5 | @staticmethod 6 | def forward(ctx, x, **kwargs): 7 | return x.view_as(x) 8 | 9 | @staticmethod 10 | def backward(ctx, grad_output): 11 | return None, None 12 | 13 | 14 | class ReverseLayerF(Function): 15 | @staticmethod 16 | def forward(ctx, x, lambda_val): 17 | ctx.lambda_val = lambda_val 18 | 19 | return x.view_as(x) 20 | 21 | @staticmethod 22 | def backward(ctx, grad_output): 23 | output = grad_output.neg() * ctx.lambda_val 24 | 25 | return output, None -------------------------------------------------------------------------------- /imcls/models/style_hallucination.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | import torch.distributions as tdist 5 | import torch.nn.functional as F 6 | 7 | 8 | class StyleHallucination(nn.Module): 9 | ''' 10 | Style Hallucination Module. 11 | Reference: 12 | Zhao et al. Style-Hallucinated Dual Consistency Learning for Domain Generalized Semantic Segmentation. ECCV 2022. 13 | https://arxiv.org/pdf/2204.02548.pdf 14 | ''' 15 | def __init__(self, concentration_coeff, base_style_num): 16 | super().__init__() 17 | self.concentration = torch.tensor([concentration_coeff]*base_style_num, device='cuda') 18 | self._dirichlet = tdist.dirichlet.Dirichlet(concentration=self.concentration) 19 | 20 | self.register_buffer("proto_mean", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 21 | self.register_buffer("proto_std", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 22 | 23 | 24 | def forward(self, x): 25 | B,C,H,W = x.size() 26 | x_mean = x.mean(dim=[2,3], keepdim=True) # B,C,1,1 27 | x_std = x.std(dim=[2,3], keepdim=True) + 1e-7 # B,C,1,1 28 | x_mean, x_std = x_mean.detach(), x_std.detach() 29 | 30 | x_norm = (x - x_mean) / x_std 31 | 32 | combine_weights = self._dirichlet.sample((B,)) # B,C 33 | combine_weights = combine_weights.detach() 34 | 35 | new_mean = combine_weights @ self.proto_mean.data # B,C 36 | new_std = combine_weights @ self.proto_std.data 37 | 38 | x_new = x_norm * new_std.unsqueeze(-1).unsqueeze(-1) + new_mean.unsqueeze(-1).unsqueeze(-1) 39 | 40 | return x, x_new 41 | -------------------------------------------------------------------------------- /imcls/optimizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/imcls/optimizer/__init__.py -------------------------------------------------------------------------------- /imcls/optimizer/optimizer_helper.py: -------------------------------------------------------------------------------- 1 | from torch import optim 2 | 3 | 4 | def get_optim_and_scheduler(network, epochs, lr, train_all, nesterov=False): 5 | if train_all: 6 | params = network.parameters() 7 | else: 8 | params = network.get_params(lr) 9 | optimizer = optim.SGD(params, weight_decay=.0005, momentum=.9, nesterov=nesterov, lr=lr) 10 | #optimizer = optim.Adam(params, lr=lr) 11 | step_size = int(epochs * .8) 12 | scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size) 13 | print("Step size: %d" % step_size) 14 | return optimizer, scheduler 15 | -------------------------------------------------------------------------------- /imcls/requirements.txt: -------------------------------------------------------------------------------- 1 | flake8==3.7.9 2 | yapf==0.29.0 3 | isort==4.3.21 4 | yacs 5 | gdown 6 | tb-nightly 7 | future 8 | scipy 9 | scikit-learn 10 | tqdm 11 | ftfy 12 | regex 13 | wilds==1.2.2 14 | tabulate 15 | ipdb -------------------------------------------------------------------------------- /imcls/scripts/train_erm.sh: -------------------------------------------------------------------------------- 1 | PY_ARGS=${@:1} 2 | 3 | python -W ignore train_shade.py --SHM --norsc --sets a-all ${PY_ARGS} 4 | 5 | python -W ignore train_shade.py --SHM --norsc --sets c-all ${PY_ARGS} 6 | 7 | python -W ignore train_shade.py --SHM --norsc --sets p-all ${PY_ARGS} 8 | 9 | python -W ignore train_shade.py --SHM --norsc --sets s-all ${PY_ARGS} -------------------------------------------------------------------------------- /imcls/scripts/train_l2d.sh: -------------------------------------------------------------------------------- 1 | PY_ARGS=${@:1} 2 | 3 | python -W ignore train_shade_l2d.py --task PACS --SHM --sets a-all ${PY_ARGS} 4 | 5 | python -W ignore train_shade_l2d.py --task PACS --SHM --sets c-all ${PY_ARGS} 6 | 7 | python -W ignore train_shade_l2d.py --task PACS --SHM --sets p-all ${PY_ARGS} 8 | 9 | python -W ignore train_shade_l2d.py --task PACS --SHM --sets s-all ${PY_ARGS} 10 | -------------------------------------------------------------------------------- /imcls/scripts/train_rsc.sh: -------------------------------------------------------------------------------- 1 | PY_ARGS=${@:1} 2 | 3 | python -W ignore train_shade.py --SHM --sets a-all ${PY_ARGS} 4 | 5 | python -W ignore train_shade.py --SHM --sets c-all ${PY_ARGS} 6 | 7 | python -W ignore train_shade.py --SHM --sets p-all ${PY_ARGS} 8 | 9 | python -W ignore train_shade.py --SHM --sets s-all ${PY_ARGS} -------------------------------------------------------------------------------- /imcls/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/imcls/utils/__init__.py -------------------------------------------------------------------------------- /imcls/utils/fps.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import numpy as np 4 | 5 | def farthest_point_sample_tensor(point, npoint): 6 | """ 7 | A sampling algorithm from the `"PointNet++: Deep Hierarchical Feature 8 | Learning on Point Sets in a Metric Space" 9 | `_ paper, which iteratively samples the 10 | most distant point with regard to the rest points. 11 | 12 | Input: 13 | point: point data for sampling, [N, D] 14 | npoint: number of samples 15 | Return: 16 | centroids: sampled point index, [npoint, D] 17 | """ 18 | device = point.device 19 | N, D = point.shape 20 | xyz = point 21 | centroids = torch.zeros((npoint,), device=device) 22 | distance = torch.ones((N,), device=device) * 1e10 23 | farthest = np.random.randint(0, N) 24 | for i in range(npoint): 25 | centroids[i] = farthest 26 | centroid = xyz[farthest, :] 27 | dist = torch.sum((xyz - centroid) ** 2, dim=-1) 28 | mask = dist < distance 29 | distance[mask] = dist[mask] 30 | farthest = torch.argmax(distance, dim=-1) 31 | point = point[centroids.long()] 32 | return point, centroids.long() 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /imcls/utils/loss_utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import ipdb 7 | 8 | def masked_feat_dist(f1, f2, mask=None): 9 | ''' 10 | f1: seg model features 2B,C,H,W 11 | f2: imgnet model features 2B,C,H,W 12 | ''' 13 | 14 | feat_diff = F.mse_loss(f1, f2, reduction='none') # B,C,H,W 15 | pw_feat_dist = feat_diff.mean(1) 16 | 17 | if mask is not None: 18 | pw_feat_dist = pw_feat_dist[mask.squeeze(1)] 19 | return torch.mean(pw_feat_dist) 20 | 21 | imnet_feature_dist_lambda=0.005 22 | imnet_feature_dist_classes=[6, 7, 11, 12, 13, 14, 15, 16, 17, 18] 23 | imnet_feature_dist_scale_min_ratio=0.75 24 | 25 | def calc_feat_dist(gt, feat_imnet, feat, num_classes): 26 | # lay = -1 27 | ''' 28 | gt B,H,W 29 | feat_imnet B,C,H,W 30 | feat B,C,H,W 31 | ''' 32 | if imnet_feature_dist_classes is not None: 33 | fdclasses = torch.tensor(imnet_feature_dist_classes, device=gt.device) 34 | scale_factor = gt.shape[-1] // feat.shape[-1] 35 | gt_rescaled = downscale_label_ratio(gt, scale_factor, 36 | imnet_feature_dist_scale_min_ratio, 37 | num_classes, 38 | 255).long().detach() 39 | # ipdb.set_trace() 40 | fdist_mask = torch.any(gt_rescaled[..., None] == fdclasses, -1) # ...None == unsqueeze(-1) 41 | if not fdist_mask.sum(): 42 | return torch.tensor(0., device=gt.device) 43 | feat_dist = masked_feat_dist(feat, feat_imnet, fdist_mask) 44 | 45 | else: 46 | feat_dist = masked_feat_dist(feat, feat_imnet) 47 | 48 | if torch.isnan(feat_dist): 49 | ipdb.set_trace() 50 | 51 | return feat_dist 52 | 53 | 54 | 55 | def downscale_label_ratio(gt, 56 | scale_factor, 57 | min_ratio, 58 | n_classes, 59 | ignore_index=255): 60 | assert scale_factor > 1 61 | bs, orig_h, orig_w = gt.shape 62 | # assert orig_c == 1 63 | trg_h, trg_w = orig_h // scale_factor, orig_w // scale_factor 64 | ignore_substitute = n_classes 65 | 66 | out = gt.clone() # otw. next line would modify original gt 67 | out[out == ignore_index] = ignore_substitute 68 | out = F.one_hot( 69 | out, num_classes=n_classes + 1).permute(0, 3, 1, 2) 70 | assert list(out.shape) == [bs, n_classes + 1, orig_h, orig_w], out.shape 71 | out = F.avg_pool2d(out.float(), kernel_size=scale_factor) 72 | gt_ratio, out = torch.max(out, dim=1, keepdim=True) 73 | out[out == ignore_substitute] = ignore_index 74 | out[gt_ratio < min_ratio] = ignore_index 75 | assert list(out.shape) == [bs, 1, trg_h, trg_w], out.shape 76 | return out 77 | -------------------------------------------------------------------------------- /imcls/utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class TFLogger(object): 12 | 13 | def __init__(self, log_dir): 14 | """Create a summary writer logging to log_dir.""" 15 | self.writer = tf.compat.v1.summary.FileWriter(log_dir) 16 | 17 | def scalar_summary(self, tag, value, step): 18 | """Log a scalar variable.""" 19 | summary = tf.compat.v1.Summary(value=[tf.compat.v1.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 40 | 41 | # Create and write Summary 42 | summary = tf.Summary(value=img_summaries) 43 | self.writer.add_summary(summary, step) 44 | 45 | def histo_summary(self, tag, values, step, bins=1000): 46 | """Log a histogram of the tensor of values.""" 47 | 48 | # Create a histogram using numpy 49 | counts, bin_edges = np.histogram(values, bins=bins) 50 | 51 | # Fill the fields of the histogram proto 52 | hist = tf.HistogramProto() 53 | hist.min = float(np.min(values)) 54 | hist.max = float(np.max(values)) 55 | hist.num = int(np.prod(values.shape)) 56 | hist.sum = float(np.sum(values)) 57 | hist.sum_squares = float(np.sum(values**2)) 58 | 59 | # Drop the start of the first bin 60 | bin_edges = bin_edges[1:] 61 | 62 | # Add bin edges and counts 63 | for edge in bin_edges: 64 | hist.bucket_limit.append(edge) 65 | for c in counts: 66 | hist.bucket.append(c) 67 | 68 | # Create and write Summary 69 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 70 | self.writer.add_summary(summary, step) 71 | self.writer.flush() 72 | -------------------------------------------------------------------------------- /imcls/utils/vis.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def view_training(logger, title): 4 | fig, ax1 = plt.subplots() 5 | for k,v in logger.losses.items(): 6 | ax1.plot(v, label=k) 7 | l = len(v) 8 | updates = l / len(logger.val_acc["class"]) 9 | plt.legend() 10 | ax2 = ax1.twinx() 11 | for k,v in logger.val_acc.items(): 12 | ax2.plot(range(0,l,int(updates)), v, label="Test %s" % k) 13 | plt.legend() 14 | plt.title(title + " last acc %.2f:" % logger.val_acc["class"][-1]) 15 | plt.show() -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/cityscapes_half_512x512.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: Half image resolution 3 | 4 | # dataset settings 5 | dataset_type = 'CityscapesDataset' 6 | data_root = 'data/cityscapes/' 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | crop_size = (512, 512) 10 | train_pipeline = [ 11 | dict(type='LoadImageFromFile'), 12 | dict(type='LoadAnnotations'), 13 | dict(type='Resize', img_scale=(1024, 512)), 14 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 15 | dict(type='RandomFlip', prob=0.5), 16 | dict(type='PhotoMetricDistortion'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 21 | ] 22 | test_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=(1024, 512), 27 | # MultiScaleFlipAug is disabled by not providing img_ratios and 28 | # setting flip=False 29 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 30 | flip=False, 31 | transforms=[ 32 | dict(type='Resize', keep_ratio=True), 33 | dict(type='RandomFlip'), 34 | dict(type='Normalize', **img_norm_cfg), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']), 37 | ]) 38 | ] 39 | data = dict( 40 | samples_per_gpu=2, 41 | workers_per_gpu=4, 42 | train=dict( 43 | type=dataset_type, 44 | data_root=data_root, 45 | img_dir='leftImg8bit/train', 46 | ann_dir='gtFine/train', 47 | pipeline=train_pipeline), 48 | val=dict( 49 | type=dataset_type, 50 | data_root=data_root, 51 | img_dir='leftImg8bit/val', 52 | ann_dir='gtFine/val', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | img_dir='leftImg8bit/val', 58 | ann_dir='gtFine/val', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/gta_to_cityscapes_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = '/ssd/yyzhao/data/CityScapes' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(1280, 720)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1024, 512), 24 | # MultiScaleFlipAug is disabled by not providing img_ratios and 25 | # setting flip=False 26 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 27 | flip=False, 28 | transforms=[ 29 | dict(type='Resize', keep_ratio=True), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='ImageToTensor', keys=['img']), 33 | dict(type='Collect', keys=['img']), 34 | ]) 35 | ] 36 | data = dict( 37 | samples_per_gpu=2, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='GTADataset', 41 | data_root='/ssd/yyzhao/data/GTAV', 42 | img_dir='images', 43 | ann_dir='labels', 44 | pipeline=train_pipeline), 45 | val=dict( 46 | type='CityscapesDataset', 47 | data_root='/ssd/yyzhao/data/CityScapes', 48 | img_dir='leftImg8bit/val', 49 | ann_dir='gtFine/val', 50 | pipeline=test_pipeline), 51 | test=dict( 52 | type='CityscapesDataset', 53 | data_root='/ssd/yyzhao/data/CityScapes', 54 | img_dir='leftImg8bit/val', 55 | ann_dir='gtFine/val', 56 | pipeline=test_pipeline)) 57 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/gta_to_cityscapes_rsc_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = '/ssd/yyzhao/data/CityScapes' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(1280, 720)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | dict(type='PhotoMetricDistortion'), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1024, 512), 24 | # MultiScaleFlipAug is disabled by not providing img_ratios and 25 | # setting flip=False 26 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 27 | flip=False, 28 | transforms=[ 29 | dict(type='Resize', keep_ratio=True), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='ImageToTensor', keys=['img']), 33 | dict(type='Collect', keys=['img']), 34 | ]) 35 | ] 36 | data = dict( 37 | samples_per_gpu=2, 38 | workers_per_gpu=4, 39 | train=dict( 40 | type='DGRSCDataset', 41 | source=dict( 42 | type='GTADataset', 43 | data_root='/ssd/yyzhao/data/GTAV', 44 | img_dir='images', 45 | ann_dir='labels', 46 | pipeline=train_pipeline)), 47 | 48 | val=dict( 49 | type='CityscapesDataset', 50 | data_root='/ssd/yyzhao/data/CityScapes', 51 | img_dir='leftImg8bit/val', 52 | ann_dir='gtFine/val', 53 | pipeline=test_pipeline), 54 | test=dict( 55 | type='CityscapesDataset', 56 | data_root='/ssd/yyzhao/data/CityScapes', 57 | img_dir='leftImg8bit/val', 58 | ann_dir='gtFine/val', 59 | pipeline=test_pipeline)) 60 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/uda_cityscapes_to_acdc_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (512, 512) 5 | cityscapes_train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(1024, 512)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | acdc_train_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='Resize', img_scale=(960, 540)), # original 1920x1080 20 | dict(type='RandomCrop', crop_size=crop_size), 21 | dict(type='RandomFlip', prob=0.5), 22 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 25 | dict(type='DefaultFormatBundle'), 26 | dict(type='Collect', keys=['img']), 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict( 31 | type='MultiScaleFlipAug', 32 | img_scale=(960, 540), # original 1920x1080 33 | # MultiScaleFlipAug is disabled by not providing img_ratios and 34 | # setting flip=False 35 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 36 | flip=False, 37 | transforms=[ 38 | dict(type='Resize', keep_ratio=True), 39 | dict(type='RandomFlip'), 40 | dict(type='Normalize', **img_norm_cfg), 41 | dict(type='ImageToTensor', keys=['img']), 42 | dict(type='Collect', keys=['img']), 43 | ]) 44 | ] 45 | data = dict( 46 | samples_per_gpu=2, 47 | workers_per_gpu=4, 48 | train=dict( 49 | type='UDADataset', 50 | source=dict( 51 | type='CityscapesDataset', 52 | data_root='data/cityscapes/', 53 | img_dir='leftImg8bit/train', 54 | ann_dir='gtFine/train', 55 | pipeline=cityscapes_train_pipeline), 56 | target=dict( 57 | type='ACDCDataset', 58 | data_root='data/acdc/', 59 | img_dir='rgb_anon/train', 60 | ann_dir='gt/train', 61 | pipeline=acdc_train_pipeline)), 62 | val=dict( 63 | type='ACDCDataset', 64 | data_root='data/acdc/', 65 | img_dir='rgb_anon/val', 66 | ann_dir='gt/val', 67 | pipeline=test_pipeline), 68 | test=dict( 69 | type='ACDCDataset', 70 | data_root='data/acdc/', 71 | img_dir='rgb_anon/val', 72 | ann_dir='gt/val', 73 | pipeline=test_pipeline)) 74 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/uda_cityscapes_to_darkzurich_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | img_norm_cfg = dict( 3 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 4 | crop_size = (512, 512) 5 | cityscapes_train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations'), 8 | dict(type='Resize', img_scale=(1024, 512)), 9 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 10 | dict(type='RandomFlip', prob=0.5), 11 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 16 | ] 17 | dark_zurich_train_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict(type='Resize', img_scale=(960, 540)), # original 1920x1080 20 | dict(type='RandomCrop', crop_size=crop_size), 21 | dict(type='RandomFlip', prob=0.5), 22 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 23 | dict(type='Normalize', **img_norm_cfg), 24 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 25 | dict(type='DefaultFormatBundle'), 26 | dict(type='Collect', keys=['img']), 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict( 31 | type='MultiScaleFlipAug', 32 | img_scale=(960, 540), # original 1920x1080 33 | # MultiScaleFlipAug is disabled by not providing img_ratios and 34 | # setting flip=False 35 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 36 | flip=False, 37 | transforms=[ 38 | dict(type='Resize', keep_ratio=True), 39 | dict(type='RandomFlip'), 40 | dict(type='Normalize', **img_norm_cfg), 41 | dict(type='ImageToTensor', keys=['img']), 42 | dict(type='Collect', keys=['img']), 43 | ]) 44 | ] 45 | data = dict( 46 | samples_per_gpu=2, 47 | workers_per_gpu=4, 48 | train=dict( 49 | type='UDADataset', 50 | source=dict( 51 | type='CityscapesDataset', 52 | data_root='data/cityscapes/', 53 | img_dir='leftImg8bit/train', 54 | ann_dir='gtFine/train', 55 | pipeline=cityscapes_train_pipeline), 56 | target=dict( 57 | type='DarkZurichDataset', 58 | data_root='data/dark_zurich/', 59 | img_dir='rgb_anon/train/night/', 60 | ann_dir='gt/train/night/', 61 | pipeline=dark_zurich_train_pipeline)), 62 | val=dict( 63 | type='DarkZurichDataset', 64 | data_root='data/dark_zurich/', 65 | img_dir='rgb_anon/val', 66 | ann_dir='gt/val', 67 | pipeline=test_pipeline), 68 | test=dict( 69 | type='DarkZurichDataset', 70 | data_root='data/dark_zurich/', 71 | img_dir='rgb_anon/val', 72 | ann_dir='gt/val', 73 | pipeline=test_pipeline)) 74 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/uda_gta_to_cityscapes_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = '/ssd/yyzhao/data/CityScapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | gta_train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(1280, 720)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | cityscapes_train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='LoadAnnotations'), 22 | dict(type='Resize', img_scale=(1024, 512)), 23 | dict(type='RandomCrop', crop_size=crop_size), 24 | dict(type='RandomFlip', prob=0.5), 25 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 28 | dict(type='DefaultFormatBundle'), 29 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 30 | ] 31 | test_pipeline = [ 32 | dict(type='LoadImageFromFile'), 33 | dict( 34 | type='MultiScaleFlipAug', 35 | img_scale=(1024, 512), 36 | # MultiScaleFlipAug is disabled by not providing img_ratios and 37 | # setting flip=False 38 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 39 | flip=False, 40 | transforms=[ 41 | dict(type='Resize', keep_ratio=True), 42 | dict(type='RandomFlip'), 43 | dict(type='Normalize', **img_norm_cfg), 44 | dict(type='ImageToTensor', keys=['img']), 45 | dict(type='Collect', keys=['img']), 46 | ]) 47 | ] 48 | data = dict( 49 | samples_per_gpu=2, 50 | workers_per_gpu=4, 51 | train=dict( 52 | type='UDADataset', 53 | source=dict( 54 | type='GTADataset', 55 | data_root='/ssd/yyzhao/data/GTAV', 56 | img_dir='images', 57 | ann_dir='labels', 58 | pipeline=gta_train_pipeline), 59 | target=dict( 60 | type='CityscapesDataset', 61 | data_root='/ssd/yyzhao/data/CityScapes', 62 | img_dir='leftImg8bit/train', 63 | ann_dir='gtFine/train', 64 | pipeline=cityscapes_train_pipeline)), 65 | val=dict( 66 | type='CityscapesDataset', 67 | data_root='/ssd/yyzhao/data/CityScapes', 68 | img_dir='leftImg8bit/val', 69 | ann_dir='gtFine/val', 70 | pipeline=test_pipeline), 71 | test=dict( 72 | type='CityscapesDataset', 73 | data_root='/ssd/yyzhao/data/CityScapes', 74 | img_dir='leftImg8bit/val', 75 | ann_dir='gtFine/val', 76 | pipeline=test_pipeline)) 77 | -------------------------------------------------------------------------------- /sseg/configs/_base_/datasets/uda_synthia_to_cityscapes_512x512.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'CityscapesDataset' 3 | data_root = 'data/cityscapes/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | crop_size = (512, 512) 7 | synthia_train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadAnnotations'), 10 | dict(type='Resize', img_scale=(1280, 760)), 11 | dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), 12 | dict(type='RandomFlip', prob=0.5), 13 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 18 | ] 19 | cityscapes_train_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='LoadAnnotations'), 22 | dict(type='Resize', img_scale=(1024, 512)), 23 | dict(type='RandomCrop', crop_size=crop_size), 24 | dict(type='RandomFlip', prob=0.5), 25 | # dict(type='PhotoMetricDistortion'), # is applied later in dacs.py 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), 28 | dict(type='DefaultFormatBundle'), 29 | dict(type='Collect', keys=['img', 'gt_semantic_seg']), 30 | ] 31 | test_pipeline = [ 32 | dict(type='LoadImageFromFile'), 33 | dict( 34 | type='MultiScaleFlipAug', 35 | img_scale=(1024, 512), 36 | # MultiScaleFlipAug is disabled by not providing img_ratios and 37 | # setting flip=False 38 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 39 | flip=False, 40 | transforms=[ 41 | dict(type='Resize', keep_ratio=True), 42 | dict(type='RandomFlip'), 43 | dict(type='Normalize', **img_norm_cfg), 44 | dict(type='ImageToTensor', keys=['img']), 45 | dict(type='Collect', keys=['img']), 46 | ]) 47 | ] 48 | data = dict( 49 | samples_per_gpu=2, 50 | workers_per_gpu=4, 51 | train=dict( 52 | type='UDADataset', 53 | source=dict( 54 | type='SynthiaDataset', 55 | data_root='data/synthia/', 56 | img_dir='RGB', 57 | ann_dir='GT/LABELS', 58 | pipeline=synthia_train_pipeline), 59 | target=dict( 60 | type='CityscapesDataset', 61 | data_root='data/cityscapes/', 62 | img_dir='leftImg8bit/train', 63 | ann_dir='gtFine/train', 64 | pipeline=cityscapes_train_pipeline)), 65 | val=dict( 66 | type='CityscapesDataset', 67 | data_root='data/cityscapes/', 68 | img_dir='leftImg8bit/val', 69 | ann_dir='gtFine/val', 70 | pipeline=test_pipeline), 71 | test=dict( 72 | type='CityscapesDataset', 73 | data_root='data/cityscapes/', 74 | img_dir='leftImg8bit/val', 75 | ann_dir='gtFine/val', 76 | pipeline=test_pipeline)) 77 | -------------------------------------------------------------------------------- /sseg/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | # yapf:disable 4 | log_config = dict( 5 | interval=50, 6 | hooks=[ 7 | dict(type='TextLoggerHook', by_epoch=False), 8 | # dict(type='TensorboardLoggerHook') 9 | ]) 10 | # yapf:enable 11 | dist_params = dict(backend='nccl') 12 | log_level = 'INFO' 13 | load_from = None 14 | resume_from = None 15 | workflow = [('train', 1)] 16 | cudnn_benchmark = True 17 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_aspp_mitb5.py: -------------------------------------------------------------------------------- 1 | # DAFormer w/o DSC in Tab. 7 2 | 3 | _base_ = ['daformer_conv1_mitb5.py'] 4 | 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | model = dict( 7 | decode_head=dict( 8 | decoder_params=dict( 9 | fusion_cfg=dict( 10 | _delete_=True, 11 | type='aspp', 12 | sep=False, 13 | dilations=(1, 6, 12, 18), 14 | pool=False, 15 | act_cfg=dict(type='ReLU'), 16 | norm_cfg=norm_cfg)))) 17 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_conv1_mitb5.py: -------------------------------------------------------------------------------- 1 | # This is the same as SegFormer but with 256 embed_dims 2 | # SegF. with C_e=256 in Tab. 7 3 | 4 | # model settings 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | find_unused_parameters = True 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained='pretrained/mit_b5.pth', 10 | backbone=dict(type='mit_b5', style='pytorch'), 11 | decode_head=dict( 12 | type='DAFormerHead', 13 | in_channels=[64, 128, 320, 512], 14 | in_index=[0, 1, 2, 3], 15 | channels=256, 16 | dropout_ratio=0.1, 17 | num_classes=19, 18 | norm_cfg=norm_cfg, 19 | align_corners=False, 20 | decoder_params=dict( 21 | embed_dims=256, 22 | embed_cfg=dict(type='mlp', act_cfg=None, norm_cfg=None), 23 | embed_neck_cfg=dict(type='mlp', act_cfg=None, norm_cfg=None), 24 | fusion_cfg=dict( 25 | type='conv', 26 | kernel_size=1, 27 | act_cfg=dict(type='ReLU'), 28 | norm_cfg=norm_cfg), 29 | ), 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole')) 35 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_isa_mitb5.py: -------------------------------------------------------------------------------- 1 | # ISA Fusion in Tab. 7 2 | 3 | _base_ = ['daformer_conv1_mitb5.py'] 4 | 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | model = dict( 7 | decode_head=dict( 8 | decoder_params=dict( 9 | fusion_cfg=dict( 10 | _delete_=True, 11 | type='isa', 12 | isa_channels=256, 13 | key_query_num_convs=1, 14 | down_factor=(8, 8), 15 | act_cfg=dict(type='ReLU'), 16 | norm_cfg=norm_cfg)))) 17 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_sepaspp_bottleneck_mitb5.py: -------------------------------------------------------------------------------- 1 | # Context only at F4 in Tab. 7 2 | 3 | _base_ = ['daformer_conv1_mitb5.py'] 4 | 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | model = dict( 7 | neck=dict(type='SegFormerAdapter', scales=[8]), 8 | decode_head=dict( 9 | decoder_params=dict( 10 | embed_neck_cfg=dict( 11 | _delete_=True, 12 | type='rawconv_and_aspp', 13 | kernel_size=1, 14 | sep=True, 15 | dilations=(1, 6, 12, 18), 16 | pool=False, 17 | act_cfg=dict(type='ReLU'), 18 | norm_cfg=norm_cfg)))) 19 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_sepaspp_mitb5.py: -------------------------------------------------------------------------------- 1 | # DAFormer (with context-aware feature fusion) in Tab. 7 2 | 3 | _base_ = ['daformer_conv1_mitb5.py'] 4 | 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | model = dict( 7 | decode_head=dict( 8 | decoder_params=dict( 9 | fusion_cfg=dict( 10 | _delete_=True, 11 | type='aspp', 12 | sep=True, 13 | dilations=(1, 6, 12, 18), 14 | pool=False, 15 | act_cfg=dict(type='ReLU'), 16 | norm_cfg=norm_cfg)))) 17 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/daformer_sepaspp_mitb5_shm.py: -------------------------------------------------------------------------------- 1 | # DAFormer (with context-aware feature fusion) in Tab. 7 2 | 3 | _base_ = ['daformer_sepaspp_mitb5.py'] 4 | 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | model = dict( 7 | backbone=dict(type='mit_b5', 8 | style='pytorch', 9 | shm_cfg=dict( 10 | concentration_coeff=0.0156, 11 | base_style_num=64, 12 | layer=1, 13 | )), 14 | ) 15 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/danet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: 3 | # - BN instead of SyncBN 4 | # - Removed auxiliary decoder 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN', requires_grad=True) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='open-mmlab://resnet50_v1c', 11 | backbone=dict( 12 | type='ResNetV1c', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | dilations=(1, 1, 2, 4), 17 | strides=(1, 2, 1, 1), 18 | norm_cfg=norm_cfg, 19 | norm_eval=False, 20 | style='pytorch', 21 | contract_dilation=True), 22 | decode_head=dict( 23 | type='DAHead', 24 | in_channels=2048, 25 | in_index=3, 26 | channels=512, 27 | pam_channels=64, 28 | dropout_ratio=0.1, 29 | num_classes=19, 30 | norm_cfg=norm_cfg, 31 | align_corners=False, 32 | loss_decode=dict( 33 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 34 | # model training and testing settings 35 | train_cfg=dict(), 36 | test_cfg=dict(mode='whole')) 37 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/deeplabv2_r50-d8.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=True) 3 | model = dict( 4 | type='EncoderDecoder', 5 | pretrained='open-mmlab://resnet50_v1c', 6 | backbone=dict( 7 | type='ResNetV1c', 8 | depth=50, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | dilations=(1, 1, 2, 4), 12 | strides=(1, 2, 1, 1), 13 | norm_cfg=norm_cfg, 14 | norm_eval=False, 15 | style='pytorch', 16 | contract_dilation=True), 17 | decode_head=dict( 18 | type='DLV2Head', 19 | in_channels=2048, 20 | in_index=3, 21 | dilations=(6, 12, 18, 24), 22 | num_classes=19, 23 | align_corners=False, 24 | init_cfg=dict( 25 | type='Normal', std=0.01, override=dict(name='aspp_modules')), 26 | loss_decode=dict( 27 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 28 | # model training and testing settings 29 | train_cfg=dict(), 30 | test_cfg=dict(mode='whole')) 31 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/deeplabv2red_r50-d8.py: -------------------------------------------------------------------------------- 1 | _base_ = ['deeplabv2_r50-d8.py'] 2 | # Previous UDA methods only use the dilation rates 6 and 12 for DeepLabV2. 3 | # This might be a bit hidden as it is caused by a return statement WITHIN 4 | # a loop over the dilation rates: 5 | # https://github.com/wasidennis/AdaptSegNet/blob/fca9ff0f09dab45d44bf6d26091377ac66607028/model/deeplab.py#L116 6 | model = dict(decode_head=dict(dilations=(6, 12))) 7 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/deeplabv3plus_r50-d8.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: 3 | # - BN instead of SyncBN 4 | # - Removed auxiliary decoder 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN', requires_grad=True) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='open-mmlab://resnet50_v1c', 11 | backbone=dict( 12 | type='ResNetV1c', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | dilations=(1, 1, 2, 4), 17 | strides=(1, 2, 1, 1), 18 | norm_cfg=norm_cfg, 19 | norm_eval=False, 20 | style='pytorch', 21 | contract_dilation=True), 22 | decode_head=dict( 23 | type='DepthwiseSeparableASPPHead', 24 | in_channels=2048, 25 | in_index=3, 26 | channels=512, 27 | dilations=(1, 12, 24, 36), 28 | c1_in_channels=256, 29 | c1_channels=48, 30 | dropout_ratio=0.1, 31 | num_classes=19, 32 | norm_cfg=norm_cfg, 33 | align_corners=False, 34 | loss_decode=dict( 35 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 36 | # model training and testing settings 37 | train_cfg=dict(), 38 | test_cfg=dict(mode='whole')) 39 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/isanet_r50-d8.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: 3 | # - BN instead of SyncBN 4 | # - Removed auxiliary decoder 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN', requires_grad=True) 8 | model = dict( 9 | type='EncoderDecoder', 10 | pretrained='open-mmlab://resnet50_v1c', 11 | backbone=dict( 12 | type='ResNetV1c', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | dilations=(1, 1, 2, 4), 17 | strides=(1, 2, 1, 1), 18 | norm_cfg=norm_cfg, 19 | norm_eval=False, 20 | style='pytorch', 21 | contract_dilation=True), 22 | decode_head=dict( 23 | type='ISAHead', 24 | in_channels=2048, 25 | in_index=3, 26 | channels=512, 27 | isa_channels=256, 28 | down_factor=(8, 8), 29 | dropout_ratio=0.1, 30 | num_classes=19, 31 | norm_cfg=norm_cfg, 32 | align_corners=False, 33 | loss_decode=dict( 34 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 35 | # model training and testing settings 36 | train_cfg=dict(), 37 | test_cfg=dict(mode='whole')) 38 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/segformer.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/NVlabs/SegFormer 2 | # Modifications: BN instead of SyncBN 3 | 4 | # model settings 5 | norm_cfg = dict(type='BN', requires_grad=True) 6 | find_unused_parameters = True 7 | model = dict( 8 | type='EncoderDecoder', 9 | pretrained=None, 10 | backbone=dict(type='IMTRv21_5', style='pytorch'), 11 | decode_head=dict( 12 | type='SegFormerHead', 13 | in_channels=[64, 128, 320, 512], 14 | in_index=[0, 1, 2, 3], 15 | channels=128, 16 | dropout_ratio=0.1, 17 | num_classes=19, 18 | norm_cfg=norm_cfg, 19 | align_corners=False, 20 | decoder_params=dict(), 21 | loss_decode=dict( 22 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 23 | # model training and testing settings 24 | train_cfg=dict(), 25 | test_cfg=dict(mode='whole')) 26 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/segformer_b5.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/NVlabs/SegFormer 2 | # Modifications: BN instead of SyncBN 3 | 4 | _base_ = ['../../_base_/models/segformer.py'] 5 | 6 | # model settings 7 | norm_cfg = dict(type='BN', requires_grad=True) 8 | find_unused_parameters = True 9 | model = dict( 10 | type='EncoderDecoder', 11 | pretrained='pretrained/mit_b5.pth', 12 | backbone=dict(type='mit_b5', style='pytorch'), 13 | decode_head=dict( 14 | type='SegFormerHead', 15 | in_channels=[64, 128, 320, 512], 16 | in_index=[0, 1, 2, 3], 17 | channels=128, 18 | dropout_ratio=0.1, 19 | num_classes=19, 20 | norm_cfg=norm_cfg, 21 | align_corners=False, 22 | decoder_params=dict(embed_dim=768, conv_kernel_size=1), 23 | loss_decode=dict( 24 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 25 | # model training and testing settings 26 | train_cfg=dict(), 27 | test_cfg=dict(mode='whole')) 28 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/segformer_r101.py: -------------------------------------------------------------------------------- 1 | _base_ = ['../../_base_/models/segformer.py'] 2 | 3 | # model settings 4 | norm_cfg = dict(type='BN', requires_grad=True) 5 | find_unused_parameters = True 6 | model = dict( 7 | type='EncoderDecoder', 8 | pretrained='open-mmlab://resnet101_v1c', 9 | backbone=dict( 10 | type='ResNetV1c', 11 | depth=101, 12 | num_stages=4, 13 | out_indices=(0, 1, 2, 3), 14 | dilations=(1, 1, 2, 4), 15 | strides=(1, 2, 1, 1), 16 | norm_cfg=norm_cfg, 17 | norm_eval=False, 18 | style='pytorch', 19 | contract_dilation=True), 20 | decode_head=dict( 21 | type='SegFormerHead', 22 | in_channels=[256, 512, 1024, 2048], 23 | in_index=[0, 1, 2, 3], 24 | channels=128, 25 | dropout_ratio=0.1, 26 | num_classes=19, 27 | norm_cfg=norm_cfg, 28 | align_corners=False, 29 | decoder_params=dict(embed_dim=768, conv_kernel_size=1), 30 | loss_decode=dict( 31 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 32 | # model training and testing settings 33 | train_cfg=dict(), 34 | test_cfg=dict(mode='whole')) 35 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/upernet_ch256_mit.py: -------------------------------------------------------------------------------- 1 | _base_ = ['upernet_mit.py'] 2 | 3 | model = dict(decode_head=dict(channels=256, )) 4 | -------------------------------------------------------------------------------- /sseg/configs/_base_/models/upernet_mit.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | norm_cfg = dict(type='BN', requires_grad=True) 3 | find_unused_parameters = True 4 | model = dict( 5 | type='EncoderDecoder', 6 | pretrained=None, 7 | backbone=dict(type='IMTRv21_5', style='pytorch'), 8 | decode_head=dict( 9 | type='UPerHead', 10 | in_channels=[64, 128, 320, 512], 11 | in_index=[0, 1, 2, 3], 12 | pool_scales=(1, 2, 3, 6), 13 | channels=512, 14 | dropout_ratio=0.1, 15 | num_classes=19, 16 | norm_cfg=norm_cfg, 17 | align_corners=False, 18 | loss_decode=dict( 19 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), 20 | # model training and testing settings 21 | train_cfg=dict(), 22 | test_cfg=dict(mode='whole')) 23 | -------------------------------------------------------------------------------- /sseg/configs/_base_/schedules/adamw.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict( 3 | type='AdamW', lr=0.00006, betas=(0.9, 0.999), weight_decay=0.01) 4 | optimizer_config = dict() 5 | -------------------------------------------------------------------------------- /sseg/configs/_base_/schedules/poly10.py: -------------------------------------------------------------------------------- 1 | # learning policy 2 | lr_config = dict(policy='poly', power=1.0, min_lr=1e-4, by_epoch=False) 3 | -------------------------------------------------------------------------------- /sseg/configs/_base_/schedules/poly10warm.py: -------------------------------------------------------------------------------- 1 | # learning policy 2 | lr_config = dict( 3 | policy='poly', 4 | warmup='linear', 5 | warmup_iters=1500, 6 | warmup_ratio=1e-6, 7 | power=1.0, 8 | min_lr=0.0, 9 | by_epoch=False) 10 | -------------------------------------------------------------------------------- /sseg/configs/_base_/uda/dacs.py: -------------------------------------------------------------------------------- 1 | # Baseline UDA 2 | uda = dict( 3 | type='DACS', 4 | alpha=0.99, 5 | pseudo_threshold=0.968, 6 | pseudo_weight_ignore_top=0, 7 | pseudo_weight_ignore_bottom=0, 8 | imnet_feature_dist_lambda=0, 9 | imnet_feature_dist_classes=None, 10 | imnet_feature_dist_scale_min_ratio=None, 11 | mix='class', 12 | blur=True, 13 | color_jitter_strength=0.2, 14 | color_jitter_probability=0.2, 15 | debug_img_interval=1000, 16 | print_grad_magnitude=False, 17 | ) 18 | use_ddp_wrapper = True 19 | -------------------------------------------------------------------------------- /sseg/configs/_base_/uda/dacs_a999_fdthings.py: -------------------------------------------------------------------------------- 1 | # UDA with Thing-Class ImageNet Feature Distance + Increased Alpha 2 | _base_ = ['dacs.py'] 3 | uda = dict( 4 | alpha=0.999, 5 | imnet_feature_dist_lambda=0.005, 6 | imnet_feature_dist_classes=[6, 7, 11, 12, 13, 14, 15, 16, 17, 18], 7 | imnet_feature_dist_scale_min_ratio=0.75, 8 | ) 9 | -------------------------------------------------------------------------------- /sseg/configs/_base_/uda/dacs_fd.py: -------------------------------------------------------------------------------- 1 | # UDA with ImageNet Feature Distance 2 | _base_ = ['dacs.py'] 3 | uda = dict(imnet_feature_dist_lambda=0.005, ) 4 | -------------------------------------------------------------------------------- /sseg/configs/_base_/uda/dacs_fdthings.py: -------------------------------------------------------------------------------- 1 | # UDA with Thing-Class ImageNet Feature Distance 2 | _base_ = ['dacs.py'] 3 | uda = dict( 4 | imnet_feature_dist_lambda=0.005, 5 | imnet_feature_dist_classes=[6, 7, 11, 12, 13, 14, 15, 16, 17, 18], 6 | imnet_feature_dist_scale_method='ratio', 7 | imnet_feature_dist_scale_min_ratio=0.75, 8 | ) 9 | -------------------------------------------------------------------------------- /sseg/configs/dgformer/gta2cs_source.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/default_runtime.py', 3 | # DAFormer Network Architecture 4 | '../_base_/models/daformer_sepaspp_mitb5.py', 5 | # GTA->Cityscapes Data Loading 6 | # '../_base_/datasets/uda_gta_to_cityscapes_512x512.py', 7 | '../_base_/datasets/gta_to_cityscapes_512x512.py', 8 | # Basic UDA Self-Training 9 | # '../_base_/uda/dacs.py', 10 | # AdamW Optimizer 11 | '../_base_/schedules/adamw.py', 12 | # Linear Learning Rate Warmup with Subsequent Linear Decay 13 | '../_base_/schedules/poly10warm.py' 14 | ] 15 | # Random Seed 16 | seed = 0 17 | # Modifications to Basic UDA 18 | uda = dict( 19 | type='DGSource', 20 | alpha=0.99, 21 | pseudo_threshold=0.968, 22 | pseudo_weight_ignore_top=0, 23 | pseudo_weight_ignore_bottom=0, 24 | imnet_feature_dist_lambda=0, 25 | imnet_feature_dist_classes=None, 26 | imnet_feature_dist_scale_min_ratio=None, 27 | mix='class', 28 | blur=True, 29 | color_jitter_strength=0.2, 30 | color_jitter_probability=0.2, 31 | debug_img_interval=1000, 32 | print_grad_magnitude=False, 33 | ) 34 | use_ddp_wrapper = True 35 | 36 | # data = dict( 37 | # train=dict( 38 | # # Rare Class Sampling 39 | # rare_class_sampling=dict( 40 | # min_pixels=3000, class_temp=0.01, min_crop_ratio=0.5))) 41 | 42 | # Optimizer Hyperparameters 43 | optimizer_config = None 44 | optimizer = dict( 45 | lr=6e-05, 46 | paramwise_cfg=dict( 47 | custom_keys=dict( 48 | head=dict(lr_mult=10.0), 49 | pos_block=dict(decay_mult=0.0), 50 | norm=dict(decay_mult=0.0)))) 51 | n_gpus = 1 52 | runner = dict(type='IterBasedRunner', max_iters=40000) 53 | # Logging Configuration 54 | checkpoint_config = dict(by_epoch=False, interval=40000, max_keep_ckpts=1) 55 | evaluation = dict(interval=4000, metric='mIoU') 56 | # Meta Information for Result Analysis 57 | name = 'gta2cs_dg_source' 58 | exp = 'basic' 59 | name_dataset = 'gta2cityscapes' 60 | name_architecture = 'daformer_sepaspp_mitb5' 61 | name_encoder = 'mitb5' 62 | name_decoder = 'daformer_sepaspp' 63 | name_uda = 'dg_source' 64 | name_opt = 'adamw_6e-05_pmTrue_poly10warm_1x2_40k' 65 | -------------------------------------------------------------------------------- /sseg/configs/dgformer/gta2cs_source_bdd.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/default_runtime.py', 3 | # DAFormer Network Architecture 4 | '../_base_/models/daformer_sepaspp_mitb5.py', 5 | # GTA->Cityscapes Data Loading 6 | # '../_base_/datasets/uda_gta_to_cityscapes_512x512.py', 7 | '../_base_/datasets/gta_to_cityscapes_rsc_512x512.py', 8 | # Basic UDA Self-Training 9 | # '../_base_/uda/dacs.py', 10 | # AdamW Optimizer 11 | '../_base_/schedules/adamw.py', 12 | # Linear Learning Rate Warmup with Subsequent Linear Decay 13 | '../_base_/schedules/poly10warm.py' 14 | ] 15 | # Random Seed 16 | seed = 0 17 | # Modifications to Basic UDA 18 | uda = dict( 19 | type='DGSource', 20 | alpha=0.99, 21 | pseudo_threshold=0.968, 22 | pseudo_weight_ignore_top=0, 23 | pseudo_weight_ignore_bottom=0, 24 | imnet_feature_dist_lambda=0, 25 | imnet_feature_dist_classes=None, 26 | imnet_feature_dist_scale_min_ratio=None, 27 | mix='class', 28 | blur=True, 29 | color_jitter_strength=0.2, 30 | color_jitter_probability=0.2, 31 | debug_img_interval=1000, 32 | print_grad_magnitude=False, 33 | ) 34 | use_ddp_wrapper = True 35 | 36 | img_norm_cfg = dict( 37 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 38 | crop_size = (512, 512) 39 | 40 | test_pipeline = [ 41 | dict(type='LoadImageFromFile'), 42 | dict( 43 | type='MultiScaleFlipAug', 44 | img_scale=(1024, 512), 45 | # MultiScaleFlipAug is disabled by not providing img_ratios and 46 | # setting flip=False 47 | # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], 48 | flip=False, 49 | transforms=[ 50 | dict(type='Resize', keep_ratio=True), 51 | dict(type='RandomFlip'), 52 | dict(type='Normalize', **img_norm_cfg), 53 | dict(type='ImageToTensor', keys=['img']), 54 | dict(type='Collect', keys=['img']), 55 | ]) 56 | ] 57 | 58 | data = dict( 59 | train=dict( 60 | # Rare Class Sampling 61 | rare_class_sampling=dict( 62 | min_pixels=3000, class_temp=0.01, min_crop_ratio=0.5)), 63 | val=dict( 64 | type='BDD100KDataset', 65 | data_root='/ssd/yyzhao/data/bdd100k', 66 | img_dir='images/10k/val', 67 | ann_dir='labels/sem_seg/masks/val', 68 | pipeline=test_pipeline), 69 | test=dict( 70 | type='BDD100KDataset', 71 | data_root='/ssd/yyzhao/data/bdd100k', 72 | img_dir='images/10k/val', 73 | ann_dir='labels/sem_seg/masks/val', 74 | pipeline=test_pipeline),) 75 | 76 | # Optimizer Hyperparameters 77 | optimizer_config = None 78 | optimizer = dict( 79 | lr=6e-05, 80 | paramwise_cfg=dict( 81 | custom_keys=dict( 82 | head=dict(lr_mult=10.0), 83 | pos_block=dict(decay_mult=0.0), 84 | norm=dict(decay_mult=0.0)))) 85 | n_gpus = 1 86 | runner = dict(type='IterBasedRunner', max_iters=40000) 87 | # Logging Configuration 88 | checkpoint_config = dict(by_epoch=False, interval=40000, max_keep_ckpts=1) 89 | evaluation = dict(interval=4000, metric='mIoU') 90 | # Meta Information for Result Analysis 91 | name = 'gta2cs_dg_source' 92 | exp = 'basic' 93 | name_dataset = 'gta2bdd' 94 | name_architecture = 'daformer_sepaspp_mitb5' 95 | name_encoder = 'mitb5' 96 | name_decoder = 'daformer_sepaspp' 97 | name_uda = 'dg_source' 98 | name_opt = 'adamw_6e-05_pmTrue_poly10warm_1x2_40k' 99 | -------------------------------------------------------------------------------- /sseg/configs/dgformer/gta2cs_source_cityscapes.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/default_runtime.py', 3 | # DAFormer Network Architecture 4 | '../_base_/models/daformer_sepaspp_mitb5.py', 5 | # GTA->Cityscapes Data Loading 6 | # '../_base_/datasets/uda_gta_to_cityscapes_512x512.py', 7 | '../_base_/datasets/gta_to_cityscapes_512x512.py', 8 | # Basic UDA Self-Training 9 | # '../_base_/uda/dacs.py', 10 | # AdamW Optimizer 11 | '../_base_/schedules/adamw.py', 12 | # Linear Learning Rate Warmup with Subsequent Linear Decay 13 | '../_base_/schedules/poly10warm.py' 14 | ] 15 | # Random Seed 16 | seed = 0 17 | # Modifications to Basic UDA 18 | uda = dict( 19 | type='DGSource', 20 | alpha=0.99, 21 | pseudo_threshold=0.968, 22 | pseudo_weight_ignore_top=0, 23 | pseudo_weight_ignore_bottom=0, 24 | imnet_feature_dist_lambda=0, 25 | imnet_feature_dist_classes=None, 26 | imnet_feature_dist_scale_min_ratio=None, 27 | mix='class', 28 | blur=True, 29 | color_jitter_strength=0.2, 30 | color_jitter_probability=0.2, 31 | debug_img_interval=1000, 32 | print_grad_magnitude=False, 33 | ) 34 | use_ddp_wrapper = True 35 | 36 | # data = dict( 37 | # train=dict( 38 | # # Rare Class Sampling 39 | # rare_class_sampling=dict( 40 | # min_pixels=3000, class_temp=0.01, min_crop_ratio=0.5))) 41 | 42 | # Optimizer Hyperparameters 43 | optimizer_config = None 44 | optimizer = dict( 45 | lr=6e-05, 46 | paramwise_cfg=dict( 47 | custom_keys=dict( 48 | head=dict(lr_mult=10.0), 49 | pos_block=dict(decay_mult=0.0), 50 | norm=dict(decay_mult=0.0)))) 51 | n_gpus = 1 52 | runner = dict(type='IterBasedRunner', max_iters=40000) 53 | # Logging Configuration 54 | checkpoint_config = dict(by_epoch=False, interval=40000, max_keep_ckpts=1) 55 | evaluation = dict(interval=4000, metric='mIoU') 56 | # Meta Information for Result Analysis 57 | name = 'gta2cs_dg_source' 58 | exp = 'basic' 59 | name_dataset = 'gta2cityscapes' 60 | name_architecture = 'daformer_sepaspp_mitb5' 61 | name_encoder = 'mitb5' 62 | name_decoder = 'daformer_sepaspp' 63 | name_uda = 'dg_source' 64 | name_opt = 'adamw_6e-05_pmTrue_poly10warm_1x2_40k' 65 | -------------------------------------------------------------------------------- /sseg/configs/dgformer/gta2cs_source_rsc_shade.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/default_runtime.py', 3 | # DAFormer Network Architecture 4 | '../_base_/models/daformer_sepaspp_mitb5_shm.py', 5 | # GTA->Cityscapes Data Loading 6 | # '../_base_/datasets/uda_gta_to_cityscapes_512x512.py', 7 | '../_base_/datasets/gta_to_cityscapes_rsc_512x512.py', 8 | # Basic UDA Self-Training 9 | # '../_base_/uda/dacs.py', 10 | # AdamW Optimizer 11 | '../_base_/schedules/adamw.py', 12 | # Linear Learning Rate Warmup with Subsequent Linear Decay 13 | '../_base_/schedules/poly10warm.py' 14 | ] 15 | # Random Seed 16 | seed = 0 17 | # Modifications to Basic UDA 18 | uda = dict( 19 | type='DGSource', 20 | mix='class', 21 | blur=True, 22 | color_jitter_strength=0.2, 23 | color_jitter_probability=0.2, 24 | debug_img_interval=1000, 25 | print_grad_magnitude=False, 26 | ### shade 27 | SHM=True, 28 | sc_weight=10.0, 29 | # Thing-Class Feature Distance 30 | imnet_feature_dist_lambda=0.005, 31 | imnet_feature_dist_classes=[6, 7, 11, 12, 13, 14, 15, 16, 17, 18], 32 | imnet_feature_dist_scale_min_ratio=0.75, 33 | ) 34 | use_ddp_wrapper = True 35 | 36 | data = dict( 37 | train=dict( 38 | # Rare Class Sampling 39 | rare_class_sampling=dict( 40 | min_pixels=3000, class_temp=0.01, min_crop_ratio=0.5))) 41 | 42 | # Optimizer Hyperparameters 43 | optimizer_config = None 44 | optimizer = dict( 45 | lr=6e-05, 46 | paramwise_cfg=dict( 47 | custom_keys=dict( 48 | head=dict(lr_mult=10.0), 49 | pos_block=dict(decay_mult=0.0), 50 | norm=dict(decay_mult=0.0)))) 51 | n_gpus = 1 52 | runner = dict(type='IterBasedRunner', max_iters=40000) 53 | # Logging Configuration 54 | checkpoint_config = dict(by_epoch=False, interval=40000, max_keep_ckpts=1) 55 | evaluation = dict(interval=4000, metric='mIoU') 56 | ### shm update 57 | shm_hook = dict( 58 | interval=4000, 59 | num_data=100000 60 | ) 61 | # Meta Information for Result Analysis 62 | name = 'gta2cs_dg_source_rsc_shade' 63 | exp = 'basic' 64 | name_dataset = 'gta2cityscapes' 65 | name_architecture = 'daformer_sepaspp_mitb5' 66 | name_encoder = 'mitb5' 67 | name_decoder = 'daformer_sepaspp' 68 | name_uda = 'dg_source' 69 | name_opt = 'adamw_6e-05_pmTrue_poly10warm_1x2_40k' 70 | -------------------------------------------------------------------------------- /sseg/mmseg/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from .version import __version__, version_info 4 | 5 | MMCV_MIN = '1.3.7' 6 | MMCV_MAX = '1.4.0' 7 | 8 | 9 | def digit_version(version_str): 10 | digit_version = [] 11 | for x in version_str.split('.'): 12 | if x.isdigit(): 13 | digit_version.append(int(x)) 14 | elif x.find('rc') != -1: 15 | patch_version = x.split('rc') 16 | digit_version.append(int(patch_version[0]) - 1) 17 | digit_version.append(int(patch_version[1])) 18 | return digit_version 19 | 20 | 21 | mmcv_min_version = digit_version(MMCV_MIN) 22 | mmcv_max_version = digit_version(MMCV_MAX) 23 | mmcv_version = digit_version(mmcv.__version__) 24 | 25 | 26 | assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ 27 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 28 | f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.' 29 | 30 | __all__ = ['__version__', 'version_info'] 31 | -------------------------------------------------------------------------------- /sseg/mmseg/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import inference_segmentor, init_segmentor, show_result_pyplot 2 | from .test import multi_gpu_test, single_gpu_test 3 | from .train import get_root_logger, set_random_seed, train_segmentor 4 | 5 | __all__ = [ 6 | 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', 7 | 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', 8 | 'show_result_pyplot' 9 | ] 10 | -------------------------------------------------------------------------------- /sseg/mmseg/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * # noqa: F401, F403 2 | from .seg import * # noqa: F401, F403 3 | from .utils import * # noqa: F401, F403 -------------------------------------------------------------------------------- /sseg/mmseg/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import get_classes, get_palette 2 | from .eval_hooks import DistEvalHook, EvalHook 3 | from .metrics import eval_metrics, mean_dice, mean_fscore, mean_iou 4 | 5 | __all__ = [ 6 | 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', 7 | 'eval_metrics', 'get_classes', 'get_palette' 8 | ] 9 | -------------------------------------------------------------------------------- /sseg/mmseg/core/seg/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_pixel_sampler 2 | from .sampler import BasePixelSampler, OHEMPixelSampler 3 | 4 | __all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] 5 | -------------------------------------------------------------------------------- /sseg/mmseg/core/seg/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | PIXEL_SAMPLERS = Registry('pixel sampler') 4 | 5 | 6 | def build_pixel_sampler(cfg, **default_args): 7 | """Build pixel sampler for segmentation map.""" 8 | return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) 9 | -------------------------------------------------------------------------------- /sseg/mmseg/core/seg/sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_pixel_sampler import BasePixelSampler 2 | from .ohem_pixel_sampler import OHEMPixelSampler 3 | 4 | __all__ = ['BasePixelSampler', 'OHEMPixelSampler'] 5 | -------------------------------------------------------------------------------- /sseg/mmseg/core/seg/sampler/base_pixel_sampler.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | from abc import ABCMeta, abstractmethod 4 | 5 | 6 | class BasePixelSampler(metaclass=ABCMeta): 7 | """Base class of pixel sampler.""" 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | @abstractmethod 13 | def sample(self, seg_logit, seg_label): 14 | """Placeholder for sample function.""" 15 | -------------------------------------------------------------------------------- /sseg/mmseg/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import add_prefix 2 | 3 | __all__ = ['add_prefix'] 4 | -------------------------------------------------------------------------------- /sseg/mmseg/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | 4 | def add_prefix(inputs, prefix): 5 | """Add prefix for dict. 6 | 7 | Args: 8 | inputs (dict): The input dict with str keys. 9 | prefix (str): The prefix to add. 10 | 11 | Returns: 12 | 13 | dict: The dict with keys updated with ``prefix``. 14 | """ 15 | 16 | outputs = dict() 17 | for name, value in inputs.items(): 18 | outputs[f'{prefix}.{name}'] = value 19 | 20 | return outputs 21 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .acdc import ACDCDataset 2 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 3 | from .cityscapes import CityscapesDataset 4 | from .custom import CustomDataset 5 | from .dark_zurich import DarkZurichDataset 6 | from .dataset_wrappers import ConcatDataset, RepeatDataset 7 | from .gta import GTADataset 8 | from .synthia import SynthiaDataset 9 | from .uda_dataset import UDADataset, DGRSCDataset 10 | from .mapillary import MapillaryDataset 11 | from .bdd100k import BDD100KDataset 12 | 13 | __all__ = [ 14 | 'CustomDataset', 15 | 'build_dataloader', 16 | 'ConcatDataset', 17 | 'RepeatDataset', 18 | 'DATASETS', 19 | 'build_dataset', 20 | 'PIPELINES', 21 | 'CityscapesDataset', 22 | 'GTADataset', 23 | 'SynthiaDataset', 24 | 'UDADataset', 25 | 'ACDCDataset', 26 | 'DarkZurichDataset', 27 | 'DGRSCDataset', 28 | 'MapillaryDataset', 29 | 'BDD100KDataset' 30 | ] 31 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/acdc.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .cityscapes import CityscapesDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class ACDCDataset(CityscapesDataset): 7 | 8 | def __init__(self, **kwargs): 9 | super(ACDCDataset, self).__init__( 10 | img_suffix='_rgb_anon.png', 11 | seg_map_suffix='_gt_labelTrainIds.png', 12 | **kwargs) 13 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/bdd100k.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .cityscapes import CityscapesDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class BDD100KDataset(CityscapesDataset): 7 | 8 | def __init__(self, **kwargs): 9 | super(BDD100KDataset, self).__init__( 10 | img_suffix='.jpg', 11 | seg_map_suffix='.png', 12 | **kwargs) 13 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/dark_zurich.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .cityscapes import CityscapesDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class DarkZurichDataset(CityscapesDataset): 7 | 8 | def __init__(self, **kwargs): 9 | super(DarkZurichDataset, self).__init__( 10 | img_suffix='_rgb_anon.png', 11 | seg_map_suffix='_gt_labelTrainIds.png', 12 | **kwargs) 13 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 4 | 5 | from .builder import DATASETS 6 | 7 | 8 | @DATASETS.register_module() 9 | class ConcatDataset(_ConcatDataset): 10 | """A wrapper of concatenated dataset. 11 | 12 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 13 | concat the group flag for image aspect ratio. 14 | 15 | Args: 16 | datasets (list[:obj:`Dataset`]): A list of datasets. 17 | """ 18 | 19 | def __init__(self, datasets): 20 | super(ConcatDataset, self).__init__(datasets) 21 | self.CLASSES = datasets[0].CLASSES 22 | self.PALETTE = datasets[0].PALETTE 23 | 24 | 25 | @DATASETS.register_module() 26 | class RepeatDataset(object): 27 | """A wrapper of repeated dataset. 28 | 29 | The length of repeated dataset will be `times` larger than the original 30 | dataset. This is useful when the data loading time is long but the dataset 31 | is small. Using RepeatDataset can reduce the data loading time between 32 | epochs. 33 | 34 | Args: 35 | dataset (:obj:`Dataset`): The dataset to be repeated. 36 | times (int): Repeat times. 37 | """ 38 | 39 | def __init__(self, dataset, times): 40 | self.dataset = dataset 41 | self.times = times 42 | self.CLASSES = dataset.CLASSES 43 | self.PALETTE = dataset.PALETTE 44 | self._ori_len = len(self.dataset) 45 | 46 | def __getitem__(self, idx): 47 | """Get item from original dataset.""" 48 | return self.dataset[idx % self._ori_len] 49 | 50 | def __len__(self): 51 | """The length is multiplied by ``times``""" 52 | return self.times * self._ori_len 53 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/gta.py: -------------------------------------------------------------------------------- 1 | from . import CityscapesDataset 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class GTADataset(CustomDataset): 8 | CLASSES = CityscapesDataset.CLASSES 9 | PALETTE = CityscapesDataset.PALETTE 10 | 11 | def __init__(self, **kwargs): 12 | assert kwargs.get('split') in [None, 'train'] 13 | if 'split' in kwargs: 14 | kwargs.pop('split') 15 | super(GTADataset, self).__init__( 16 | img_suffix='.png', 17 | seg_map_suffix='_labelTrainIds.png', 18 | split=None, 19 | **kwargs) 20 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/mapillary.py: -------------------------------------------------------------------------------- 1 | from . import CityscapesDataset 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class MapillaryDataset(CityscapesDataset): 8 | 9 | def __init__(self, **kwargs): 10 | super(MapillaryDataset, self).__init__( 11 | img_suffix='.jpg', 12 | seg_map_suffix='_labelTrainIds.png', 13 | **kwargs) 14 | 15 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, 3 | Transpose, to_tensor) 4 | from .loading import LoadAnnotations, LoadImageFromFile 5 | from .test_time_aug import MultiScaleFlipAug 6 | from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, 7 | PhotoMetricDistortion, RandomCrop, RandomFlip, 8 | RandomRotate, Rerange, Resize, RGB2Gray, SegRescale) 9 | 10 | __all__ = [ 11 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 12 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 13 | 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 14 | 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', 15 | 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray' 16 | ] 17 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import collections 4 | 5 | from mmcv.utils import build_from_cfg 6 | 7 | from ..builder import PIPELINES 8 | 9 | 10 | @PIPELINES.register_module() 11 | class Compose(object): 12 | """Compose multiple transforms sequentially. 13 | 14 | Args: 15 | transforms (Sequence[dict | callable]): Sequence of transform object or 16 | config dict to be composed. 17 | """ 18 | 19 | def __init__(self, transforms): 20 | assert isinstance(transforms, collections.abc.Sequence) 21 | self.transforms = [] 22 | for transform in transforms: 23 | if isinstance(transform, dict): 24 | transform = build_from_cfg(transform, PIPELINES) 25 | self.transforms.append(transform) 26 | elif callable(transform): 27 | self.transforms.append(transform) 28 | else: 29 | raise TypeError('transform must be callable or a dict') 30 | 31 | def __call__(self, data): 32 | """Call function to apply transforms sequentially. 33 | 34 | Args: 35 | data (dict): A result dict contains the data to transform. 36 | 37 | Returns: 38 | dict: Transformed data. 39 | """ 40 | 41 | for t in self.transforms: 42 | data = t(data) 43 | if data is None: 44 | return None 45 | return data 46 | 47 | def __repr__(self): 48 | format_string = self.__class__.__name__ + '(' 49 | for t in self.transforms: 50 | format_string += '\n' 51 | format_string += f' {t}' 52 | format_string += '\n)' 53 | return format_string 54 | -------------------------------------------------------------------------------- /sseg/mmseg/datasets/synthia.py: -------------------------------------------------------------------------------- 1 | from . import CityscapesDataset 2 | from .builder import DATASETS 3 | from .custom import CustomDataset 4 | 5 | 6 | @DATASETS.register_module() 7 | class SynthiaDataset(CustomDataset): 8 | CLASSES = CityscapesDataset.CLASSES 9 | PALETTE = CityscapesDataset.PALETTE 10 | 11 | def __init__(self, **kwargs): 12 | assert kwargs.get('split') in [None, 'train'] 13 | if 'split' in kwargs: 14 | kwargs.pop('split') 15 | super(SynthiaDataset, self).__init__( 16 | img_suffix='.png', 17 | seg_map_suffix='_labelTrainIds.png', 18 | split=None, 19 | **kwargs) 20 | -------------------------------------------------------------------------------- /sseg/mmseg/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, UDA, 3 | build_backbone, build_head, build_loss, build_segmentor) 4 | from .decode_heads import * # noqa: F401,F403 5 | from .losses import * # noqa: F401,F403 6 | from .necks import * # noqa: F401,F403 7 | from .segmentors import * # noqa: F401,F403 8 | from .dg import * # noqa: F401,F403 9 | 10 | __all__ = [ 11 | 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'UDA', 'build_backbone', 12 | 'build_head', 'build_loss', 'build_segmentor' 13 | ] 14 | -------------------------------------------------------------------------------- /sseg/mmseg/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .mix_transformer import (MixVisionTransformer, mit_b0, mit_b1, mit_b2, 2 | mit_b3, mit_b4, mit_b5) 3 | from .resnest import ResNeSt 4 | from .resnet import ResNet, ResNetV1c, ResNetV1d 5 | from .resnext import ResNeXt 6 | 7 | __all__ = [ 8 | 'ResNet', 9 | 'ResNetV1c', 10 | 'ResNetV1d', 11 | 'ResNeXt', 12 | 'ResNeSt', 13 | 'MixVisionTransformer', 14 | 'mit_b0', 15 | 'mit_b1', 16 | 'mit_b2', 17 | 'mit_b3', 18 | 'mit_b4', 19 | 'mit_b5', 20 | ] 21 | -------------------------------------------------------------------------------- /sseg/mmseg/models/builder.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: Support UDA models 3 | 4 | import warnings 5 | 6 | from mmcv.cnn import MODELS as MMCV_MODELS 7 | from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION 8 | from mmcv.utils import Registry 9 | 10 | MODELS = Registry('models', parent=MMCV_MODELS) 11 | ATTENTION = Registry('attention', parent=MMCV_ATTENTION) 12 | 13 | BACKBONES = MODELS 14 | NECKS = MODELS 15 | HEADS = MODELS 16 | LOSSES = MODELS 17 | SEGMENTORS = MODELS 18 | UDA = MODELS 19 | 20 | 21 | def build_backbone(cfg): 22 | """Build backbone.""" 23 | return BACKBONES.build(cfg) 24 | 25 | 26 | def build_neck(cfg): 27 | """Build neck.""" 28 | return NECKS.build(cfg) 29 | 30 | 31 | def build_head(cfg): 32 | """Build head.""" 33 | return HEADS.build(cfg) 34 | 35 | 36 | def build_loss(cfg): 37 | """Build loss.""" 38 | return LOSSES.build(cfg) 39 | 40 | 41 | def build_train_model(cfg, train_cfg=None, test_cfg=None): 42 | """Build model.""" 43 | if train_cfg is not None or test_cfg is not None: 44 | warnings.warn( 45 | 'train_cfg and test_cfg is deprecated, ' 46 | 'please specify them in model', UserWarning) 47 | assert cfg.model.get('train_cfg') is None or train_cfg is None, \ 48 | 'train_cfg specified in both outer field and model field ' 49 | assert cfg.model.get('test_cfg') is None or test_cfg is None, \ 50 | 'test_cfg specified in both outer field and model field ' 51 | if 'uda' in cfg: 52 | cfg.uda['model'] = cfg.model 53 | cfg.uda['max_iters'] = cfg.runner.max_iters 54 | return UDA.build( 55 | cfg.uda, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 56 | else: 57 | return SEGMENTORS.build( 58 | cfg.model, 59 | default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 60 | 61 | 62 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 63 | """Build segmentor.""" 64 | if train_cfg is not None or test_cfg is not None: 65 | warnings.warn( 66 | 'train_cfg and test_cfg is deprecated, ' 67 | 'please specify them in model', UserWarning) 68 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 69 | 'train_cfg specified in both outer field and model field ' 70 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 71 | 'test_cfg specified in both outer field and model field ' 72 | return SEGMENTORS.build( 73 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 74 | -------------------------------------------------------------------------------- /sseg/mmseg/models/decode_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .aspp_head import ASPPHead 2 | from .da_head import DAHead 3 | from .daformer_head import DAFormerHead 4 | from .dlv2_head import DLV2Head 5 | from .fcn_head import FCNHead 6 | from .isa_head import ISAHead 7 | from .psp_head import PSPHead 8 | from .segformer_head import SegFormerHead 9 | from .sep_aspp_head import DepthwiseSeparableASPPHead 10 | from .uper_head import UPerHead 11 | 12 | __all__ = [ 13 | 'FCNHead', 14 | 'PSPHead', 15 | 'ASPPHead', 16 | 'UPerHead', 17 | 'DepthwiseSeparableASPPHead', 18 | 'DAHead', 19 | 'DLV2Head', 20 | 'SegFormerHead', 21 | 'DAFormerHead', 22 | 'ISAHead', 23 | ] 24 | -------------------------------------------------------------------------------- /sseg/mmseg/models/decode_heads/dlv2_head.py: -------------------------------------------------------------------------------- 1 | from ..builder import HEADS 2 | from .aspp_head import ASPPModule 3 | from .decode_head import BaseDecodeHead 4 | 5 | 6 | @HEADS.register_module() 7 | class DLV2Head(BaseDecodeHead): 8 | 9 | def __init__(self, dilations=(6, 12, 18, 24), **kwargs): 10 | assert 'channels' not in kwargs 11 | assert 'dropout_ratio' not in kwargs 12 | assert 'norm_cfg' not in kwargs 13 | kwargs['channels'] = 1 14 | kwargs['dropout_ratio'] = 0 15 | kwargs['norm_cfg'] = None 16 | super(DLV2Head, self).__init__(**kwargs) 17 | del self.conv_seg 18 | assert isinstance(dilations, (list, tuple)) 19 | self.dilations = dilations 20 | self.aspp_modules = ASPPModule( 21 | dilations, 22 | self.in_channels, 23 | self.num_classes, 24 | conv_cfg=self.conv_cfg, 25 | norm_cfg=None, 26 | act_cfg=None) 27 | 28 | def forward(self, inputs): 29 | """Forward function.""" 30 | # for f in inputs: 31 | # mmcv.print_log(f'{f.shape}', 'mmseg') 32 | x = self._transform_inputs(inputs) 33 | aspp_outs = self.aspp_modules(x) 34 | out = aspp_outs[0] 35 | for i in range(len(aspp_outs) - 1): 36 | out += aspp_outs[i + 1] 37 | return out 38 | -------------------------------------------------------------------------------- /sseg/mmseg/models/decode_heads/fcn_head.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.cnn import ConvModule 6 | 7 | from ..builder import HEADS 8 | from .decode_head import BaseDecodeHead 9 | 10 | 11 | @HEADS.register_module() 12 | class FCNHead(BaseDecodeHead): 13 | """Fully Convolution Networks for Semantic Segmentation. 14 | 15 | This head is implemented of `FCNNet `_. 16 | 17 | Args: 18 | num_convs (int): Number of convs in the head. Default: 2. 19 | kernel_size (int): The kernel size for convs in the head. Default: 3. 20 | concat_input (bool): Whether concat the input and output of convs 21 | before classification layer. 22 | dilation (int): The dilation rate for convs in the head. Default: 1. 23 | """ 24 | 25 | def __init__(self, 26 | num_convs=2, 27 | kernel_size=3, 28 | concat_input=True, 29 | dilation=1, 30 | **kwargs): 31 | assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) 32 | self.num_convs = num_convs 33 | self.concat_input = concat_input 34 | self.kernel_size = kernel_size 35 | super(FCNHead, self).__init__(**kwargs) 36 | if num_convs == 0: 37 | assert self.in_channels == self.channels 38 | 39 | conv_padding = (kernel_size // 2) * dilation 40 | convs = [] 41 | convs.append( 42 | ConvModule( 43 | self.in_channels, 44 | self.channels, 45 | kernel_size=kernel_size, 46 | padding=conv_padding, 47 | dilation=dilation, 48 | conv_cfg=self.conv_cfg, 49 | norm_cfg=self.norm_cfg, 50 | act_cfg=self.act_cfg)) 51 | for i in range(num_convs - 1): 52 | convs.append( 53 | ConvModule( 54 | self.channels, 55 | self.channels, 56 | kernel_size=kernel_size, 57 | padding=conv_padding, 58 | dilation=dilation, 59 | conv_cfg=self.conv_cfg, 60 | norm_cfg=self.norm_cfg, 61 | act_cfg=self.act_cfg)) 62 | if num_convs == 0: 63 | self.convs = nn.Identity() 64 | else: 65 | self.convs = nn.Sequential(*convs) 66 | if self.concat_input: 67 | self.conv_cat = ConvModule( 68 | self.in_channels + self.channels, 69 | self.channels, 70 | kernel_size=kernel_size, 71 | padding=kernel_size // 2, 72 | conv_cfg=self.conv_cfg, 73 | norm_cfg=self.norm_cfg, 74 | act_cfg=self.act_cfg) 75 | 76 | def forward(self, inputs): 77 | """Forward function.""" 78 | x = self._transform_inputs(inputs) 79 | output = self.convs(x) 80 | if self.concat_input: 81 | output = self.conv_cat(torch.cat([x, output], dim=1)) 82 | output = self.cls_seg(output) 83 | return output 84 | -------------------------------------------------------------------------------- /sseg/mmseg/models/decode_heads/segformer_head.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/NVlabs/SegFormer 2 | # Modifications: Model construction with loop 3 | # --------------------------------------------------------------- 4 | # Copyright (c) 2021, NVIDIA Corporation. All rights reserved. 5 | # 6 | # This work is licensed under the NVIDIA Source Code License 7 | # --------------------------------------------------------------- 8 | 9 | import torch 10 | import torch.nn as nn 11 | from mmcv.cnn import ConvModule 12 | 13 | from mmseg.ops import resize 14 | from ..builder import HEADS 15 | from .decode_head import BaseDecodeHead 16 | 17 | 18 | class MLP(nn.Module): 19 | """Linear Embedding.""" 20 | 21 | def __init__(self, input_dim=2048, embed_dim=768): 22 | super().__init__() 23 | self.proj = nn.Linear(input_dim, embed_dim) 24 | 25 | def forward(self, x): 26 | x = x.flatten(2).transpose(1, 2).contiguous() 27 | x = self.proj(x) 28 | return x 29 | 30 | 31 | @HEADS.register_module() 32 | class SegFormerHead(BaseDecodeHead): 33 | """ 34 | SegFormer: Simple and Efficient Design for Semantic Segmentation with 35 | Transformers 36 | """ 37 | 38 | def __init__(self, **kwargs): 39 | super(SegFormerHead, self).__init__( 40 | input_transform='multiple_select', **kwargs) 41 | 42 | decoder_params = kwargs['decoder_params'] 43 | embedding_dim = decoder_params['embed_dim'] 44 | conv_kernel_size = decoder_params['conv_kernel_size'] 45 | 46 | self.linear_c = {} 47 | for i, in_channels in zip(self.in_index, self.in_channels): 48 | self.linear_c[str(i)] = MLP( 49 | input_dim=in_channels, embed_dim=embedding_dim) 50 | self.linear_c = nn.ModuleDict(self.linear_c) 51 | 52 | self.linear_fuse = ConvModule( 53 | in_channels=embedding_dim * len(self.in_index), 54 | out_channels=embedding_dim, 55 | kernel_size=conv_kernel_size, 56 | padding=0 if conv_kernel_size == 1 else conv_kernel_size // 2, 57 | norm_cfg=kwargs['norm_cfg']) 58 | 59 | self.linear_pred = nn.Conv2d( 60 | embedding_dim, self.num_classes, kernel_size=1) 61 | 62 | def forward(self, inputs): 63 | x = inputs 64 | n, _, h, w = x[-1].shape 65 | # for f in x: 66 | # print(f.shape) 67 | 68 | _c = {} 69 | for i in self.in_index: 70 | # mmcv.print_log(f'{i}: {x[i].shape}, {self.linear_c[str(i)]}') 71 | _c[i] = self.linear_c[str(i)](x[i]).permute(0, 2, 1).contiguous() 72 | _c[i] = _c[i].reshape(n, -1, x[i].shape[2], x[i].shape[3]) 73 | if i != 0: 74 | _c[i] = resize( 75 | _c[i], 76 | size=x[0].size()[2:], 77 | mode='bilinear', 78 | align_corners=False) 79 | 80 | _c = self.linear_fuse(torch.cat(list(_c.values()), dim=1)) 81 | 82 | if self.dropout is not None: 83 | x = self.dropout(_c) 84 | else: 85 | x = _c 86 | x = self.linear_pred(x) 87 | 88 | return x 89 | -------------------------------------------------------------------------------- /sseg/mmseg/models/dg/__init__.py: -------------------------------------------------------------------------------- 1 | from mmseg.models.dg.dacs import DGSource 2 | 3 | __all__ = ['DGSource'] 4 | -------------------------------------------------------------------------------- /sseg/mmseg/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | cross_entropy, mask_cross_entropy) 4 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 5 | 6 | __all__ = [ 7 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 8 | 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', 9 | 'weight_reduce_loss', 'weighted_loss' 10 | ] 11 | -------------------------------------------------------------------------------- /sseg/mmseg/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import torch.nn as nn 4 | 5 | 6 | def accuracy(pred, target, topk=1, thresh=None): 7 | """Calculate accuracy according to the prediction and target. 8 | 9 | Args: 10 | pred (torch.Tensor): The model prediction, shape (N, num_class, ...) 11 | target (torch.Tensor): The target of each prediction, shape (N, , ...) 12 | topk (int | tuple[int], optional): If the predictions in ``topk`` 13 | matches the target, the predictions will be regarded as 14 | correct ones. Defaults to 1. 15 | thresh (float, optional): If not None, predictions with scores under 16 | this threshold are considered incorrect. Default to None. 17 | 18 | Returns: 19 | float | tuple[float]: If the input ``topk`` is a single integer, 20 | the function will return a single float as accuracy. If 21 | ``topk`` is a tuple containing multiple integers, the 22 | function will return a tuple containing accuracies of 23 | each ``topk`` number. 24 | """ 25 | assert isinstance(topk, (int, tuple)) 26 | if isinstance(topk, int): 27 | topk = (topk, ) 28 | return_single = True 29 | else: 30 | return_single = False 31 | 32 | maxk = max(topk) 33 | if pred.size(0) == 0: 34 | accu = [pred.new_tensor(0.) for i in range(len(topk))] 35 | return accu[0] if return_single else accu 36 | assert pred.ndim == target.ndim + 1 37 | assert pred.size(0) == target.size(0) 38 | assert maxk <= pred.size(1), \ 39 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 40 | pred_value, pred_label = pred.topk(maxk, dim=1) 41 | # transpose to shape (maxk, N, ...) 42 | pred_label = pred_label.transpose(0, 1) 43 | correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) 44 | if thresh is not None: 45 | # Only prediction values larger than thresh are counted as correct 46 | correct = correct & (pred_value > thresh).t() 47 | res = [] 48 | for k in topk: 49 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 50 | res.append(correct_k.mul_(100.0 / target.numel())) 51 | return res[0] if return_single else res 52 | 53 | 54 | class Accuracy(nn.Module): 55 | """Accuracy calculation module.""" 56 | 57 | def __init__(self, topk=(1, ), thresh=None): 58 | """Module to calculate the accuracy. 59 | 60 | Args: 61 | topk (tuple, optional): The criterion used to calculate the 62 | accuracy. Defaults to (1,). 63 | thresh (float, optional): If not None, predictions with scores 64 | under this threshold are considered incorrect. Default to None. 65 | """ 66 | super().__init__() 67 | self.topk = topk 68 | self.thresh = thresh 69 | 70 | def forward(self, pred, target): 71 | """Forward function to calculate accuracy. 72 | 73 | Args: 74 | pred (torch.Tensor): Prediction of models. 75 | target (torch.Tensor): Target for each prediction. 76 | 77 | Returns: 78 | tuple[float]: The accuracies under different topk criterions. 79 | """ 80 | return accuracy(pred, target, self.topk, self.thresh) 81 | -------------------------------------------------------------------------------- /sseg/mmseg/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .segformer_adapter import SegFormerAdapter 2 | 3 | __all__ = ['SegFormerAdapter'] 4 | -------------------------------------------------------------------------------- /sseg/mmseg/models/necks/segformer_adapter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from mmseg.ops import resize 5 | from ..builder import NECKS 6 | 7 | 8 | @NECKS.register_module() 9 | class SegFormerAdapter(nn.Module): 10 | 11 | def __init__(self, out_layers=[3], scales=[4]): 12 | super(SegFormerAdapter, self).__init__() 13 | self.out_layers = out_layers 14 | self.scales = scales 15 | 16 | def forward(self, x): 17 | _c = {} 18 | for i, s in zip(self.out_layers, self.scales): 19 | if s == 1: 20 | _c[i] = x[i] 21 | else: 22 | _c[i] = resize( 23 | x[i], scale_factor=s, mode='bilinear', align_corners=False) 24 | # mmcv.print_log(f'{i}: {x[i].shape}, {_c[i].shape}', 'mmseg') 25 | 26 | x[-1] = torch.cat(list(_c.values()), dim=1) 27 | return x 28 | -------------------------------------------------------------------------------- /sseg/mmseg/models/segmentors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseSegmentor 2 | from .encoder_decoder import EncoderDecoder 3 | 4 | __all__ = ['BaseSegmentor', 'EncoderDecoder'] 5 | -------------------------------------------------------------------------------- /sseg/mmseg/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .ckpt_convert import mit_convert 2 | from .make_divisible import make_divisible 3 | from .res_layer import ResLayer 4 | from .self_attention_block import SelfAttentionBlock 5 | from .shape_convert import nchw_to_nlc, nlc_to_nchw 6 | from .style_hallucination import StyleHallucination 7 | 8 | __all__ = [ 9 | 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'mit_convert', 10 | 'nchw_to_nlc', 'nlc_to_nchw', 'StyleHallucination' 11 | ] 12 | -------------------------------------------------------------------------------- /sseg/mmseg/models/utils/ckpt_convert.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def mit_convert(ckpt): 9 | new_ckpt = OrderedDict() 10 | # Process the concat between q linear weights and kv linear weights 11 | for k, v in ckpt.items(): 12 | if k.startswith('head'): 13 | continue 14 | elif k.startswith('patch_embed'): 15 | stage_i = int(k.split('.')[0].replace('patch_embed', '')) 16 | new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0') 17 | new_v = v 18 | if 'proj.' in new_k: 19 | new_k = new_k.replace('proj.', 'projection.') 20 | elif k.startswith('block'): 21 | stage_i = int(k.split('.')[0].replace('block', '')) 22 | new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1') 23 | new_v = v 24 | if 'attn.q.' in new_k: 25 | sub_item_k = k.replace('q.', 'kv.') 26 | new_k = new_k.replace('q.', 'attn.in_proj_') 27 | new_v = torch.cat([v, ckpt[sub_item_k]], dim=0) 28 | elif 'attn.kv.' in new_k: 29 | continue 30 | elif 'attn.proj.' in new_k: 31 | new_k = new_k.replace('proj.', 'attn.out_proj.') 32 | elif 'attn.sr.' in new_k: 33 | new_k = new_k.replace('sr.', 'sr.') 34 | elif 'mlp.' in new_k: 35 | string = f'{new_k}-' 36 | new_k = new_k.replace('mlp.', 'ffn.layers.') 37 | if 'fc1.weight' in new_k or 'fc2.weight' in new_k: 38 | new_v = v.reshape((*v.shape, 1, 1)) 39 | new_k = new_k.replace('fc1.', '0.') 40 | new_k = new_k.replace('dwconv.dwconv.', '1.') 41 | new_k = new_k.replace('fc2.', '4.') 42 | string += f'{new_k} {v.shape}-{new_v.shape}' 43 | # print(string) 44 | elif k.startswith('norm'): 45 | stage_i = int(k.split('.')[0].replace('norm', '')) 46 | new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2') 47 | new_v = v 48 | else: 49 | new_k = k 50 | new_v = v 51 | new_ckpt[new_k] = new_v 52 | return new_ckpt 53 | -------------------------------------------------------------------------------- /sseg/mmseg/models/utils/make_divisible.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | 4 | def make_divisible(value, divisor, min_value=None, min_ratio=0.9): 5 | """Make divisible function. 6 | 7 | This function rounds the channel number to the nearest value that can be 8 | divisible by the divisor. It is taken from the original tf repo. It ensures 9 | that all layers have a channel number that is divisible by divisor. It can 10 | be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa 11 | 12 | Args: 13 | value (int): The original channel number. 14 | divisor (int): The divisor to fully divide the channel number. 15 | min_value (int): The minimum value of the output channel. 16 | Default: None, means that the minimum value equal to the divisor. 17 | min_ratio (float): The minimum ratio of the rounded channel number to 18 | the original channel number. Default: 0.9. 19 | 20 | Returns: 21 | int: The modified output channel number. 22 | """ 23 | 24 | if min_value is None: 25 | min_value = divisor 26 | new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) 27 | # Make sure that round down does not go down by more than (1-min_ratio). 28 | if new_value < min_ratio * value: 29 | new_value += divisor 30 | return new_value 31 | -------------------------------------------------------------------------------- /sseg/mmseg/models/utils/shape_convert.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | 4 | def nlc_to_nchw(x, hw_shape): 5 | """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor. 6 | 7 | Args: 8 | x (Tensor): The input tensor of shape [N, L, C] before convertion. 9 | hw_shape (Sequence[int]): The height and width of output feature map. 10 | 11 | Returns: 12 | Tensor: The output tensor of shape [N, C, H, W] after convertion. 13 | """ 14 | H, W = hw_shape 15 | assert len(x.shape) == 3 16 | B, L, C = x.shape 17 | assert L == H * W, 'The seq_len doesn\'t match H, W' 18 | return x.transpose(1, 2).reshape(B, C, H, W) 19 | 20 | 21 | def nchw_to_nlc(x): 22 | """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor. 23 | 24 | Args: 25 | x (Tensor): The input tensor of shape [N, C, H, W] before convertion. 26 | 27 | Returns: 28 | Tensor: The output tensor of shape [N, L, C] after convertion. 29 | """ 30 | assert len(x.shape) == 4 31 | return x.flatten(2).transpose(1, 2).contiguous() 32 | -------------------------------------------------------------------------------- /sseg/mmseg/models/utils/style_hallucination.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.distributions as tdist 6 | import torch.nn.functional as F 7 | import random 8 | 9 | import ipdb 10 | 11 | class StyleHallucination(nn.Module): 12 | ''' 13 | Style Hallucination Module. 14 | Reference: 15 | Zhao et al. Style-Hallucinated Dual Consistency Learning for Domain Generalized Semantic Segmentation. ECCV 2022. 16 | https://arxiv.org/pdf/2204.02548.pdf 17 | ''' 18 | def __init__(self, concentration_coeff, base_style_num, mode='shm'): 19 | super().__init__() 20 | self.concentration = torch.tensor([concentration_coeff]*base_style_num, device='cuda') 21 | self._dirichlet = tdist.dirichlet.Dirichlet(concentration=self.concentration) 22 | self.mode = mode 23 | self.register_buffer("proto_mean", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 24 | self.register_buffer("proto_std", torch.zeros((base_style_num, base_style_num), requires_grad=False)) 25 | 26 | def forward(self, x): 27 | # ipdb.set_trace() 28 | B,C,H,W = x.size() 29 | x_mean = x.mean(dim=[2,3], keepdim=True) # B,C,1,1 30 | x_std = x.std(dim=[2,3], keepdim=True) + 1e-7 # B,C,1,1 31 | x_mean, x_std = x_mean.detach(), x_std.detach() 32 | 33 | x_norm = (x - x_mean) / x_std 34 | 35 | combine_weights = self._dirichlet.sample((B,)) # B,C 36 | combine_weights = combine_weights.detach() 37 | 38 | new_mean = combine_weights @ self.proto_mean.data # B,C 39 | new_std = combine_weights @ self.proto_std.data 40 | 41 | x_new = x_norm * new_std.unsqueeze(-1).unsqueeze(-1) + new_mean.unsqueeze(-1).unsqueeze(-1) 42 | 43 | return x, x_new 44 | 45 | 46 | -------------------------------------------------------------------------------- /sseg/mmseg/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .encoding import Encoding 2 | from .wrappers import Upsample, resize 3 | 4 | __all__ = ['Upsample', 'resize', 'Encoding'] 5 | -------------------------------------------------------------------------------- /sseg/mmseg/ops/encoding.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | 8 | class Encoding(nn.Module): 9 | """Encoding Layer: a learnable residual encoder. 10 | 11 | Input is of shape (batch_size, channels, height, width). 12 | Output is of shape (batch_size, num_codes, channels). 13 | 14 | Args: 15 | channels: dimension of the features or feature channels 16 | num_codes: number of code words 17 | """ 18 | 19 | def __init__(self, channels, num_codes): 20 | super(Encoding, self).__init__() 21 | # init codewords and smoothing factor 22 | self.channels, self.num_codes = channels, num_codes 23 | std = 1. / ((num_codes * channels)**0.5) 24 | # [num_codes, channels] 25 | self.codewords = nn.Parameter( 26 | torch.empty(num_codes, channels, 27 | dtype=torch.float).uniform_(-std, std), 28 | requires_grad=True) 29 | # [num_codes] 30 | self.scale = nn.Parameter( 31 | torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), 32 | requires_grad=True) 33 | 34 | @staticmethod 35 | def scaled_l2(x, codewords, scale): 36 | num_codes, channels = codewords.size() 37 | batch_size = x.size(0) 38 | reshaped_scale = scale.view((1, 1, num_codes)) 39 | expanded_x = x.unsqueeze(2).expand( 40 | (batch_size, x.size(1), num_codes, channels)) 41 | reshaped_codewords = codewords.view((1, 1, num_codes, channels)) 42 | 43 | scaled_l2_norm = reshaped_scale * ( 44 | expanded_x - reshaped_codewords).pow(2).sum(dim=3) 45 | return scaled_l2_norm 46 | 47 | @staticmethod 48 | def aggregate(assignment_weights, x, codewords): 49 | num_codes, channels = codewords.size() 50 | reshaped_codewords = codewords.view((1, 1, num_codes, channels)) 51 | batch_size = x.size(0) 52 | 53 | expanded_x = x.unsqueeze(2).expand( 54 | (batch_size, x.size(1), num_codes, channels)) 55 | encoded_feat = (assignment_weights.unsqueeze(3) * 56 | (expanded_x - reshaped_codewords)).sum(dim=1) 57 | return encoded_feat 58 | 59 | def forward(self, x): 60 | assert x.dim() == 4 and x.size(1) == self.channels 61 | # [batch_size, channels, height, width] 62 | batch_size = x.size(0) 63 | # [batch_size, height x width, channels] 64 | x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() 65 | # assignment_weights: [batch_size, channels, num_codes] 66 | assignment_weights = F.softmax( 67 | self.scaled_l2(x, self.codewords, self.scale), dim=2) 68 | # aggregate 69 | encoded_feat = self.aggregate(assignment_weights, x, self.codewords) 70 | return encoded_feat 71 | 72 | def __repr__(self): 73 | repr_str = self.__class__.__name__ 74 | repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ 75 | f'x{self.channels})' 76 | return repr_str 77 | -------------------------------------------------------------------------------- /sseg/mmseg/ops/wrappers.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import warnings 4 | 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | def resize(input, 10 | size=None, 11 | scale_factor=None, 12 | mode='nearest', 13 | align_corners=None, 14 | warning=True): 15 | if warning: 16 | if size is not None and align_corners: 17 | input_h, input_w = tuple(int(x) for x in input.shape[2:]) 18 | output_h, output_w = tuple(int(x) for x in size) 19 | if output_h > input_h or output_w > output_h: 20 | if ((output_h > 1 and output_w > 1 and input_h > 1 21 | and input_w > 1) and (output_h - 1) % (input_h - 1) 22 | and (output_w - 1) % (input_w - 1)): 23 | warnings.warn( 24 | f'When align_corners={align_corners}, ' 25 | 'the output would more aligned if ' 26 | f'input size {(input_h, input_w)} is `x+1` and ' 27 | f'out size {(output_h, output_w)} is `nx+1`') 28 | return F.interpolate(input, size, scale_factor, mode, align_corners) 29 | 30 | 31 | class Upsample(nn.Module): 32 | 33 | def __init__(self, 34 | size=None, 35 | scale_factor=None, 36 | mode='nearest', 37 | align_corners=None): 38 | super(Upsample, self).__init__() 39 | self.size = size 40 | if isinstance(scale_factor, tuple): 41 | self.scale_factor = tuple(float(factor) for factor in scale_factor) 42 | else: 43 | self.scale_factor = float(scale_factor) if scale_factor else None 44 | self.mode = mode 45 | self.align_corners = align_corners 46 | 47 | def forward(self, x): 48 | if not self.size: 49 | size = [int(t * self.scale_factor) for t in x.shape[-2:]] 50 | else: 51 | size = self.size 52 | return resize(x, size, None, self.mode, self.align_corners) 53 | -------------------------------------------------------------------------------- /sseg/mmseg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /sseg/mmseg/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: Add code archive generation 3 | 4 | import os 5 | import tarfile 6 | 7 | from mmcv.utils import collect_env as collect_base_env 8 | from mmcv.utils import get_git_hash 9 | 10 | import mmseg 11 | 12 | 13 | def collect_env(): 14 | """Collect the information of the running environments.""" 15 | env_info = collect_base_env() 16 | env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' 17 | 18 | return env_info 19 | 20 | 21 | def is_source_file(x): 22 | if x.isdir() or x.name.endswith(('.py', '.sh', '.yml', '.json', '.txt')) \ 23 | and '.mim' not in x.name and 'jobs/' not in x.name: 24 | # print(x.name) 25 | return x 26 | else: 27 | return None 28 | 29 | 30 | def gen_code_archive(out_dir, file='code.tar.gz'): 31 | archive = os.path.join(out_dir, file) 32 | os.makedirs(os.path.dirname(archive), exist_ok=True) 33 | with tarfile.open(archive, mode='w:gz') as tar: 34 | tar.add('.', filter=is_source_file) 35 | return archive 36 | 37 | 38 | if __name__ == '__main__': 39 | for name, val in collect_env().items(): 40 | print('{}: {}'.format(name, val)) 41 | -------------------------------------------------------------------------------- /sseg/mmseg/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | 3 | import logging 4 | 5 | from mmcv.utils import get_logger 6 | 7 | 8 | def get_root_logger(log_file=None, log_level=logging.INFO): 9 | """Get the root logger. 10 | 11 | The logger will be initialized if it has not been initialized. By default a 12 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 13 | also be added. The name of the root logger is the top-level package name, 14 | e.g., "mmseg". 15 | 16 | Args: 17 | log_file (str | None): The log filename. If specified, a FileHandler 18 | will be added to the root logger. 19 | log_level (int): The root logger level. Note that only the process of 20 | rank 0 is affected, while other processes will set the level to 21 | "Error" and be silent most of the time. 22 | 23 | Returns: 24 | logging.Logger: The root logger. 25 | """ 26 | 27 | logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) 28 | 29 | return logger 30 | -------------------------------------------------------------------------------- /sseg/mmseg/utils/utils.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn.functional as F 6 | 7 | 8 | @contextlib.contextmanager 9 | def np_local_seed(seed): 10 | state = np.random.get_state() 11 | np.random.seed(seed) 12 | try: 13 | yield 14 | finally: 15 | np.random.set_state(state) 16 | 17 | 18 | def downscale_label_ratio(gt, 19 | scale_factor, 20 | min_ratio, 21 | n_classes, 22 | ignore_index=255): 23 | assert scale_factor > 1 24 | bs, orig_c, orig_h, orig_w = gt.shape 25 | assert orig_c == 1 26 | trg_h, trg_w = orig_h // scale_factor, orig_w // scale_factor 27 | ignore_substitute = n_classes 28 | 29 | out = gt.clone() # otw. next line would modify original gt 30 | out[out == ignore_index] = ignore_substitute 31 | out = F.one_hot( 32 | out.squeeze(1), num_classes=n_classes + 1).permute(0, 3, 1, 2) 33 | assert list(out.shape) == [bs, n_classes + 1, orig_h, orig_w], out.shape 34 | out = F.avg_pool2d(out.float(), kernel_size=scale_factor) 35 | gt_ratio, out = torch.max(out, dim=1, keepdim=True) 36 | out[out == ignore_substitute] = ignore_index 37 | out[gt_ratio < min_ratio] = ignore_index 38 | assert list(out.shape) == [bs, 1, trg_h, trg_w], out.shape 39 | return out 40 | -------------------------------------------------------------------------------- /sseg/mmseg/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.16.0' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /sseg/requirements.txt: -------------------------------------------------------------------------------- 1 | cityscapesscripts==2.2.0 2 | cycler==0.10.0 3 | gdown==4.2.0 4 | humanfriendly==9.2 5 | kiwisolver==1.2.0 6 | kornia==0.5.8 7 | matplotlib==3.4.2 8 | numpy==1.19.2 9 | opencv-python==4.4.0.46 10 | pandas==1.1.3 11 | Pillow==8.3.1 12 | prettytable==2.1.0 13 | pyparsing==2.4.7 14 | pytz==2020.1 15 | PyYAML==5.4.1 16 | scipy==1.6.3 17 | seaborn==0.11.1 18 | timm==0.3.2 19 | torch==1.7.1+cu110 20 | torchvision==0.8.2+cu110 21 | tqdm==4.48.2 22 | typing-extensions==3.7.4.3 23 | wcwidth==0.2.5 24 | yapf==0.31.0 25 | ipdb 26 | -------------------------------------------------------------------------------- /sseg/resources/color_palette.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/sseg/resources/color_palette.png -------------------------------------------------------------------------------- /sseg/resources/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/sseg/resources/demo.gif -------------------------------------------------------------------------------- /sseg/resources/license_dacs: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 vikolss 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /sseg/resources/uda_over_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/sseg/resources/uda_over_time.png -------------------------------------------------------------------------------- /sseg/run_experiments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | import subprocess 5 | import uuid 6 | from datetime import datetime 7 | 8 | import torch 9 | from mmcv import Config, get_git_hash 10 | from tools import train 11 | 12 | 13 | def run_command(command): 14 | p = subprocess.Popen( 15 | command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) 16 | for line in iter(p.stdout.readline, b''): 17 | print(line.decode('utf-8'), end='') 18 | 19 | 20 | def rsync(src, dst): 21 | rsync_cmd = f'rsync -a {src} {dst}' 22 | print(rsync_cmd) 23 | run_command(rsync_cmd) 24 | 25 | 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser() 28 | group = parser.add_mutually_exclusive_group(required=True) 29 | group.add_argument( 30 | '--exp', 31 | type=int, 32 | default=None, 33 | help='Experiment id as defined in experiment.py', 34 | ) 35 | group.add_argument( 36 | '--config', 37 | default=None, 38 | help='Path to config file', 39 | ) 40 | parser.add_argument( 41 | '--machine', type=str, choices=['local'], default='local') 42 | parser.add_argument('--debug', action='store_true') 43 | args = parser.parse_args() 44 | assert (args.config is None) != (args.exp is None), \ 45 | 'Either config or exp has to be defined.' 46 | 47 | GEN_CONFIG_DIR = 'configs/generated/' 48 | JOB_DIR = 'jobs' 49 | cfgs, config_files = [], [] 50 | 51 | # Training with Predefined Config 52 | if args.config is not None: 53 | cfg = Config.fromfile(args.config) 54 | # Specify Name and Work Directory 55 | exp_name = f'{args.machine}-{cfg["exp"]}' 56 | unique_name = f'{datetime.now().strftime("%y%m%d_%H%M")}_' \ 57 | f'{cfg["name"]}_{str(uuid.uuid4())[:5]}' 58 | child_cfg = { 59 | '_base_': args.config.replace('configs', '../..'), 60 | 'name': unique_name, 61 | 'work_dir': os.path.join('work_dirs', exp_name, unique_name), 62 | 'git_rev': get_git_hash() 63 | } 64 | cfg_out_file = f"{GEN_CONFIG_DIR}/{exp_name}/{child_cfg['name']}.json" 65 | os.makedirs(os.path.dirname(cfg_out_file), exist_ok=True) 66 | assert not os.path.isfile(cfg_out_file) 67 | with open(cfg_out_file, 'w') as of: 68 | json.dump(child_cfg, of, indent=4) 69 | config_files.append(cfg_out_file) 70 | cfgs.append(cfg) 71 | 72 | if args.machine == 'local': 73 | for i, cfg in enumerate(cfgs): 74 | print('Run job {}'.format(cfg['name'])) 75 | train.main([config_files[i]]) 76 | torch.cuda.empty_cache() 77 | else: 78 | raise NotImplementedError(args.machine) 79 | -------------------------------------------------------------------------------- /sseg/scripts/test_dg.sh: -------------------------------------------------------------------------------- 1 | 2 | TEST_ROOT=$1 3 | DATASET=$2 4 | CHECKPOINT_FILE="${TEST_ROOT}/latest.pth" 5 | SHOW_DIR="${TEST_ROOT}/preds/" 6 | CONFIG_FILE="configs/dgformer/gta2cs_source_${DATASET}.py" 7 | echo 'Dataset:' $DATASET 8 | echo 'Config File:' $CONFIG_FILE 9 | echo 'Checkpoint File:' $CHECKPOINT_FILE 10 | echo 'Predictions Output Directory:' $SHOW_DIR 11 | 12 | python -m tools.test ${CONFIG_FILE} ${CHECKPOINT_FILE} --eval mIoU --show-dir ${SHOW_DIR} --opacity 1 13 | 14 | -------------------------------------------------------------------------------- /sseg/setup.cfg: -------------------------------------------------------------------------------- 1 | # Obtained from: https://github.com/open-mmlab/mmsegmentation/tree/v0.16.0 2 | # Modifications: Update known_third_party 3 | 4 | [yapf] 5 | based_on_style = pep8 6 | blank_line_before_nested_class_or_def = true 7 | split_before_expression_after_opening_paren = true 8 | 9 | [isort] 10 | line_length = 79 11 | multi_line_output = 0 12 | known_standard_library = setuptools 13 | known_first_party = mmseg 14 | known_third_party = PIL,cityscapesscripts,cv2,kornia,matplotlib,mmcv,numpy,prettytable,seaborn,timm,torch,tqdm 15 | no_lines_before = STDLIB,LOCALFOLDER 16 | default_section = THIRDPARTY 17 | -------------------------------------------------------------------------------- /sseg/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HeliosZhao/SHADE-VisualDG/954bfc4e2d5dbaca5ea9551a657cacf73478b660/sseg/tools/__init__.py -------------------------------------------------------------------------------- /sseg/tools/download_checkpoints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Instructions for Manual Download: 4 | # 5 | # Please, download the [MiT weights](https://drive.google.com/drive/folders/1b7bwrInTW4VLEm27YawHOAMSMikga2Ia?usp=sharing) 6 | # pretrained on ImageNet-1K provided by the official 7 | # [SegFormer repository](https://github.com/NVlabs/SegFormer) and put them in a 8 | # folder `pretrained/` within this project. For most of the experiments, only 9 | # mit_b5.pth is necessary. 10 | # 11 | 12 | # Automatic Downloads: 13 | set -e # exit when any command fails 14 | mkdir -p pretrained/ 15 | cd pretrained/ 16 | gdown --id 1d7I50jVjtCddnhpf-lqj8-f13UyCzoW1 # MiT-B5 weights 17 | cd ../ 18 | 19 | 20 | --------------------------------------------------------------------------------